diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
109 files changed, 7110 insertions, 3525 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 65f8a4b6ed0c..3b8576b9c2f9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -55,11 +55,11 @@ #define TASKLET_MAX_TIME 2 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) -void mlx4_cq_tasklet_cb(unsigned long data) +void mlx4_cq_tasklet_cb(struct tasklet_struct *t) { unsigned long flags; unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; - struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data; + struct mlx4_eq_tasklet *ctx = from_tasklet(ctx, t, task); struct mlx4_cq *mcq, *temp; spin_lock_irqsave(&ctx->lock, flags); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index b816154bc79a..23849f2b9c25 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1106,6 +1106,24 @@ static int mlx4_en_set_pauseparam(struct net_device *dev, return err; } +static void mlx4_en_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *stats) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct bitmap_iterator it; + + bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS); + + spin_lock_bh(&priv->stats_lock); + if (test_bit(FLOW_PRIORITY_STATS_IDX_TX_FRAMES, + priv->stats_bitmap.bitmap)) + stats->tx_pause_frames = priv->tx_flowstats.tx_pause; + if (test_bit(FLOW_PRIORITY_STATS_IDX_RX_FRAMES, + priv->stats_bitmap.bitmap)) + stats->rx_pause_frames = priv->rx_flowstats.rx_pause; + spin_unlock_bh(&priv->stats_lock); +} + static void mlx4_en_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { @@ -2138,6 +2156,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .set_msglevel = mlx4_en_set_msglevel, .get_coalesce = mlx4_en_get_coalesce, .set_coalesce = mlx4_en_set_coalesce, + .get_pause_stats = mlx4_en_get_pause_stats, .get_pauseparam = mlx4_en_get_pauseparam, .set_pauseparam = mlx4_en_set_pauseparam, .get_ringparam = mlx4_en_get_ringparam, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b50c567ef508..502d1b97855c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -705,7 +705,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud frags = ring->rx_info + (index << priv->log_rx_info); va = page_address(frags[0].page) + frags[0].page_offset; - prefetchw(va); + net_prefetchw(va); /* * make sure we read the CQE after we read the ownership bit */ @@ -943,6 +943,9 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) bool clean_complete = true; int done; + if (!budget) + return 0; + if (priv->tx_ring_num[TX_XDP]) { xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring]; if (xdp_tx_cq->xdp_busy) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 9dff7b086c9f..3ddb7268e415 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -350,7 +350,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, .dma = tx_info->map0_dma, }; - if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { + if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { dma_unmap_page(priv->ddev, tx_info->map0_dma, PAGE_SIZE, priv->dma_dir); put_page(tx_info->page); @@ -842,6 +842,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) struct mlx4_en_tx_desc *tx_desc; struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; + u32 __maybe_unused ring_cons; int tx_ind; int nr_txbb; int desc_size; @@ -855,7 +856,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bool stop_queue; bool inline_ok; u8 data_offset; - u32 ring_cons; bool bf_ok; tx_ind = skb_get_queue_mapping(skb); @@ -1075,7 +1075,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) */ smp_rmb(); - ring_cons = READ_ONCE(ring->cons); if (unlikely(!mlx4_en_is_tx_ring_full(ring))) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index ae305c2e9225..9e48509ed3b2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1057,8 +1057,7 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, INIT_LIST_HEAD(&eq->tasklet_ctx.list); INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); + tasklet_setup(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 258c7a96f269..c326b434734e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3031,6 +3031,17 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) if (err) return err; + /* Ethernet and IB drivers will normally set the port type, + * but if they are not built set the type now to prevent + * devlink_port_type_warn() from firing. + */ + if (!IS_ENABLED(CONFIG_MLX4_EN) && + dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + devlink_port_type_eth_set(&info->devlink_port, NULL); + else if (!IS_ENABLED(CONFIG_MLX4_INFINIBAND) && + dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) + devlink_port_type_ib_set(&info->devlink_port, NULL); + info->dev = dev; info->port = port; if (!mlx4_is_slave(dev)) { @@ -3935,6 +3946,8 @@ static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, struct devlink *devlink); static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, struct netlink_ext_ack *extack) { struct mlx4_priv *priv = devlink_priv(devlink); @@ -3951,7 +3964,8 @@ static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, return 0; } -static int mlx4_devlink_reload_up(struct devlink *devlink, +static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack) { struct mlx4_priv *priv = devlink_priv(devlink); @@ -3959,6 +3973,7 @@ static int mlx4_devlink_reload_up(struct devlink *devlink, struct mlx4_dev_persistent *persist = dev->persist; int err; + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); err = mlx4_restart_one_up(persist->pdev, true, devlink); if (err) mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n", @@ -3969,6 +3984,7 @@ static int mlx4_devlink_reload_up(struct devlink *devlink, static const struct devlink_ops mlx4_devlink_ops = { .port_type_set = mlx4_devlink_port_type_set, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), .reload_down = mlx4_devlink_reload_down, .reload_up = mlx4_devlink_reload_up, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 527b52e48276..64bed7ac3836 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1217,7 +1217,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev); int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, u16 op, unsigned long timeout); -void mlx4_cq_tasklet_cb(unsigned long data); +void mlx4_cq_tasklet_cb(struct tasklet_struct *t); void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index 86b6051da8ec..51d4eaab6a2f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -84,6 +84,11 @@ struct mlx4_en_flow_stats_rx { MLX4_NUM_PRIORITIES) }; +#define FLOW_PRIORITY_STATS_IDX_RX_FRAMES (NUM_MAIN_STATS + \ + NUM_PORT_STATS + \ + NUM_PF_STATS + \ + NUM_FLOW_PRIORITY_STATS_RX) + struct mlx4_en_flow_stats_tx { u64 tx_pause; u64 tx_pause_duration; @@ -93,6 +98,13 @@ struct mlx4_en_flow_stats_tx { MLX4_NUM_PRIORITIES) }; +#define FLOW_PRIORITY_STATS_IDX_TX_FRAMES (NUM_MAIN_STATS + \ + NUM_PORT_STATS + \ + NUM_PF_STATS + \ + NUM_FLOW_PRIORITY_STATS_RX + \ + NUM_FLOW_STATS_RX + \ + NUM_FLOW_PRIORITY_STATS_TX) + #define NUM_FLOW_STATS (NUM_FLOW_STATS_RX + NUM_FLOW_STATS_TX + \ NUM_FLOW_PRIORITY_STATS_TX + \ NUM_FLOW_PRIORITY_STATS_RX) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 10e6886c96ba..2d477f9a8cb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o # # Netdev basic @@ -24,7 +24,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ - en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \ + en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o # @@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \ en_rep.o en/rep/bond.o en/mod_hdr.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ - en/mapping.o esw/chains.o en/tc_tun.o \ + en/mapping.o lib/fs_chains.o en/tc_tun.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o @@ -49,7 +49,8 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo ecpf.o rdma.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ - esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o + esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \ + esw/devlink_port.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 8db4b5f0f963..291e427e9e4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -56,8 +56,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, size_t size, dma_addr_t *dma_handle, int node) { + struct device *device = mlx5_core_dma_dev(dev); struct mlx5_priv *priv = &dev->priv; - struct device *device = dev->device; int original_node; void *cpu_handle; @@ -111,7 +111,7 @@ EXPORT_SYMBOL(mlx5_buf_alloc); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) { - dma_free_coherent(dev->device, buf->size, buf->frags->buf, + dma_free_coherent(mlx5_core_dma_dev(dev), buf->size, buf->frags->buf, buf->frags->map); kfree(buf->frags); @@ -140,7 +140,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, if (!frag->buf) goto err_free_buf; if (frag->map & ((1 << buf->page_shift) - 1)) { - dma_free_coherent(dev->device, frag_sz, + dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf, buf->frags[i].map); mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n", &frag->map, buf->page_shift); @@ -153,7 +153,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, err_free_buf: while (i--) - dma_free_coherent(dev->device, PAGE_SIZE, buf->frags[i].buf, + dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf, buf->frags[i].map); kfree(buf->frags); err_out: @@ -169,7 +169,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) for (i = 0; i < buf->npages; i++) { int frag_sz = min_t(int, size, PAGE_SIZE); - dma_free_coherent(dev->device, frag_sz, buf->frags[i].buf, + dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf, buf->frags[i].map); size -= frag_sz; } @@ -275,7 +275,7 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) __set_bit(db->index, db->u.pgdir->bitmap); if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) { - dma_free_coherent(dev->device, PAGE_SIZE, + dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); bitmap_free(db->u.pgdir->bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2d1f4b3be9bf..e49387dbef98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1989,9 +1989,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev) static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) { - struct device *ddev = dev->device; - - cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, + cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, &cmd->alloc_dma, GFP_KERNEL); if (!cmd->cmd_alloc_buf) return -ENOMEM; @@ -2004,9 +2002,9 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) return 0; } - dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf, + dma_free_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf, cmd->alloc_dma); - cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, + cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), 2 * MLX5_ADAPTER_PAGE_SIZE - 1, &cmd->alloc_dma, GFP_KERNEL); if (!cmd->cmd_alloc_buf) @@ -2020,9 +2018,7 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) { - struct device *ddev = dev->device; - - dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf, + dma_free_coherent(mlx5_core_dma_dev(dev), cmd->alloc_size, cmd->cmd_alloc_buf, cmd->alloc_dma); } @@ -2054,7 +2050,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) if (!cmd->stats) return -ENOMEM; - cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0); + cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); if (!cmd->pool) { err = -ENOMEM; goto dma_pool_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 8379b24cb838..df3e4938ecdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -42,11 +42,11 @@ #define TASKLET_MAX_TIME 2 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) -void mlx5_cq_tasklet_cb(unsigned long data) +void mlx5_cq_tasklet_cb(struct tasklet_struct *t) { unsigned long flags; unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; - struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data; + struct mlx5_eq_tasklet *ctx = from_tasklet(ctx, t, task); struct mlx5_core_cq *mcq; struct mlx5_core_cq *temp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index c709e9a385f6..a28f95df2901 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -4,22 +4,19 @@ #include <devlink.h> #include "mlx5_core.h" +#include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" static int mlx5_devlink_flash_update(struct devlink *devlink, - const char *file_name, - const char *component, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); const struct firmware *fw; int err; - if (component) - return -EOPNOTSUPP; - - err = request_firmware_direct(&fw, file_name, &dev->pdev->dev); + err = request_firmware_direct(&fw, params->file_name, &dev->pdev->dev); if (err) return err; @@ -88,21 +85,96 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, return 0; } +static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 reset_level, reset_type, net_port_alive; + int err; + + err = mlx5_fw_reset_query(dev, &reset_level, &reset_type); + if (err) + return err; + if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) { + NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot"); + return -EINVAL; + } + + net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE); + err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive); + if (err) + goto out; + + err = mlx5_fw_reset_wait_reset_done(dev); +out: + if (err) + NL_SET_ERR_MSG_MOD(extack, "FW activate command failed"); + return err; +} + +static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 reset_level; + int err; + + err = mlx5_fw_reset_query(dev, &reset_level, NULL); + if (err) + return err; + if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL0)) { + NL_SET_ERR_MSG_MOD(extack, + "FW upgrade to the stored FW can't be done by FW live patching"); + return -EINVAL; + } + + return mlx5_fw_reset_set_live_patch(dev); +} + static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); - mlx5_unload_one(dev, false); - return 0; + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + mlx5_unload_one(dev, false); + return 0; + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) + return mlx5_devlink_trigger_fw_live_patch(devlink, extack); + return mlx5_devlink_reload_fw_activate(devlink, extack); + default: + /* Unsupported action should not get to this function */ + WARN_ON(1); + return -EOPNOTSUPP; + } } -static int mlx5_devlink_reload_up(struct devlink *devlink, +static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); - return mlx5_load_one(dev, false); + *actions_performed = BIT(action); + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + return mlx5_load_one(dev, false); + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) + break; + /* On fw_activate action, also driver is reloaded and reinit performed */ + *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + return mlx5_load_one(dev, false); + default: + /* Unsupported action should not get to this function */ + WARN_ON(1); + return -EOPNOTSUPP; + } + + return 0; } static const struct devlink_ops mlx5_devlink_ops = { @@ -118,6 +190,9 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), + .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET), .reload_down = mlx5_devlink_reload_down, .reload_up = mlx5_devlink_reload_up, }; @@ -228,6 +303,24 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id } #endif +static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + mlx5_fw_reset_enable_remote_dev_reset_set(dev, ctx->val.vbool); + return 0; +} + +static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_fw_reset_enable_remote_dev_reset_get(dev); + return 0; +} + static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, @@ -243,6 +336,9 @@ static const struct devlink_param mlx5_devlink_params[] = { NULL, NULL, mlx5_devlink_large_group_num_validate), #endif + DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_enable_remote_dev_reset_get, + mlx5_devlink_enable_remote_dev_reset_set, NULL), }; static void mlx5_devlink_set_params_init_values(struct devlink *devlink) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index ad3594c4afcb..2eb022ad7fd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -124,7 +124,7 @@ static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer) static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer) { struct mlx5_core_dev *dev = tracer->dev; - struct device *ddev = &dev->pdev->dev; + struct device *ddev; dma_addr_t dma; void *buff; gfp_t gfp; @@ -142,6 +142,7 @@ static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer) } tracer->buff.log_buf = buff; + ddev = mlx5_core_dma_dev(dev); dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE); if (dma_mapping_error(ddev, dma)) { mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n", @@ -162,11 +163,12 @@ free_pages: static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer) { struct mlx5_core_dev *dev = tracer->dev; - struct device *ddev = &dev->pdev->dev; + struct device *ddev; if (!tracer->buff.log_buf) return; + ddev = mlx5_core_dma_dev(dev); dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE); free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size)); } @@ -1064,6 +1066,58 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) kvfree(tracer); } +static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + cancel_work_sync(&tracer->read_fw_strings_work); + mlx5_fw_tracer_clean_ready_list(tracer); + mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_clean_saved_traces_array(tracer); + mlx5_fw_tracer_free_strings_db(tracer); + + dev = tracer->dev; + err = mlx5_query_mtrc_caps(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err); + return err; + } + + err = mlx5_fw_tracer_allocate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err); + return err; + } + mlx5_fw_tracer_init_saved_traces_array(tracer); + + return 0; +} + +int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + if (IS_ERR_OR_NULL(tracer)) + return -EINVAL; + + dev = tracer->dev; + mlx5_fw_tracer_cleanup(tracer); + err = mlx5_fw_tracer_recreate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n"); + return err; + } + err = mlx5_fw_tracer_init(tracer); + if (err) { + mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n"); + return err; + } + + return 0; +} + static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data) { struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 40601fba80ba..97252a85d65e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -191,5 +191,6 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev); int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, struct devlink_fmsg *fmsg); +int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c index 4924a5658853..ed4fb79b4db7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -78,7 +78,7 @@ static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump struct page *page) { struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; - struct device *ddev = &dev->pdev->dev; + struct device *ddev = mlx5_core_dma_dev(dev); u32 out_seq_num; u32 in_seq_num; dma_addr_t dma; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index a894ea98c95a..3dc9dd3f24dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -43,19 +43,13 @@ static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev) int mlx5_ec_init(struct mlx5_core_dev *dev) { - int err = 0; - if (!mlx5_core_is_ecpf(dev)) return 0; /* ECPF shall enable HCA for peer PF in the same way a PF * does this for its VFs. */ - err = mlx5_peer_pf_init(dev); - if (err) - return err; - - return 0; + return mlx5_peer_pf_init(dev); } void mlx5_ec_cleanup(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 356f5852955f..2f05b0f9de01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -226,6 +226,7 @@ enum mlx5e_priv_flag { MLX5E_PFLAG_RX_STRIDING_RQ, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5E_PFLAG_SKB_TX_MPWQE, MLX5E_NUM_PFLAGS, /* Keep last */ }; @@ -270,6 +271,7 @@ enum { MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */ + MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX /* set when mini_cqe_resp_stride_index cap is used */ }; struct mlx5e_cq { @@ -309,6 +311,7 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_MPWQE, MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, @@ -317,26 +320,40 @@ enum { MLX5E_SQ_STATE_PENDING_XSK_TX, }; +struct mlx5e_tx_mpwqe { + /* Current MPWQE session */ + struct mlx5e_tx_wqe *wqe; + u32 bytes_count; + u8 ds_count; + u8 pkt_count; + u8 inline_on; +}; + struct mlx5e_txqsq { /* data path */ /* dirtied @completion */ u16 cc; + u16 skb_fifo_cc; u32 dma_fifo_cc; struct dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; + u16 skb_fifo_pc; u32 dma_fifo_pc; + struct mlx5e_tx_mpwqe mpwqe; struct mlx5e_cq cq; /* read only */ struct mlx5_wq_cyc wq; u32 dma_fifo_mask; + u16 skb_fifo_mask; struct mlx5e_sq_stats *stats; struct { struct mlx5e_sq_dma *dma_fifo; + struct sk_buff **skb_fifo; struct mlx5e_tx_wqe_info *wqe_info; } db; void __iomem *uar_map; @@ -403,7 +420,7 @@ struct mlx5e_xdp_info { }; }; -struct mlx5e_xdp_xmit_data { +struct mlx5e_xmit_data { dma_addr_t dma_addr; void *data; u32 len; @@ -416,18 +433,10 @@ struct mlx5e_xdp_info_fifo { u32 mask; }; -struct mlx5e_xdp_mpwqe { - /* Current MPWQE session */ - struct mlx5e_tx_wqe *wqe; - u8 ds_count; - u8 pkt_count; - u8 inline_on; -}; - struct mlx5e_xdpsq; typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, - struct mlx5e_xdp_xmit_data *, + struct mlx5e_xmit_data *, struct mlx5e_xdp_info *, int); @@ -442,12 +451,12 @@ struct mlx5e_xdpsq { u32 xdpi_fifo_pc ____cacheline_aligned_in_smp; u16 pc; struct mlx5_wqe_ctrl_seg *doorbell_cseg; - struct mlx5e_xdp_mpwqe mpwqe; + struct mlx5e_tx_mpwqe mpwqe; struct mlx5e_cq cq; /* read only */ - struct xdp_umem *umem; + struct xsk_buff_pool *xsk_pool; struct mlx5_wq_cyc wq; struct mlx5e_xdpsq_stats *stats; mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check; @@ -611,7 +620,7 @@ struct mlx5e_rq { struct page_pool *page_pool; /* AF_XDP zero-copy */ - struct xdp_umem *umem; + struct xsk_buff_pool *xsk_pool; struct work_struct recover_work; @@ -735,12 +744,13 @@ struct mlx5e_hv_vhca_stats_agent { #endif struct mlx5e_xsk { - /* UMEMs are stored separately from channels, because we don't want to - * lose them when channels are recreated. The kernel also stores UMEMs, - * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs, - * so rely on our mechanism. + /* XSK buffer pools are stored separately from channels, + * because we don't want to lose them when channels are + * recreated. The kernel also stores buffer pool, but it doesn't + * distinguish between zero-copy and non-zero-copy UMEMs, so + * rely on our mechanism. */ - struct xdp_umem **umems; + struct xsk_buff_pool **pools; u16 refcnt; bool ever_used; }; @@ -899,7 +909,7 @@ struct mlx5e_xsk_param; struct mlx5e_rq_param; int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xdp_umem *umem, struct mlx5e_rq *rq); + struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq); int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_close_rq(struct mlx5e_rq *rq); @@ -909,7 +919,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); void mlx5e_close_icosq(struct mlx5e_icosq *sq); int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 6fdcd5e69476..dc744702aee4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -12,9 +12,12 @@ enum { }; struct mlx5e_tc_table { - /* protects flow table */ + /* Protects the dynamic assignment of the t parameter + * which is the nic tc root table. + */ struct mutex t_lock; struct mlx5_flow_table *t; + struct mlx5_fs_chains *chains; struct rhashtable ht; @@ -24,6 +27,8 @@ struct mlx5e_tc_table { struct notifier_block netdevice_nb; struct netdev_net_notifier netdevice_nn; + + struct mlx5_tc_ct_priv *ct; }; struct mlx5e_flow_table { @@ -231,6 +236,7 @@ struct mlx5e_accel_fs_tcp; struct mlx5e_flow_steering { struct mlx5_flow_namespace *ns; + struct mlx5_flow_namespace *egress_ns; #ifdef CONFIG_MLX5_EN_RXNFC struct mlx5e_ethtool_steering ethtool; #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 3dc200bcfabd..69a05da0e3e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -242,8 +242,8 @@ static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, { u32 data_size; + int err = 0; u32 offset; - int err; for (offset = 0; offset < value_len; offset += data_size) { data_size = value_len - offset; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 79cc42d88eec..e36e505d38ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -12,7 +12,7 @@ #include "neigh.h" #include "en_rep.h" #include "eswitch.h" -#include "esw/chains.h" +#include "lib/fs_chains.h" #include "en/tc_ct.h" #include "en/mapping.h" #include "en/tc_tun.h" @@ -191,7 +191,7 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, case TC_SETUP_CLSFLOWER: memcpy(&tmp, f, sizeof(*f)); - if (!mlx5_esw_chains_prios_supported(esw)) + if (!mlx5_chains_prios_supported(esw_chains(esw))) return -EOPNOTSUPP; /* Re-use tc offload path by moving the ft flow to the @@ -203,12 +203,12 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, * * We only support chain 0 of FT offload. */ - if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw)) + if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw))) return -EOPNOTSUPP; if (tmp.common.chain_index != 0) return -EOPNOTSUPP; - tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw)); tmp.common.prio++; err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags); memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); @@ -378,12 +378,12 @@ static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, * * We only support chain 0 of FT offload. */ - if (!mlx5_esw_chains_prios_supported(esw) || - tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) || + if (!mlx5_chains_prios_supported(esw_chains(esw)) || + tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) || tmp.common.chain_index) return -EOPNOTSUPP; - tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw)); tmp.common.prio++; err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); @@ -612,7 +612,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, struct tc_skb_ext *tc_skb_ext; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; - int tunnel_moffset; int err; reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); @@ -626,7 +625,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; - err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); + err = mlx5_get_chain_for_tag(esw_chains(esw), reg_c0, &chain); if (err) { netdev_dbg(priv->netdev, "Couldn't find chain for chain tag: %d, err: %d\n", @@ -647,13 +646,12 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &uplink_rpriv->uplink_priv; - if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, + if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb, zone_restore_id)) return false; } - tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; - tunnel_id = reg_c1 >> (8 * tunnel_moffset); + tunnel_id = reg_c1 >> REG_MAPPING_SHIFT(TUNNEL_TO_REG); return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); #endif /* CONFIG_NET_TC_SKB_EXT */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a8be40cbe325..e521254d886e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -14,7 +14,7 @@ #include <linux/workqueue.h> #include <linux/xarray.h> -#include "esw/chains.h" +#include "lib/fs_chains.h" #include "en/tc_ct.h" #include "en/mod_hdr.h" #include "en/mapping.h" @@ -39,8 +39,9 @@ netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) struct mlx5_tc_ct_priv { - struct mlx5_eswitch *esw; + struct mlx5_core_dev *dev; const struct net_device *netdev; + struct mod_hdr_tbl *mod_hdr_tbl; struct idr fte_ids; struct xarray tuple_ids; struct rhashtable zone_ht; @@ -50,13 +51,16 @@ struct mlx5_tc_ct_priv { struct mlx5_flow_table *ct_nat; struct mlx5_flow_table *post_ct; struct mutex control_lock; /* guards parallel adds/dels */ + struct mutex shared_counter_lock; struct mapping_ctx *zone_mapping; struct mapping_ctx *labels_mapping; + enum mlx5_flow_namespace_type ns_type; + struct mlx5_fs_chains *chains; }; struct mlx5_ct_flow { - struct mlx5_esw_flow_attr pre_ct_attr; - struct mlx5_esw_flow_attr post_ct_attr; + struct mlx5_flow_attr *pre_ct_attr; + struct mlx5_flow_attr *post_ct_attr; struct mlx5_flow_handle *pre_ct_rule; struct mlx5_flow_handle *post_ct_rule; struct mlx5_ct_ft *ft; @@ -67,12 +71,12 @@ struct mlx5_ct_flow { struct mlx5_ct_zone_rule { struct mlx5_flow_handle *rule; struct mlx5e_mod_hdr_handle *mh; - struct mlx5_esw_flow_attr attr; + struct mlx5_flow_attr *attr; bool nat; }; struct mlx5_tc_ct_pre { - struct mlx5_flow_table *fdb; + struct mlx5_flow_table *ft; struct mlx5_flow_group *flow_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle *flow_rule; @@ -114,11 +118,16 @@ struct mlx5_ct_tuple { u16 zone; }; +struct mlx5_ct_shared_counter { + struct mlx5_fc *counter; + refcount_t refcount; +}; + struct mlx5_ct_entry { struct rhash_head node; struct rhash_head tuple_node; struct rhash_head tuple_nat_node; - struct mlx5_fc *counter; + struct mlx5_ct_shared_counter *shared_counter; unsigned long cookie; unsigned long restore_cookie; struct mlx5_ct_tuple tuple; @@ -157,18 +166,6 @@ static const struct rhashtable_params tuples_nat_ht_params = { .min_size = 16 * 1024, }; -static struct mlx5_tc_ct_priv * -mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - return uplink_priv->ct_priv; -} - static int mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) { @@ -397,20 +394,30 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, } static void +mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) +{ + if (!refcount_dec_and_test(&entry->shared_counter->refcount)) + return; + + mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter); + kfree(entry->shared_counter); +} + +static void mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry, bool nat) { struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; - struct mlx5_esw_flow_attr *attr = &zone_rule->attr; - struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_flow_attr *attr = zone_rule->attr; ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); - mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); - mlx5e_mod_hdr_detach(ct_priv->esw->dev, - &esw->offloads.mod_hdr, zone_rule->mh); + mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr); + mlx5e_mod_hdr_detach(ct_priv->dev, + ct_priv->mod_hdr_tbl, zone_rule->mh); mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + kfree(attr); } static void @@ -419,8 +426,6 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, { mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); - - mlx5_fc_destroy(ct_priv->esw->dev, entry->counter); } static struct flow_action_entry * @@ -446,29 +451,40 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, u32 labels_id, u8 zone_restore_id) { - struct mlx5_eswitch *esw = ct_priv->esw; + enum mlx5_flow_namespace_type ns = ct_priv->ns_type; + struct mlx5_core_dev *dev = ct_priv->dev; int err; - err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, CTSTATE_TO_REG, ct_state); if (err) return err; - err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, MARK_TO_REG, mark); if (err) return err; - err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, LABELS_TO_REG, labels_id); if (err) return err; - err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, ZONE_RESTORE_TO_REG, zone_restore_id); if (err) return err; + /* Make another copy of zone id in reg_b for + * NIC rx flows since we don't copy reg_c1 to + * reg_b upon miss. + */ + if (ns != MLX5_FLOW_NAMESPACE_FDB) { + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + NIC_ZONE_RESTORE_TO_REG, zone_restore_id); + if (err) + return err; + } return 0; } @@ -549,7 +565,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, struct mlx5e_tc_mod_hdr_acts *mod_acts) { struct flow_action *flow_action = &flow_rule->action; - struct mlx5_core_dev *mdev = ct_priv->esw->dev; + struct mlx5_core_dev *mdev = ct_priv->dev; struct flow_action_entry *act; size_t action_size; char *modact; @@ -560,8 +576,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_MANGLE: { - err = alloc_mod_hdr_actions(mdev, - MLX5_FLOW_NAMESPACE_FDB, + err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type, mod_acts); if (err) return err; @@ -590,7 +605,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, static int mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule, struct mlx5e_mod_hdr_handle **mh, u8 zone_restore_id, bool nat) @@ -626,9 +641,9 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, if (err) goto err_mapping; - *mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev, - &ct_priv->esw->offloads.mod_hdr, - MLX5_FLOW_NAMESPACE_FDB, + *mh = mlx5e_mod_hdr_attach(ct_priv->dev, + ct_priv->mod_hdr_tbl, + ct_priv->ns_type, &mod_acts); if (IS_ERR(*mh)) { err = PTR_ERR(*mh); @@ -652,9 +667,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, bool nat, u8 zone_restore_id) { struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; - struct mlx5_esw_flow_attr *attr = &zone_rule->attr; - struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); struct mlx5_flow_spec *spec = NULL; + struct mlx5_flow_attr *attr; int err; zone_rule->nat = nat; @@ -663,6 +678,12 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, if (!spec) return -ENOMEM; + attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!attr) { + err = -ENOMEM; + goto err_attr; + } + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &zone_rule->mh, zone_restore_id, nat); @@ -676,9 +697,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = 0; attr->dest_ft = ct_priv->post_ct; - attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct; + attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; attr->outer_match_level = MLX5_MATCH_L4; - attr->counter = entry->counter; + attr->counter = entry->shared_counter->counter; attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); @@ -686,39 +707,100 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, entry->tuple.zone & MLX5_CT_ZONE_MASK, MLX5_CT_ZONE_MASK); - zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); if (IS_ERR(zone_rule->rule)) { err = PTR_ERR(zone_rule->rule); ct_dbg("Failed to add ct entry rule, nat: %d", nat); goto err_rule; } + zone_rule->attr = attr; + kfree(spec); ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); return 0; err_rule: - mlx5e_mod_hdr_detach(ct_priv->esw->dev, - &esw->offloads.mod_hdr, zone_rule->mh); + mlx5e_mod_hdr_detach(ct_priv->dev, + ct_priv->mod_hdr_tbl, zone_rule->mh); mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); err_mod_hdr: + kfree(attr); +err_attr: kfree(spec); return err; } +static struct mlx5_ct_shared_counter * +mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + struct mlx5_ct_tuple rev_tuple = entry->tuple; + struct mlx5_ct_shared_counter *shared_counter; + struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_ct_entry *rev_entry; + __be16 tmp_port; + int ret; + + /* get the reversed tuple */ + tmp_port = rev_tuple.port.src; + rev_tuple.port.src = rev_tuple.port.dst; + rev_tuple.port.dst = tmp_port; + + if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + __be32 tmp_addr = rev_tuple.ip.src_v4; + + rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; + rev_tuple.ip.dst_v4 = tmp_addr; + } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct in6_addr tmp_addr = rev_tuple.ip.src_v6; + + rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; + rev_tuple.ip.dst_v6 = tmp_addr; + } else { + return ERR_PTR(-EOPNOTSUPP); + } + + /* Use the same counter as the reverse direction */ + mutex_lock(&ct_priv->shared_counter_lock); + rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple, + tuples_ht_params); + if (rev_entry) { + if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) { + mutex_unlock(&ct_priv->shared_counter_lock); + return rev_entry->shared_counter; + } + } + mutex_unlock(&ct_priv->shared_counter_lock); + + shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL); + if (!shared_counter) + return ERR_PTR(-ENOMEM); + + shared_counter->counter = mlx5_fc_create(dev, true); + if (IS_ERR(shared_counter->counter)) { + ct_dbg("Failed to create counter for ct entry"); + ret = PTR_ERR(shared_counter->counter); + kfree(shared_counter); + return ERR_PTR(ret); + } + + refcount_set(&shared_counter->refcount, 1); + return shared_counter; +} + static int mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, struct flow_rule *flow_rule, struct mlx5_ct_entry *entry, u8 zone_restore_id) { - struct mlx5_eswitch *esw = ct_priv->esw; int err; - entry->counter = mlx5_fc_create(esw->dev, true); - if (IS_ERR(entry->counter)) { - err = PTR_ERR(entry->counter); + entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); + if (IS_ERR(entry->shared_counter)) { + err = PTR_ERR(entry->shared_counter); ct_dbg("Failed to create counter for ct entry"); return err; } @@ -738,7 +820,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, err_nat: mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); err_orig: - mlx5_fc_destroy(esw->dev, entry->counter); + mlx5_tc_ct_shared_counter_put(ct_priv, entry); return err; } @@ -828,12 +910,16 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) { mlx5_tc_ct_entry_del_rules(ct_priv, entry); + mutex_lock(&ct_priv->shared_counter_lock); if (entry->tuple_node.next) rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, &entry->tuple_nat_node, tuples_nat_ht_params); rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, tuples_ht_params); + mutex_unlock(&ct_priv->shared_counter_lock); + mlx5_tc_ct_shared_counter_put(ct_priv, entry); + } static int @@ -870,7 +956,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, if (!entry) return -ENOENT; - mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse); + mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse); flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); @@ -943,9 +1029,7 @@ out: return false; } -int -mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec) +int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) { u32 ctstate = 0, ctstate_mask = 0; @@ -961,24 +1045,21 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, return 0; } -void mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr) +void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) { - struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); - - if (!ct_priv || !ct_attr->ct_labels_id) + if (!priv || !ct_attr->ct_labels_id) return; - mapping_remove(ct_priv->labels_mapping, ct_attr->ct_labels_id); + mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id); } int -mlx5_tc_ct_match_add(struct mlx5e_priv *priv, +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct mlx5_ct_attr *ct_attr, struct netlink_ext_ack *extack) { - struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector_key_ct *mask, *key; bool trk, est, untrk, unest, new; @@ -991,7 +1072,7 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv, if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) return 0; - if (!ct_priv) { + if (!priv) { NL_SET_ERR_MSG_MOD(extack, "offload of ct matching isn't available"); return -EOPNOTSUPP; @@ -1047,7 +1128,7 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv, ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; - if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) + if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) return -EOPNOTSUPP; mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, MLX5_CT_LABELS_MASK); @@ -1057,14 +1138,12 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv, } int -mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, - struct mlx5_esw_flow_attr *attr, +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { - struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); - - if (!ct_priv) { + if (!priv) { NL_SET_ERR_MSG_MOD(extack, "offload of ct action isn't available"); return -EOPNOTSUPP; @@ -1083,8 +1162,8 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, { struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; - struct mlx5_core_dev *dev = ct_priv->esw->dev; - struct mlx5_flow_table *fdb = pre_ct->fdb; + struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_flow_table *ft = pre_ct->ft; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {}; struct mlx5_modify_hdr *mod_hdr; @@ -1099,14 +1178,14 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, return -ENOMEM; zone = ct_ft->zone & MLX5_CT_ZONE_MASK; - err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone); + err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type, + ZONE_TO_REG, zone); if (err) { ct_dbg("Failed to set zone register mapping"); goto err_mapping; } - mod_hdr = mlx5_modify_header_alloc(dev, - MLX5_FLOW_NAMESPACE_FDB, + mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type, pre_mod_acts.num_actions, pre_mod_acts.actions); @@ -1132,7 +1211,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); dest.ft = ct_priv->post_ct; - rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); ct_dbg("Failed to add pre ct flow rule zone %d", zone); @@ -1143,7 +1222,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, /* add miss rule */ memset(spec, 0, sizeof(*spec)); dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; - rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); ct_dbg("Failed to add pre ct miss rule zone %d", zone); @@ -1170,7 +1249,7 @@ tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, struct mlx5_tc_ct_pre *pre_ct) { struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; - struct mlx5_core_dev *dev = ct_priv->esw->dev; + struct mlx5_core_dev *dev = ct_priv->dev; mlx5_del_flow_rules(pre_ct->flow_rule); mlx5_del_flow_rules(pre_ct->miss_rule); @@ -1184,7 +1263,7 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; - struct mlx5_core_dev *dev = ct_priv->esw->dev; + struct mlx5_core_dev *dev = ct_priv->dev; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *ns; struct mlx5_flow_table *ft; @@ -1194,10 +1273,10 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, void *misc; int err; - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type); if (!ns) { err = -EOPNOTSUPP; - ct_dbg("Failed to get FDB flow namespace"); + ct_dbg("Failed to get flow namespace"); return err; } @@ -1206,7 +1285,8 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, return -ENOMEM; ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; - ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? + FDB_TC_OFFLOAD : MLX5E_TC_PRIO; ft_attr.max_fte = 2; ft_attr.level = 1; ft = mlx5_create_flow_table(ns, &ft_attr); @@ -1215,7 +1295,7 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, ct_dbg("Failed to create pre ct table"); goto out_free; } - pre_ct->fdb = ft; + pre_ct->ft = ft; /* create flow group */ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); @@ -1279,7 +1359,7 @@ mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, tc_ct_pre_ct_del_rules(ct_ft, pre_ct); mlx5_destroy_flow_group(pre_ct->miss_grp); mlx5_destroy_flow_group(pre_ct->flow_grp); - mlx5_destroy_flow_table(pre_ct->fdb); + mlx5_destroy_flow_table(pre_ct->ft); } static int @@ -1398,7 +1478,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) /* We translate the tc filter with CT action to the following HW model: * * +---------------------+ - * + fdb prio (tc chain) + + * + ft prio (tc chain) + * + original match + * +---------------------+ * | set chain miss mapping @@ -1428,17 +1508,17 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * +--------------+ */ static struct mlx5_flow_handle * -__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, +__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *orig_spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { - struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); struct mlx5_flow_spec *post_ct_spec = NULL; - struct mlx5_eswitch *esw = ct_priv->esw; - struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; struct mlx5_flow_handle *rule; struct mlx5_ct_flow *ct_flow; @@ -1473,10 +1553,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, } ct_flow->fte_id = fte_id; - /* Base esw attributes of both rules on original rule attribute */ - pre_ct_attr = &ct_flow->pre_ct_attr; - memcpy(pre_ct_attr, attr, sizeof(*attr)); - memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr)); + /* Base flow attributes of both rules on original rule attribute */ + ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!ct_flow->pre_ct_attr) { + err = -ENOMEM; + goto err_alloc_pre; + } + + ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!ct_flow->post_ct_attr) { + err = -ENOMEM; + goto err_alloc_post; + } + + pre_ct_attr = ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, attr_sz); + memcpy(ct_flow->post_ct_attr, attr, attr_sz); /* Modify the original rule's action to fwd and modify, leave decap */ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; @@ -1487,22 +1579,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, * don't go though all prios of this chain as normal tc rules * miss. */ - err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain, - &chain_mapping); + err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain, + &chain_mapping); if (err) { ct_dbg("Failed to get chain register mapping for chain"); goto err_get_chain; } ct_flow->chain_mapping = chain_mapping; - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, CHAIN_TO_REG, chain_mapping); if (err) { ct_dbg("Failed to set chain register mapping"); goto err_mapping; } - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, FTEID_TO_REG, fte_id); if (err) { ct_dbg("Failed to set fte_id register mapping"); @@ -1516,7 +1608,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, attr->chain == 0) { u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, + ct_priv->ns_type, TUNNEL_TO_REG, tun_id); if (err) { @@ -1525,8 +1618,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, } } - mod_hdr = mlx5_modify_header_alloc(esw->dev, - MLX5_FLOW_NAMESPACE_FDB, + mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, pre_mod_acts.num_actions, pre_mod_acts.actions); if (IS_ERR(mod_hdr)) { @@ -1542,16 +1634,16 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, fte_id, MLX5_FTE_ID_MASK); - /* Put post_ct rule on post_ct fdb */ - ct_flow->post_ct_attr.chain = 0; - ct_flow->post_ct_attr.prio = 0; - ct_flow->post_ct_attr.fdb = ct_priv->post_ct; + /* Put post_ct rule on post_ct flow table */ + ct_flow->post_ct_attr->chain = 0; + ct_flow->post_ct_attr->prio = 0; + ct_flow->post_ct_attr->ft = ct_priv->post_ct; - ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE; - ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE; - ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); - rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec, - &ct_flow->post_ct_attr); + ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + rule = mlx5_tc_rule_insert(priv, post_ct_spec, + ct_flow->post_ct_attr); ct_flow->post_ct_rule = rule; if (IS_ERR(ct_flow->post_ct_rule)) { err = PTR_ERR(ct_flow->post_ct_rule); @@ -1561,10 +1653,9 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, /* Change original rule point to ct table */ pre_ct_attr->dest_chain = 0; - pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb; - ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, - orig_spec, - pre_ct_attr); + pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; + ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec, + pre_ct_attr); if (IS_ERR(ct_flow->pre_ct_rule)) { err = PTR_ERR(ct_flow->pre_ct_rule); ct_dbg("Failed to add pre ct rule"); @@ -1578,14 +1669,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, return rule; err_insert_orig: - mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule, - &ct_flow->post_ct_attr); + mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule, + ct_flow->post_ct_attr); err_insert_post_ct: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: dealloc_mod_hdr_actions(&pre_mod_acts); - mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); + mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); err_get_chain: + kfree(ct_flow->post_ct_attr); +err_alloc_post: + kfree(ct_flow->pre_ct_attr); +err_alloc_pre: idr_remove(&ct_priv->fte_ids, fte_id); err_idr: mlx5_tc_ct_del_ft_cb(ct_priv, ft); @@ -1597,14 +1692,14 @@ err_ft: } static struct mlx5_flow_handle * -__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, +__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_flow_spec *orig_spec, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_acts) { - struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); - struct mlx5_eswitch *esw = ct_priv->esw; - struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); + u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); + struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; struct mlx5_flow_handle *rule; struct mlx5_ct_flow *ct_flow; @@ -1615,8 +1710,13 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, return ERR_PTR(-ENOMEM); /* Base esw attributes on original rule attribute */ - pre_ct_attr = &ct_flow->pre_ct_attr; - memcpy(pre_ct_attr, attr, sizeof(*attr)); + pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!pre_ct_attr) { + err = -ENOMEM; + goto err_attr; + } + + memcpy(pre_ct_attr, attr, attr_sz); err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); if (err) { @@ -1624,8 +1724,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, goto err_set_registers; } - mod_hdr = mlx5_modify_header_alloc(esw->dev, - MLX5_FLOW_NAMESPACE_FDB, + mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, mod_acts->num_actions, mod_acts->actions); if (IS_ERR(mod_hdr)) { @@ -1638,7 +1737,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, pre_ct_attr->modify_hdr = mod_hdr; pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); + rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr); if (IS_ERR(rule)) { err = PTR_ERR(rule); ct_dbg("Failed to add ct clear rule"); @@ -1646,6 +1745,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, } attr->ct_attr.ct_flow = ct_flow; + ct_flow->pre_ct_attr = pre_ct_attr; ct_flow->pre_ct_rule = rule; return rule; @@ -1654,61 +1754,67 @@ err_insert: err_set_registers: netdev_warn(priv->netdev, "Failed to offload ct clear flow, err %d\n", err); + kfree(pre_ct_attr); +err_attr: + kfree(ct_flow); + return ERR_PTR(err); } struct mlx5_flow_handle * -mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; - struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); struct mlx5_flow_handle *rule; - if (!ct_priv) + if (!priv) return ERR_PTR(-EOPNOTSUPP); - mutex_lock(&ct_priv->control_lock); + mutex_lock(&priv->control_lock); if (clear_action) rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts); else rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr); - mutex_unlock(&ct_priv->control_lock); + mutex_unlock(&priv->control_lock); return rule; } static void __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5e_tc_flow *flow, struct mlx5_ct_flow *ct_flow) { - struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr; - struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule, - pre_ct_attr); - mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr); + mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, + pre_ct_attr); + mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); if (ct_flow->post_ct_rule) { - mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule, - &ct_flow->post_ct_attr); - mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); + mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule, + ct_flow->post_ct_attr); + mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); } + kfree(ct_flow->pre_ct_attr); + kfree(ct_flow->post_ct_attr); kfree(ct_flow); } void -mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *attr) +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) { - struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; /* We are called on error to clean up stuff from parsing @@ -1717,22 +1823,15 @@ mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, if (!ct_flow) return; - mutex_lock(&ct_priv->control_lock); - __mlx5_tc_ct_delete_flow(ct_priv, ct_flow); - mutex_unlock(&ct_priv->control_lock); + mutex_lock(&priv->control_lock); + __mlx5_tc_ct_delete_flow(priv, flow, ct_flow); + mutex_unlock(&priv->control_lock); } static int -mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw, - const char **err_msg) +mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, + const char **err_msg) { -#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - /* cannot restore chain ID on HW miss */ - - *err_msg = "tc skb extension missing"; - return -EOPNOTSUPP; -#endif - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { *err_msg = "firmware level support is missing"; return -EOPNOTSUPP; @@ -1766,44 +1865,61 @@ mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw, return 0; } -static void -mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err) +static int +mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv, + const char **err_msg) +{ + if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { + *err_msg = "firmware level support is missing"; + return -EOPNOTSUPP; + } + + return 0; +} + +static int +mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + const char **err_msg) { - if (msg) - netdev_warn(rpriv->netdev, - "tc ct offload not supported, %s, err: %d\n", - msg, err); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + /* cannot restore chain ID on HW miss */ + + *err_msg = "tc skb extension missing"; + return -EOPNOTSUPP; +#endif + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + return mlx5_tc_ct_init_check_esw_support(esw, err_msg); else - netdev_warn(rpriv->netdev, - "tc ct offload not supported, err: %d\n", - err); + return mlx5_tc_ct_init_check_nic_support(priv, err_msg); } -int -mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +#define INIT_ERR_PREFIX "tc ct offload init failed" + +struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type) { struct mlx5_tc_ct_priv *ct_priv; - struct mlx5e_rep_priv *rpriv; - struct mlx5_eswitch *esw; - struct mlx5e_priv *priv; + struct mlx5_core_dev *dev; const char *msg; int err; - rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); - priv = netdev_priv(rpriv->netdev); - esw = priv->mdev->priv.eswitch; - - err = mlx5_tc_ct_init_check_support(esw, &msg); + dev = priv->mdev; + err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg); if (err) { - mlx5_tc_ct_init_err(rpriv, msg, err); + mlx5_core_warn(dev, + "tc ct offload not supported, %s\n", + msg); goto err_support; } ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); - if (!ct_priv) { - mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM); + if (!ct_priv) goto err_alloc; - } ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true); if (IS_ERR(ct_priv->zone_mapping)) { @@ -1817,46 +1933,51 @@ mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) goto err_mapping_labels; } - ct_priv->esw = esw; - ct_priv->netdev = rpriv->netdev; - ct_priv->ct = mlx5_esw_chains_create_global_table(esw); + ct_priv->ns_type = ns_type; + ct_priv->chains = chains; + ct_priv->netdev = priv->netdev; + ct_priv->dev = priv->mdev; + ct_priv->mod_hdr_tbl = mod_hdr; + ct_priv->ct = mlx5_chains_create_global_table(chains); if (IS_ERR(ct_priv->ct)) { err = PTR_ERR(ct_priv->ct); - mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err); + mlx5_core_warn(dev, + "%s, failed to create ct table err: %d\n", + INIT_ERR_PREFIX, err); goto err_ct_tbl; } - ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw); + ct_priv->ct_nat = mlx5_chains_create_global_table(chains); if (IS_ERR(ct_priv->ct_nat)) { err = PTR_ERR(ct_priv->ct_nat); - mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table", - err); + mlx5_core_warn(dev, + "%s, failed to create ct nat table err: %d\n", + INIT_ERR_PREFIX, err); goto err_ct_nat_tbl; } - ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw); + ct_priv->post_ct = mlx5_chains_create_global_table(chains); if (IS_ERR(ct_priv->post_ct)) { err = PTR_ERR(ct_priv->post_ct); - mlx5_tc_ct_init_err(rpriv, "failed to create post ct table", - err); + mlx5_core_warn(dev, + "%s, failed to create post ct table err: %d\n", + INIT_ERR_PREFIX, err); goto err_post_ct_tbl; } idr_init(&ct_priv->fte_ids); mutex_init(&ct_priv->control_lock); + mutex_init(&ct_priv->shared_counter_lock); rhashtable_init(&ct_priv->zone_ht, &zone_params); rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params); - /* Done, set ct_priv to know it initializted */ - uplink_priv->ct_priv = ct_priv; - - return 0; + return ct_priv; err_post_ct_tbl: - mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat); + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); err_ct_nat_tbl: - mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct); + mlx5_chains_destroy_global_table(chains, ct_priv->ct); err_ct_tbl: mapping_destroy(ct_priv->labels_mapping); err_mapping_labels: @@ -1866,20 +1987,22 @@ err_mapping_zone: err_alloc: err_support: - return 0; + return NULL; } void -mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) { - struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + struct mlx5_fs_chains *chains; if (!ct_priv) return; - mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct); - mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat); - mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct); + chains = ct_priv->chains; + + mlx5_chains_destroy_global_table(chains, ct_priv->post_ct); + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); + mlx5_chains_destroy_global_table(chains, ct_priv->ct); mapping_destroy(ct_priv->zone_mapping); mapping_destroy(ct_priv->labels_mapping); @@ -1887,17 +2010,15 @@ mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); rhashtable_destroy(&ct_priv->zone_ht); mutex_destroy(&ct_priv->control_lock); + mutex_destroy(&ct_priv->shared_counter_lock); idr_destroy(&ct_priv->fte_ids); kfree(ct_priv); - - uplink_priv->ct_priv = NULL; } bool -mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id) { - struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; struct mlx5_ct_tuple tuple = {}; struct mlx5_ct_entry *entry; u16 zone; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 708c216325d3..6503b614337c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -10,12 +10,14 @@ #include "en.h" -struct mlx5_esw_flow_attr; +struct mlx5_flow_attr; struct mlx5e_tc_mod_hdr_acts; struct mlx5_rep_uplink_priv; struct mlx5e_tc_flow; struct mlx5e_priv; +struct mlx5_fs_chains; +struct mlx5_tc_ct_priv; struct mlx5_ct_flow; struct nf_flowtable; @@ -76,68 +78,78 @@ struct mlx5_ct_attr { misc_parameters_2.metadata_reg_c_1) + 3,\ } +#define nic_zone_restore_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\ + .moffset = 2,\ + .mlen = 1,\ +} + #define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen) +#define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset) +#define REG_MAPPING_SHIFT(reg) (REG_MAPPING_MOFFSET(reg) * 8) #define ZONE_RESTORE_BITS (REG_MAPPING_MLEN(ZONE_RESTORE_TO_REG) * 8) #define ZONE_RESTORE_MAX GENMASK(ZONE_RESTORE_BITS - 1, 0) #if IS_ENABLED(CONFIG_MLX5_TC_CT) -int -mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv); +struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type); void -mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv); +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv); void -mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr); +mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr); int -mlx5_tc_ct_match_add(struct mlx5e_priv *priv, +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct mlx5_ct_attr *ct_attr, struct netlink_ext_ack *extack); +int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec); int -mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec); -int -mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, - struct mlx5_esw_flow_attr *attr, +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, const struct flow_action_entry *act, struct netlink_ext_ack *extack); struct mlx5_flow_handle * -mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); void -mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *attr); + struct mlx5_flow_attr *attr); bool -mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id); #else /* CONFIG_MLX5_TC_CT */ -static inline int -mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +static inline struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type) { - return 0; + return NULL; } static inline void -mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) { } static inline void -mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr) {} +mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) {} static inline int -mlx5_tc_ct_match_add(struct mlx5e_priv *priv, +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct mlx5_ct_attr *ct_attr, @@ -149,47 +161,44 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv, return 0; NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); - netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); return -EOPNOTSUPP; } static inline int -mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec) +mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) { return 0; } static inline int -mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, - struct mlx5_esw_flow_attr *attr, +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); - netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); return -EOPNOTSUPP; } static inline struct mlx5_flow_handle * -mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { return ERR_PTR(-EOPNOTSUPP); } static inline void -mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { } static inline bool -mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id) { if (!zone_restore_id) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 24336c60123a..07ee1d236ab3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -7,6 +7,21 @@ #include "en.h" #include <linux/indirect_call_wrapper.h> +#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + +/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ +#if L1_CACHE_BYTES < 128 +#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) +#else +#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) +#endif + +#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) + #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) enum mlx5e_icosq_wqe_type { @@ -46,8 +61,6 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); -void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); @@ -110,6 +123,7 @@ struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; u8 num_dma; + u8 num_fifo_pkts; #ifdef CONFIG_MLX5_EN_TLS struct page *resync_dump_frag_page; #endif @@ -194,23 +208,6 @@ static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size) } static inline void -mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, - u16 pi, u16 nnops) -{ - struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; - - edge_wi = wi + nnops; - - /* fill sq frag edge with nops to avoid wqe wrapping two pages */ - for (; wi < edge_wi; wi++) { - memset(wi, 0, sizeof(*wi)); - wi->num_wqebbs = 1; - mlx5e_post_nop(wq, sq->sqn, &sq->pc); - } - sq->stats->nop += nnops; -} - -static inline void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, struct mlx5_wqe_ctrl_seg *ctrl) { @@ -228,29 +225,6 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, mlx5_write64((__be32 *)ctrl, uar_map); } -static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg) -{ - return cseg && !!cseg->tis_tir_num; -} - -static inline u8 -mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, - struct sk_buff *skb) -{ - u8 mode; - - if (mlx5e_transport_inline_tx_wqe(cseg)) - return MLX5_INLINE_MODE_TCP_UDP; - - mode = sq->min_inline_mode; - - if (skb_vlan_tag_present(skb) && - test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) - mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); - - return mode; -} - static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) { struct mlx5_core_cq *mcq; @@ -276,6 +250,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, dma->type = map_type; } +static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i) +{ + return &sq->db.skb_fifo[i & sq->skb_fifo_mask]; +} + +static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb) +{ + struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++); + + *skb_item = skb; +} + +static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq) +{ + return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++); +} + static inline void mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) { @@ -291,6 +282,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) } } +void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); +void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); + +static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) +{ + return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS; +} + static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index b28df21981a1..ae90d533a350 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -59,7 +59,7 @@ static inline bool mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, struct mlx5e_dma_info *di, struct xdp_buff *xdp) { - struct mlx5e_xdp_xmit_data xdptxd; + struct mlx5e_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; struct xdp_frame *xdpf; dma_addr_t dma_addr; @@ -194,18 +194,22 @@ static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size) static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) { - struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; + struct mlx5e_tx_wqe *wqe; u16 pi; - pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS); - session->wqe = MLX5E_TX_FETCH_WQE(sq, pi); + pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + net_prefetchw(wqe->data); - prefetchw(session->wqe->data); - session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; - session->pkt_count = 0; - - mlx5e_xdp_update_inline_state(sq); + *session = (struct mlx5e_tx_mpwqe) { + .wqe = wqe, + .bytes_count = 0, + .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .pkt_count = 0, + .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on), + }; stats->mpwqe++; } @@ -213,7 +217,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; - struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; u16 ds_count = session->ds_count; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -258,10 +262,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq } INDIRECT_CALLABLE_SCOPE bool -mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, +mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, struct mlx5e_xdp_info *xdpi, int check_result) { - struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; if (unlikely(xdptxd->len > sq->hw_mtu)) { @@ -284,8 +288,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *x mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) || - session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS)) + if (unlikely(mlx5e_xdp_mpqwe_is_full(session))) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); @@ -306,7 +309,7 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) } INDIRECT_CALLABLE_SCOPE bool -mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, +mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, struct mlx5e_xdp_info *xdpi, int check_result) { struct mlx5_wq_cyc *wq = &sq->wq; @@ -322,7 +325,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, struct mlx5e_xdpsq_stats *stats = sq->stats; - prefetchw(wqe); + net_prefetchw(wqe); if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { stats->err++; @@ -445,7 +448,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); if (xsk_frames) - xsk_umem_complete_tx(sq->umem, xsk_frames); + xsk_tx_completed(sq->xsk_pool, xsk_frames); sq->stats->cqes += i; @@ -475,7 +478,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) } if (xsk_frames) - xsk_umem_complete_tx(sq->umem, xsk_frames); + xsk_tx_completed(sq->xsk_pool, xsk_frames); } int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -503,7 +506,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; - struct mlx5e_xdp_xmit_data xdptxd; + struct mlx5e_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; bool ret; @@ -563,4 +566,3 @@ void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) sq->xmit_xdp_frame = is_mpw ? mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; } - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index e806c13d491f..d487e5e37162 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -38,27 +38,12 @@ #include "en/txrx.h" #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_TX_EMPTY_DS_COUNT \ - (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) -#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) - -#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) -#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \ - DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS) - -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. - */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif +#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */) -#define MLX5E_XDP_MPW_MAX_NUM_DS \ - (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) +#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16 +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \ + (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \ + sizeof(struct mlx5_wqe_inline_seg)) struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); @@ -73,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xmit_data *xdptxd, struct mlx5e_xdp_info *xdpi, int check_result)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xmit_data *xdptxd, struct mlx5e_xdp_info *xdpi, int check_result)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); @@ -122,30 +107,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) /* Enable inline WQEs to shift some load from a congested HCA (HW) to * a less congested cpu (SW). */ -static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) +static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) { u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc; - struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; #define MLX5E_XDP_INLINE_WATERMARK_LOW 10 #define MLX5E_XDP_INLINE_WATERMARK_HIGH 128 - if (session->inline_on) { - if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW) - session->inline_on = 0; - return; - } + if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW) + return false; + + if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH) + return true; - /* inline is false */ - if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH) - session->inline_on = 1; + return cur; } -static inline bool -mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session) +static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session) { - return session->inline_on && - session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS; + if (session->inline_on) + return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > + MLX5E_TX_MPW_MAX_NUM_DS; + return mlx5e_tx_mpwqe_is_full(session); } struct mlx5e_xdp_wqe_info { @@ -155,15 +138,16 @@ struct mlx5e_xdp_wqe_info { static inline void mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xmit_data *xdptxd, struct mlx5e_xdpsq_stats *stats) { - struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5_wqe_data_seg *dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; u32 dma_len = xdptxd->len; session->pkt_count++; + session->bytes_count += dma_len; if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { struct mlx5_wqe_inline_seg *inline_dseg = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 331ca2b0f8a4..71e8d66fa150 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -1,31 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* Copyright (c) 2019 Mellanox Technologies. */ +/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */ #include <net/xdp_sock_drv.h> -#include "umem.h" +#include "pool.h" #include "setup.h" #include "en/params.h" -static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv, - struct xdp_umem *umem) +static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool) { - struct device *dev = priv->mdev->device; + struct device *dev = mlx5_core_dma_dev(priv->mdev); - return xsk_buff_dma_map(umem, dev, 0); + return xsk_pool_dma_map(pool, dev, 0); } -static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv, - struct xdp_umem *umem) +static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool) { - return xsk_buff_dma_unmap(umem, 0); + return xsk_pool_dma_unmap(pool, 0); } -static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk) +static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk) { - if (!xsk->umems) { - xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS, - sizeof(*xsk->umems), GFP_KERNEL); - if (unlikely(!xsk->umems)) + if (!xsk->pools) { + xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->pools), GFP_KERNEL); + if (unlikely(!xsk->pools)) return -ENOMEM; } @@ -35,68 +35,68 @@ static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk) return 0; } -static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk) +static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk) { if (!--xsk->refcnt) { - kfree(xsk->umems); - xsk->umems = NULL; + kfree(xsk->pools); + xsk->pools = NULL; } } -static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix) +static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix) { int err; - err = mlx5e_xsk_get_umems(xsk); + err = mlx5e_xsk_get_pools(xsk); if (unlikely(err)) return err; - xsk->umems[ix] = umem; + xsk->pools[ix] = pool; return 0; } -static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix) +static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix) { - xsk->umems[ix] = NULL; + xsk->pools[ix] = NULL; - mlx5e_xsk_put_umems(xsk); + mlx5e_xsk_put_pools(xsk); } -static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem) +static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool) { - return xsk_umem_get_headroom(umem) <= 0xffff && - xsk_umem_get_chunk_size(umem) <= 0xffff; + return xsk_pool_get_headroom(pool) <= 0xffff && + xsk_pool_get_chunk_size(pool) <= 0xffff; } -void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk) +void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk) { - xsk->headroom = xsk_umem_get_headroom(umem); - xsk->chunk_size = xsk_umem_get_chunk_size(umem); + xsk->headroom = xsk_pool_get_headroom(pool); + xsk->chunk_size = xsk_pool_get_chunk_size(pool); } static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, - struct xdp_umem *umem, u16 ix) + struct xsk_buff_pool *pool, u16 ix) { struct mlx5e_params *params = &priv->channels.params; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; int err; - if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix))) + if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix))) return -EBUSY; - if (unlikely(!mlx5e_xsk_is_umem_sane(umem))) + if (unlikely(!mlx5e_xsk_is_pool_sane(pool))) return -EINVAL; - err = mlx5e_xsk_map_umem(priv, umem); + err = mlx5e_xsk_map_pool(priv, pool); if (unlikely(err)) return err; - err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix); + err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix); if (unlikely(err)) - goto err_unmap_umem; + goto err_unmap_pool; - mlx5e_build_xsk_param(umem, &xsk); + mlx5e_build_xsk_param(pool, &xsk); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { /* XSK objects will be created on open. */ @@ -112,9 +112,9 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, c = priv->channels.c[ix]; - err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + err = mlx5e_open_xsk(priv, params, &xsk, pool, c); if (unlikely(err)) - goto err_remove_umem; + goto err_remove_pool; mlx5e_activate_xsk(c); @@ -132,11 +132,11 @@ err_deactivate: mlx5e_deactivate_xsk(c); mlx5e_close_xsk(c); -err_remove_umem: - mlx5e_xsk_remove_umem(&priv->xsk, ix); +err_remove_pool: + mlx5e_xsk_remove_pool(&priv->xsk, ix); -err_unmap_umem: - mlx5e_xsk_unmap_umem(priv, umem); +err_unmap_pool: + mlx5e_xsk_unmap_pool(priv, pool); return err; @@ -146,7 +146,7 @@ validate_closed: */ if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { err = -EINVAL; - goto err_remove_umem; + goto err_remove_pool; } return 0; @@ -154,45 +154,45 @@ validate_closed: static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) { - struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params, + struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix); struct mlx5e_channel *c; - if (unlikely(!umem)) + if (unlikely(!pool)) return -EINVAL; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto remove_umem; + goto remove_pool; /* XSK RQ and SQ are only created if XDP program is set. */ if (!priv->channels.params.xdp_prog) - goto remove_umem; + goto remove_pool; c = priv->channels.c[ix]; mlx5e_xsk_redirect_rqt_to_drop(priv, ix); mlx5e_deactivate_xsk(c); mlx5e_close_xsk(c); -remove_umem: - mlx5e_xsk_remove_umem(&priv->xsk, ix); - mlx5e_xsk_unmap_umem(priv, umem); +remove_pool: + mlx5e_xsk_remove_pool(&priv->xsk, ix); + mlx5e_xsk_unmap_pool(priv, pool); return 0; } -static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem, +static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool, u16 ix) { int err; mutex_lock(&priv->state_lock); - err = mlx5e_xsk_enable_locked(priv, umem, ix); + err = mlx5e_xsk_enable_locked(priv, pool, ix); mutex_unlock(&priv->state_lock); return err; } -static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix) +static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix) { int err; @@ -203,7 +203,7 @@ static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix) return err; } -int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid) +int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_params *params = &priv->channels.params; @@ -212,6 +212,6 @@ int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid) if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) return -EINVAL; - return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) : - mlx5e_xsk_disable_umem(priv, ix); + return pool ? mlx5e_xsk_enable_pool(priv, pool, ix) : + mlx5e_xsk_disable_pool(priv, ix); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h new file mode 100644 index 000000000000..dca0010a0866 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_XSK_POOL_H__ +#define __MLX5_EN_XSK_POOL_H__ + +#include "en.h" + +static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->pools) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->pools[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid); + +#endif /* __MLX5_EN_XSK_POOL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 40db27bf790b..8e7b877d8a12 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -47,8 +47,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, xdp->data_end = xdp->data + cqe_bcnt32; xdp_set_data_meta_invalid(xdp); - xsk_buff_dma_sync_for_cpu(xdp); - prefetch(xdp->data); + xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); + net_prefetch(xdp->data); /* Possible flows: * - XDP_REDIRECT to XSKMAP: @@ -93,8 +93,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, xdp->data_end = xdp->data + cqe_bcnt; xdp_set_data_meta_invalid(xdp); - xsk_buff_dma_sync_for_cpu(xdp); - prefetch(xdp->data); + xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); + net_prefetch(xdp->data); if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index d147b2f13b54..7f88ccf67fdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -19,10 +19,10 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); -static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, +static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - dma_info->xsk = xsk_buff_alloc(rq->umem); + dma_info->xsk = xsk_buff_alloc(rq->xsk_pool); if (!dma_info->xsk) return -ENOMEM; @@ -38,13 +38,13 @@ static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err) { - if (!xsk_umem_uses_need_wakeup(rq->umem)) + if (!xsk_uses_need_wakeup(rq->xsk_pool)) return alloc_err; if (unlikely(alloc_err)) - xsk_set_rx_need_wakeup(rq->umem); + xsk_set_rx_need_wakeup(rq->xsk_pool); else - xsk_clear_rx_need_wakeup(rq->umem); + xsk_clear_rx_need_wakeup(rq->xsk_pool); return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 55e65a438de7..4e574ac73019 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -45,7 +45,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv, } int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, struct mlx5e_channel *c) { struct mlx5e_channel_param *cparam; @@ -64,7 +64,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (unlikely(err)) goto err_free_cparam; - err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq); + err = mlx5e_open_rq(c, params, &cparam->rq, xsk, pool, &c->xskrq); if (unlikely(err)) goto err_close_rx_cq; @@ -72,13 +72,13 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (unlikely(err)) goto err_close_rq; - /* Create a separate SQ, so that when the UMEM is disabled, we could + /* Create a separate SQ, so that when the buff pool is disabled, we could * close this SQ safely and stop receiving CQEs. In other case, e.g., if - * the XDPSQ was used instead, we might run into trouble when the UMEM + * the XDPSQ was used instead, we might run into trouble when the buff pool * is disabled and then reenabled, but the SQ continues receiving CQEs - * from the old UMEM. + * from the old buff pool. */ - err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true); if (unlikely(err)) goto err_close_tx_cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h index 0dd11b81c046..ca20f1ff5e39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -12,7 +12,7 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5_core_dev *mdev); int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, struct mlx5e_channel *c); void mlx5e_close_xsk(struct mlx5e_channel *c); void mlx5e_activate_xsk(struct mlx5e_channel *c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 4d892f6cecb3..fb671a457129 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include "tx.h" -#include "umem.h" +#include "pool.h" #include "en/xdp.h" #include "en/params.h" #include <net/xdp_sock_drv.h> @@ -66,9 +66,9 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) { - struct xdp_umem *umem = sq->umem; + struct xsk_buff_pool *pool = sq->xsk_pool; + struct mlx5e_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; - struct mlx5e_xdp_xmit_data xdptxd; bool work_done = true; bool flush = false; @@ -87,7 +87,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) break; } - if (!xsk_umem_consume_tx(umem, &desc)) { + if (!xsk_tx_peek_desc(pool, &desc)) { /* TX will get stuck until something wakes it up by * triggering NAPI. Currently it's expected that the * application calls sendto() if there are consumed, but @@ -96,11 +96,11 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) break; } - xdptxd.dma_addr = xsk_buff_raw_get_dma(umem, desc.addr); - xdptxd.data = xsk_buff_raw_get_data(umem, desc.addr); + xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr); + xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr); xdptxd.len = desc.len; - xsk_buff_raw_dma_sync_for_device(umem, xdptxd.dma_addr, xdptxd.len); + xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, check_result); @@ -119,7 +119,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xmit_xdp_doorbell(sq); - xsk_umem_consume_tx_done(umem); + xsk_tx_release(pool); } return !(budget && work_done); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h index 39fa0a705856..a05085035f23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -15,13 +15,13 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq) { - if (!xsk_umem_uses_need_wakeup(sq->umem)) + if (!xsk_uses_need_wakeup(sq->xsk_pool)) return; if (sq->pc != sq->cc) - xsk_clear_tx_need_wakeup(sq->umem); + xsk_clear_tx_need_wakeup(sq->xsk_pool); else - xsk_set_tx_need_wakeup(sq->umem); + xsk_set_tx_need_wakeup(sq->xsk_pool); } #endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h deleted file mode 100644 index bada94973586..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2019 Mellanox Technologies. */ - -#ifndef __MLX5_EN_XSK_UMEM_H__ -#define __MLX5_EN_XSK_UMEM_H__ - -#include "en.h" - -static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params, - struct mlx5e_xsk *xsk, u16 ix) -{ - if (!xsk || !xsk->umems) - return NULL; - - if (unlikely(ix >= params->num_channels)) - return NULL; - - return xsk->umems[ix]; -} - -struct mlx5e_xsk_param; -void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk); - -/* .ndo_bpf callback. */ -int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid); - -int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries); - -#endif /* __MLX5_EN_XSK_UMEM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 110476bdeffb..899b98aca0d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -107,6 +107,9 @@ struct mlx5e_accel_tx_state { #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_accel_tx_tls_state tls; #endif +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5e_accel_tx_ipsec_state ipsec; +#endif }; static inline bool mlx5e_accel_tx_begin(struct net_device *dev, @@ -125,27 +128,70 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, } #endif +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) { + if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec))) + return false; + } +#endif + return true; } -static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv, - struct mlx5e_txqsq *sq, - struct sk_buff *skb, +static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state) +{ +#ifdef CONFIG_MLX5_EN_IPSEC + return mlx5e_ipsec_is_tx_flow(&state->ipsec); +#endif + + return false; +} + +static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, + struct mlx5e_accel_tx_state *state) +{ +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) + return mlx5e_ipsec_tx_ids_len(&state->ipsec); +#endif + + return 0; +} + +/* Part of the eseg touched by TX offloads */ +#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss) + +static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ +#ifdef CONFIG_MLX5_EN_IPSEC + if (xfrm_offload(skb)) + mlx5e_ipsec_tx_build_eseg(priv, skb, eseg); +#endif + +#if IS_ENABLED(CONFIG_GENEVE) + if (skb->encapsulation) + mlx5e_tx_tunnel_accel(skb, eseg); +#endif + + return true; +} + +static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe *wqe, - struct mlx5e_accel_tx_state *state) + struct mlx5e_accel_tx_state *state, + struct mlx5_wqe_inline_seg *inlseg) { #ifdef CONFIG_MLX5_EN_TLS mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls); #endif #ifdef CONFIG_MLX5_EN_IPSEC - if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) { - if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, &wqe->eth, skb))) - return false; - } + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && + state->ipsec.xo && state->ipsec.tailen) + mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg); #endif - - return true; } static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 4cdd9eac647d..97f1594cee11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -191,7 +191,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { - kvfree(ft->g); + kfree(ft->g); kvfree(in); return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index d39989cddd90..3d45341e2216 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -560,6 +560,9 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) return; } + if (mlx5_is_ipsec_device(mdev)) + netdev->gso_partial_features |= NETIF_F_GSO_ESP; + mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); netdev->features |= NETIF_F_GSO_ESP; netdev->hw_features |= NETIF_F_GSO_ESP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 0fc8b4d4f4a3..6164c7f59efb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -76,6 +76,7 @@ struct mlx5e_ipsec_stats { }; struct mlx5e_accel_fs_esp; +struct mlx5e_ipsec_tx; struct mlx5e_ipsec { struct mlx5e_priv *en_priv; @@ -87,6 +88,7 @@ struct mlx5e_ipsec { struct mlx5e_ipsec_stats stats; struct workqueue_struct *wq; struct mlx5e_accel_fs_esp *rx_fs; + struct mlx5e_ipsec_tx *tx_fs; }; struct mlx5e_ipsec_esn_state { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 429428bbc903..0e45590662a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -34,6 +34,12 @@ struct mlx5e_accel_fs_esp { struct mlx5e_accel_fs_esp_prot fs_prot[ACCEL_FS_ESP_NUM_TYPES]; }; +struct mlx5e_ipsec_tx { + struct mlx5_flow_table *ft; + struct mutex mutex; /* Protect IPsec TX steering */ + u32 refcnt; +}; + /* IPsec RX flow steering */ static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i) { @@ -228,8 +234,8 @@ static int rx_fs_create(struct mlx5e_priv *priv, fs_prot->miss_rule = miss_rule; out: - kfree(flow_group_in); - kfree(spec); + kvfree(flow_group_in); + kvfree(spec); return err; } @@ -323,6 +329,77 @@ out: mutex_unlock(&fs_prot->prot_mutex); } +/* IPsec TX flow steering */ +static int tx_create(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_ipsec *ipsec = priv->ipsec; + struct mlx5_flow_table *ft; + int err; + + priv->fs.egress_ns = + mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_EGRESS_KERNEL); + if (!priv->fs.egress_ns) + return -EOPNOTSUPP; + + ft_attr.max_fte = NUM_IPSEC_FTE; + ft_attr.autogroup.max_num_groups = 1; + ft = mlx5_create_auto_grouped_flow_table(priv->fs.egress_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err); + return err; + } + ipsec->tx_fs->ft = ft; + return 0; +} + +static void tx_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + + if (IS_ERR_OR_NULL(ipsec->tx_fs->ft)) + return; + + mlx5_destroy_flow_table(ipsec->tx_fs->ft); + ipsec->tx_fs->ft = NULL; +} + +static int tx_ft_get(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs; + int err = 0; + + mutex_lock(&tx_fs->mutex); + if (tx_fs->refcnt++) + goto out; + + err = tx_create(priv); + if (err) { + tx_fs->refcnt--; + goto out; + } + +out: + mutex_unlock(&tx_fs->mutex); + return err; +} + +static void tx_ft_put(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs; + + mutex_lock(&tx_fs->mutex); + if (--tx_fs->refcnt) + goto out; + + tx_destroy(priv); + +out: + mutex_unlock(&tx_fs->mutex); +} + static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs, u32 ipsec_obj_id, struct mlx5_flow_spec *spec, @@ -457,6 +534,54 @@ out: return err; } +static int tx_add_rule(struct mlx5e_priv *priv, + struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 ipsec_obj_id, + struct mlx5e_ipsec_rule *ipsec_rule) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + err = tx_ft_get(priv); + if (err) + return err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act); + + /* Add IPsec indicator in metadata_reg_a */ + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_IPSEC); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_IPSEC); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | + MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT; + rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n", + attrs->action, err); + goto out; + } + + ipsec_rule->rule = rule; + +out: + kvfree(spec); + if (err) + tx_ft_put(priv); + return err; +} + static void rx_del_rule(struct mlx5e_priv *priv, struct mlx5_accel_esp_xfrm_attrs *attrs, struct mlx5e_ipsec_rule *ipsec_rule) @@ -470,15 +595,27 @@ static void rx_del_rule(struct mlx5e_priv *priv, rx_ft_put(priv, attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4); } +static void tx_del_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_rule *ipsec_rule) +{ + mlx5_del_flow_rules(ipsec_rule->rule); + ipsec_rule->rule = NULL; + + tx_ft_put(priv); +} + int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, struct mlx5_accel_esp_xfrm_attrs *attrs, u32 ipsec_obj_id, struct mlx5e_ipsec_rule *ipsec_rule) { - if (!priv->ipsec->rx_fs || attrs->action != MLX5_ACCEL_ESP_ACTION_DECRYPT) + if (!priv->ipsec->rx_fs) return -EOPNOTSUPP; - return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule); + if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT) + return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule); + else + return tx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule); } void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, @@ -488,7 +625,18 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, if (!priv->ipsec->rx_fs) return; - rx_del_rule(priv, attrs, ipsec_rule); + if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT) + rx_del_rule(priv, attrs, ipsec_rule); + else + tx_del_rule(priv, ipsec_rule); +} + +static void fs_cleanup_tx(struct mlx5e_priv *priv) +{ + mutex_destroy(&priv->ipsec->tx_fs->mutex); + WARN_ON(priv->ipsec->tx_fs->refcnt); + kfree(priv->ipsec->tx_fs); + priv->ipsec->tx_fs = NULL; } static void fs_cleanup_rx(struct mlx5e_priv *priv) @@ -507,6 +655,17 @@ static void fs_cleanup_rx(struct mlx5e_priv *priv) priv->ipsec->rx_fs = NULL; } +static int fs_init_tx(struct mlx5e_priv *priv) +{ + priv->ipsec->tx_fs = + kzalloc(sizeof(struct mlx5e_ipsec_tx), GFP_KERNEL); + if (!priv->ipsec->tx_fs) + return -ENOMEM; + + mutex_init(&priv->ipsec->tx_fs->mutex); + return 0; +} + static int fs_init_rx(struct mlx5e_priv *priv) { struct mlx5e_accel_fs_esp_prot *fs_prot; @@ -532,13 +691,24 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv) if (!priv->ipsec->rx_fs) return; + fs_cleanup_tx(priv); fs_cleanup_rx(priv); } int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv) { + int err; + if (!mlx5_is_ipsec_device(priv->mdev) || !priv->ipsec) return -EOPNOTSUPP; - return fs_init_rx(priv); + err = fs_init_tx(priv); + if (err) + return err; + + err = fs_init_rx(priv); + if (err) + fs_cleanup_tx(priv); + + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 93a8d68815ad..11e31a3db2be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -34,7 +34,7 @@ #include <crypto/aead.h> #include <net/xfrm.h> #include <net/esp.h> - +#include "accel/ipsec_offload.h" #include "en_accel/ipsec_rxtx.h" #include "en_accel/ipsec.h" #include "accel/accel.h" @@ -233,18 +233,94 @@ static void mlx5e_ipsec_set_metadata(struct sk_buff *skb, ntohs(mdata->content.tx.seq)); } -bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv, - struct mlx5_wqe_eth_seg *eseg, - struct sk_buff *skb) +void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_ipsec_state *ipsec_st, + struct mlx5_wqe_inline_seg *inlseg) +{ + inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG); + esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto); +} + +static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv, + struct sk_buff *skb, + struct xfrm_state *x, + struct xfrm_offload *xo, + struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + unsigned int blksize, clen, alen, plen; + struct crypto_aead *aead; + unsigned int tailen; + + ipsec_st->x = x; + ipsec_st->xo = xo; + if (mlx5_is_ipsec_device(priv->mdev)) { + aead = x->data; + alen = crypto_aead_authsize(aead); + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + clen = ALIGN(skb->len + 2, blksize); + plen = max_t(u32, clen - skb->len, 4); + tailen = plen + alen; + ipsec_st->plen = plen; + ipsec_st->tailen = tailen; + } + + return 0; +} + +void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) { struct xfrm_offload *xo = xfrm_offload(skb); - struct mlx5e_ipsec_metadata *mdata; - struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_encap_tmpl *encap; struct xfrm_state *x; struct sec_path *sp; + u8 l3_proto; + + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) + return; + + x = xfrm_input_state(skb); + if (unlikely(!x)) + return; + + if (unlikely(!x->xso.offload_handle || + (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)))) + return; + + mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo); - if (!xo) - return true; + l3_proto = (x->props.family == AF_INET) ? + ((struct iphdr *)skb_network_header(skb))->protocol : + ((struct ipv6hdr *)skb_network_header(skb))->nexthdr; + + if (mlx5_is_ipsec_device(priv->mdev)) { + eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); + eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER); + encap = x->encap; + if (!encap) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC); + } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC); + } + } +} + +bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo = xfrm_offload(skb); + struct mlx5e_ipsec_sa_entry *sa_entry; + struct mlx5e_ipsec_metadata *mdata; + struct xfrm_state *x; + struct sec_path *sp; sp = skb_sec_path(skb); if (unlikely(sp->len != 1)) { @@ -270,15 +346,21 @@ bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv, atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer); goto drop; } - mdata = mlx5e_ipsec_add_metadata(skb); - if (IS_ERR(mdata)) { - atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); - goto drop; + + if (MLX5_CAP_GEN(priv->mdev, fpga)) { + mdata = mlx5e_ipsec_add_metadata(skb); + if (IS_ERR(mdata)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); + goto drop; + } } - mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo); + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; sa_entry->set_iv_op(skb, x, xo); - mlx5e_ipsec_set_metadata(skb, mdata, xo); + if (MLX5_CAP_GEN(priv->mdev, fpga)) + mlx5e_ipsec_set_metadata(skb, mdata, xo); + + mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st); return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index f96e786db158..056dacb612b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -43,6 +43,13 @@ #define MLX5_IPSEC_METADATA_SYNDROM_MASK (0x7F) #define MLX5_IPSEC_METADATA_HANDLE(metadata) (((metadata) >> 8) & 0xFF) +struct mlx5e_accel_tx_ipsec_state { + struct xfrm_offload *xo; + struct xfrm_state *x; + u32 tailen; + u32 plen; +}; + #ifdef CONFIG_MLX5_EN_IPSEC struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, @@ -55,16 +62,32 @@ void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_offload *xo); void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_offload *xo); -bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv, - struct mlx5_wqe_eth_seg *eseg, - struct sk_buff *skb); +bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_accel_tx_ipsec_state *ipsec_st); +void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_ipsec_state *ipsec_st, + struct mlx5_wqe_inline_seg *inlseg); void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, struct mlx5_cqe64 *cqe); +static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + return ipsec_st->tailen; +} + static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return !!(MLX5_IPSEC_METADATA_MARKER_MASK & be32_to_cpu(cqe->ft_metadata)); } + +static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + return ipsec_st->x; +} + +void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg); #else static inline void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 6bbfcf18107d..ccaccb9fc2f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -253,7 +253,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, goto err_out; } - pdev = sq->channel->priv->mdev->device; + pdev = mlx5_core_dma_dev(sq->channel->priv->mdev); buf->dma_addr = dma_map_single(pdev, &buf->progress, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) { @@ -390,7 +390,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, priv_rx = buf->priv_rx; resync = &priv_rx->resync; - dev = resync->priv->mdev->device; + dev = mlx5_core_dma_dev(resync->priv->mdev); if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index f4861545b236..b140e13fdcc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -345,9 +345,6 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_sq_stats *stats; struct mlx5e_sq_dma *dma; - if (!wi->resync_dump_frag_page) - return; - dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); stats = sq->stats; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index ff4c740af10b..7521c9be735b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -29,12 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); +static inline bool +mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + if (unlikely(wi->resync_dump_frag_page)) { + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc); + return true; + } + return false; +} #else -static inline void -mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, - struct mlx5e_tx_wqe_info *wi, - u32 *dma_fifo_cc) +static inline bool +mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) { + return false; } #endif /* CONFIG_MLX5_EN_TLS */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index b0c31d49ff8d..6982b193ee8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -189,12 +189,10 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, struct mlx5e_tls *tls) { u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); - struct mlx5e_tx_wqe *wqe; struct sync_info info; struct sk_buff *nskb; int linear_len = 0; int headln; - u16 pi; int i; sq->stats->tls_ooo++; @@ -246,9 +244,7 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, sq->stats->tls_resync_bytes += nskb->len; mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, cpu_to_be64(info.rcd_sn)); - pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); - wqe = MLX5E_TX_FETCH_WQE(sq, pi); - mlx5e_sq_xmit(sq, nskb, wqe, pi, true); + mlx5e_sq_xmit_simple(sq, nskb, true); return true; @@ -274,6 +270,8 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, if (!datalen) return true; + mlx5e_tx_mpwqe_ensure_complete(sq); + tls_ctx = tls_get_ctx(skb->sk); if (WARN_ON_ONCE(tls_ctx->netdev != netdev)) goto err_out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 08270987c506..d25a56ec6876 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,7 +32,7 @@ #include "en.h" #include "en/port.h" -#include "en/xsk/umem.h" +#include "en/xsk/pool.h" #include "lib/clock.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -243,7 +243,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) return MLX5E_NUM_PFLAGS; case ETH_SS_TEST: return mlx5e_self_test_num(priv); - fallthrough; default: return -EOPNOTSUPP; } @@ -1341,6 +1340,14 @@ static int mlx5e_set_tunable(struct net_device *dev, return err; } +static void mlx5e_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *pause_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_pause_get(priv, pause_stats); +} + void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam) { @@ -1901,7 +1908,7 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) return 0; } -static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) +static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; @@ -1913,7 +1920,7 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) new_channels.params = priv->channels.params; - MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable); + MLX5E_SET_PFLAG(&new_channels.params, flag, enable); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -1924,6 +1931,16 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) return err; } +static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) +{ + return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable); +} + +static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable) +{ + return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable); +} + static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = { { "rx_cqe_moder", set_pflag_rx_cqe_based_moder }, { "tx_cqe_moder", set_pflag_tx_cqe_based_moder }, @@ -1931,6 +1948,7 @@ static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = { { "rx_striding_rq", set_pflag_rx_striding_rq }, { "rx_no_csum_complete", set_pflag_rx_no_csum_complete }, { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe }, + { "skb_tx_mpwqe", set_pflag_skb_tx_mpwqe }, }; static int mlx5e_handle_pflag(struct net_device *netdev, @@ -2033,6 +2051,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, + .get_pause_stats = mlx5e_get_pause_stats, .get_pauseparam = mlx5e_get_pauseparam, .set_pauseparam = mlx5e_set_pauseparam, .get_ts_info = mlx5e_get_ts_info, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 83c9b2bbc4af..b416a8ee2eed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -33,7 +33,7 @@ #include <linux/mlx5/fs.h> #include "en.h" #include "en/params.h" -#include "en/xsk/umem.h" +#include "en/xsk/pool.h" struct mlx5e_ethtool_rule { struct list_head list; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 42ec28e29834..b3f02aac7f26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -57,7 +57,7 @@ #include "en/monitor_stats.h" #include "en/health.h" #include "en/params.h" -#include "en/xsk/umem.h" +#include "en/xsk/pool.h" #include "en/xsk/setup.h" #include "en/xsk/rx.h" #include "en/xsk/tx.h" @@ -393,7 +393,7 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct xdp_umem *umem, + struct xsk_buff_pool *xsk_pool, struct mlx5e_rq_param *rqp, struct mlx5e_rq *rq) { @@ -419,9 +419,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; - rq->umem = umem; + rq->xsk_pool = xsk_pool; - if (rq->umem) + if (rq->xsk_pool) rq->stats = &c->priv->channel_stats[c->ix].xskrq; else rq->stats = &c->priv->channel_stats[c->ix].rq; @@ -511,7 +511,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); - xsk_buff_set_rxq_info(rq->umem, &rq->xdp_rxq); + xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); } else { /* Create a page_pool and register it with rxq */ pp_params.order = 0; @@ -861,11 +861,11 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xdp_umem *umem, struct mlx5e_rq *rq) + struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq) { int err; - err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq); + err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq); if (err) return err; @@ -893,6 +893,13 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + /* For CQE compression on striding RQ, use stride index provided by + * HW if capability is supported. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && + MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state); + return 0; err_destroy_rq: @@ -970,7 +977,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct xdp_umem *umem, + struct xsk_buff_pool *xsk_pool, struct mlx5e_sq_param *param, struct mlx5e_xdpsq *sq, bool is_redirect) @@ -986,9 +993,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->umem = umem; + sq->xsk_pool = xsk_pool; - sq->stats = sq->umem ? + sq->stats = sq->xsk_pool ? &c->priv->channel_stats[c->ix].xsksq : is_redirect ? &c->priv->channel_stats[c->ix].xdpsq : @@ -1085,6 +1092,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq) static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) { kvfree(sq->db.wqe_info); + kvfree(sq->db.skb_fifo); kvfree(sq->db.dma_fifo); } @@ -1096,15 +1104,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, sizeof(*sq->db.dma_fifo)), GFP_KERNEL, numa); + sq->db.skb_fifo = kvzalloc_node(array_size(df_sz, + sizeof(*sq->db.skb_fifo)), + GFP_KERNEL, numa); sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.wqe_info)), GFP_KERNEL, numa); - if (!sq->db.dma_fifo || !sq->db.wqe_info) { + if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) { mlx5e_free_txqsq_db(sq); return -ENOMEM; } sq->dma_fifo_mask = df_sz - 1; + sq->skb_fifo_mask = df_sz - 1; return 0; } @@ -1115,6 +1127,12 @@ static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size) sq->stop_room = mlx5e_tls_get_stop_room(sq); sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) + /* A MPWQE can take up to the maximum-sized WQE + all the normal + * stop room can be taken if a new packet breaks the active + * MPWQE session and allocates its WQEs right away. + */ + sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); if (WARN_ON(sq->stop_room >= sq_size)) { netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n", @@ -1156,6 +1174,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_TLS, &sq->state); + if (param->is_mpw) + set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size); if (err) return err; @@ -1449,13 +1469,13 @@ void mlx5e_close_icosq(struct mlx5e_icosq *sq) } int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect); + err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect); if (err) return err; @@ -1948,7 +1968,7 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, - struct xdp_umem *umem, + struct xsk_buff_pool *xsk_pool, struct mlx5e_channel **cp) { int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); @@ -1972,7 +1992,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->tstamp = &priv->tstamp; c->ix = ix; c->cpu = cpu; - c->pdev = priv->mdev->device; + c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = params->num_tc; @@ -1987,9 +2007,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (unlikely(err)) goto err_napi_del; - if (umem) { - mlx5e_build_xsk_param(umem, &xsk); - err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (xsk_pool) { + mlx5e_build_xsk_param(xsk_pool, &xsk); + err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c); if (unlikely(err)) goto err_close_queues; } @@ -2160,7 +2180,7 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - param->wq.buf_numa_node = dev_to_node(mdev->device); + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); mlx5e_build_rx_cq_param(priv, params, xsk, ¶m->cqp); } @@ -2176,7 +2196,7 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); - param->wq.buf_numa_node = dev_to_node(mdev->device); + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); } void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, @@ -2188,7 +2208,7 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); - param->wq.buf_numa_node = dev_to_node(priv->mdev->device); + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, @@ -2204,6 +2224,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); } @@ -2223,6 +2244,7 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; + bool hw_stridx = false; void *cqc = param->cqc; u8 log_cq_size; @@ -2230,6 +2252,7 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); break; default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; @@ -2237,7 +2260,8 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? + MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); MLX5_SET(cqc, cqc, cqe_comp_en, 1); } @@ -2350,12 +2374,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, mlx5e_build_channel_param(priv, &chs->params, cparam); for (i = 0; i < chs->num; i++) { - struct xdp_umem *umem = NULL; + struct xsk_buff_pool *xsk_pool = NULL; if (chs->params.xdp_prog) - umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i); + xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); - err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]); + err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]); if (err) goto err_close_channels; } @@ -3222,8 +3246,8 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { - param->wq.buf_numa_node = dev_to_node(mdev->device); - param->wq.db_numa_node = dev_to_node(mdev->device); + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); return mlx5e_alloc_cq_common(mdev, param, cq); } @@ -3927,13 +3951,14 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, u16 ix; for (ix = 0; ix < chs->params.num_channels; ix++) { - struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix); + struct xsk_buff_pool *xsk_pool = + mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); struct mlx5e_xsk_param xsk; - if (!umem) + if (!xsk_pool) continue; - mlx5e_build_xsk_param(umem, &xsk); + mlx5e_build_xsk_param(xsk_pool, &xsk); if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) { u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); @@ -4466,8 +4491,8 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return mlx5e_xdp_set(dev, xdp->prog); - case XDP_SETUP_XSK_UMEM: - return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem, + case XDP_SETUP_XSK_POOL: + return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool, xdp->xsk.queue_id); default: return -EINVAL; @@ -4758,6 +4783,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, + MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)); /* XDP SQ */ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e979bff64c49..67247c33b9fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -39,7 +39,6 @@ #include <net/ipv6_stubs.h> #include "eswitch.h" -#include "esw/chains.h" #include "en.h" #include "en_rep.h" #include "en/txrx.h" @@ -288,6 +287,14 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } +static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_pause_get(priv, stats); +} + static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pauseparam) { @@ -362,23 +369,11 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, .set_rxnfc = mlx5e_set_rxnfc, + .get_pause_stats = mlx5e_uplink_rep_get_pause_stats, .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, }; -static void mlx5e_rep_get_port_parent_id(struct net_device *dev, - struct netdev_phys_item_id *ppid) -{ - struct mlx5e_priv *priv; - u64 parent_id; - - priv = netdev_priv(dev); - - parent_id = mlx5_query_nic_system_image_guid(priv->mdev); - ppid->id_len = sizeof(parent_id); - memcpy(ppid->id, &parent_id, sizeof(parent_id)); -} - static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { @@ -603,12 +598,13 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan return 0; } -static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *dev) +static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev) { - struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_core_dev *dev = priv->mdev; - return &rpriv->dl_port; + return mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); } static int mlx5e_rep_change_carrier(struct net_device *dev, bool new_carrier) @@ -1198,63 +1194,13 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .stats_grps_num = mlx5e_ul_rep_stats_grps_num, }; -static bool -is_devlink_port_supported(const struct mlx5_core_dev *dev, - const struct mlx5e_rep_priv *rpriv) -{ - return rpriv->rep->vport == MLX5_VPORT_UPLINK || - rpriv->rep->vport == MLX5_VPORT_PF || - mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport); -} - -static int register_devlink_port(struct mlx5_core_dev *dev, - struct mlx5e_rep_priv *rpriv) -{ - struct devlink *devlink = priv_to_devlink(dev); - struct mlx5_eswitch_rep *rep = rpriv->rep; - struct devlink_port_attrs attrs = {}; - struct netdev_phys_item_id ppid = {}; - unsigned int dl_port_index = 0; - u16 pfnum; - - if (!is_devlink_port_supported(dev, rpriv)) - return 0; - - mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); - dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, rep->vport); - pfnum = PCI_FUNC(dev->pdev->devfn); - if (rep->vport == MLX5_VPORT_UPLINK) { - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pfnum; - memcpy(attrs.switch_id.id, &ppid.id[0], ppid.id_len); - attrs.switch_id.id_len = ppid.id_len; - devlink_port_attrs_set(&rpriv->dl_port, &attrs); - } else if (rep->vport == MLX5_VPORT_PF) { - memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len); - rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len; - devlink_port_attrs_pci_pf_set(&rpriv->dl_port, pfnum); - } else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) { - memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len); - rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len; - devlink_port_attrs_pci_vf_set(&rpriv->dl_port, - pfnum, rep->vport - 1); - } - return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index); -} - -static void unregister_devlink_port(struct mlx5_core_dev *dev, - struct mlx5e_rep_priv *rpriv) -{ - if (is_devlink_port_supported(dev, rpriv)) - devlink_port_unregister(&rpriv->dl_port); -} - /* e-Switch vport representors */ static int mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { const struct mlx5e_profile *profile; struct mlx5e_rep_priv *rpriv; + struct devlink_port *dl_port; struct net_device *netdev; int nch, err; @@ -1304,28 +1250,19 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } - err = register_devlink_port(dev, rpriv); - if (err) { - netdev_warn(netdev, "Failed to register devlink port %d\n", - rep->vport); - goto err_neigh_cleanup; - } - err = register_netdev(netdev); if (err) { netdev_warn(netdev, "Failed to register representor netdev for vport %d\n", rep->vport); - goto err_devlink_cleanup; + goto err_neigh_cleanup; } - if (is_devlink_port_supported(dev, rpriv)) - devlink_port_type_eth_set(&rpriv->dl_port, netdev); + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_eth_set(dl_port, netdev); return 0; -err_devlink_cleanup: - unregister_devlink_port(dev, rpriv); - err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); @@ -1349,12 +1286,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *dev = priv->mdev; + struct devlink_port *dl_port; void *ppriv = priv->ppriv; - if (is_devlink_port_supported(dev, rpriv)) - devlink_port_type_clear(&rpriv->dl_port); + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_clear(dl_port); unregister_netdev(netdev); - unregister_devlink_port(dev, rpriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); if (rep->vport == MLX5_VPORT_UPLINK) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 0d1562e20118..9020d1419bcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -101,7 +101,6 @@ struct mlx5e_rep_priv { struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ struct rtnl_link_stats64 prev_vf_vport_stats; - struct devlink_port dl_port; }; static inline diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 64c8ac5eabf6..599f5b5ebc97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/prefetch.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> @@ -139,8 +138,17 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, title->check_sum = mini_cqe->checksum; title->op_own &= 0xf0; title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz); - title->wqe_counter = cpu_to_be16(cqd->wqe_counter); + /* state bit set implies linked-list striding RQ wq type and + * HW stride index capability supported + */ + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) { + title->wqe_counter = mini_cqe->stridx; + return; + } + + /* HW stride index capability not supported */ + title->wqe_counter = cpu_to_be16(cqd->wqe_counter); if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title); else @@ -282,8 +290,8 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - if (rq->umem) - return mlx5e_xsk_page_alloc_umem(rq, dma_info); + if (rq->xsk_pool) + return mlx5e_xsk_page_alloc_pool(rq, dma_info); else return mlx5e_page_alloc_pool(rq, dma_info); } @@ -314,7 +322,7 @@ static inline void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle) { - if (rq->umem) + if (rq->xsk_pool) /* The `recycle` parameter is ignored, and the page is always * put into the Reuse Ring, because there is no way to return * the page to the userspace when the interface goes down. @@ -401,14 +409,14 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk) int err; int i; - if (rq->umem) { + if (rq->xsk_pool) { int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags; /* Check in advance that we have enough frames, instead of * allocating one-by-one, failing and moving frames to the * Reuse Ring. */ - if (unlikely(!xsk_buff_can_alloc(rq->umem, pages_desired))) + if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, pages_desired))) return -ENOMEM; } @@ -506,8 +514,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) /* Check in advance that we have enough frames, instead of allocating * one-by-one, failing and moving frames to the Reuse Ring. */ - if (rq->umem && - unlikely(!xsk_buff_can_alloc(rq->umem, MLX5_MPWRQ_PAGES_PER_WQE))) { + if (rq->xsk_pool && + unlikely(!xsk_buff_can_alloc(rq->xsk_pool, MLX5_MPWRQ_PAGES_PER_WQE))) { err = -ENOMEM; goto err; } @@ -755,7 +763,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) * the driver when it refills the Fill Ring. * 2. Otherwise, busy poll by rescheduling the NAPI poll. */ - if (unlikely(alloc_err == -ENOMEM && rq->umem)) + if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool)) return true; return false; @@ -1144,8 +1152,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, frag_size, DMA_FROM_DEVICE); - prefetchw(va); /* xdp_frame data area */ - prefetch(data); + net_prefetchw(va); /* xdp_frame data area */ + net_prefetch(data); mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp)) @@ -1184,7 +1192,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, return NULL; } - prefetchw(skb->data); + net_prefetchw(skb->data); while (byte_cnt) { u16 frag_consumed_bytes = @@ -1252,6 +1260,11 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (mlx5e_cqe_regb_chain(cqe)) + if (!mlx5e_tc_update_skb(cqe, skb)) + goto free_wqe; + napi_gro_receive(rq->cq.napi, skb); free_wqe: @@ -1399,7 +1412,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w return NULL; } - prefetchw(skb->data); + net_prefetchw(skb->data); if (unlikely(frag_offset >= PAGE_SIZE)) { di++; @@ -1451,8 +1464,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset, frag_size, DMA_FROM_DEVICE); - prefetchw(va); /* xdp_frame data area */ - prefetch(data); + net_prefetchw(va); /* xdp_frame data area */ + net_prefetch(data); mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp); if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) { @@ -1513,6 +1526,11 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq goto mpwrq_cqe_out; mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (mlx5e_cqe_regb_chain(cqe)) + if (!mlx5e_tc_update_skb(cqe, skb)) + goto mpwrq_cqe_out; + napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 46790216ce86..ce8ab1f01876 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/prefetch.h> #include <linux/ip.h> #include <linux/udp.h> #include <net/udp.h> @@ -115,7 +114,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) return NULL; } - prefetchw(skb->data); + net_prefetchw(skb->data); skb_reserve(skb, NET_IP_ALIGN); /* Reserve for ethernet and IP header */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index f6383bc2bc3f..78f6a6f0a7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -110,6 +110,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) }, #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, @@ -365,6 +367,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; s->tx_added_vlan_packets += sq_stats->added_vlan_packets; s->tx_nop += sq_stats->nop; + s->tx_mpwqe_blks += sq_stats->mpwqe_blks; + s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; @@ -689,6 +693,35 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } +#define MLX5E_READ_CTR64_BE_F(ptr, c) \ + be64_to_cpu(*(__be64 *)((char *)ptr + \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high))) + +void mlx5e_stats_pause_get(struct mlx5e_priv *priv, + struct ethtool_pause_stats *pause_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3, + sz, MLX5_REG_PPCNT, 0, 0); + + pause_stats->tx_pause_frames = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + a_pause_mac_ctrl_frames_transmitted); + pause_stats->rx_pause_frames = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + a_pause_mac_ctrl_frames_received); +} + #define PPORT_2863_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_2863_cntrs_grp_data_layout.c##_high) @@ -1539,6 +1572,8 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 562263d62141..162daaadb0d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -105,6 +105,9 @@ void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx); void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data); void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv); +void mlx5e_stats_pause_get(struct mlx5e_priv *priv, + struct ethtool_pause_stats *pause_stats); + /* Concrete NIC Stats */ struct mlx5e_sw_stats { @@ -118,6 +121,8 @@ struct mlx5e_sw_stats { u64 tx_tso_inner_bytes; u64 tx_added_vlan_packets; u64 tx_nop; + u64 tx_mpwqe_blks; + u64 tx_mpwqe_pkts; u64 rx_lro_packets; u64 rx_lro_bytes; u64 rx_mcast_packets; @@ -348,6 +353,8 @@ struct mlx5e_sq_stats { u64 csum_partial_inner; u64 added_vlan_packets; u64 nop; + u64 mpwqe_blks; + u64 mpwqe_pkts; #ifdef CONFIG_MLX5_EN_TLS u64 tls_encrypted_packets; u64 tls_encrypted_bytes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1c93f92d9210..e3a968e9e2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -57,7 +57,6 @@ #include "en/rep/neigh.h" #include "en_tc.h" #include "eswitch.h" -#include "esw/chains.h" #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" @@ -66,20 +65,11 @@ #include "en/mod_hdr.h" #include "lib/devcom.h" #include "lib/geneve.h" +#include "lib/fs_chains.h" #include "diag/en_tc_tracepoint.h" +#define nic_chains(priv) ((priv)->fs.tc.chains) #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) - -struct mlx5_nic_flow_attr { - u32 action; - u32 flow_tag; - struct mlx5_modify_hdr *modify_hdr; - u32 hairpin_tirn; - u8 match_level; - struct mlx5_flow_table *hairpin_ft; - struct mlx5_fc *counter; -}; - #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) enum { @@ -153,11 +143,7 @@ struct mlx5e_tc_flow { struct rcu_head rcu_head; struct completion init_done; int tunnel_id; /* the mapped tunnel id of this flow */ - - union { - struct mlx5_esw_flow_attr esw_attr[0]; - struct mlx5_nic_flow_attr nic_attr[0]; - }; + struct mlx5_flow_attr *attr; }; struct mlx5e_tc_flow_parse_attr { @@ -170,7 +156,7 @@ struct mlx5e_tc_flow_parse_attr { }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 -#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) +#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [CHAIN_TO_REG] = { @@ -191,6 +177,16 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [MARK_TO_REG] = mark_to_reg_ct, [LABELS_TO_REG] = labels_to_reg_ct, [FTEID_TO_REG] = fteid_to_reg_ct, + /* For NIC rules we store the retore metadata directly + * into reg_b that is passed to SW since we don't + * jump between steering domains. + */ + [NIC_CHAIN_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, + .moffset = 0, + .mlen = 2, + }, + [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, }; static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); @@ -244,6 +240,7 @@ mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, enum mlx5e_tc_attr_to_reg type, u32 data) { @@ -253,8 +250,7 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, char *modact; int err; - err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB, - mod_hdr_acts); + err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts); if (err) return err; @@ -275,6 +271,54 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, return 0; } +#define esw_offloads_mode(esw) (mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS) + +static struct mlx5_tc_ct_priv * +get_ct_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (esw_offloads_mode(esw)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->ct_priv; + } + + return priv->fs.tc.ct; +} + +struct mlx5_flow_handle * +mlx5_tc_rule_insert(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (esw_offloads_mode(esw)) + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + + return mlx5e_add_offloaded_nic_rule(priv, spec, attr); +} + +void +mlx5_tc_rule_delete(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (esw_offloads_mode(esw)) { + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + + return; + } + + mlx5e_del_offloaded_nic_rule(priv, rule, attr); +} + struct mlx5e_hairpin { struct mlx5_hairpin *pair; @@ -370,7 +414,7 @@ static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) #define flow_flag_test(flow, flag) __flow_flag_test(flow, \ MLX5E_TC_FLOW_FLAG_##flag) -static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) +bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) { return flow_flag_test(flow, ESWITCH); } @@ -415,10 +459,7 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, return PTR_ERR(mh); modify_hdr = mlx5e_mod_hdr_get(mh); - if (mlx5e_is_eswitch_flow(flow)) - flow->esw_attr->modify_hdr = modify_hdr; - else - flow->nic_attr->modify_hdr = modify_hdr; + flow->attr->modify_hdr = modify_hdr; flow->mh = mh; return 0; @@ -858,9 +899,9 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, attach_flow: if (hpe->hp->num_channels > 1) { flow_flag_set(flow, HAIRPIN_RSS); - flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; + flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; } else { - flow->nic_attr->hairpin_tirn = hpe->hp->tirn; + flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn; } flow->hpe = hpe; @@ -890,129 +931,212 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, flow->hpe = NULL; } -static int -mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) { - struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context; - struct mlx5_nic_flow_attr *attr = flow->nic_attr; - struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_flow_context *flow_context = &spec->flow_context; + struct mlx5_fs_chains *nic_chains = nic_chains(priv); + struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; + struct mlx5e_tc_table *tc = &priv->fs.tc; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, .flags = FLOW_ACT_NO_APPEND, }; - struct mlx5_fc *counter = NULL; - int err, dest_ix = 0; + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *ft; + int dest_ix = 0; flow_context->flags |= FLOW_CONTEXT_HAS_TAG; - flow_context->flow_tag = attr->flow_tag; - - if (flow_flag_test(flow, HAIRPIN)) { - err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); - if (err) - return err; + flow_context->flow_tag = nic_attr->flow_tag; - if (flow_flag_test(flow, HAIRPIN_RSS)) { - dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[dest_ix].ft = attr->hairpin_ft; - } else { - dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest[dest_ix].tir_num = attr->hairpin_tirn; - } + if (attr->dest_ft) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = attr->dest_ft; + dest_ix++; + } else if (nic_attr->hairpin_ft) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = nic_attr->hairpin_ft; + dest_ix++; + } else if (nic_attr->hairpin_tirn) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest[dest_ix].tir_num = nic_attr->hairpin_tirn; dest_ix++; } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[dest_ix].ft = priv->fs.vlan.ft.t; + if (attr->dest_chain) { + dest[dest_ix].ft = mlx5_chains_get_table(nic_chains, + attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); + if (IS_ERR(dest[dest_ix].ft)) + return ERR_CAST(dest[dest_ix].ft); + } else { + dest[dest_ix].ft = priv->fs.vlan.ft.t; + } + dest_ix++; + } + + if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); dest_ix++; } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_hdr = attr->modify_hdr; + + mutex_lock(&tc->t_lock); + if (IS_ERR_OR_NULL(tc->t)) { + /* Create the root table here if doesn't exist yet */ + tc->t = + mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL); + + if (IS_ERR(tc->t)) { + mutex_unlock(&tc->t_lock); + netdev_err(priv->netdev, + "Failed to create tc offload table\n"); + rule = ERR_CAST(priv->fs.tc.t); + goto err_ft_get; + } + } + mutex_unlock(&tc->t_lock); + + if (attr->chain || attr->prio) + ft = mlx5_chains_get_table(nic_chains, + attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); + else + ft = attr->ft; + + if (IS_ERR(ft)) { + rule = ERR_CAST(ft); + goto err_ft_get; + } + + if (attr->outer_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + rule = mlx5_add_flow_rules(ft, spec, + &flow_act, dest, dest_ix); + if (IS_ERR(rule)) + goto err_rule; + + return rule; + +err_rule: + if (attr->chain || attr->prio) + mlx5_chains_put_table(nic_chains, + attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); +err_ft_get: + if (attr->dest_chain) + mlx5_chains_put_table(nic_chains, + attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); + + return ERR_CAST(rule); +} + +static int +mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_fc *counter = NULL; + int err; + + if (flow_flag_test(flow, HAIRPIN)) { + err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); + if (err) + return err; + } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) return PTR_ERR(counter); - dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dest_ix].counter_id = mlx5_fc_id(counter); - dest_ix++; attr->counter = counter; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - flow_act.modify_hdr = attr->modify_hdr; dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } - mutex_lock(&priv->fs.tc.t_lock); - if (IS_ERR_OR_NULL(priv->fs.tc.t)) { - struct mlx5_flow_table_attr ft_attr = {}; - int tc_grp_size, tc_tbl_size, tc_num_grps; - u32 max_flow_counter; - - max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - - tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); - - tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, - BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); - tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS; - - ft_attr.prio = MLX5E_TC_PRIO; - ft_attr.max_fte = tc_tbl_size; - ft_attr.level = MLX5E_TC_FT_LEVEL; - ft_attr.autogroup.max_num_groups = tc_num_grps; - priv->fs.tc.t = - mlx5_create_auto_grouped_flow_table(priv->fs.ns, - &ft_attr); - if (IS_ERR(priv->fs.tc.t)) { - mutex_unlock(&priv->fs.tc.t_lock); - NL_SET_ERR_MSG_MOD(extack, - "Failed to create tc offload table"); - netdev_err(priv->netdev, - "Failed to create tc offload table\n"); - return PTR_ERR(priv->fs.tc.t); - } - } + if (flow_flag_test(flow, CT)) + flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec, + attr, &parse_attr->mod_hdr_acts); + else + flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, + attr); - if (attr->match_level != MLX5_MATCH_NONE) - parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + return PTR_ERR_OR_ZERO(flow->rule[0]); +} - flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, - &flow_act, dest, dest_ix); - mutex_unlock(&priv->fs.tc.t_lock); +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_fs_chains *nic_chains = nic_chains(priv); - return PTR_ERR_OR_ZERO(flow->rule[0]); + mlx5_del_flow_rules(rule); + + if (attr->chain || attr->prio) + mlx5_chains_put_table(nic_chains, attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); + + if (attr->dest_chain) + mlx5_chains_put_table(nic_chains, attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); } static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct mlx5_nic_flow_attr *attr = flow->nic_attr; - struct mlx5_fc *counter = NULL; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5e_tc_table *tc = &priv->fs.tc; + + flow_flag_clear(flow, OFFLOADED); - counter = attr->counter; - if (!IS_ERR_OR_NULL(flow->rule[0])) - mlx5_del_flow_rules(flow->rule[0]); - mlx5_fc_destroy(priv->mdev, counter); + if (flow_flag_test(flow, CT)) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + else if (!IS_ERR_OR_NULL(flow->rule[0])) + mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); + /* Remove root table if no rules are left to avoid + * extra steering hops. + */ mutex_lock(&priv->fs.tc.t_lock); - if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) { - mlx5_destroy_flow_table(priv->fs.tc.t); + if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && + !IS_ERR_OR_NULL(tc->t)) { + mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL); priv->fs.tc.t = NULL; } mutex_unlock(&priv->fs.tc.t_lock); + kvfree(attr->parse_attr); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); + mlx5_fc_destroy(priv->mdev, attr->counter); + if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); + + kfree(flow->attr); } static void mlx5e_detach_encap(struct mlx5e_priv *priv, @@ -1035,7 +1159,7 @@ static struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; @@ -1043,7 +1167,8 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, if (flow_flag_test(flow, CT)) { mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; - return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr, + return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), + flow, spec, attr, mod_hdr_acts); } @@ -1051,7 +1176,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, if (IS_ERR(rule)) return rule; - if (attr->split_count) { + if (attr->esw_attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); if (IS_ERR(flow->rule[1])) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); @@ -1065,16 +1190,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, static void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { flow_flag_clear(flow, OFFLOADED); if (flow_flag_test(flow, CT)) { - mlx5_tc_ct_delete_flow(flow->priv, flow, attr); + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); return; } - if (attr->split_count) + if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); @@ -1085,18 +1210,24 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec) { - struct mlx5_esw_flow_attr slow_attr; + struct mlx5_flow_attr *slow_attr; struct mlx5_flow_handle *rule; - memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr)); - slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr.split_count = 0; - slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!slow_attr) + return ERR_PTR(-ENOMEM); + + memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->esw_attr->split_count = 0; + slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, &slow_attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) flow_flag_set(flow, SLOW); + kfree(slow_attr); + return rule; } @@ -1104,14 +1235,21 @@ static void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow) { - struct mlx5_esw_flow_attr slow_attr; + struct mlx5_flow_attr *slow_attr; - memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr)); - slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr.split_count = 0; - slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; - mlx5e_tc_unoffload_fdb_rules(esw, flow, &slow_attr); + slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!slow_attr) { + mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n"); + return; + } + + memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->esw_attr->split_count = 0; + slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow_flag_clear(flow, SLOW); + kfree(slow_attr); } /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this @@ -1169,9 +1307,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; struct net_device *out_dev, *encap_dev = NULL; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; @@ -1180,7 +1319,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, int err = 0; int out_index; - if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) { + if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) { NL_SET_ERR_MSG_MOD(extack, "E-switch priorities unsupported, upgrade FW"); return -EOPNOTSUPP; @@ -1191,14 +1330,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, * FDB_FT_CHAIN which is outside tc range. * See mlx5e_rep_setup_ft_cb(). */ - max_chain = mlx5_esw_chains_get_chain_range(esw); + max_chain = mlx5_chains_get_chain_range(esw_chains(esw)); if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { NL_SET_ERR_MSG_MOD(extack, "Requested chain is out of supported range"); return -EOPNOTSUPP; } - max_prio = mlx5_esw_chains_get_prio_range(esw); + max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); if (attr->prio > max_prio) { NL_SET_ERR_MSG_MOD(extack, "Requested priority is out of supported range"); @@ -1211,10 +1350,13 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return err; } + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { int mirred_ifindex; - if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) continue; mirred_ifindex = parse_attr->mirred_ifindex[out_index]; @@ -1227,8 +1369,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; - attr->dests[out_index].rep = rpriv->rep; - attr->dests[out_index].mdev = out_priv->mdev; + esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].mdev = out_priv->mdev; } err = mlx5_eswitch_add_vlan_action(esw, attr); @@ -1244,7 +1386,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(attr->counter_dev, true); + counter = mlx5_fc_create(esw_attr->counter_dev, true); if (IS_ERR(counter)) return PTR_ERR(counter); @@ -1270,7 +1412,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) { - struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec; + struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); @@ -1285,7 +1427,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5_flow_attr *attr = flow->attr; int out_index; mlx5e_put_flow_tunnel_id(flow); @@ -1306,22 +1448,24 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_eswitch_del_vlan_action(esw, attr); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { + if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { mlx5e_detach_encap(priv, flow, out_index); kfree(attr->parse_attr->tun_info[out_index]); } kvfree(attr->parse_attr); - mlx5_tc_ct_match_del(priv, &flow->esw_attr->ct_attr); + mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(attr->counter_dev, attr->counter); + mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter); if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); + + kfree(flow->attr); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -1331,6 +1475,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; int err; @@ -1353,8 +1498,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, if (!mlx5e_is_offloaded_flow(flow)) continue; - esw_attr = flow->esw_attr; - spec = &esw_attr->parse_attr->spec; + attr = flow->attr; + esw_attr = attr->esw_attr; + spec = &attr->parse_attr->spec; esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat; esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; @@ -1374,7 +1520,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, if (!all_flow_encaps_valid) continue; /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", @@ -1394,7 +1540,9 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; int err; @@ -1402,12 +1550,14 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow)) continue; - spec = &flow->esw_attr->parse_attr->spec; + attr = flow->attr; + esw_attr = attr->esw_attr; + spec = &attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); /* mark the flow's encap dest as non-valid */ - flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1416,7 +1566,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, continue; } - mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); + mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); flow->rule[0] = rule; /* was unset when fast path rule removed */ flow_flag_set(flow, OFFLOADED); @@ -1429,10 +1579,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { - if (mlx5e_is_eswitch_flow(flow)) - return flow->esw_attr->counter; - else - return flow->nic_attr->counter; + return flow->attr->counter; } /* Takes reference to all flows attached to encap and adds the flows to @@ -1798,11 +1945,11 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; - struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct flow_match_enc_opts enc_opts_match; struct tunnel_match_enc_opts tun_enc_opts; struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5_flow_attr *attr = flow->attr; struct mlx5e_rep_priv *uplink_rpriv; struct tunnel_match_key tunnel_key; bool enc_opts_is_dont_care = true; @@ -1866,7 +2013,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, } else { mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; err = mlx5e_tc_match_to_reg_set(priv->mdev, - mod_hdr_acts, + mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB, TUNNEL_TO_REG, value); if (err) goto err_set; @@ -1952,8 +2099,8 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, if (!mlx5e_is_eswitch_flow(flow)) return -EOPNOTSUPP; - needs_mapping = !!flow->esw_attr->chain; - sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f); + needs_mapping = !!flow->attr->chain; + sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f); *match_inner = !needs_mapping; if ((needs_mapping || sets_mapping) && @@ -1965,7 +2112,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (!flow->esw_attr->chain) { + if (!flow->attr->chain) { err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, match_level); if (err) { @@ -1980,7 +2127,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, * object */ if (!netif_is_bareudp(filter_dev)) - flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } if (!needs_mapping && !sets_mapping) @@ -2483,12 +2630,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv, } } - if (is_eswitch_flow) { - flow->esw_attr->inner_match_level = inner_match_level; - flow->esw_attr->outer_match_level = outer_match_level; - } else { - flow->nic_attr->match_level = non_tunnel_match_level; - } + flow->attr->inner_match_level = inner_match_level; + flow->attr->outer_match_level = outer_match_level; + return err; } @@ -2614,6 +2758,7 @@ static struct mlx5_fields fields[] = { OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), + OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), @@ -3090,7 +3235,7 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv, * we can't restore ct state */ if (!ct_clear && modify_tuple && - mlx5_tc_ct_add_no_trk_match(priv, spec)) { + mlx5_tc_ct_add_no_trk_match(spec)) { NL_SET_ERR_MSG_MOD(extack, "can't offload tuple modify header with ct matches"); netdev_info(priv->netdev, @@ -3121,12 +3266,13 @@ static bool actions_match_supported(struct mlx5e_priv *priv, bool ct_flow = false, ct_clear = false; u32 actions; + ct_clear = flow->attr->ct_attr.ct_action & + TCA_CT_ACT_CLEAR; + ct_flow = flow_flag_test(flow, CT) && !ct_clear; + actions = flow->attr->action; + if (mlx5e_is_eswitch_flow(flow)) { - actions = flow->esw_attr->action; - ct_clear = flow->esw_attr->ct_attr.ct_action & - TCA_CT_ACT_CLEAR; - ct_flow = flow_flag_test(flow, CT) && !ct_clear; - if (flow->esw_attr->split_count && ct_flow) { + if (flow->attr->esw_attr->split_count && ct_flow) { /* All registers used by ct are cleared when using * split rules. */ @@ -3134,8 +3280,6 @@ static bool actions_match_supported(struct mlx5e_priv *priv, "Can't offload mirroring with action ct"); return false; } - } else { - actions = flow->nic_attr->action; } if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@ -3233,15 +3377,67 @@ add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, extack); } +static int validate_goto_chain(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + const struct flow_action_entry *act, + u32 actions, + struct netlink_ext_ack *extack) +{ + bool is_esw = mlx5e_is_eswitch_flow(flow); + struct mlx5_flow_attr *attr = flow->attr; + bool ft_flow = mlx5e_is_ft_flow(flow); + u32 dest_chain = act->chain_index; + struct mlx5_fs_chains *chains; + struct mlx5_eswitch *esw; + u32 reformat_and_fwd; + u32 max_chain; + + esw = priv->mdev->priv.eswitch; + chains = is_esw ? esw_chains(esw) : nic_chains(priv); + max_chain = mlx5_chains_get_chain_range(chains); + reformat_and_fwd = is_esw ? + MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) : + MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table); + + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } + + if (!mlx5_chains_backwards_supported(chains) && + dest_chain <= attr->chain) { + NL_SET_ERR_MSG_MOD(extack, + "Goto lower numbered chain isn't supported"); + return -EOPNOTSUPP; + } + + if (dest_chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + + if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && + !reformat_and_fwd) { + NL_SET_ERR_MSG_MOD(extack, + "Goto chain is not allowed if action has reformat or decap"); + return -EOPNOTSUPP; + } + + return 0; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { - struct mlx5_nic_flow_attr *attr = flow->nic_attr; + struct mlx5_flow_attr *attr = flow->attr; struct pedit_headers_action hdrs[2] = {}; const struct flow_action_entry *act; + struct mlx5_nic_flow_attr *nic_attr; u32 action = 0; int err, i; @@ -3252,7 +3448,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; - attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + nic_attr = attr->nic_attr; + + nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; flow_action_for_each(i, act, flow_action) { switch (act->id) { @@ -3273,8 +3471,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (err) return err; - action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; break; case FLOW_ACTION_VLAN_MANGLE: err = add_vlan_rewrite_action(priv, @@ -3319,10 +3516,26 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return -EINVAL; } - attr->flow_tag = mark; + nic_attr->flow_tag = mark; action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } break; + case FLOW_ACTION_GOTO: + err = validate_goto_chain(priv, flow, act, action, + extack); + if (err) + return err; + + action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = act->chain_index; + break; + case FLOW_ACTION_CT: + err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); + if (err) + return err; + + flow_flag_set(flow, CT); + break; default: NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); return -EOPNOTSUPP; @@ -3345,6 +3558,18 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, } attr->action = action; + + if (attr->dest_chain) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); + return -EOPNOTSUPP; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; @@ -3476,8 +3701,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, bool *encap_valid) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; struct encap_key key; struct mlx5e_encap_entry *e; @@ -3563,8 +3788,8 @@ attach_flow: flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; if (e->flags & MLX5_ENCAP_ENTRY_VALID) { - attr->dests[out_index].pkt_reformat = e->pkt_reformat; - attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; + attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; *encap_valid = true; } else { *encap_valid = false; @@ -3591,14 +3816,14 @@ static int mlx5e_attach_decap(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_decap_entry *d; struct mlx5e_decap_key key; uintptr_t hash_key; int err = 0; - parse_attr = attr->parse_attr; + parse_attr = flow->attr->parse_attr; if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { NL_SET_ERR_MSG_MOD(extack, "encap header larger than max supported"); @@ -3740,7 +3965,7 @@ static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev, } static int add_vlan_push_action(struct mlx5e_priv *priv, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, struct net_device **out_dev, u32 *action) { @@ -3753,7 +3978,7 @@ static int add_vlan_push_action(struct mlx5e_priv *priv, }; int err; - err = parse_tc_vlan_action(priv, &vlan_act, attr, action); + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action); if (err) return err; @@ -3766,7 +3991,7 @@ static int add_vlan_push_action(struct mlx5e_priv *priv, } static int add_vlan_pop_action(struct mlx5e_priv *priv, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, u32 *action) { struct flow_action_entry vlan_act = { @@ -3777,7 +4002,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, nest_level = attr->parse_attr->filter_dev->lower_level - priv->netdev->lower_level; while (nest_level--) { - err = parse_tc_vlan_action(priv, &vlan_act, attr, action); + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action); if (err) return err; } @@ -3838,59 +4063,20 @@ static bool is_duplicated_output_device(struct net_device *dev, return false; } -static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw, - struct mlx5e_tc_flow *flow, - const struct flow_action_entry *act, - u32 actions, - struct netlink_ext_ack *extack) -{ - u32 max_chain = mlx5_esw_chains_get_chain_range(esw); - struct mlx5_esw_flow_attr *attr = flow->esw_attr; - bool ft_flow = mlx5e_is_ft_flow(flow); - u32 dest_chain = act->chain_index; - - if (ft_flow) { - NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); - return -EOPNOTSUPP; - } - - if (!mlx5_esw_chains_backwards_supported(esw) && - dest_chain <= attr->chain) { - NL_SET_ERR_MSG_MOD(extack, - "Goto lower numbered chain isn't supported"); - return -EOPNOTSUPP; - } - if (dest_chain > max_chain) { - NL_SET_ERR_MSG_MOD(extack, - "Requested destination chain is out of supported range"); - return -EOPNOTSUPP; - } - - if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_DECAP) && - !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_and_fwd_to_table)) { - NL_SET_ERR_MSG_MOD(extack, - "Goto chain is not allowed if action has reformat or decap"); - return -EOPNOTSUPP; - } - - return 0; -} - static int verify_uplink_forwarding(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct net_device *out_dev, struct netlink_ext_ack *extack) { + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rep_priv; /* Forwarding non encapsulated traffic between * uplink ports is allowed only if * termination_table_raw_traffic cap is set. * - * Input vport was stored esw_attr->in_rep. + * Input vport was stored attr->in_rep. * In LAG case, *priv* is the private data of * uplink which may be not the input vport. */ @@ -3925,13 +4111,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; const struct ip_tunnel_info *info = NULL; + struct mlx5_flow_attr *attr = flow->attr; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; + struct mlx5_esw_flow_attr *esw_attr; bool encap = false, decap = false; u32 action = attr->action; int err, i, if_count = 0; @@ -3944,12 +4131,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; break; + case FLOW_ACTION_TRAP: + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, + "action trap is supported as a sole action only"); + return -EOPNOTSUPP; + } + action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT); + attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + break; case FLOW_ACTION_MPLS_PUSH: if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) || @@ -3990,7 +4190,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (!flow_flag_test(flow, L3_TO_L2_DECAP)) { action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - attr->split_count = attr->out_count; + esw_attr->split_count = esw_attr->out_count; } break; case FLOW_ACTION_CSUM: @@ -4027,27 +4227,27 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { + if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, "can't support more output ports, can't offload forwarding"); netdev_warn(priv->netdev, "can't support more than %d output ports, can't offload forwarding\n", - attr->out_count); + esw_attr->out_count); return -EOPNOTSUPP; } action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; if (encap) { - parse_attr->mirred_ifindex[attr->out_count] = + parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex; - parse_attr->tun_info[attr->out_count] = dup_tun_info(info); - if (!parse_attr->tun_info[attr->out_count]) + parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info); + if (!parse_attr->tun_info[esw_attr->out_count]) return -ENOMEM; encap = false; - attr->dests[attr->out_count].flags |= + esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; - attr->out_count++; + esw_attr->out_count++; /* attr->dests[].rep is resolved when we * handle encap */ @@ -4096,9 +4296,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; - attr->dests[attr->out_count].rep = rpriv->rep; - attr->dests[attr->out_count].mdev = out_priv->mdev; - attr->out_count++; + esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; + esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; + esw_attr->out_count++; } else if (parse_attr->filter_dev != priv->netdev) { /* All mlx5 devices are called to configure * high level device filters. Therefore, the @@ -4136,12 +4336,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, act, parse_attr, hdrs, &action, extack); } else { - err = parse_tc_vlan_action(priv, act, attr, &action); + err = parse_tc_vlan_action(priv, act, esw_attr, &action); } if (err) return err; - attr->split_count = attr->out_count; + esw_attr->split_count = esw_attr->out_count; break; case FLOW_ACTION_VLAN_MANGLE: err = add_vlan_rewrite_action(priv, @@ -4151,14 +4351,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (err) return err; - attr->split_count = attr->out_count; + esw_attr->split_count = esw_attr->out_count; break; case FLOW_ACTION_TUNNEL_DECAP: decap = true; break; case FLOW_ACTION_GOTO: - err = mlx5_validate_goto_chain(esw, flow, act, action, - extack); + err = validate_goto_chain(priv, flow, act, action, + extack); if (err) return err; @@ -4166,7 +4366,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->dest_chain = act->chain_index; break; case FLOW_ACTION_CT: - err = mlx5_tc_ct_parse_action(priv, attr, act, extack); + err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); if (err) return err; @@ -4205,7 +4405,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) - attr->split_count = 0; + esw_attr->split_count = 0; } } @@ -4245,7 +4445,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { NL_SET_ERR_MSG_MOD(extack, "current firmware doesn't support split rule for port mirroring"); netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); @@ -4296,25 +4496,37 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) { - struct mlx5_esw_flow_attr *attr = flow->esw_attr; - bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + struct mlx5_flow_attr *attr = flow->attr; + bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK && flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); - bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, + bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); if (!esw_paired) return false; - if ((mlx5_lag_is_sriov(attr->in_mdev) || - mlx5_lag_is_multipath(attr->in_mdev)) && + if ((mlx5_lag_is_sriov(esw_attr->in_mdev) || + mlx5_lag_is_multipath(esw_attr->in_mdev)) && (is_rep_ingress || act_is_encap)) return true; return false; } +struct mlx5_flow_attr * +mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) +{ + u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ? + sizeof(struct mlx5_esw_flow_attr) : + sizeof(struct mlx5_nic_flow_attr); + struct mlx5_flow_attr *attr; + + return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); +} + static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, struct flow_cls_offload *f, unsigned long flow_flags, @@ -4322,19 +4534,26 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, struct mlx5e_tc_flow **__flow) { struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr; struct mlx5e_tc_flow *flow; - int out_index, err; + int err = -ENOMEM; + int out_index; - flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); + flow = kzalloc(sizeof(*flow), GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); - if (!parse_attr || !flow) { - err = -ENOMEM; + if (!parse_attr || !flow) goto err_free; - } - flow->cookie = f->cookie; flow->flags = flow_flags; + flow->cookie = f->cookie; flow->priv = priv; + + attr = mlx5_alloc_flow_attr(get_flow_name_space(flow)); + if (!attr) + goto err_free; + + flow->attr = attr; + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) INIT_LIST_HEAD(&flow->encaps[out_index].list); INIT_LIST_HEAD(&flow->hairpin); @@ -4354,7 +4573,17 @@ err_free: } static void -mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, +mlx5e_flow_attr_init(struct mlx5_flow_attr *attr, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct flow_cls_offload *f) +{ + attr->parse_attr = parse_attr; + attr->chain = f->common.chain_index; + attr->prio = f->common.prio; +} + +static void +mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr, struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct flow_cls_offload *f, @@ -4362,10 +4591,9 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, struct mlx5_core_dev *in_mdev) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - esw_attr->parse_attr = parse_attr; - esw_attr->chain = f->common.chain_index; - esw_attr->prio = f->common.prio; + mlx5e_flow_attr_init(attr, parse_attr, f); esw_attr->in_rep = in_rep; esw_attr->in_mdev = in_mdev; @@ -4399,7 +4627,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, goto out; parse_attr->filter_dev = filter_dev; - mlx5e_flow_esw_attr_init(flow->esw_attr, + mlx5e_flow_esw_attr_init(flow->attr, priv, parse_attr, f, in_rep, in_mdev); @@ -4409,8 +4637,8 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, goto err_free; /* actions validation depends on parsing the ct matches first */ - err = mlx5_tc_ct_match_add(priv, &parse_attr->spec, f, - &flow->esw_attr->ct_attr, extack); + err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, + &flow->attr->ct_attr, extack); if (err) goto err_free; @@ -4441,6 +4669,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, { struct mlx5e_priv *priv = flow->priv, *peer_priv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_rep_priv *peer_urpriv; @@ -4460,15 +4689,15 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, * original flow and packets redirected from uplink use the * peer mdev. */ - if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK) + if (attr->in_rep->vport == MLX5_VPORT_UPLINK) in_mdev = peer_priv->mdev; else in_mdev = priv->mdev; - parse_attr = flow->esw_attr->parse_attr; + parse_attr = flow->attr->parse_attr; peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, parse_attr->filter_dev, - flow->esw_attr->in_rep, in_mdev); + attr->in_rep, in_mdev); if (IS_ERR(peer_flow)) { err = PTR_ERR(peer_flow); goto out; @@ -4532,9 +4761,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; int attr_size, err; - /* multi-chain not supported for NIC rules */ - if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) + if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { + if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) + return -EOPNOTSUPP; + } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) { return -EOPNOTSUPP; + } flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); attr_size = sizeof(struct mlx5_nic_flow_attr); @@ -4544,11 +4776,18 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, goto out; parse_attr->filter_dev = filter_dev; + mlx5e_flow_attr_init(flow->attr, parse_attr, f); + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, f, filter_dev); if (err) goto err_free; + err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, + &flow->attr->ct_attr, extack); + if (err) + goto err_free; + err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free; @@ -4558,14 +4797,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, goto err_free; flow_flag_set(flow, OFFLOADED); - kvfree(parse_attr); *__flow = flow; return 0; err_free: mlx5e_flow_put(priv, flow); - kvfree(parse_attr); out: return err; } @@ -4940,9 +5177,27 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } +static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) +{ + int tc_grp_size, tc_tbl_size; + u32 max_flow_counter; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); + + tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, + BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); + + return tc_tbl_size; +} + int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; + struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_chains_attr attr = {}; int err; mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); @@ -4954,6 +5209,27 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) if (err) return err; + if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { + attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | + MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; + attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK; + } + attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; + attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev); + attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; + attr.default_ft = priv->fs.vlan.ft.t; + + tc->chains = mlx5_chains_create(dev, &attr); + if (IS_ERR(tc->chains)) { + err = PTR_ERR(tc->chains); + goto err_chains; + } + + tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, + MLX5_FLOW_NAMESPACE_KERNEL); + if (IS_ERR(tc->ct)) + goto err_ct; + tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, &tc->netdevice_nb, @@ -4961,8 +5237,17 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) if (err) { tc->netdevice_nb.notifier_call = NULL; mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); + goto err_reg; } + return 0; + +err_reg: + mlx5_tc_ct_clean(tc->ct); +err_ct: + mlx5_chains_destroy(tc->chains); +err_chains: + rhashtable_destroy(&tc->ht); return err; } @@ -4987,28 +5272,38 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr); mutex_destroy(&tc->hairpin_tbl_lock); - rhashtable_destroy(&tc->ht); + rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); if (!IS_ERR_OR_NULL(tc->t)) { - mlx5_destroy_flow_table(tc->t); + mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL); tc->t = NULL; } mutex_destroy(&tc->t_lock); + + mlx5_tc_ct_clean(tc->ct); + mlx5_chains_destroy(tc->chains); } int mlx5e_tc_esw_init(struct rhashtable *tc_ht) { const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *priv; + struct mlx5e_rep_priv *rpriv; struct mapping_ctx *mapping; - int err; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + int err = 0; uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); - priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; - err = mlx5_tc_ct_init(uplink_priv); - if (err) + uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), + esw_chains(esw), + &esw->offloads.mod_hdr, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(uplink_priv->ct_priv)) goto err_ct; mapping = mapping_create(sizeof(struct tunnel_match_key), @@ -5037,7 +5332,7 @@ err_ht_init: err_enc_opts_mapping: mapping_destroy(uplink_priv->tunnel_mapping); err_tun_mapping: - mlx5_tc_ct_clean(uplink_priv); + mlx5_tc_ct_clean(uplink_priv->ct_priv); err_ct: netdev_warn(priv->netdev, "Failed to initialize tc (eswitch), err: %d", err); @@ -5051,10 +5346,11 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); mapping_destroy(uplink_priv->tunnel_mapping); - mlx5_tc_ct_clean(uplink_priv); + mlx5_tc_ct_clean(uplink_priv->ct_priv); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) @@ -5119,3 +5415,44 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, return -EOPNOTSUPP; } } + +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain = 0, chain_tag, reg_b, zone_restore_id; + struct mlx5e_priv *priv = netdev_priv(skb->dev); + struct mlx5e_tc_table *tc = &priv->fs.tc; + struct tc_skb_ext *tc_skb_ext; + int err; + + reg_b = be32_to_cpu(cqe->ft_metadata); + + chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + + err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find chain for chain tag: %d, err: %d\n", + chain_tag, err); + return false; + } + + if (chain) { + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON(!tc_skb_ext)) + return false; + + tc_skb_ext->chain = chain; + + zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) & + ZONE_RESTORE_MAX; + + if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, + zone_restore_id)) + return false; + } +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 437f680728fd..3b979008143d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -35,17 +35,57 @@ #include <net/pkt_cls.h> #include "en.h" +#include "eswitch.h" +#include "en/tc_ct.h" #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff #ifdef CONFIG_MLX5_ESWITCH +#define NIC_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\ + sizeof(struct mlx5_nic_flow_attr)) +#define ESW_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\ + sizeof(struct mlx5_esw_flow_attr)) +#define ns_to_attr_sz(ns) (((ns) == MLX5_FLOW_NAMESPACE_FDB) ?\ + ESW_FLOW_ATTR_SZ :\ + NIC_FLOW_ATTR_SZ) + + int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); struct mlx5e_tc_update_priv { struct net_device *tun_dev; }; +struct mlx5_nic_flow_attr { + u32 flow_tag; + u32 hairpin_tirn; + struct mlx5_flow_table *hairpin_ft; +}; + +struct mlx5_flow_attr { + u32 action; + struct mlx5_fc *counter; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_ct_attr ct_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + u32 chain; + u16 prio; + u32 dest_chain; + struct mlx5_flow_table *ft; + struct mlx5_flow_table *dest_ft; + u8 inner_match_level; + u8 outer_match_level; + u32 flags; + union { + struct mlx5_esw_flow_attr esw_attr[0]; + struct mlx5_nic_flow_attr nic_attr[0]; + }; +}; + +#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16 +#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0) + #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) struct tunnel_match_key { @@ -90,6 +130,7 @@ enum { int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); +bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags); @@ -133,6 +174,8 @@ enum mlx5e_tc_attr_to_reg { MARK_TO_REG, LABELS_TO_REG, FTEID_TO_REG, + NIC_CHAIN_TO_REG, + NIC_ZONE_RESTORE_TO_REG, }; struct mlx5e_tc_attr_to_reg_mapping { @@ -150,6 +193,7 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, enum mlx5e_tc_attr_to_reg type, u32 data); @@ -181,14 +225,42 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5_flow_handle * +mlx5_tc_rule_insert(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void +mlx5_tc_rule_delete(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + #else /* CONFIG_MLX5_CLS_ACT */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} static inline int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { return -EOPNOTSUPP; } + #endif /* CONFIG_MLX5_CLS_ACT */ +struct mlx5_flow_attr *mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type); + +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} @@ -203,4 +275,29 @@ mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { return -EOPNOTSUPP; } #endif +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain, reg_b; + + reg_b = be32_to_cpu(cqe->ft_metadata); + + chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + if (chain) + return true; +#endif + + return false; +} + +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +#else /* CONFIG_MLX5_CLS_ACT */ +static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) +{ return false; } +static inline bool +mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb) +{ return true; } +#endif + #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index da596de3abba..82b4419af9d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -144,9 +144,29 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); } +/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */ +static void +ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (skb->encapsulation) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + sq->stats->csum_partial_inner++; + } else { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial++; + } +} + static inline void mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { + if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) { + ipsec_txwqe_build_eseg_csum(sq, skb, eseg); + return; + } + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; if (skb->encapsulation) { @@ -232,131 +252,188 @@ dma_unmap_wqe_err: return -ENOMEM; } +struct mlx5e_tx_attr { + u32 num_bytes; + u16 headlen; + u16 ihs; + __be16 mss; + u16 insz; + u8 opcode; +}; + +struct mlx5e_tx_wqe_attr { + u16 ds_cnt; + u16 ds_cnt_inl; + u16 ds_cnt_ids; + u8 num_wqebbs; +}; + +static u8 +mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel) +{ + u8 mode; + +#ifdef CONFIG_MLX5_EN_TLS + if (accel && accel->tls.tls_tisn) + return MLX5_INLINE_MODE_TCP_UDP; +#endif + + mode = sq->min_inline_mode; + + if (skb_vlan_tag_present(skb) && + test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); + + return mode; +} + +static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel, + struct mlx5e_tx_attr *attr) +{ + struct mlx5e_sq_stats *stats = sq->stats; + + if (skb_is_gso(skb)) { + u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb); + + *attr = (struct mlx5e_tx_attr) { + .opcode = MLX5_OPCODE_LSO, + .mss = cpu_to_be16(skb_shinfo(skb)->gso_size), + .ihs = ihs, + .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs, + .headlen = skb_headlen(skb) - ihs, + }; + + stats->packets += skb_shinfo(skb)->gso_segs; + } else { + u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel); + u16 ihs = mlx5e_calc_min_inline(mode, skb); + + *attr = (struct mlx5e_tx_attr) { + .opcode = MLX5_OPCODE_SEND, + .mss = cpu_to_be16(0), + .ihs = ihs, + .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN), + .headlen = skb_headlen(skb) - ihs, + }; + + stats->packets++; + } + + attr->insz = mlx5e_accel_tx_ids_len(sq, accel); + stats->bytes += attr->num_bytes; +} + +static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr, + struct mlx5e_tx_wqe_attr *wqe_attr) +{ + u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT; + u16 ds_cnt_inl = 0; + u16 ds_cnt_ids = 0; + + if (attr->insz) + ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz, + MLX5_SEND_WQE_DS); + + ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids; + if (attr->ihs) { + u16 inl = attr->ihs - INL_HDR_START_SZ; + + if (skb_vlan_tag_present(skb)) + inl += VLAN_HLEN; + + ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + } + + *wqe_attr = (struct mlx5e_tx_wqe_attr) { + .ds_cnt = ds_cnt, + .ds_cnt_inl = ds_cnt_inl, + .ds_cnt_ids = ds_cnt_ids, + .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), + }; +} + +static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb) +{ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; +} + +static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) { + netif_tx_stop_queue(sq->txq); + sq->stats->stopped++; + } +} + static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, - u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma, + const struct mlx5e_tx_attr *attr, + const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma, struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg, bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; bool send_doorbell; - wi->num_bytes = num_bytes; - wi->num_dma = num_dma; - wi->num_wqebbs = num_wqebbs; - wi->skb = skb; + *wi = (struct mlx5e_tx_wqe_info) { + .skb = skb, + .num_bytes = attr->num_bytes, + .num_dma = num_dma, + .num_wqebbs = wqe_attr->num_wqebbs, + .num_fifo_pkts = 0, + }; - cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + mlx5e_tx_skb_update_hwts_flags(skb); sq->pc += wi->num_wqebbs; - if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) { - netif_tx_stop_queue(sq->txq); - sq->stats->stopped++; - } - send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes, - xmit_more); + mlx5e_tx_check_stop(sq); + + send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more); if (send_doorbell) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); } -void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) +static void +mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, + const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr, + struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) { - struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_eth_seg *eseg; struct mlx5_wqe_data_seg *dseg; struct mlx5e_tx_wqe_info *wi; struct mlx5e_sq_stats *stats = sq->stats; - u16 headlen, ihs, contig_wqebbs_room; - u16 ds_cnt, ds_cnt_inl = 0; - u8 num_wqebbs, opcode; - u32 num_bytes; int num_dma; - __be16 mss; - - /* Calc ihs and ds cnt, no writes to wqe yet */ - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - if (skb_is_gso(skb)) { - opcode = MLX5_OPCODE_LSO; - mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - ihs = mlx5e_tx_get_gso_ihs(sq, skb); - num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; - stats->packets += skb_shinfo(skb)->gso_segs; - } else { - u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb); - opcode = MLX5_OPCODE_SEND; - mss = 0; - ihs = mlx5e_calc_min_inline(mode, skb); - num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - stats->packets++; - } - - stats->bytes += num_bytes; stats->xmit_more += xmit_more; - headlen = skb->len - ihs - skb->data_len; - ds_cnt += !!headlen; - ds_cnt += skb_shinfo(skb)->nr_frags; - - if (ihs) { - ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN; - - ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); - ds_cnt += ds_cnt_inl; - } - - num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); - if (unlikely(contig_wqebbs_room < num_wqebbs)) { -#ifdef CONFIG_MLX5_EN_IPSEC - struct mlx5_wqe_eth_seg cur_eth = wqe->eth; -#endif -#ifdef CONFIG_MLX5_EN_TLS - struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl; -#endif - mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); - pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - wqe = MLX5E_TX_FETCH_WQE(sq, pi); -#ifdef CONFIG_MLX5_EN_IPSEC - wqe->eth = cur_eth; -#endif -#ifdef CONFIG_MLX5_EN_TLS - wqe->ctrl = cur_ctrl; -#endif - } - /* fill wqe */ wi = &sq->db.wqe_info[pi]; cseg = &wqe->ctrl; eseg = &wqe->eth; dseg = wqe->data; -#if IS_ENABLED(CONFIG_GENEVE) - if (skb->encapsulation) - mlx5e_tx_tunnel_accel(skb, eseg); -#endif - mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + eseg->mss = attr->mss; - eseg->mss = mss; - - if (ihs) { - eseg->inline_hdr.sz = cpu_to_be16(ihs); + if (attr->ihs) { if (skb_vlan_tag_present(skb)) { - ihs -= VLAN_HLEN; - mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs); + eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN); + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs); stats->added_vlan_packets++; } else { - memcpy(eseg->inline_hdr.start, skb->data, ihs); + eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs); + memcpy(eseg->inline_hdr.start, skb->data, attr->ihs); } - dseg += ds_cnt_inl; + dseg += wqe_attr->ds_cnt_inl; } else if (skb_vlan_tag_present(skb)) { eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) @@ -365,12 +442,13 @@ void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, stats->added_vlan_packets++; } - num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg); + dseg += wqe_attr->ds_cnt_ids; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs, + attr->headlen, dseg); if (unlikely(num_dma < 0)) goto err_drop; - mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, - num_dma, wi, cseg, xmit_more); + mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more); return; @@ -379,10 +457,173 @@ err_drop: dev_kfree_skb_any(skb); } +static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) +{ + return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs && + !attr->insz; +} + +static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + + /* Assumes the session is already running and has at least one packet. */ + return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); +} + +static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, + struct mlx5_wqe_eth_seg *eseg) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + prefetchw(wqe->data); + + *session = (struct mlx5e_tx_mpwqe) { + .wqe = wqe, + .bytes_count = 0, + .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .pkt_count = 0, + .inline_on = 0, + }; + + memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); + + sq->stats->mpwqe_blks++; +} + +static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq) +{ + return sq->mpwqe.wqe; +} + +static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_data_seg *dseg; + + dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; + + session->pkt_count++; + session->bytes_count += txd->len; + + dseg->addr = cpu_to_be64(txd->dma_addr); + dseg->byte_count = cpu_to_be32(txd->len); + dseg->lkey = sq->mkey_be; + session->ds_count++; + + sq->stats->mpwqe_pkts++; +} + +static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + u8 ds_count = session->ds_count; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_tx_wqe_info *wi; + u16 pi; + + cseg = &session->wqe->ctrl; + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + *wi = (struct mlx5e_tx_wqe_info) { + .skb = NULL, + .num_bytes = session->bytes_count, + .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS), + .num_dma = session->pkt_count, + .num_fifo_pkts = session->pkt_count, + }; + + sq->pc += wi->num_wqebbs; + + session->wqe = NULL; + + mlx5e_tx_check_stop(sq); + + return cseg; +} + +static void +mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, bool xmit_more) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_xmit_data txd; + + if (!mlx5e_tx_mpwqe_session_is_active(sq)) { + mlx5e_tx_mpwqe_session_start(sq, eseg); + } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { + mlx5e_tx_mpwqe_session_complete(sq); + mlx5e_tx_mpwqe_session_start(sq, eseg); + } + + sq->stats->xmit_more += xmit_more; + + txd.data = skb->data; + txd.len = skb->len; + + txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) + goto err_unmap; + mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); + + mlx5e_skb_fifo_push(sq, skb); + + mlx5e_tx_mpwqe_add_dseg(sq, &txd); + + mlx5e_tx_skb_update_hwts_flags(skb); + + if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { + /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ + cseg = mlx5e_tx_mpwqe_session_complete(sq); + + if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) { + /* Might stop the queue, but we were asked to ring the doorbell anyway. */ + cseg = mlx5e_tx_mpwqe_session_complete(sq); + + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + } + + return; + +err_unmap: + mlx5e_dma_unmap_wqe_err(sq, 1); + sq->stats->dropped++; + dev_kfree_skb_any(skb); +} + +void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) +{ + /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */ + if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq))) + mlx5e_tx_mpwqe_session_complete(sq); +} + +static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, + struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) +{ + if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg))) + return false; + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + return true; +} + netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_accel_tx_state accel = {}; + struct mlx5e_tx_wqe_attr wqe_attr; + struct mlx5e_tx_attr attr; struct mlx5e_tx_wqe *wqe; struct mlx5e_txqsq *sq; u16 pi; @@ -391,21 +632,92 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) /* May send SKBs and WQEs. */ if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) - goto out; + return NETDEV_TX_OK; - pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); + + if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) { + if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { + struct mlx5_wqe_eth_seg eseg = {}; + + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &eseg))) + return NETDEV_TX_OK; + + mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); + return NETDEV_TX_OK; + } + + mlx5e_tx_mpwqe_ensure_complete(sq); + } + + mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); + pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); /* May update the WQE, but may not post other WQEs. */ - if (unlikely(!mlx5e_accel_tx_finish(priv, sq, skb, wqe, &accel))) - goto out; + mlx5e_accel_tx_finish(sq, wqe, &accel, + (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth))) + return NETDEV_TX_OK; - mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more()); + mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); -out: return NETDEV_TX_OK; } +void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more) +{ + struct mlx5e_tx_wqe_attr wqe_attr; + struct mlx5e_tx_attr attr; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); + mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); + pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + mlx5e_txwqe_build_eseg_csum(sq, skb, &wqe->eth); + mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more); +} + +static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + int i; + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } +} + +static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_cqe64 *cqe, int napi_budget) +{ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct skb_shared_hwtstamps hwts = {}; + u64 ts = get_cqe_ts(cqe); + + hwts.hwtstamp = mlx5_timecounter_cyc2time(sq->clock, ts); + skb_tstamp_tx(skb, &hwts); + } + + napi_consume_skb(skb, napi_budget); +} + +static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, + struct mlx5_cqe64 *cqe, int napi_budget) +{ + int i; + + for (i = 0; i < wi->num_fifo_pkts; i++) { + struct sk_buff *skb = mlx5e_skb_fifo_pop(sq); + + mlx5e_consume_skb(sq, skb, cqe, napi_budget); + } +} + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_sq_stats *stats; @@ -451,42 +763,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wqe_counter = be16_to_cpu(cqe->wqe_counter); do { - struct sk_buff *skb; - int j; - last_wqe = (sqcc == wqe_counter); ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.wqe_info[ci]; - skb = wi->skb; - if (unlikely(!skb)) { - mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc); - sqcc += wi->num_wqebbs; - continue; - } + sqcc += wi->num_wqebbs; - if (unlikely(skb_shinfo(skb)->tx_flags & - SKBTX_HW_TSTAMP)) { - struct skb_shared_hwtstamps hwts = {}; + if (likely(wi->skb)) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget); - hwts.hwtstamp = - mlx5_timecounter_cyc2time(sq->clock, - get_cqe_ts(cqe)); - skb_tstamp_tx(skb, &hwts); + npkts++; + nbytes += wi->num_bytes; + continue; } - for (j = 0; j < wi->num_dma; j++) { - struct mlx5e_sq_dma *dma = - mlx5e_dma_get(sq, dma_fifo_cc++); + if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, + &dma_fifo_cc))) + continue; - mlx5e_tx_dma_unmap(sq->pdev, dma); - } + if (wi->num_fifo_pkts) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget); - npkts++; - nbytes += wi->num_bytes; - sqcc += wi->num_wqebbs; - napi_consume_skb(skb, napi_budget); + npkts += wi->num_fifo_pkts; + nbytes += wi->num_bytes; + } } while (!last_wqe); if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { @@ -525,13 +828,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } +static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi) +{ + int i; + + for (i = 0; i < wi->num_fifo_pkts; i++) + dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq)); +} + void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) { struct mlx5e_tx_wqe_info *wi; u32 dma_fifo_cc, nbytes = 0; u16 ci, sqcc, npkts = 0; - struct sk_buff *skb; - int i; sqcc = sq->cc; dma_fifo_cc = sq->dma_fifo_cc; @@ -539,25 +848,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) while (sqcc != sq->pc) { ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.wqe_info[ci]; - skb = wi->skb; - if (!skb) { - mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc); - sqcc += wi->num_wqebbs; + sqcc += wi->num_wqebbs; + + if (likely(wi->skb)) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + dev_kfree_skb_any(wi->skb); + + npkts++; + nbytes += wi->num_bytes; continue; } - for (i = 0; i < wi->num_dma; i++) { - struct mlx5e_sq_dma *dma = - mlx5e_dma_get(sq, dma_fifo_cc++); + if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc))) + continue; - mlx5e_tx_dma_unmap(sq->pdev, dma); - } + if (wi->num_fifo_pkts) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_tx_wi_kfree_fifo_skbs(sq, wi); - dev_kfree_skb_any(skb); - npkts++; - nbytes += wi->num_bytes; - sqcc += wi->num_wqebbs; + npkts += wi->num_fifo_pkts; + nbytes += wi->num_bytes; + } } sq->dma_fifo_cc = dma_fifo_cc; @@ -576,9 +888,34 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); } +static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb, + const struct mlx5e_tx_attr *attr, + struct mlx5e_tx_wqe_attr *wqe_attr) +{ + u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS; + u16 ds_cnt_inl = 0; + + ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags; + + if (attr->ihs) { + u16 inl = attr->ihs - INL_HDR_START_SZ; + + ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + } + + *wqe_attr = (struct mlx5e_tx_wqe_attr) { + .ds_cnt = ds_cnt, + .ds_cnt_inl = ds_cnt_inl, + .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), + }; +} + void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more) { + struct mlx5e_tx_wqe_attr wqe_attr; + struct mlx5e_tx_attr attr; struct mlx5i_tx_wqe *wqe; struct mlx5_wqe_datagram_seg *datagram; @@ -588,47 +925,17 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe_info *wi; struct mlx5e_sq_stats *stats = sq->stats; - u16 ds_cnt, ds_cnt_inl = 0; - u8 num_wqebbs, opcode; - u16 headlen, ihs, pi; - u32 num_bytes; int num_dma; - __be16 mss; + u16 pi; - /* Calc ihs and ds cnt, no writes to wqe yet */ - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - if (skb_is_gso(skb)) { - opcode = MLX5_OPCODE_LSO; - mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - ihs = mlx5e_tx_get_gso_ihs(sq, skb); - num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; - stats->packets += skb_shinfo(skb)->gso_segs; - } else { - u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb); + mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); + mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr); - opcode = MLX5_OPCODE_SEND; - mss = 0; - ihs = mlx5e_calc_min_inline(mode, skb); - num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - stats->packets++; - } + pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); + wqe = MLX5I_SQ_FETCH_WQE(sq, pi); - stats->bytes += num_bytes; stats->xmit_more += xmit_more; - headlen = skb->len - ihs - skb->data_len; - ds_cnt += !!headlen; - ds_cnt += skb_shinfo(skb)->nr_frags; - - if (ihs) { - ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); - ds_cnt += ds_cnt_inl; - } - - num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs); - wqe = MLX5I_SQ_FETCH_WQE(sq, pi); - /* fill wqe */ wi = &sq->db.wqe_info[pi]; cseg = &wqe->ctrl; @@ -640,20 +947,20 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); - eseg->mss = mss; + eseg->mss = attr.mss; - if (ihs) { - memcpy(eseg->inline_hdr.start, skb->data, ihs); - eseg->inline_hdr.sz = cpu_to_be16(ihs); - dseg += ds_cnt_inl; + if (attr.ihs) { + memcpy(eseg->inline_hdr.start, skb->data, attr.ihs); + eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); + dseg += wqe_attr.ds_cnt_inl; } - num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg); + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs, + attr.headlen, dseg); if (unlikely(num_dma < 0)) goto err_drop; - mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, - num_dma, wi, cseg, xmit_more); + mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 22a19d391e17..8ebfe782f95e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -828,8 +828,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&eq->tasklet_ctx.list); INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); + tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index 07b2acd7e6b3..c3faae67e4d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -148,6 +148,11 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) esw_acl_egress_vlan_grp_destroy(vport); } +static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_eswitch_is_vf_vport(esw, vport_num); +} + int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { int table_size = 0; @@ -157,6 +162,9 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport !MLX5_CAP_GEN(esw->dev, prio_tag_required)) return 0; + if (!esw_acl_egress_needed(esw, vport->vport)) + return 0; + esw_acl_egress_ofld_rules_destroy(vport); if (mlx5_esw_acl_egress_fwd2vport_supported(esw)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c deleted file mode 100644 index d5bf908dfecd..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c +++ /dev/null @@ -1,944 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -// Copyright (c) 2020 Mellanox Technologies. - -#include <linux/mlx5/driver.h> -#include <linux/mlx5/mlx5_ifc.h> -#include <linux/mlx5/fs.h> - -#include "esw/chains.h" -#include "en/mapping.h" -#include "mlx5_core.h" -#include "fs_core.h" -#include "eswitch.h" -#include "en.h" -#include "en_tc.h" - -#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) -#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) -#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) -#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping) -#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) -#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) -#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) -#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) -#define fdb_ignore_flow_level_supported(esw) \ - (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) -#define fdb_modify_header_fwd_to_table_supported(esw) \ - (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) - -/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), - * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated - * for each flow table pool. We can allocate up to 16M of each pool, - * and we keep track of how much we used via get_next_avail_sz_from_pool. - * Firmware doesn't report any of this for now. - * ESW_POOL is expected to be sorted from large to small and match firmware - * pools. - */ -#define ESW_SIZE (16 * 1024 * 1024) -static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, - 1 * 1024 * 1024, - 64 * 1024, - 128 }; -#define ESW_FT_TBL_SZ (64 * 1024) - -struct mlx5_esw_chains_priv { - struct rhashtable chains_ht; - struct rhashtable prios_ht; - /* Protects above chains_ht and prios_ht */ - struct mutex lock; - - struct mlx5_flow_table *tc_end_fdb; - struct mapping_ctx *chains_mapping; - - int fdb_left[ARRAY_SIZE(ESW_POOLS)]; -}; - -struct fdb_chain { - struct rhash_head node; - - u32 chain; - - int ref; - int id; - - struct mlx5_eswitch *esw; - struct list_head prios_list; - struct mlx5_flow_handle *restore_rule; - struct mlx5_modify_hdr *miss_modify_hdr; -}; - -struct fdb_prio_key { - u32 chain; - u32 prio; - u32 level; -}; - -struct fdb_prio { - struct rhash_head node; - struct list_head list; - - struct fdb_prio_key key; - - int ref; - - struct fdb_chain *fdb_chain; - struct mlx5_flow_table *fdb; - struct mlx5_flow_table *next_fdb; - struct mlx5_flow_group *miss_group; - struct mlx5_flow_handle *miss_rule; -}; - -static const struct rhashtable_params chain_params = { - .head_offset = offsetof(struct fdb_chain, node), - .key_offset = offsetof(struct fdb_chain, chain), - .key_len = sizeof_field(struct fdb_chain, chain), - .automatic_shrinking = true, -}; - -static const struct rhashtable_params prio_params = { - .head_offset = offsetof(struct fdb_prio, node), - .key_offset = offsetof(struct fdb_prio, key), - .key_len = sizeof_field(struct fdb_prio, key), - .automatic_shrinking = true, -}; - -bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) -{ - return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; -} - -bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw) -{ - return mlx5_esw_chains_prios_supported(esw) && - fdb_ignore_flow_level_supported(esw); -} - -u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_chains_prios_supported(esw)) - return 1; - - if (fdb_ignore_flow_level_supported(esw)) - return UINT_MAX - 1; - - return FDB_TC_MAX_CHAIN; -} - -u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw) -{ - return mlx5_esw_chains_get_chain_range(esw) + 1; -} - -u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_chains_prios_supported(esw)) - return 1; - - if (fdb_ignore_flow_level_supported(esw)) - return UINT_MAX; - - return FDB_TC_MAX_PRIO; -} - -static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) -{ - if (fdb_ignore_flow_level_supported(esw)) - return UINT_MAX; - - return FDB_TC_LEVELS_PER_PRIO; -} - -#define POOL_NEXT_SIZE 0 -static int -mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw, - int desired_size) -{ - int i, found_i = -1; - - for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { - if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) { - found_i = i; - if (desired_size != POOL_NEXT_SIZE) - break; - } - } - - if (found_i != -1) { - --fdb_pool_left(esw)[found_i]; - return ESW_POOLS[found_i]; - } - - return 0; -} - -static void -mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz) -{ - int i; - - for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { - if (sz == ESW_POOLS[i]) { - ++fdb_pool_left(esw)[i]; - return; - } - } - - WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz); -} - -static void -mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw) -{ - u32 fdb_max; - int i; - - fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size); - - for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) - fdb_pool_left(esw)[i] = - ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; -} - -static struct mlx5_flow_table * -mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, - u32 chain, u32 prio, u32 level) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_namespace *ns; - struct mlx5_flow_table *fdb; - int sz; - - if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) - ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | - MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - - sz = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? - mlx5_esw_chains_get_avail_sz_from_pool(esw, ESW_FT_TBL_SZ) : - mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); - if (!sz) - return ERR_PTR(-ENOSPC); - ft_attr.max_fte = sz; - - /* We use tc_slow_fdb(esw) as the table's next_ft till - * ignore_flow_level is allowed on FT creation and not just for FTEs. - * Instead caller should add an explicit miss rule if needed. - */ - ft_attr.next_ft = tc_slow_fdb(esw); - - /* The root table(chain 0, prio 1, level 0) is required to be - * connected to the previous prio (FDB_BYPASS_PATH if exists). - * We always create it, as a managed table, in order to align with - * fs_core logic. - */ - if (!fdb_ignore_flow_level_supported(esw) || - (chain == 0 && prio == 1 && level == 0)) { - ft_attr.level = level; - ft_attr.prio = prio - 1; - ns = mlx5_get_fdb_sub_ns(esw->dev, chain); - } else { - ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; - ft_attr.prio = FDB_TC_OFFLOAD; - /* Firmware doesn't allow us to create another level 0 table, - * so we create all unmanaged tables as level 1. - * - * To connect them, we use explicit miss rules with - * ignore_flow_level. Caller is responsible to create - * these rules (if needed). - */ - ft_attr.level = 1; - ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); - } - - ft_attr.autogroup.num_reserved_entries = 2; - ft_attr.autogroup.max_num_groups = esw->params.large_group_num; - fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(fdb)) { - esw_warn(esw->dev, - "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n", - (int)PTR_ERR(fdb), chain, prio, level, sz); - mlx5_esw_chains_put_sz_to_pool(esw, sz); - return fdb; - } - - return fdb; -} - -static void -mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw, - struct mlx5_flow_table *fdb) -{ - mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte); - mlx5_destroy_flow_table(fdb); -} - -static int -create_fdb_chain_restore(struct fdb_chain *fdb_chain) -{ - char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)]; - struct mlx5_eswitch *esw = fdb_chain->esw; - struct mlx5_modify_hdr *mod_hdr; - u32 index; - int err; - - if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw) || - !mlx5_esw_chains_prios_supported(esw)) - return 0; - - err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index); - if (err) - return err; - if (index == MLX5_FS_DEFAULT_FLOW_TAG) { - /* we got the special default flow tag id, so we won't know - * if we actually marked the packet with the restore rule - * we create. - * - * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. - */ - err = mapping_add(esw_chains_mapping(esw), - &fdb_chain->chain, &index); - mapping_remove(esw_chains_mapping(esw), - MLX5_FS_DEFAULT_FLOW_TAG); - if (err) - return err; - } - - fdb_chain->id = index; - - MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, modact, field, - mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield); - MLX5_SET(set_action_in, modact, offset, - mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8); - MLX5_SET(set_action_in, modact, length, - mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8); - MLX5_SET(set_action_in, modact, data, fdb_chain->id); - mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, - 1, modact); - if (IS_ERR(mod_hdr)) { - err = PTR_ERR(mod_hdr); - goto err_mod_hdr; - } - fdb_chain->miss_modify_hdr = mod_hdr; - - fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id); - if (IS_ERR(fdb_chain->restore_rule)) { - err = PTR_ERR(fdb_chain->restore_rule); - goto err_rule; - } - - return 0; - -err_rule: - mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); -err_mod_hdr: - /* Datapath can't find this mapping, so we can safely remove it */ - mapping_remove(esw_chains_mapping(esw), fdb_chain->id); - return err; -} - -static void destroy_fdb_chain_restore(struct fdb_chain *fdb_chain) -{ - struct mlx5_eswitch *esw = fdb_chain->esw; - - if (!fdb_chain->miss_modify_hdr) - return; - - mlx5_del_flow_rules(fdb_chain->restore_rule); - mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); - mapping_remove(esw_chains_mapping(esw), fdb_chain->id); -} - -static struct fdb_chain * -mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) -{ - struct fdb_chain *fdb_chain = NULL; - int err; - - fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL); - if (!fdb_chain) - return ERR_PTR(-ENOMEM); - - fdb_chain->esw = esw; - fdb_chain->chain = chain; - INIT_LIST_HEAD(&fdb_chain->prios_list); - - err = create_fdb_chain_restore(fdb_chain); - if (err) - goto err_restore; - - err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, - chain_params); - if (err) - goto err_insert; - - return fdb_chain; - -err_insert: - destroy_fdb_chain_restore(fdb_chain); -err_restore: - kvfree(fdb_chain); - return ERR_PTR(err); -} - -static void -mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) -{ - struct mlx5_eswitch *esw = fdb_chain->esw; - - rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, - chain_params); - - destroy_fdb_chain_restore(fdb_chain); - kvfree(fdb_chain); -} - -static struct fdb_chain * -mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain) -{ - struct fdb_chain *fdb_chain; - - fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain, - chain_params); - if (!fdb_chain) { - fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain); - if (IS_ERR(fdb_chain)) - return fdb_chain; - } - - fdb_chain->ref++; - - return fdb_chain; -} - -static struct mlx5_flow_handle * -mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, - struct mlx5_flow_table *fdb, - struct mlx5_flow_table *next_fdb) -{ - struct mlx5_eswitch *esw = fdb_chain->esw; - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_act act = {}; - - act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; - act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = next_fdb; - - if (next_fdb == tc_end_fdb(esw) && - mlx5_esw_chains_prios_supported(esw)) { - act.modify_hdr = fdb_chain->miss_modify_hdr; - act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - } - - return mlx5_add_flow_rules(fdb, NULL, &act, &dest, 1); -} - -static int -mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio, - struct mlx5_flow_table *next_fdb) -{ - struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {}; - struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; - struct fdb_prio *pos; - int n = 0, err; - - if (fdb_prio->key.level) - return 0; - - /* Iterate in reverse order until reaching the level 0 rule of - * the previous priority, adding all the miss rules first, so we can - * revert them if any of them fails. - */ - pos = fdb_prio; - list_for_each_entry_continue_reverse(pos, - &fdb_chain->prios_list, - list) { - miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain, - pos->fdb, - next_fdb); - if (IS_ERR(miss_rules[n])) { - err = PTR_ERR(miss_rules[n]); - goto err_prev_rule; - } - - n++; - if (!pos->key.level) - break; - } - - /* Success, delete old miss rules, and update the pointers. */ - n = 0; - pos = fdb_prio; - list_for_each_entry_continue_reverse(pos, - &fdb_chain->prios_list, - list) { - mlx5_del_flow_rules(pos->miss_rule); - - pos->miss_rule = miss_rules[n]; - pos->next_fdb = next_fdb; - - n++; - if (!pos->key.level) - break; - } - - return 0; - -err_prev_rule: - while (--n >= 0) - mlx5_del_flow_rules(miss_rules[n]); - - return err; -} - -static void -mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain) -{ - if (--fdb_chain->ref == 0) - mlx5_esw_chains_destroy_fdb_chain(fdb_chain); -} - -static struct fdb_prio * -mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw, - u32 chain, u32 prio, u32 level) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_handle *miss_rule = NULL; - struct mlx5_flow_group *miss_group; - struct fdb_prio *fdb_prio = NULL; - struct mlx5_flow_table *next_fdb; - struct fdb_chain *fdb_chain; - struct mlx5_flow_table *fdb; - struct list_head *pos; - u32 *flow_group_in; - int err; - - fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain); - if (IS_ERR(fdb_chain)) - return ERR_CAST(fdb_chain); - - fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL); - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!fdb_prio || !flow_group_in) { - err = -ENOMEM; - goto err_alloc; - } - - /* Chain's prio list is sorted by prio and level. - * And all levels of some prio point to the next prio's level 0. - * Example list (prio, level): - * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0) - * In hardware, we will we have the following pointers: - * (3,0) -> (5,0) -> (7,0) -> Slow path - * (3,1) -> (5,0) - * (5,1) -> (7,0) - * (6,1) -> (7,0) - */ - - /* Default miss for each chain: */ - next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? - tc_slow_fdb(esw) : - tc_end_fdb(esw); - list_for_each(pos, &fdb_chain->prios_list) { - struct fdb_prio *p = list_entry(pos, struct fdb_prio, list); - - /* exit on first pos that is larger */ - if (prio < p->key.prio || (prio == p->key.prio && - level < p->key.level)) { - /* Get next level 0 table */ - next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb; - break; - } - } - - fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - goto err_create; - } - - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, - fdb->max_fte - 2); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - fdb->max_fte - 1); - miss_group = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(miss_group)) { - err = PTR_ERR(miss_group); - goto err_group; - } - - /* Add miss rule to next_fdb */ - miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb); - if (IS_ERR(miss_rule)) { - err = PTR_ERR(miss_rule); - goto err_miss_rule; - } - - fdb_prio->miss_group = miss_group; - fdb_prio->miss_rule = miss_rule; - fdb_prio->next_fdb = next_fdb; - fdb_prio->fdb_chain = fdb_chain; - fdb_prio->key.chain = chain; - fdb_prio->key.prio = prio; - fdb_prio->key.level = level; - fdb_prio->fdb = fdb; - - err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node, - prio_params); - if (err) - goto err_insert; - - list_add(&fdb_prio->list, pos->prev); - - /* Table is ready, connect it */ - err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb); - if (err) - goto err_update; - - kvfree(flow_group_in); - return fdb_prio; - -err_update: - list_del(&fdb_prio->list); - rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, - prio_params); -err_insert: - mlx5_del_flow_rules(miss_rule); -err_miss_rule: - mlx5_destroy_flow_group(miss_group); -err_group: - mlx5_esw_chains_destroy_fdb_table(esw, fdb); -err_create: -err_alloc: - kvfree(fdb_prio); - kvfree(flow_group_in); - mlx5_esw_chains_put_fdb_chain(fdb_chain); - return ERR_PTR(err); -} - -static void -mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw, - struct fdb_prio *fdb_prio) -{ - struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; - - WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio, - fdb_prio->next_fdb)); - - list_del(&fdb_prio->list); - rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, - prio_params); - mlx5_del_flow_rules(fdb_prio->miss_rule); - mlx5_destroy_flow_group(fdb_prio->miss_group); - mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb); - mlx5_esw_chains_put_fdb_chain(fdb_chain); - kvfree(fdb_prio); -} - -struct mlx5_flow_table * -mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level) -{ - struct mlx5_flow_table *prev_fts; - struct fdb_prio *fdb_prio; - struct fdb_prio_key key; - int l = 0; - - if ((chain > mlx5_esw_chains_get_chain_range(esw) && - chain != mlx5_esw_chains_get_ft_chain(esw)) || - prio > mlx5_esw_chains_get_prio_range(esw) || - level > mlx5_esw_chains_get_level_range(esw)) - return ERR_PTR(-EOPNOTSUPP); - - /* create earlier levels for correct fs_core lookup when - * connecting tables. - */ - for (l = 0; l < level; l++) { - prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l); - if (IS_ERR(prev_fts)) { - fdb_prio = ERR_CAST(prev_fts); - goto err_get_prevs; - } - } - - key.chain = chain; - key.prio = prio; - key.level = level; - - mutex_lock(&esw_chains_lock(esw)); - fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, - prio_params); - if (!fdb_prio) { - fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain, - prio, level); - if (IS_ERR(fdb_prio)) - goto err_create_prio; - } - - ++fdb_prio->ref; - mutex_unlock(&esw_chains_lock(esw)); - - return fdb_prio->fdb; - -err_create_prio: - mutex_unlock(&esw_chains_lock(esw)); -err_get_prevs: - while (--l >= 0) - mlx5_esw_chains_put_table(esw, chain, prio, l); - return ERR_CAST(fdb_prio); -} - -void -mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level) -{ - struct fdb_prio *fdb_prio; - struct fdb_prio_key key; - - key.chain = chain; - key.prio = prio; - key.level = level; - - mutex_lock(&esw_chains_lock(esw)); - fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, - prio_params); - if (!fdb_prio) - goto err_get_prio; - - if (--fdb_prio->ref == 0) - mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio); - mutex_unlock(&esw_chains_lock(esw)); - - while (level-- > 0) - mlx5_esw_chains_put_table(esw, chain, prio, level); - - return; - -err_get_prio: - mutex_unlock(&esw_chains_lock(esw)); - WARN_ONCE(1, - "Couldn't find table: (chain: %d prio: %d level: %d)", - chain, prio, level); -} - -struct mlx5_flow_table * -mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) -{ - return tc_end_fdb(esw); -} - -struct mlx5_flow_table * -mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw) -{ - u32 chain, prio, level; - int err; - - if (!fdb_ignore_flow_level_supported(esw)) { - err = -EOPNOTSUPP; - - esw_warn(esw->dev, - "Couldn't create global flow table, ignore_flow_level not supported."); - goto err_ignore; - } - - chain = mlx5_esw_chains_get_chain_range(esw), - prio = mlx5_esw_chains_get_prio_range(esw); - level = mlx5_esw_chains_get_level_range(esw); - - return mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); - -err_ignore: - return ERR_PTR(err); -} - -void -mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, - struct mlx5_flow_table *ft) -{ - mlx5_esw_chains_destroy_fdb_table(esw, ft); -} - -static int -mlx5_esw_chains_init(struct mlx5_eswitch *esw) -{ - struct mlx5_esw_chains_priv *chains_priv; - struct mlx5_core_dev *dev = esw->dev; - u32 max_flow_counter, fdb_max; - struct mapping_ctx *mapping; - int err; - - chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); - if (!chains_priv) - return -ENOMEM; - esw_chains_priv(esw) = chains_priv; - - max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); - - esw_debug(dev, - "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n", - max_flow_counter, esw->params.large_group_num, fdb_max); - - mlx5_esw_chains_init_sz_pool(esw); - - if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && - esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { - esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); - } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { - esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); - } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { - /* Disabled when ttl workaround is needed, e.g - * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig - */ - esw_warn(dev, - "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); - esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - } else { - esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", - mlx5_esw_chains_get_chain_range(esw), - mlx5_esw_chains_get_prio_range(esw)); - } - - err = rhashtable_init(&esw_chains_ht(esw), &chain_params); - if (err) - goto init_chains_ht_err; - - err = rhashtable_init(&esw_prios_ht(esw), &prio_params); - if (err) - goto init_prios_ht_err; - - mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw), - true); - if (IS_ERR(mapping)) { - err = PTR_ERR(mapping); - goto mapping_err; - } - esw_chains_mapping(esw) = mapping; - - mutex_init(&esw_chains_lock(esw)); - - return 0; - -mapping_err: - rhashtable_destroy(&esw_prios_ht(esw)); -init_prios_ht_err: - rhashtable_destroy(&esw_chains_ht(esw)); -init_chains_ht_err: - kfree(chains_priv); - return err; -} - -static void -mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) -{ - mutex_destroy(&esw_chains_lock(esw)); - mapping_destroy(esw_chains_mapping(esw)); - rhashtable_destroy(&esw_prios_ht(esw)); - rhashtable_destroy(&esw_chains_ht(esw)); - - kfree(esw_chains_priv(esw)); -} - -static int -mlx5_esw_chains_open(struct mlx5_eswitch *esw) -{ - struct mlx5_flow_table *ft; - int err; - - /* Create tc_end_fdb(esw) which is the always created ft chain */ - ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw), - 1, 0); - if (IS_ERR(ft)) - return PTR_ERR(ft); - - tc_end_fdb(esw) = ft; - - /* Always open the root for fast path */ - ft = mlx5_esw_chains_get_table(esw, 0, 1, 0); - if (IS_ERR(ft)) { - err = PTR_ERR(ft); - goto level_0_err; - } - - /* Open level 1 for split rules now if prios isn't supported */ - if (!mlx5_esw_chains_prios_supported(esw)) { - err = mlx5_esw_vport_tbl_get(esw); - if (err) - goto level_1_err; - } - - return 0; - -level_1_err: - mlx5_esw_chains_put_table(esw, 0, 1, 0); -level_0_err: - mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); - return err; -} - -static void -mlx5_esw_chains_close(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_chains_prios_supported(esw)) - mlx5_esw_vport_tbl_put(esw); - mlx5_esw_chains_put_table(esw, 0, 1, 0); - mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); -} - -int -mlx5_esw_chains_create(struct mlx5_eswitch *esw) -{ - int err; - - err = mlx5_esw_chains_init(esw); - if (err) - return err; - - err = mlx5_esw_chains_open(esw); - if (err) - goto err_open; - - return 0; - -err_open: - mlx5_esw_chains_cleanup(esw); - return err; -} - -void -mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) -{ - mlx5_esw_chains_close(esw); - mlx5_esw_chains_cleanup(esw); -} - -int -mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, - u32 *chain_mapping) -{ - return mapping_add(esw_chains_mapping(esw), &chain, chain_mapping); -} - -int -mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, u32 chain_mapping) -{ - return mapping_remove(esw_chains_mapping(esw), chain_mapping); -} - -int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, - u32 *chain) -{ - int err; - - err = mapping_find(esw_chains_mapping(esw), tag, chain); - if (err) { - esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag); - return -ENOENT; - } - - return 0; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h deleted file mode 100644 index 7679ac359e31..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2020 Mellanox Technologies. */ - -#ifndef __ML5_ESW_CHAINS_H__ -#define __ML5_ESW_CHAINS_H__ - -#include "eswitch.h" - -#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) - -bool -mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw); -bool -mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw); -u32 -mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw); -u32 -mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw); -u32 -mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw); - -struct mlx5_flow_table * -mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level); -void -mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level); - -struct mlx5_flow_table * -mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw); - -struct mlx5_flow_table * -mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw); -void -mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, - struct mlx5_flow_table *ft); - -int -mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, - u32 *chain_mapping); -int -mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, - u32 chain_mapping); - -int mlx5_esw_chains_create(struct mlx5_eswitch *esw); -void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); - -int -mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain); - -#else /* CONFIG_MLX5_CLS_ACT */ - -static inline struct mlx5_flow_table * -mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level) { return ERR_PTR(-EOPNOTSUPP); } -static inline void -mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level) {} - -static inline struct mlx5_flow_table * -mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) { return ERR_PTR(-EOPNOTSUPP); } - -static inline int mlx5_esw_chains_create(struct mlx5_eswitch *esw) { return 0; } -static inline void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) {} - -#endif /* CONFIG_MLX5_CLS_ACT */ - -#endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c new file mode 100644 index 000000000000..ffff11baa3d0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/mlx5/driver.h> +#include "eswitch.h" + +static void +mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) +{ + u64 parent_id; + + parent_id = mlx5_query_nic_system_image_guid(dev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); +} + +static bool +mlx5_esw_devlink_port_supported(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num == MLX5_VPORT_UPLINK || + (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || + mlx5_eswitch_is_vf_vport(esw, vport_num); +} + +static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_core_dev *dev = esw->dev; + struct devlink_port_attrs attrs = {}; + struct netdev_phys_item_id ppid = {}; + struct devlink_port *dl_port; + u32 controller_num = 0; + bool external; + u16 pfnum; + + dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL); + if (!dl_port) + return NULL; + + mlx5_esw_get_port_parent_id(dev, &ppid); + pfnum = PCI_FUNC(dev->pdev->devfn); + external = mlx5_core_is_ecpf_esw_manager(dev); + if (external) + controller_num = dev->priv.eswitch->offloads.host_number + 1; + + if (vport_num == MLX5_VPORT_UPLINK) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = pfnum; + memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); + attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_set(dl_port, &attrs); + } else if (vport_num == MLX5_VPORT_PF) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external); + } else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, + vport_num - 1, external); + } + return dl_port; +} + +static void mlx5_esw_dl_port_free(struct devlink_port *dl_port) +{ + kfree(dl_port); +} + +int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_core_dev *dev = esw->dev; + struct devlink_port *dl_port; + unsigned int dl_port_index; + struct mlx5_vport *vport; + struct devlink *devlink; + int err; + + if (!mlx5_esw_devlink_port_supported(esw, vport_num)) + return 0; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + dl_port = mlx5_esw_dl_port_alloc(esw, vport_num); + if (!dl_port) + return -ENOMEM; + + devlink = priv_to_devlink(dev); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); + err = devlink_port_register(devlink, dl_port, dl_port_index); + if (err) + goto reg_err; + + vport->dl_port = dl_port; + return 0; + +reg_err: + mlx5_esw_dl_port_free(dl_port); + return err; +} + +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + if (!mlx5_esw_devlink_port_supported(esw, vport_num)) + return; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + devlink_port_unregister(vport->dl_port); + mlx5_esw_dl_port_free(vport->dl_port); + vport->dl_port = NULL; +} + +struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + return vport->dl_port; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 867d8120b8a5..cf87de94418f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -42,6 +42,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "lib/mpfs.h" +#include "lib/fs_chains.h" #include "en/tc_ct.h" #ifdef CONFIG_MLX5_ESWITCH @@ -62,6 +63,9 @@ #define mlx5_esw_has_fwd_fdb(dev) \ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) +#define esw_chains(esw) \ + ((esw)->fdb_table.offloads.esw_chains_priv) + struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_handle *allow_rule; @@ -152,14 +156,9 @@ struct mlx5_vport { bool enabled; enum mlx5_eswitch_vport_event enabled_events; + struct devlink_port *dl_port; }; -enum offloads_fdb_flags { - ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), -}; - -struct mlx5_esw_chains_priv; - struct mlx5_eswitch_fdb { union { struct legacy_fdb { @@ -183,7 +182,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; - struct mlx5_esw_chains_priv *esw_chains_priv; + struct mlx5_fs_chains *esw_chains_priv; struct { DECLARE_HASHTABLE(table, 8); /* Protects vports.table */ @@ -217,6 +216,7 @@ struct mlx5_esw_offload { atomic64_t num_flows; enum devlink_eswitch_encap_mode encap; struct ida vport_metadata_ida; + unsigned int host_number; /* ECPF supports one external host */ }; /* E-Switch MC FDB table hash node */ @@ -329,7 +329,7 @@ struct mlx5_termtbl_handle; bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec); @@ -349,19 +349,19 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr); + struct mlx5_flow_attr *attr); struct mlx5_flow_handle * mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr); + struct mlx5_flow_attr *attr); void mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, - struct mlx5_esw_flow_attr *attr); + struct mlx5_flow_attr *attr); void mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, - struct mlx5_esw_flow_attr *attr); + struct mlx5_flow_attr *attr); struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, @@ -401,7 +401,6 @@ struct mlx5_esw_flow_attr { int split_count; int out_count; - int action; __be16 vlan_proto[MLX5_FS_VLAN_DEPTH]; u16 vlan_vid[MLX5_FS_VLAN_DEPTH]; u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; @@ -413,19 +412,7 @@ struct mlx5_esw_flow_attr { struct mlx5_core_dev *mdev; struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; - struct mlx5_modify_hdr *modify_hdr; - u8 inner_match_level; - u8 outer_match_level; - struct mlx5_fc *counter; - u32 chain; - u16 prio; - u32 dest_chain; - u32 flags; - struct mlx5_flow_table *fdb; - struct mlx5_flow_table *dest_ft; - struct mlx5_ct_attr ct_attr; struct mlx5_pkt_reformat *decap_pkt_reformat; - struct mlx5e_tc_flow_parse_attr *parse_attr; }; int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, @@ -451,9 +438,9 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, - struct mlx5_esw_flow_attr *attr); + struct mlx5_flow_attr *attr); int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, - struct mlx5_esw_flow_attr *attr); + struct mlx5_flow_attr *attr); int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); @@ -677,6 +664,9 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); +int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); +struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1bcf2609dca8..c9c2962ad49f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -39,12 +39,13 @@ #include "mlx5_core.h" #include "eswitch.h" #include "esw/acl/ofld.h" -#include "esw/chains.h" #include "rdma.h" #include "en.h" #include "fs_core.h" #include "lib/devcom.h" #include "lib/eq.h" +#include "lib/fs_chains.h" +#include "en_tc.h" /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. @@ -66,6 +67,12 @@ struct mlx5_vport_key { u16 vhca_id; } __packed; +struct mlx5_vport_tbl_attr { + u16 chain; + u16 prio; + u16 vport; +}; + struct mlx5_vport_table { struct hlist_node hlist; struct mlx5_flow_table *fdb; @@ -94,10 +101,10 @@ esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns) } static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, - struct mlx5_esw_flow_attr *attr, + struct mlx5_vport_tbl_attr *attr, struct mlx5_vport_key *key) { - key->vport = attr->in_rep->vport; + key->vport = attr->vport; key->chain = attr->chain; key->prio = attr->prio; key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); @@ -118,7 +125,7 @@ esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 } static void -esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) +esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) { struct mlx5_vport_table *e; struct mlx5_vport_key key; @@ -138,7 +145,7 @@ out: } static struct mlx5_flow_table * -esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) +esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) { struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *ns; @@ -189,16 +196,15 @@ err_alloc: int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw) { - struct mlx5_esw_flow_attr attr = {}; - struct mlx5_eswitch_rep rep = {}; + struct mlx5_vport_tbl_attr attr; struct mlx5_flow_table *fdb; struct mlx5_vport *vport; int i; + attr.chain = 0; attr.prio = 1; - attr.in_rep = &rep; mlx5_esw_for_all_vports(esw, i, vport) { - attr.in_rep->vport = vport->vport; + attr.vport = vport->vport; fdb = esw_vport_tbl_get(esw, &attr); if (IS_ERR(fdb)) goto out; @@ -212,15 +218,14 @@ out: void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw) { - struct mlx5_esw_flow_attr attr = {}; - struct mlx5_eswitch_rep rep = {}; + struct mlx5_vport_tbl_attr attr; struct mlx5_vport *vport; int i; + attr.chain = 0; attr.prio = 1; - attr.in_rep = &rep; mlx5_esw_for_all_vports(esw, i, vport) { - attr.in_rep->vport = vport->vport; + attr.vport = vport->vport; esw_vport_tbl_put(esw, &attr); } } @@ -242,8 +247,11 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) { if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && - attr && attr->in_rep && attr->in_rep->vport == MLX5_VPORT_UPLINK) - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + attr && attr->in_rep) + spec->flow_context.flow_source = + attr->in_rep->vport == MLX5_VPORT_UPLINK ? + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK : + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; } static void @@ -290,11 +298,14 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; - bool split = !!(attr->split_count); + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + bool split = !!(esw_attr->split_count); + struct mlx5_vport_tbl_attr fwd_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; @@ -308,13 +319,13 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { - flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]); - flow_act.vlan[0].vid = attr->vlan_vid[0]; - flow_act.vlan[0].prio = attr->vlan_prio[0]; + flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]); + flow_act.vlan[0].vid = esw_attr->vlan_vid[0]; + flow_act.vlan[0].prio = esw_attr->vlan_prio[0]; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { - flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]); - flow_act.vlan[1].vid = attr->vlan_vid[1]; - flow_act.vlan[1].prio = attr->vlan_prio[1]; + flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]); + flow_act.vlan[1].vid = esw_attr->vlan_vid[1]; + flow_act.vlan[1].prio = esw_attr->vlan_prio[1]; } } @@ -329,12 +340,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw); + dest[i].ft = mlx5_chains_get_tc_end_ft(chains); i++; } else if (attr->dest_chain) { flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - ft = mlx5_esw_chains_get_table(esw, attr->dest_chain, - 1, 0); + ft = mlx5_chains_get_table(chains, attr->dest_chain, + 1, 0); if (IS_ERR(ft)) { rule = ERR_CAST(ft); goto err_create_goto_table; @@ -344,28 +355,29 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, dest[i].ft = ft; i++; } else { - for (j = attr->split_count; j < attr->out_count; j++) { + for (j = esw_attr->split_count; j < esw_attr->out_count; j++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->dests[j].rep->vport; + dest[i].vport.num = esw_attr->dests[j].rep->vport; dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id); + MLX5_CAP_GEN(esw_attr->dests[j].mdev, vhca_id); if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { + if (esw_attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act.pkt_reformat = attr->dests[j].pkt_reformat; + flow_act.pkt_reformat = + esw_attr->dests[j].pkt_reformat; dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; dest[i].vport.pkt_reformat = - attr->dests[j].pkt_reformat; + esw_attr->dests[j].pkt_reformat; } i++; } } } - if (attr->decap_pkt_reformat) - flow_act.pkt_reformat = attr->decap_pkt_reformat; + if (esw_attr->decap_pkt_reformat) + flow_act.pkt_reformat = esw_attr->decap_pkt_reformat; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; @@ -382,26 +394,30 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, flow_act.modify_hdr = attr->modify_hdr; if (split) { - fdb = esw_vport_tbl_get(esw, attr); + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + + fdb = esw_vport_tbl_get(esw, &fwd_attr); } else { if (attr->chain || attr->prio) - fdb = mlx5_esw_chains_get_table(esw, attr->chain, - attr->prio, 0); + fdb = mlx5_chains_get_table(chains, attr->chain, + attr->prio, 0); else - fdb = attr->fdb; + fdb = attr->ft; if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) - mlx5_eswitch_set_rule_source_port(esw, spec, attr); + mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr); } if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; } - mlx5_eswitch_set_rule_flow_source(esw, spec, attr); + mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr); if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) - rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, + rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr, &flow_act, dest, i); else rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); @@ -414,12 +430,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, err_add_rule: if (split) - esw_vport_tbl_put(esw, attr); + esw_vport_tbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_esw_get: if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain) - mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); + mlx5_chains_put_table(chains, attr->dest_chain, 1, 0); err_create_goto_table: return rule; } @@ -427,46 +443,51 @@ err_create_goto_table: struct mlx5_flow_handle * mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5_vport_tbl_attr fwd_attr; struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; int i; - fast_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 0); + fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0); if (IS_ERR(fast_fdb)) { rule = ERR_CAST(fast_fdb); goto err_get_fast; } - fwd_fdb = esw_vport_tbl_get(esw, attr); + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + fwd_fdb = esw_vport_tbl_get(esw, &fwd_attr); if (IS_ERR(fwd_fdb)) { rule = ERR_CAST(fwd_fdb); goto err_get_fwd; } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - for (i = 0; i < attr->split_count; i++) { + for (i = 0; i < esw_attr->split_count; i++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->dests[i].rep->vport; + dest[i].vport.num = esw_attr->dests[i].rep->vport; dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id); + MLX5_CAP_GEN(esw_attr->dests[i].mdev, vhca_id); if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat; + dest[i].vport.pkt_reformat = esw_attr->dests[i].pkt_reformat; } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = fwd_fdb, i++; - mlx5_eswitch_set_rule_source_port(esw, spec, attr); - mlx5_eswitch_set_rule_flow_source(esw, spec, attr); + mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr); if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -481,9 +502,9 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, return rule; add_err: - esw_vport_tbl_put(esw, attr); + esw_vport_tbl_put(esw, &fwd_attr); err_get_fwd: - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_get_fast: return rule; } @@ -491,10 +512,13 @@ err_get_fast: static void __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, bool fwd_rule) { - bool split = (attr->split_count > 0); + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + bool split = (esw_attr->split_count > 0); + struct mlx5_vport_tbl_attr fwd_attr; int i; mlx5_del_flow_rules(rule); @@ -502,31 +526,36 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) { /* unref the term table */ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { - if (attr->dests[i].termtbl) - mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + if (esw_attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl); } } atomic64_dec(&esw->offloads.num_flows); + if (fwd_rule || split) { + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + } + if (fwd_rule) { - esw_vport_tbl_put(esw, attr); - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); + esw_vport_tbl_put(esw, &fwd_attr); + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); } else { if (split) - esw_vport_tbl_put(esw, attr); + esw_vport_tbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, - 0); + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); if (attr->dest_chain) - mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); + mlx5_chains_put_table(chains, attr->dest_chain, 1, 0); } } void mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { __mlx5_eswitch_del_rule(esw, rule, attr, false); } @@ -534,7 +563,7 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, void mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { __mlx5_eswitch_del_rule(esw, rule, attr, true); } @@ -611,9 +640,10 @@ out_notsupp: } int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; struct mlx5_eswitch_rep *vport = NULL; bool push, pop, fwd; int err = 0; @@ -629,17 +659,17 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); - err = esw_add_vlan_action_check(attr, push, pop, fwd); + err = esw_add_vlan_action_check(esw_attr, push, pop, fwd); if (err) goto unlock; attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; - vport = esw_vlan_action_get_vport(attr, push, pop); + vport = esw_vlan_action_get_vport(esw_attr, push, pop); if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { + if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; } @@ -662,11 +692,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (vport->vlan_refcount) goto skip_set_push; - err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0, - SET_VLAN_INSERT | SET_VLAN_STRIP); + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, esw_attr->vlan_vid[0], + 0, SET_VLAN_INSERT | SET_VLAN_STRIP); if (err) goto out; - vport->vlan = attr->vlan_vid[0]; + vport->vlan = esw_attr->vlan_vid[0]; skip_set_push: vport->vlan_refcount++; } @@ -679,9 +709,10 @@ unlock: } int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr) { struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; struct mlx5_eswitch_rep *vport = NULL; bool push, pop, fwd; int err = 0; @@ -699,11 +730,11 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); - vport = esw_vlan_action_get_vport(attr, push, pop); + vport = esw_vlan_action_get_vport(esw_attr, push, pop); if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) + if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; goto out; @@ -1137,6 +1168,126 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, } } +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +#define fdb_modify_header_fwd_to_table_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) +static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags) +{ + struct mlx5_core_dev *dev = esw->dev; + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level)) + *flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; + + if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); + } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { + /* Disabled when ttl workaround is needed, e.g + * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig + */ + esw_warn(dev, + "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + } else { + *flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_info(dev, "Supported tc chains and prios offload\n"); + } + + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + *flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED; +} + +static int +esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *nf_ft, *ft; + struct mlx5_chains_attr attr = {}; + struct mlx5_fs_chains *chains; + u32 fdb_max; + int err; + + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + + esw_init_chains_offload_flags(esw, &attr.flags); + attr.ns = MLX5_FLOW_NAMESPACE_FDB; + attr.max_ft_sz = fdb_max; + attr.max_grp_num = esw->params.large_group_num; + attr.default_ft = miss_fdb; + attr.max_restore_tag = esw_get_max_restore_tag(esw); + + chains = mlx5_chains_create(dev, &attr); + if (IS_ERR(chains)) { + err = PTR_ERR(chains); + esw_warn(dev, "Failed to create fdb chains err(%d)\n", err); + return err; + } + + esw->fdb_table.offloads.esw_chains_priv = chains; + + /* Create tc_end_ft which is the always created ft chain */ + nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains), + 1, 0); + if (IS_ERR(nf_ft)) { + err = PTR_ERR(nf_ft); + goto nf_ft_err; + } + + /* Always open the root for fast path */ + ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto level_0_err; + } + + /* Open level 1 for split fdb rules now if prios isn't supported */ + if (!mlx5_chains_prios_supported(chains)) { + err = mlx5_esw_vport_tbl_get(esw); + if (err) + goto level_1_err; + } + + mlx5_chains_set_end_ft(chains, nf_ft); + + return 0; + +level_1_err: + mlx5_chains_put_table(chains, 0, 1, 0); +level_0_err: + mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); +nf_ft_err: + mlx5_chains_destroy(chains); + esw->fdb_table.offloads.esw_chains_priv = NULL; + + return err; +} + +static void +esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) +{ + if (!mlx5_chains_prios_supported(chains)) + mlx5_esw_vport_tbl_put(esw); + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); + mlx5_chains_destroy(chains); +} + +#else /* CONFIG_MLX5_CLS_ACT */ + +static int +esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) +{ return 0; } + +static void +esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) +{} + +#endif + static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1192,9 +1343,9 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.slow_fdb = fdb; - err = mlx5_esw_chains_create(esw); + err = esw_chains_create(esw, fdb); if (err) { - esw_warn(dev, "Failed to create fdb chains err(%d)\n", err); + esw_warn(dev, "Failed to open fdb chains err(%d)\n", err); goto fdb_chains_err; } @@ -1288,7 +1439,7 @@ miss_err: peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: - mlx5_esw_chains_destroy(esw); + esw_chains_destroy(esw, esw_chains(esw)); fdb_chains_err: mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); slow_fdb_err: @@ -1312,7 +1463,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); - mlx5_esw_chains_destroy(esw); + esw_chains_destroy(esw, esw_chains(esw)); + mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, @@ -1671,15 +1823,12 @@ static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) __esw_offloads_unload_rep(esw, rep, rep_type); } -int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) +static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; int rep_type; int err; - if (esw->mode != MLX5_ESWITCH_OFFLOADS) - return 0; - rep = mlx5_eswitch_get_rep(esw, vport_num); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) if (atomic_cmpxchg(&rep->rep_data[rep_type].state, @@ -1698,19 +1847,46 @@ err_reps: return err; } -void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) +static void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; int rep_type; - if (esw->mode != MLX5_ESWITCH_OFFLOADS) - return; - rep = mlx5_eswitch_get_rep(esw, vport_num); for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--) __esw_offloads_unload_rep(esw, rep, rep_type); } +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) +{ + int err; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); + if (err) + return err; + + err = mlx5_esw_offloads_rep_load(esw, vport_num); + if (err) + goto load_err; + return err; + +load_err: + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + return err; +} + +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; + + mlx5_esw_offloads_rep_unload(esw, vport_num); + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); +} + #define ESW_OFFLOADS_DEVCOM_PAIR (0) #define ESW_OFFLOADS_DEVCOM_UNPAIR (1) @@ -1868,53 +2044,38 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) return true; } -static bool -esw_check_vport_match_metadata_mandatory(const struct mlx5_eswitch *esw) -{ - return mlx5_core_mp_enabled(esw->dev); -} - -static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw) -{ - return esw_check_vport_match_metadata_mandatory(esw) && - esw_check_vport_match_metadata_supported(esw); -} - u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) { - u32 num_vports = GENMASK(ESW_VPORT_BITS - 1, 0) - 1; - u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0); - u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); - u32 start; - u32 end; + u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1; + u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 1; + u32 pf_num; int id; - /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */ - WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS)); - - /* Trim vhca_id to ESW_VHCA_ID_BITS */ - vhca_id &= vhca_id_mask; - - start = (vhca_id << ESW_VPORT_BITS); - end = start + num_vports; - if (!vhca_id) - start += 1; /* zero is reserved/invalid metadata */ - id = ida_alloc_range(&esw->offloads.vport_metadata_ida, start, end, GFP_KERNEL); + /* Only 4 bits of pf_num */ + pf_num = PCI_FUNC(esw->dev->pdev->devfn); + if (pf_num > max_pf_num) + return 0; - return (id < 0) ? 0 : id; + /* Metadata is 4 bits of PFNUM and 12 bits of unique id */ + /* Use only non-zero vport_id (1-4095) for all PF's */ + id = ida_alloc_range(&esw->offloads.vport_metadata_ida, 1, vport_end_ida, GFP_KERNEL); + if (id < 0) + return 0; + id = (pf_num << ESW_VPORT_BITS) | id; + return id; } void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata) { - ida_free(&esw->offloads.vport_metadata_ida, metadata); + u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1; + + /* Metadata contains only 12 bits of actual ida id */ + ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask); } static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (vport->vport == MLX5_VPORT_UPLINK) - return 0; - vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); vport->metadata = vport->default_metadata; return vport->metadata ? 0 : -ENOSPC; @@ -1923,40 +2084,65 @@ static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (vport->vport == MLX5_VPORT_UPLINK || !vport->default_metadata) + if (!vport->default_metadata) return; WARN_ON(vport->metadata != vport->default_metadata); mlx5_esw_match_metadata_free(esw, vport->default_metadata); } +static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return; + + mlx5_esw_for_all_vports_reverse(esw, i, vport) + esw_offloads_vport_metadata_cleanup(esw, vport); +} + +static int esw_offloads_metadata_init(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int err; + int i; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; + + mlx5_esw_for_all_vports(esw, i, vport) { + err = esw_offloads_vport_metadata_setup(esw, vport); + if (err) + goto metadata_err; + } + + return 0; + +metadata_err: + esw_offloads_metadata_uninit(esw); + return err; +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { int err; - err = esw_offloads_vport_metadata_setup(esw, vport); - if (err) - goto metadata_err; - err = esw_acl_ingress_ofld_setup(esw, vport); if (err) - goto ingress_err; + return err; - if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { - err = esw_acl_egress_ofld_setup(esw, vport); - if (err) - goto egress_err; - } + err = esw_acl_egress_ofld_setup(esw, vport); + if (err) + goto egress_err; return 0; egress_err: esw_acl_ingress_ofld_cleanup(esw, vport); -ingress_err: - esw_offloads_vport_metadata_cleanup(esw, vport); -metadata_err: return err; } @@ -1966,22 +2152,14 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, { esw_acl_egress_ofld_cleanup(vport); esw_acl_ingress_ofld_cleanup(esw, vport); - esw_offloads_vport_metadata_cleanup(esw, vport); } static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - int err; - - if (esw_use_vport_metadata(esw)) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - err = esw_vport_create_offloads_acl_tables(esw, vport); - if (err) - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; - return err; + return esw_vport_create_offloads_acl_tables(esw, vport); } static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) @@ -1990,7 +2168,6 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); esw_vport_destroy_offloads_acl_tables(esw, vport); - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } static int esw_offloads_steering_init(struct mlx5_eswitch *esw) @@ -2114,6 +2291,24 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type return NOTIFY_OK; } +static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw) +{ + const u32 *query_host_out; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return 0; + + query_host_out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(query_host_out)) + return PTR_ERR(query_host_out); + + /* Mark non local controller with non zero controller number. */ + esw->offloads.host_number = MLX5_GET(query_esw_functions_out, query_host_out, + host_params_context.host_number); + kvfree(query_host_out); + return 0; +} + int esw_offloads_enable(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; @@ -2128,6 +2323,17 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); + err = mlx5_esw_host_number_init(esw); + if (err) + goto err_metadata; + + if (esw_check_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + + err = esw_offloads_metadata_init(esw); + if (err) + goto err_metadata; + err = esw_set_passing_vport_metadata(esw, true); if (err) goto err_vport_metadata; @@ -2160,6 +2366,9 @@ err_uplink: err_steering_init: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: + esw_offloads_metadata_uninit(esw); +err_metadata: + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); return err; @@ -2193,6 +2402,8 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + esw_offloads_metadata_uninit(esw); + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 17a0d2bc102b..ec679560a95d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -3,6 +3,7 @@ #include <linux/mlx5/fs.h> #include "eswitch.h" +#include "en_tc.h" #include "fs_core.h" struct mlx5_termtbl_handle { @@ -228,10 +229,11 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, - struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec) { + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; int i; if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || @@ -244,8 +246,8 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, return true; /* hairpin */ - for (i = attr->split_count; i < attr->out_count; i++) - if (attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) return true; return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 831d2c39e153..80da50e12915 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -54,7 +54,7 @@ static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn, if (unlikely(!buf->sg[0].data)) goto out; - dma_device = &conn->fdev->mdev->pdev->dev; + dma_device = mlx5_core_dma_dev(conn->fdev->mdev); buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data, buf->sg[0].size, buf->dma_dir); err = dma_mapping_error(dma_device, buf->sg[0].dma_addr); @@ -86,7 +86,7 @@ static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn, { struct device *dma_device; - dma_device = &conn->fdev->mdev->pdev->dev; + dma_device = mlx5_core_dma_dev(conn->fdev->mdev); if (buf->sg[1].data) dma_unmap_single(dma_device, buf->sg[1].dma_addr, buf->sg[1].size, buf->dma_dir); @@ -388,9 +388,9 @@ static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn, mlx5_fpga_conn_arm_cq(conn); } -static void mlx5_fpga_conn_cq_tasklet(unsigned long data) +static void mlx5_fpga_conn_cq_tasklet(struct tasklet_struct *t) { - struct mlx5_fpga_conn *conn = (void *)data; + struct mlx5_fpga_conn *conn = from_tasklet(conn, t, cq.tasklet); if (unlikely(!conn->qp.active)) return; @@ -478,8 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; conn->cq.mcq.irqn = irqn; conn->cq.mcq.uar = fdev->conn_res.uar; - tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet, - (unsigned long)conn); + tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index fee169732de7..babe3405132a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -776,6 +776,9 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, table_type = FS_FT_NIC_RX; break; case MLX5_FLOW_NAMESPACE_EGRESS: +#ifdef CONFIG_MLX5_IPSEC + case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL: +#endif max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); table_type = FS_FT_NIC_TX; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 75fa44eee434..16091838bfcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -126,6 +126,10 @@ #define LAG_NUM_PRIOS 1 #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1) +#define KERNEL_TX_IPSEC_NUM_PRIOS 1 +#define KERNEL_TX_IPSEC_NUM_LEVELS 1 +#define KERNEL_TX_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS) + struct node_caps { size_t arr_sz; long *caps; @@ -180,13 +184,24 @@ static struct init_tree_node { static struct init_tree_node egress_root_fs = { .type = FS_TYPE_NAMESPACE, +#ifdef CONFIG_MLX5_IPSEC + .ar_size = 2, +#else .ar_size = 1, +#endif .children = (struct init_tree_node[]) { ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, FS_CHAINING_CAPS_EGRESS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), +#ifdef CONFIG_MLX5_IPSEC + ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS, + KERNEL_TX_IPSEC_NUM_LEVELS))), +#endif } }; @@ -1595,11 +1610,12 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, return true; if (ignore_level) { - if (ft->type != FS_FT_FDB) + if (ft->type != FS_FT_FDB && + ft->type != FS_FT_NIC_RX) return false; if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && - dest->ft->type != FS_FT_FDB) + ft->type != dest->ft->type) return false; } @@ -2164,8 +2180,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, break; } - if (type == MLX5_FLOW_NAMESPACE_EGRESS) { + if (type == MLX5_FLOW_NAMESPACE_EGRESS || + type == MLX5_FLOW_NAMESPACE_EGRESS_KERNEL) { root_ns = steering->egress_root_ns; + prio = type - MLX5_FLOW_NAMESPACE_EGRESS; } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) { root_ns = steering->rdma_rx_root_ns; prio = RDMA_RX_BYPASS_PRIO; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c new file mode 100644 index 000000000000..f9042e147c7f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "fw_reset.h" +#include "diag/fw_tracer.h" + +enum { + MLX5_FW_RESET_FLAGS_RESET_REQUESTED, + MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, + MLX5_FW_RESET_FLAGS_PENDING_COMP +}; + +struct mlx5_fw_reset { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + struct workqueue_struct *wq; + struct work_struct fw_live_patch_work; + struct work_struct reset_request_work; + struct work_struct reset_reload_work; + struct work_struct reset_now_work; + struct work_struct reset_abort_work; + unsigned long reset_flags; + struct timer_list timer; + struct completion done; + int ret; +}; + +void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (enable) + clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); + else + set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); +} + +bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + return !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); +} + +static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, + u8 reset_type_sel, u8 sync_resp, bool sync_start) +{ + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + + MLX5_SET(mfrl_reg, in, reset_level, reset_level); + MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_resp, sync_resp); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, sync_start); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1); +} + +static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) +{ + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 0); + if (err) + return err; + + if (reset_level) + *reset_level = MLX5_GET(mfrl_reg, out, reset_level); + if (reset_type) + *reset_type = MLX5_GET(mfrl_reg, out, reset_type); + + return 0; +} + +int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) +{ + return mlx5_reg_mfrl_query(dev, reset_level, reset_type); +} + +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + int err; + + set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true); + if (err) + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + return err; +} + +int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); +} + +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + /* if this is the driver that initiated the fw reset, devlink completed the reload */ + if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { + complete(&fw_reset->done); + } else { + mlx5_load_one(dev, false); + devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, + BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); + } +} + +static void mlx5_sync_reset_reload_work(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_reload_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + mlx5_enter_error_state(dev, true); + mlx5_unload_one(dev, false); + err = mlx5_health_wait_pci_up(dev); + if (err) + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + +static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + del_timer(&fw_reset->timer); +} + +static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + mlx5_stop_sync_reset_poll(dev); + clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); + if (poll_health) + mlx5_start_health_poll(dev); +} + +#define MLX5_RESET_POLL_INTERVAL (HZ / 10) +static void poll_sync_reset(struct timer_list *t) +{ + struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer); + struct mlx5_core_dev *dev = fw_reset->dev; + u32 fatal_error; + + if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + return; + + fatal_error = mlx5_health_check_fatal_sensors(dev); + + if (fatal_error) { + mlx5_core_warn(dev, "Got Device Reset\n"); + mlx5_sync_reset_clear_reset_requested(dev, false); + queue_work(fw_reset->wq, &fw_reset->reset_reload_work); + return; + } + + mod_timer(&fw_reset->timer, round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL)); +} + +static void mlx5_start_sync_reset_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + timer_setup(&fw_reset->timer, poll_sync_reset, 0); + fw_reset->timer.expires = round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL); + add_timer(&fw_reset->timer); +} + +static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false); +} + +static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false); +} + +static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + mlx5_stop_health_poll(dev, true); + set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); + mlx5_start_sync_reset_poll(dev); +} + +static void mlx5_fw_live_patch_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + fw_live_patch_work); + struct mlx5_core_dev *dev = fw_reset->dev; + struct mlx5_fw_tracer *tracer; + + mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); + + tracer = dev->tracer; + if (IS_ERR_OR_NULL(tracer)) + return; + + if (mlx5_fw_tracer_reload(tracer)) + mlx5_core_err(dev, "Failed to reload FW tracer\n"); +} + +static void mlx5_sync_reset_request_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_request_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags)) { + err = mlx5_fw_reset_set_reset_sync_nack(dev); + mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", + err ? "Failed" : "Sent"); + return; + } + mlx5_sync_reset_set_reset_requested(dev); + err = mlx5_fw_reset_set_reset_sync_ack(dev); + if (err) + mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); + else + mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); +} + +#define MLX5_PCI_LINK_UP_TIMEOUT 2000 + +static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) +{ + struct pci_bus *bridge_bus = dev->pdev->bus; + struct pci_dev *bridge = bridge_bus->self; + u16 reg16, dev_id, sdev_id; + unsigned long timeout; + struct pci_dev *sdev; + int cap, err; + u32 reg32; + + /* Check that all functions under the pci bridge are PFs of + * this device otherwise fail this function. + */ + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); + if (err) + return err; + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id); + if (err) + return err; + if (sdev_id != dev_id) + return -EPERM; + } + + cap = pci_find_capability(bridge, PCI_CAP_ID_EXP); + if (!cap) + return -EOPNOTSUPP; + + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + pci_save_state(sdev); + pci_cfg_access_lock(sdev); + } + /* PCI link toggle */ + err = pci_read_config_word(bridge, cap + PCI_EXP_LNKCTL, ®16); + if (err) + return err; + reg16 |= PCI_EXP_LNKCTL_LD; + err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16); + if (err) + return err; + msleep(500); + reg16 &= ~PCI_EXP_LNKCTL_LD; + err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16); + if (err) + return err; + + /* Check link */ + err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, ®32); + if (err) + return err; + if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) { + mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32); + msleep(1000); + goto restore; + } + + timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT); + do { + err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, ®16); + if (err) + return err; + if (reg16 & PCI_EXP_LNKSTA_DLLLA) + break; + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (reg16 & PCI_EXP_LNKSTA_DLLLA) { + mlx5_core_info(dev, "PCI Link up\n"); + } else { + mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n", + reg16, MLX5_PCI_LINK_UP_TIMEOUT); + err = -ETIMEDOUT; + } + +restore: + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + pci_cfg_access_unlock(sdev); + pci_restore_state(sdev); + } + + return err; +} + +static void mlx5_sync_reset_now_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_now_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + mlx5_sync_reset_clear_reset_requested(dev, false); + + mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) { + mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); + goto done; + } + + err = mlx5_pci_link_toggle(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); + goto done; + } + + mlx5_enter_error_state(dev, true); + mlx5_unload_one(dev, false); +done: + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + +static void mlx5_sync_reset_abort_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_abort_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + mlx5_sync_reset_clear_reset_requested(dev, true); + mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); +} + +static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe; + u8 sync_event_rst_type; + + sync_fw_update_eqe = &eqe->data.sync_fw_update; + sync_event_rst_type = sync_fw_update_eqe->sync_rst_state & SYNC_RST_STATE_MASK; + switch (sync_event_rst_type) { + case MLX5_SYNC_RST_STATE_RESET_REQUEST: + queue_work(fw_reset->wq, &fw_reset->reset_request_work); + break; + case MLX5_SYNC_RST_STATE_RESET_NOW: + queue_work(fw_reset->wq, &fw_reset->reset_now_work); + break; + case MLX5_SYNC_RST_STATE_RESET_ABORT: + queue_work(fw_reset->wq, &fw_reset->reset_abort_work); + break; + } +} + +static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb); + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT: + queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work); + break; + case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT: + mlx5_sync_reset_events_handle(fw_reset, eqe); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +#define MLX5_FW_RESET_TIMEOUT_MSEC 5000 +int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC); + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + int err; + + if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { + mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n", + MLX5_FW_RESET_TIMEOUT_MSEC / 1000); + err = -ETIMEDOUT; + goto out; + } + err = fw_reset->ret; +out: + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + return err; +} + +void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT); + mlx5_eq_notifier_register(dev, &fw_reset->nb); +} + +void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) +{ + mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); +} + +int mlx5_fw_reset_init(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); + + if (!fw_reset) + return -ENOMEM; + fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events"); + if (!fw_reset->wq) { + kfree(fw_reset); + return -ENOMEM; + } + + fw_reset->dev = dev; + dev->priv.fw_reset = fw_reset; + + INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event); + INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event); + INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); + INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event); + INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event); + + init_completion(&fw_reset->done); + return 0; +} + +void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + destroy_workqueue(fw_reset->wq); + kfree(dev->priv.fw_reset); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h new file mode 100644 index 000000000000..7761ee5fc7d0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_FW_RESET_H +#define __MLX5_FW_RESET_H + +#include "mlx5_core.h" + +void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable); +bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev); +int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type); +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel); +int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); + +int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); +void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); +void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev); +int mlx5_fw_reset_init(struct mlx5_core_dev *dev); +void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index b31f769d2df9..54523bed16cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -110,7 +110,7 @@ static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) return rfr && synd; } -static u32 check_fatal_sensors(struct mlx5_core_dev *dev) +u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev) { if (sensor_pci_not_working(dev)) return MLX5_SENSOR_PCI_COMM_ERR; @@ -173,7 +173,7 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev) * Check again to avoid a redundant 2nd reset. If the fatal erros was * PCI related a reset won't help. */ - fatal_error = check_fatal_sensors(dev); + fatal_error = mlx5_health_check_fatal_sensors(dev); if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || fatal_error == MLX5_SENSOR_NIC_DISABLED || fatal_error == MLX5_SENSOR_NIC_SW_RESET) { @@ -195,7 +195,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) bool err_detected = false; /* Mark the device as fatal in order to abort FW commands */ - if ((check_fatal_sensors(dev) || force) && + if ((mlx5_health_check_fatal_sensors(dev) || force) && dev->state == MLX5_DEVICE_STATE_UP) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; err_detected = true; @@ -208,7 +208,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) goto unlock; } - if (check_fatal_sensors(dev) || force) { /* protected state setting */ + if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_cmd_flush(dev); } @@ -231,7 +231,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev) mlx5_core_err(dev, "start\n"); - if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { + if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { /* Get cr-dump and reset FW semaphore */ lock = lock_sem_sw_reset(dev, true); @@ -308,26 +308,31 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) /* How much time to wait until health resetting the driver (in msecs) */ #define MLX5_RECOVERY_WAIT_MSECS 60000 -static int mlx5_health_try_recover(struct mlx5_core_dev *dev) +int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) { unsigned long end; - mlx5_core_warn(dev, "handling bad device here\n"); - mlx5_handle_bad_state(dev); end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS); while (sensor_pci_not_working(dev)) { - if (time_after(jiffies, end)) { - mlx5_core_err(dev, - "health recovery flow aborted, PCI reads still not working\n"); - return -EIO; - } + if (time_after(jiffies, end)) + return -ETIMEDOUT; msleep(100); } + return 0; +} +static int mlx5_health_try_recover(struct mlx5_core_dev *dev) +{ + mlx5_core_warn(dev, "handling bad device here\n"); + mlx5_handle_bad_state(dev); + if (mlx5_health_wait_pci_up(dev)) { + mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n"); + return -EIO; + } mlx5_core_err(dev, "starting health recovery flow\n"); mlx5_recover_device(dev); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) || - check_fatal_sensors(dev)) { + mlx5_health_check_fatal_sensors(dev)) { mlx5_core_err(dev, "health recovery failed\n"); return -EIO; } @@ -696,7 +701,7 @@ static void poll_health(struct timer_list *t) if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto out; - fatal_error = check_fatal_sensors(dev); + fatal_error = mlx5_health_check_fatal_sensors(dev); if (fatal_error && !health->fatal_error) { mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 874c70e8cc54..33081b24f10a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -102,7 +102,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, if (ldev->pf[i].netdev == ndev) return i; - return -1; + return -ENOENT; } static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) @@ -271,7 +271,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) bool do_bond, roce_lag; int err; - if (!dev0 || !dev1) + if (!mlx5_lag_is_ready(ldev)) return; spin_lock(&lag_lock); @@ -355,7 +355,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, { struct net_device *upper = info->upper_dev, *ndev_tmp; struct netdev_lag_upper_info *lag_upper_info = NULL; - bool is_bonded; + bool is_bonded, is_in_lag, mode_supported; int bond_status = 0; int num_slaves = 0; int idx; @@ -374,7 +374,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); - if (idx > -1) + if (idx >= 0) bond_status |= (1 << idx); num_slaves++; @@ -391,13 +391,24 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, /* Determine bonding status: * A device is considered bonded if both its physical ports are slaves * of the same lag master, and only them. - * Lag mode must be activebackup or hash. */ - is_bonded = (num_slaves == MLX5_MAX_PORTS) && - (bond_status == 0x3) && - ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) || - (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)); + is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; + if (!mlx5_lag_is_ready(ldev) && is_in_lag) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, PF is configured with more than 64 VFs"); + return 0; + } + + /* Lag mode must be activebackup or hash. */ + mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; + + if (is_in_lag && !mode_supported) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, TX type isn't supported"); + + is_bonded = is_in_lag && mode_supported; if (tracker->is_bonded != is_bonded) { tracker->is_bonded = is_bonded; return 1; @@ -418,7 +429,7 @@ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, return 0; idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); - if (idx == -1) + if (idx < 0) return 0; /* This information is used to determine virtual to physical @@ -445,6 +456,10 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, return NOTIFY_DONE; ldev = container_of(this, struct mlx5_lag, nb); + + if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE) + return NOTIFY_DONE; + tracker = ldev->tracker; switch (event) { @@ -493,14 +508,14 @@ static void mlx5_lag_dev_free(struct mlx5_lag *ldev) kfree(ldev); } -static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev, - struct net_device *netdev) +static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev, + struct net_device *netdev) { unsigned int fn = PCI_FUNC(dev->pdev->devfn); if (fn >= MLX5_MAX_PORTS) - return; + return -EPERM; spin_lock(&lag_lock); ldev->pf[fn].dev = dev; @@ -511,6 +526,8 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, dev->priv.lag = ldev; spin_unlock(&lag_lock); + + return fn; } static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, @@ -537,11 +554,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; - int err; + int i, err; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) + if (!MLX5_CAP_GEN(dev, vport_group_manager)) return; tmp_dev = mlx5_get_next_phys_dev(dev); @@ -556,7 +571,18 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) } } - mlx5_lag_dev_add_pf(ldev, dev, netdev); + if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0) + return; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + tmp_dev = ldev->pf[i].dev; + if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) || + MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS) + break; + } + + if (i >= MLX5_MAX_PORTS) + ldev->flags |= MLX5_LAG_FLAG_READY; if (!ldev->nb.notifier_call) { ldev->nb.notifier_call = mlx5_lag_netdev_event; @@ -587,6 +613,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) mlx5_lag_dev_remove_pf(ldev, dev); + ldev->flags &= ~MLX5_LAG_FLAG_READY; + for (i = 0; i < MLX5_MAX_PORTS; i++) if (ldev->pf[i].dev) break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h index f1068aac6406..8d8cf2d0bc6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -16,6 +16,7 @@ enum { MLX5_LAG_FLAG_ROCE = 1 << 0, MLX5_LAG_FLAG_SRIOV = 1 << 1, MLX5_LAG_FLAG_MULTIPATH = 1 << 2, + MLX5_LAG_FLAG_READY = 1 << 3, }; #define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\ @@ -59,6 +60,12 @@ __mlx5_lag_is_active(struct mlx5_lag *ldev) return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); } +static inline bool +mlx5_lag_is_ready(struct mlx5_lag *ldev) +{ + return ldev->flags & MLX5_LAG_FLAG_READY; +} + void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 9e68f5926ab6..88e58ac902de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -11,7 +11,7 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) { - if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) + if (!mlx5_lag_is_ready(ldev)) return false; return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, @@ -131,7 +131,12 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, struct net_device *nh_dev = nh->fib_nh_dev; int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); - mlx5_lag_set_port_affinity(ldev, ++i); + if (i < 0) + i = MLX5_LAG_NORMAL_AFFINITY; + else + ++i; + + mlx5_lag_set_port_affinity(ldev, i); } return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 2d55b7c22c03..c70c1f0ca0c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -150,28 +150,30 @@ static void mlx5_pps_out(struct work_struct *work) static void mlx5_timestamp_overflow(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); - struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock, - overflow_work); + struct mlx5_core_dev *mdev; + struct mlx5_clock *clock; unsigned long flags; + clock = container_of(dwork, struct mlx5_clock, overflow_work); + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); timecounter_read(&clock->tc); - mlx5_update_clock_info_page(clock->mdev); + mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); schedule_delayed_work(&clock->overflow_work, clock->overflow_period); } -static int mlx5_ptp_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, - ptp_info); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); u64 ns = timespec64_to_ns(ts); + struct mlx5_core_dev *mdev; unsigned long flags; + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); timecounter_init(&clock->tc, &clock->cycles, ns); - mlx5_update_clock_info_page(clock->mdev); + mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); return 0; @@ -180,13 +182,12 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, struct ptp_system_timestamp *sts) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, - ptp_info); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; unsigned long flags; u64 cycles, ns; + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); cycles = mlx5_read_internal_timer(mdev, sts); ns = timecounter_cyc2time(&clock->tc, cycles); @@ -199,13 +200,14 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, - ptp_info); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; unsigned long flags; + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); timecounter_adjtime(&clock->tc, delta); - mlx5_update_clock_info_page(clock->mdev); + mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); return 0; @@ -213,12 +215,13 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) { - u64 adj; - u32 diff; + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; unsigned long flags; int neg_adj = 0; - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, - ptp_info); + u32 diff; + u64 adj; + if (delta < 0) { neg_adj = 1; @@ -229,11 +232,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) adj *= delta; diff = div_u64(adj, 1000000000ULL); + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); timecounter_read(&clock->tc); clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : clock->nominal_c_mult + diff; - mlx5_update_clock_info_page(clock->mdev); + mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); return 0; @@ -431,13 +435,11 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, default: return -EOPNOTSUPP; } - - return -EOPNOTSUPP; } static const struct ptp_clock_info mlx5_ptp_clock_info = { .owner = THIS_MODULE, - .name = "mlx5_p2p", + .name = "mlx5_ptp", .max_adj = 100000000, .n_alarm = 0, .n_ext_ts = 0, @@ -465,7 +467,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) { - struct mlx5_core_dev *mdev = clock->mdev; + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; u8 mode; int err; @@ -538,20 +541,23 @@ static int mlx5_pps_event(struct notifier_block *nb, unsigned long type, void *data) { struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); - struct mlx5_core_dev *mdev = clock->mdev; struct ptp_clock_event ptp_event; u64 cycles_now, cycles_delta; u64 nsec_now, nsec_delta, ns; struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; + struct mlx5_core_dev *mdev; struct timespec64 ts; unsigned long flags; + mdev = container_of(clock, struct mlx5_core_dev, clock); + switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: ptp_event.index = pin; - ptp_event.timestamp = timecounter_cyc2time(&clock->tc, - be64_to_cpu(eqe->data.pps.time_stamp)); + ptp_event.timestamp = + mlx5_timecounter_cyc2time(clock, + be64_to_cpu(eqe->data.pps.time_stamp)); if (clock->pps_info.enabled) { ptp_event.type = PTP_CLOCK_PPSUSR; ptp_event.pps_times.ts_real = @@ -574,8 +580,8 @@ static int mlx5_pps_event(struct notifier_block *nb, cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, clock->cycles.mult); clock->pps_info.start[pin] = cycles_now + cycles_delta; - schedule_work(&clock->pps_info.out_work); write_sequnlock_irqrestore(&clock->lock, flags); + schedule_work(&clock->pps_info.out_work); break; default: mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", @@ -605,7 +611,6 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) clock->cycles.shift); clock->nominal_c_mult = clock->cycles.mult; clock->cycles.mask = CLOCKSOURCE_MASK(41); - clock->mdev = mdev; timecounter_init(&clock->tc, &clock->cycles, ktime_to_ns(ktime_get_real())); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 5c681e31983b..81f2cc4ca1da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -78,7 +78,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); -void mlx5_cq_tasklet_cb(unsigned long data); +void mlx5_cq_tasklet_cb(struct tasklet_struct *t); struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c new file mode 100644 index 000000000000..947f346bdc2d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -0,0 +1,911 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies. + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/fs.h> + +#include "lib/fs_chains.h" +#include "en/mapping.h" +#include "mlx5_core.h" +#include "fs_core.h" +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" + +#define chains_lock(chains) ((chains)->lock) +#define chains_ht(chains) ((chains)->chains_ht) +#define chains_mapping(chains) ((chains)->chains_mapping) +#define prios_ht(chains) ((chains)->prios_ht) +#define ft_pool_left(chains) ((chains)->ft_left) +#define tc_default_ft(chains) ((chains)->tc_default_ft) +#define tc_end_ft(chains) ((chains)->tc_end_ft) +#define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \ + FDB_TC_OFFLOAD : MLX5E_TC_PRIO) + +/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS), + * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via get_next_avail_sz_from_pool. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small and match firmware + * pools. + */ +#define FT_SIZE (16 * 1024 * 1024) +static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024, + 1 * 1024 * 1024, + 64 * 1024, + 128 }; +#define FT_TBL_SZ (64 * 1024) + +struct mlx5_fs_chains { + struct mlx5_core_dev *dev; + + struct rhashtable chains_ht; + struct rhashtable prios_ht; + /* Protects above chains_ht and prios_ht */ + struct mutex lock; + + struct mlx5_flow_table *tc_default_ft; + struct mlx5_flow_table *tc_end_ft; + struct mapping_ctx *chains_mapping; + + enum mlx5_flow_namespace_type ns; + u32 group_num; + u32 flags; + + int ft_left[ARRAY_SIZE(FT_POOLS)]; +}; + +struct fs_chain { + struct rhash_head node; + + u32 chain; + + int ref; + int id; + + struct mlx5_fs_chains *chains; + struct list_head prios_list; + struct mlx5_flow_handle *restore_rule; + struct mlx5_modify_hdr *miss_modify_hdr; +}; + +struct prio_key { + u32 chain; + u32 prio; + u32 level; +}; + +struct prio { + struct rhash_head node; + struct list_head list; + + struct prio_key key; + + int ref; + + struct fs_chain *chain; + struct mlx5_flow_table *ft; + struct mlx5_flow_table *next_ft; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; +}; + +static const struct rhashtable_params chain_params = { + .head_offset = offsetof(struct fs_chain, node), + .key_offset = offsetof(struct fs_chain, chain), + .key_len = sizeof_field(struct fs_chain, chain), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params prio_params = { + .head_offset = offsetof(struct prio, node), + .key_offset = offsetof(struct prio, key), + .key_len = sizeof_field(struct prio, key), + .automatic_shrinking = true, +}; + +bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains) +{ + return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED; +} + +static bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains) +{ + return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; +} + +bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains) +{ + return mlx5_chains_prios_supported(chains) && + mlx5_chains_ignore_flow_level_supported(chains); +} + +u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains) +{ + if (!mlx5_chains_prios_supported(chains)) + return 1; + + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX - 1; + + /* We should get here only for eswitch case */ + return FDB_TC_MAX_CHAIN; +} + +u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) +{ + return mlx5_chains_get_chain_range(chains) + 1; +} + +u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) +{ + if (!mlx5_chains_prios_supported(chains)) + return 1; + + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX; + + /* We should get here only for eswitch case */ + return FDB_TC_MAX_PRIO; +} + +static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains) +{ + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX; + + /* Same value for FDB and NIC RX tables */ + return FDB_TC_LEVELS_PER_PRIO; +} + +void +mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft) +{ + tc_end_ft(chains) = ft; +} + +#define POOL_NEXT_SIZE 0 +static int +mlx5_chains_get_avail_sz_from_pool(struct mlx5_fs_chains *chains, + int desired_size) +{ + int i, found_i = -1; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) { + if (ft_pool_left(chains)[i] && FT_POOLS[i] > desired_size) { + found_i = i; + if (desired_size != POOL_NEXT_SIZE) + break; + } + } + + if (found_i != -1) { + --ft_pool_left(chains)[found_i]; + return FT_POOLS[found_i]; + } + + return 0; +} + +static void +mlx5_chains_put_sz_to_pool(struct mlx5_fs_chains *chains, int sz) +{ + int i; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) { + if (sz == FT_POOLS[i]) { + ++ft_pool_left(chains)[i]; + return; + } + } + + WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz); +} + +static void +mlx5_chains_init_sz_pool(struct mlx5_fs_chains *chains, u32 ft_max) +{ + int i; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) + ft_pool_left(chains)[i] = + FT_POOLS[i] <= ft_max ? FT_SIZE / FT_POOLS[i] : 0; +} + +static struct mlx5_flow_table * +mlx5_chains_create_table(struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int sz; + + if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED) + ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? + mlx5_chains_get_avail_sz_from_pool(chains, FT_TBL_SZ) : + mlx5_chains_get_avail_sz_from_pool(chains, POOL_NEXT_SIZE); + if (!sz) + return ERR_PTR(-ENOSPC); + ft_attr.max_fte = sz; + + /* We use tc_default_ft(chains) as the table's next_ft till + * ignore_flow_level is allowed on FT creation and not just for FTEs. + * Instead caller should add an explicit miss rule if needed. + */ + ft_attr.next_ft = tc_default_ft(chains); + + /* The root table(chain 0, prio 1, level 0) is required to be + * connected to the previous fs_core managed prio. + * We always create it, as a managed table, in order to align with + * fs_core logic. + */ + if (!mlx5_chains_ignore_flow_level_supported(chains) || + (chain == 0 && prio == 1 && level == 0)) { + ft_attr.level = level; + ft_attr.prio = prio - 1; + ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ? + mlx5_get_fdb_sub_ns(chains->dev, chain) : + mlx5_get_flow_namespace(chains->dev, chains->ns); + } else { + ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = ns_to_chains_fs_prio(chains->ns); + /* Firmware doesn't allow us to create another level 0 table, + * so we create all unmanaged tables as level 1. + * + * To connect them, we use explicit miss rules with + * ignore_flow_level. Caller is responsible to create + * these rules (if needed). + */ + ft_attr.level = 1; + ns = mlx5_get_flow_namespace(chains->dev, chains->ns); + } + + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = chains->group_num; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(ft), chain, prio, level, sz); + mlx5_chains_put_sz_to_pool(chains, sz); + return ft; + } + + return ft; +} + +static void +mlx5_chains_destroy_table(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft) +{ + mlx5_chains_put_sz_to_pool(chains, ft->max_fte); + mlx5_destroy_flow_table(ft); +} + +static int +create_chain_restore(struct fs_chain *chain) +{ + struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; + char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)]; + struct mlx5_fs_chains *chains = chain->chains; + enum mlx5e_tc_attr_to_reg chain_to_reg; + struct mlx5_modify_hdr *mod_hdr; + u32 index; + int err; + + if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) || + !mlx5_chains_prios_supported(chains)) + return 0; + + err = mapping_add(chains_mapping(chains), &chain->chain, &index); + if (err) + return err; + if (index == MLX5_FS_DEFAULT_FLOW_TAG) { + /* we got the special default flow tag id, so we won't know + * if we actually marked the packet with the restore rule + * we create. + * + * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. + */ + err = mapping_add(chains_mapping(chains), + &chain->chain, &index); + mapping_remove(chains_mapping(chains), + MLX5_FS_DEFAULT_FLOW_TAG); + if (err) + return err; + } + + chain->id = index; + + if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) { + chain_to_reg = CHAIN_TO_REG; + chain->restore_rule = esw_add_restore_rule(esw, chain->id); + if (IS_ERR(chain->restore_rule)) { + err = PTR_ERR(chain->restore_rule); + goto err_rule; + } + } else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) { + /* For NIC RX we don't need a restore rule + * since we write the metadata to reg_b + * that is passed to SW directly. + */ + chain_to_reg = NIC_CHAIN_TO_REG; + } else { + err = -EINVAL; + goto err_rule; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, + mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield); + MLX5_SET(set_action_in, modact, offset, + mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset * 8); + MLX5_SET(set_action_in, modact, length, + mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen * 8); + MLX5_SET(set_action_in, modact, data, chain->id); + mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns, + 1, modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + chain->miss_modify_hdr = mod_hdr; + + return 0; + +err_mod_hdr: + if (!IS_ERR_OR_NULL(chain->restore_rule)) + mlx5_del_flow_rules(chain->restore_rule); +err_rule: + /* Datapath can't find this mapping, so we can safely remove it */ + mapping_remove(chains_mapping(chains), chain->id); + return err; +} + +static void destroy_chain_restore(struct fs_chain *chain) +{ + struct mlx5_fs_chains *chains = chain->chains; + + if (!chain->miss_modify_hdr) + return; + + if (chain->restore_rule) + mlx5_del_flow_rules(chain->restore_rule); + + mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr); + mapping_remove(chains_mapping(chains), chain->id); +} + +static struct fs_chain * +mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain) +{ + struct fs_chain *chain_s = NULL; + int err; + + chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL); + if (!chain_s) + return ERR_PTR(-ENOMEM); + + chain_s->chains = chains; + chain_s->chain = chain; + INIT_LIST_HEAD(&chain_s->prios_list); + + err = create_chain_restore(chain_s); + if (err) + goto err_restore; + + err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node, + chain_params); + if (err) + goto err_insert; + + return chain_s; + +err_insert: + destroy_chain_restore(chain_s); +err_restore: + kvfree(chain_s); + return ERR_PTR(err); +} + +static void +mlx5_chains_destroy_chain(struct fs_chain *chain) +{ + struct mlx5_fs_chains *chains = chain->chains; + + rhashtable_remove_fast(&chains_ht(chains), &chain->node, + chain_params); + + destroy_chain_restore(chain); + kvfree(chain); +} + +static struct fs_chain * +mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain) +{ + struct fs_chain *chain_s; + + chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain, + chain_params); + if (!chain_s) { + chain_s = mlx5_chains_create_chain(chains, chain); + if (IS_ERR(chain_s)) + return chain_s; + } + + chain_s->ref++; + + return chain_s; +} + +static struct mlx5_flow_handle * +mlx5_chains_add_miss_rule(struct fs_chain *chain, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5_fs_chains *chains = chain->chains; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act act = {}; + + act.flags = FLOW_ACT_NO_APPEND; + if (mlx5_chains_ignore_flow_level_supported(chain->chains)) + act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = next_ft; + + if (next_ft == tc_end_ft(chains) && + chain->chain != mlx5_chains_get_nf_ft_chain(chains) && + mlx5_chains_prios_supported(chains)) { + act.modify_hdr = chain->miss_modify_hdr; + act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1); +} + +static int +mlx5_chains_update_prio_prevs(struct prio *prio, + struct mlx5_flow_table *next_ft) +{ + struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {}; + struct fs_chain *chain = prio->chain; + struct prio *pos; + int n = 0, err; + + if (prio->key.level) + return 0; + + /* Iterate in reverse order until reaching the level 0 rule of + * the previous priority, adding all the miss rules first, so we can + * revert them if any of them fails. + */ + pos = prio; + list_for_each_entry_continue_reverse(pos, + &chain->prios_list, + list) { + miss_rules[n] = mlx5_chains_add_miss_rule(chain, + pos->ft, + next_ft); + if (IS_ERR(miss_rules[n])) { + err = PTR_ERR(miss_rules[n]); + goto err_prev_rule; + } + + n++; + if (!pos->key.level) + break; + } + + /* Success, delete old miss rules, and update the pointers. */ + n = 0; + pos = prio; + list_for_each_entry_continue_reverse(pos, + &chain->prios_list, + list) { + mlx5_del_flow_rules(pos->miss_rule); + + pos->miss_rule = miss_rules[n]; + pos->next_ft = next_ft; + + n++; + if (!pos->key.level) + break; + } + + return 0; + +err_prev_rule: + while (--n >= 0) + mlx5_del_flow_rules(miss_rules[n]); + + return err; +} + +static void +mlx5_chains_put_chain(struct fs_chain *chain) +{ + if (--chain->ref == 0) + mlx5_chains_destroy_chain(chain); +} + +static struct prio * +mlx5_chains_create_prio(struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_handle *miss_rule = NULL; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *ft; + struct prio *prio_s = NULL; + struct fs_chain *chain_s; + struct list_head *pos; + u32 *flow_group_in; + int err; + + chain_s = mlx5_chains_get_chain(chains, chain); + if (IS_ERR(chain_s)) + return ERR_CAST(chain_s); + + prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!prio_s || !flow_group_in) { + err = -ENOMEM; + goto err_alloc; + } + + /* Chain's prio list is sorted by prio and level. + * And all levels of some prio point to the next prio's level 0. + * Example list (prio, level): + * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0) + * In hardware, we will we have the following pointers: + * (3,0) -> (5,0) -> (7,0) -> Slow path + * (3,1) -> (5,0) + * (5,1) -> (7,0) + * (6,1) -> (7,0) + */ + + /* Default miss for each chain: */ + next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? + tc_default_ft(chains) : + tc_end_ft(chains); + list_for_each(pos, &chain_s->prios_list) { + struct prio *p = list_entry(pos, struct prio, list); + + /* exit on first pos that is larger */ + if (prio < p->key.prio || (prio == p->key.prio && + level < p->key.level)) { + /* Get next level 0 table */ + next_ft = p->key.level == 0 ? p->ft : p->next_ft; + break; + } + } + + ft = mlx5_chains_create_table(chains, chain, prio, level); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_create; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + ft->max_fte - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft->max_fte - 1); + miss_group = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(miss_group)) { + err = PTR_ERR(miss_group); + goto err_group; + } + + /* Add miss rule to next_ft */ + miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft); + if (IS_ERR(miss_rule)) { + err = PTR_ERR(miss_rule); + goto err_miss_rule; + } + + prio_s->miss_group = miss_group; + prio_s->miss_rule = miss_rule; + prio_s->next_ft = next_ft; + prio_s->chain = chain_s; + prio_s->key.chain = chain; + prio_s->key.prio = prio; + prio_s->key.level = level; + prio_s->ft = ft; + + err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node, + prio_params); + if (err) + goto err_insert; + + list_add(&prio_s->list, pos->prev); + + /* Table is ready, connect it */ + err = mlx5_chains_update_prio_prevs(prio_s, ft); + if (err) + goto err_update; + + kvfree(flow_group_in); + return prio_s; + +err_update: + list_del(&prio_s->list); + rhashtable_remove_fast(&prios_ht(chains), &prio_s->node, + prio_params); +err_insert: + mlx5_del_flow_rules(miss_rule); +err_miss_rule: + mlx5_destroy_flow_group(miss_group); +err_group: + mlx5_chains_destroy_table(chains, ft); +err_create: +err_alloc: + kvfree(prio_s); + kvfree(flow_group_in); + mlx5_chains_put_chain(chain_s); + return ERR_PTR(err); +} + +static void +mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains, + struct prio *prio) +{ + struct fs_chain *chain = prio->chain; + + WARN_ON(mlx5_chains_update_prio_prevs(prio, + prio->next_ft)); + + list_del(&prio->list); + rhashtable_remove_fast(&prios_ht(chains), &prio->node, + prio_params); + mlx5_del_flow_rules(prio->miss_rule); + mlx5_destroy_flow_group(prio->miss_group); + mlx5_chains_destroy_table(chains, prio->ft); + mlx5_chains_put_chain(chain); + kvfree(prio); +} + +struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) +{ + struct mlx5_flow_table *prev_fts; + struct prio *prio_s; + struct prio_key key; + int l = 0; + + if ((chain > mlx5_chains_get_chain_range(chains) && + chain != mlx5_chains_get_nf_ft_chain(chains)) || + prio > mlx5_chains_get_prio_range(chains) || + level > mlx5_chains_get_level_range(chains)) + return ERR_PTR(-EOPNOTSUPP); + + /* create earlier levels for correct fs_core lookup when + * connecting tables. + */ + for (l = 0; l < level; l++) { + prev_fts = mlx5_chains_get_table(chains, chain, prio, l); + if (IS_ERR(prev_fts)) { + prio_s = ERR_CAST(prev_fts); + goto err_get_prevs; + } + } + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&chains_lock(chains)); + prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key, + prio_params); + if (!prio_s) { + prio_s = mlx5_chains_create_prio(chains, chain, + prio, level); + if (IS_ERR(prio_s)) + goto err_create_prio; + } + + ++prio_s->ref; + mutex_unlock(&chains_lock(chains)); + + return prio_s->ft; + +err_create_prio: + mutex_unlock(&chains_lock(chains)); +err_get_prevs: + while (--l >= 0) + mlx5_chains_put_table(chains, chain, prio, l); + return ERR_CAST(prio_s); +} + +void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) +{ + struct prio *prio_s; + struct prio_key key; + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&chains_lock(chains)); + prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key, + prio_params); + if (!prio_s) + goto err_get_prio; + + if (--prio_s->ref == 0) + mlx5_chains_destroy_prio(chains, prio_s); + mutex_unlock(&chains_lock(chains)); + + while (level-- > 0) + mlx5_chains_put_table(chains, chain, prio, level); + + return; + +err_get_prio: + mutex_unlock(&chains_lock(chains)); + WARN_ONCE(1, + "Couldn't find table: (chain: %d prio: %d level: %d)", + chain, prio, level); +} + +struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) +{ + return tc_end_ft(chains); +} + +struct mlx5_flow_table * +mlx5_chains_create_global_table(struct mlx5_fs_chains *chains) +{ + u32 chain, prio, level; + int err; + + if (!mlx5_chains_ignore_flow_level_supported(chains)) { + err = -EOPNOTSUPP; + + mlx5_core_warn(chains->dev, + "Couldn't create global flow table, ignore_flow_level not supported."); + goto err_ignore; + } + + chain = mlx5_chains_get_chain_range(chains), + prio = mlx5_chains_get_prio_range(chains); + level = mlx5_chains_get_level_range(chains); + + return mlx5_chains_create_table(chains, chain, prio, level); + +err_ignore: + return ERR_PTR(err); +} + +void +mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft) +{ + mlx5_chains_destroy_table(chains, ft); +} + +static struct mlx5_fs_chains * +mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ + struct mlx5_fs_chains *chains_priv; + struct mapping_ctx *mapping; + u32 max_flow_counter; + int err; + + chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); + if (!chains_priv) + return ERR_PTR(-ENOMEM); + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + mlx5_core_dbg(dev, + "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n", + max_flow_counter, attr->max_grp_num, attr->max_ft_sz); + + chains_priv->dev = dev; + chains_priv->flags = attr->flags; + chains_priv->ns = attr->ns; + chains_priv->group_num = attr->max_grp_num; + tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft; + + mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", + mlx5_chains_get_chain_range(chains_priv), + mlx5_chains_get_prio_range(chains_priv)); + + mlx5_chains_init_sz_pool(chains_priv, attr->max_ft_sz); + + err = rhashtable_init(&chains_ht(chains_priv), &chain_params); + if (err) + goto init_chains_ht_err; + + err = rhashtable_init(&prios_ht(chains_priv), &prio_params); + if (err) + goto init_prios_ht_err; + + mapping = mapping_create(sizeof(u32), attr->max_restore_tag, + true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto mapping_err; + } + chains_mapping(chains_priv) = mapping; + + mutex_init(&chains_lock(chains_priv)); + + return chains_priv; + +mapping_err: + rhashtable_destroy(&prios_ht(chains_priv)); +init_prios_ht_err: + rhashtable_destroy(&chains_ht(chains_priv)); +init_chains_ht_err: + kfree(chains_priv); + return ERR_PTR(err); +} + +static void +mlx5_chains_cleanup(struct mlx5_fs_chains *chains) +{ + mutex_destroy(&chains_lock(chains)); + mapping_destroy(chains_mapping(chains)); + rhashtable_destroy(&prios_ht(chains)); + rhashtable_destroy(&chains_ht(chains)); + + kfree(chains); +} + +struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ + struct mlx5_fs_chains *chains; + + chains = mlx5_chains_init(dev, attr); + + return chains; +} + +void +mlx5_chains_destroy(struct mlx5_fs_chains *chains) +{ + mlx5_chains_cleanup(chains); +} + +int +mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain, + u32 *chain_mapping) +{ + return mapping_add(chains_mapping(chains), &chain, chain_mapping); +} + +int +mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping) +{ + return mapping_remove(chains_mapping(chains), chain_mapping); +} + +int mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag, + u32 *chain) +{ + int err; + + err = mapping_find(chains_mapping(chains), tag, chain); + if (err) { + mlx5_core_warn(chains->dev, "Can't find chain for tag: %d\n", tag); + return -ENOENT; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h new file mode 100644 index 000000000000..6d5be31b05dd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_ESW_CHAINS_H__ +#define __ML5_ESW_CHAINS_H__ + +#include <linux/mlx5/fs.h> + +struct mlx5_fs_chains; + +enum mlx5_chains_flags { + MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), + MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED = BIT(1), + MLX5_CHAINS_FT_TUNNEL_SUPPORTED = BIT(2), +}; + +struct mlx5_chains_attr { + enum mlx5_flow_namespace_type ns; + u32 flags; + u32 max_ft_sz; + u32 max_grp_num; + struct mlx5_flow_table *default_ft; + u32 max_restore_tag; +}; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +bool +mlx5_chains_prios_supported(struct mlx5_fs_chains *chains); +bool +mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains); + +struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level); +void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level); + +struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains); + +struct mlx5_flow_table * +mlx5_chains_create_global_table(struct mlx5_fs_chains *chains); +void +mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft); + +int +mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain, + u32 *chain_mapping); +int +mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, + u32 chain_mapping); + +struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr); +void mlx5_chains_destroy(struct mlx5_fs_chains *chains); + +int +mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag, u32 *chain); + +void +mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) { return ERR_PTR(-EOPNOTSUPP); } +static inline void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) {}; + +static inline struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ return NULL; } +static inline void +mlx5_chains_destroy(struct mlx5_fs_chains *chains) {}; + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ce43e3feccd9..8ff207aa1479 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -57,6 +57,7 @@ #include "lib/mpfs.h" #include "eswitch.h" #include "devlink.h" +#include "fw_reset.h" #include "lib/mlx5.h" #include "fpga/core.h" #include "fpga/ipsec.h" @@ -548,6 +549,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN_MAX(dev, dct)) MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); + if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event)) + MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1); + if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) MLX5_SET(cmd_hca_cap, set_hca_cap, @@ -739,7 +743,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, pci_set_drvdata(dev->pdev, dev); dev->bar_addr = pci_resource_start(pdev, 0); - priv->numa_node = dev_to_node(&dev->pdev->dev); + priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); err = mlx5_pci_enable_device(dev); if (err) { @@ -832,6 +836,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eq_cleanup; } + err = mlx5_fw_reset_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize fw reset events\n"); + goto err_events_cleanup; + } + mlx5_cq_debugfs_init(dev); mlx5_init_reserved_gids(dev); @@ -893,6 +903,8 @@ err_tables_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); +err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); @@ -920,6 +932,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); mlx5_irq_table_cleanup(dev); @@ -1078,6 +1091,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_fw_tracer; } + mlx5_fw_reset_events_start(dev); mlx5_hv_vhca_init(dev->hv_vhca); err = mlx5_rsc_dump_init(dev); @@ -1139,6 +1153,7 @@ err_fpga_start: mlx5_rsc_dump_cleanup(dev); err_rsc_dump: mlx5_hv_vhca_cleanup(dev->hv_vhca); + mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: mlx5_eq_table_destroy(dev); @@ -1161,6 +1176,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_fpga_device_stop(dev); mlx5_rsc_dump_cleanup(dev); mlx5_hv_vhca_cleanup(dev->hv_vhca); + mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); mlx5_irq_table_destroy(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index fc1649dac11b..8cec85ab419d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -100,6 +100,11 @@ do { \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +static inline struct device *mlx5_core_dma_dev(struct mlx5_core_dev *dev) +{ + return &dev->pdev->dev; +} + enum { MLX5_CMD_DATA, /* print command payload only */ MLX5_CMD_TIME, /* print command execution time */ @@ -123,6 +128,8 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_error_sw_reset(struct mlx5_core_dev *dev); +u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev); +int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index c0e18f2ade99..150638814517 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -238,7 +238,7 @@ static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, rb_erase(&fwp->rb_node, root); if (in_free_list) list_del(&fwp->list); - dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK, + dma_unmap_page(mlx5_core_dma_dev(dev), fwp->addr & MLX5_U64_4K_PAGE_MASK, PAGE_SIZE, DMA_BIDIRECTIONAL); __free_page(fwp->page); kfree(fwp); @@ -265,7 +265,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id) static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) { - struct device *device = dev->device; + struct device *device = mlx5_core_dma_dev(dev); int nid = dev_to_node(device); struct page *page; u64 zero_addr = 1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index c63f727273d8..7df883686d46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -203,7 +203,6 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; struct mlx5dr_domain *dmn = matcher->tbl->dmn; struct mlx5dr_match_param mask = {}; - struct mlx5dr_match_misc3 *misc3; struct mlx5dr_ste_build *sb; bool inner, rx; int idx = 0; @@ -252,18 +251,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) && (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) { - ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask, - dmn, inner, rx); - if (ret) - return ret; + mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask, + dmn, inner, rx); } if (dr_mask_is_smac_set(&mask.outer) && dr_mask_is_dmac_set(&mask.outer)) { - ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask, - inner, rx); - if (ret) - return ret; + mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask, + inner, rx); } if (dr_mask_is_smac_set(&mask.outer)) @@ -313,8 +308,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx); - misc3 = &mask.misc3; - if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) && + if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(&mask.misc3) && mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) || (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) && mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) { @@ -340,10 +334,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_smac_set(&mask.inner) && dr_mask_is_dmac_set(&mask.inner)) { - ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], - &mask, inner, rx); - if (ret) - return ret; + mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], + &mask, inner, rx); } if (dr_mask_is_smac_set(&mask.inner)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 6ec5106bc472..b3c9dc032026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -242,7 +242,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); new_ste = &new_htbl->ste_arr[new_idx]; - if (mlx5dr_ste_not_used_ste(new_ste)) { + if (mlx5dr_ste_is_not_used(new_ste)) { mlx5dr_htbl_get(new_htbl); list_add_tail(&new_ste->miss_list_node, mlx5dr_ste_get_miss_list(new_ste)); @@ -335,7 +335,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, for (i = 0; i < cur_entries; i++) { cur_ste = &cur_htbl->ste_arr[i]; - if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */ + if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */ continue; err = dr_rule_rehash_copy_miss_list(matcher, @@ -791,7 +791,7 @@ again: miss_list = &cur_htbl->chunk->miss_list[index]; ste = &cur_htbl->ste_arr[index]; - if (mlx5dr_ste_not_used_ste(ste)) { + if (mlx5dr_ste_is_not_used(ste)) { if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl, ste, ste_location, hw_ste, miss_list, @@ -985,31 +985,28 @@ static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec) static bool dr_rule_skip(enum mlx5dr_domain_type domain, enum mlx5dr_ste_entry_type ste_type, struct mlx5dr_match_param *mask, - struct mlx5dr_match_param *value) + struct mlx5dr_match_param *value, + u32 flow_source) { + bool rx = ste_type == MLX5DR_STE_TYPE_RX; + if (domain != MLX5DR_DOMAIN_TYPE_FDB) return false; if (mask->misc.source_port) { - if (ste_type == MLX5DR_STE_TYPE_RX) - if (value->misc.source_port != WIRE_PORT) - return true; + if (rx && value->misc.source_port != WIRE_PORT) + return true; - if (ste_type == MLX5DR_STE_TYPE_TX) - if (value->misc.source_port == WIRE_PORT) - return true; + if (!rx && value->misc.source_port == WIRE_PORT) + return true; } - /* Metadata C can be used to describe the source vport */ - if (mask->misc2.metadata_reg_c_0) { - if (ste_type == MLX5DR_STE_TYPE_RX) - if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT) - return true; + if (rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT) + return true; + + if (!rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) + return true; - if (ste_type == MLX5DR_STE_TYPE_TX) - if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT) - return true; - } return false; } @@ -1038,7 +1035,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, INIT_LIST_HEAD(&nic_rule->rule_members_list); - if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param)) + if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param, + rule->flow_source)) return 0; hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL); @@ -1173,7 +1171,8 @@ static struct mlx5dr_rule * dr_rule_create_rule(struct mlx5dr_matcher *matcher, struct mlx5dr_match_parameters *value, size_t num_actions, - struct mlx5dr_action *actions[]) + struct mlx5dr_action *actions[], + u32 flow_source) { struct mlx5dr_domain *dmn = matcher->tbl->dmn; struct mlx5dr_match_param param = {}; @@ -1188,6 +1187,7 @@ dr_rule_create_rule(struct mlx5dr_matcher *matcher, return NULL; rule->matcher = matcher; + rule->flow_source = flow_source; INIT_LIST_HEAD(&rule->rule_actions_list); ret = dr_rule_add_action_members(rule, num_actions, actions); @@ -1232,13 +1232,14 @@ free_rule: struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher, struct mlx5dr_match_parameters *value, size_t num_actions, - struct mlx5dr_action *actions[]) + struct mlx5dr_action *actions[], + u32 flow_source) { struct mlx5dr_rule *rule; refcount_inc(&matcher->refcount); - rule = dr_rule_create_rule(matcher, value, num_actions, actions); + rule = dr_rule_create_rule(matcher, value, num_actions, actions, flow_source); if (!rule) refcount_dec(&matcher->refcount); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 2ca79b9bde1f..24dede1b0a20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -466,10 +466,10 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, * need to add the bit_mask */ for (j = 0; j < num_stes_per_iter; j++) { - u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste; + struct mlx5dr_ste *ste = &htbl->ste_arr[ste_index + j]; u32 ste_off = j * DR_STE_SIZE; - if (mlx5dr_ste_is_not_valid_entry(hw_ste)) { + if (mlx5dr_ste_is_not_used(ste)) { memcpy(data + ste_off, formatted_ste, DR_STE_SIZE); } else { @@ -831,7 +831,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, if (!mr) return NULL; - dma_device = &mdev->pdev->dev; + dma_device = mlx5_core_dma_dev(mdev); dma_addr = dma_map_single(dma_device, buf, size, DMA_BIDIRECTIONAL); err = dma_mapping_error(dma_device, dma_addr); @@ -860,7 +860,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) { mlx5_core_destroy_mkey(mdev, &mr->mkey); - dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size, + dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size, DMA_BIDIRECTIONAL); kfree(mr); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 00c2f598f034..b01aaec75622 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -155,6 +155,13 @@ static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask) return byte_mask; } +static u8 *mlx5dr_ste_get_tag(u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + + return hw_ste->tag; +} + void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask) { struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; @@ -549,25 +556,6 @@ void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr) dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste); } -/* The assumption here is that we don't update the ste->hw_ste if it is not - * used ste, so it will be all zero, checking the next_lu_type. - */ -bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste) -{ - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste; - - if (MLX5_GET(ste_general, hw_ste, next_lu_type) == - MLX5DR_STE_LU_TYPE_NOP) - return true; - - return false; -} - -bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste) -{ - return !ste->refcount; -} - /* Init one ste as a pattern for ste data array */ void mlx5dr_ste_set_formatted_ste(u16 gvmi, struct mlx5dr_domain_rx_tx *nic_dmn, @@ -728,7 +716,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, { if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { if (mask->misc.source_port && mask->misc.source_port != 0xffff) { - mlx5dr_err(dmn, "Partial mask source_port is not supported\n"); + mlx5dr_err(dmn, + "Partial mask source_port is not supported\n"); + return -EINVAL; + } + if (mask->misc.source_eswitch_owner_vhca_id && + mask->misc.source_eswitch_owner_vhca_id != 0xffff) { + mlx5dr_err(dmn, + "Partial mask source_eswitch_owner_vhca_id is not supported\n"); return -EINVAL; } } @@ -760,7 +755,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask); - ret = sb->ste_build_tag_func(value, sb, ste_arr); + ret = sb->ste_build_tag_func(value, sb, mlx5dr_ste_get_tag(ste_arr)); if (ret) return ret; @@ -778,8 +773,8 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, return 0; } -static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value, - bool inner, u8 *bit_mask) +static void dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) { struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; @@ -807,13 +802,6 @@ static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); mask->svlan_tag = 0; } - - if (mask->cvlan_tag || mask->svlan_tag) { - pr_info("Invalid c/svlan mask configuration\n"); - return -EINVAL; - } - - return 0; } static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec) @@ -1059,11 +1047,9 @@ void mlx5dr_ste_copy_param(u8 match_criteria, static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16); DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0); @@ -1104,23 +1090,17 @@ static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value, return 0; } -int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { - int ret; - - ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask); - if (ret) - return ret; + dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask); sb->rx = rx; sb->inner = inner; sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner); sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag; - - return 0; } static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value, @@ -1136,11 +1116,9 @@ static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *val static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); @@ -1176,11 +1154,9 @@ static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *val static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); @@ -1238,11 +1214,9 @@ static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0); DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0); @@ -1328,12 +1302,10 @@ dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, } static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, - bool inner, u8 *hw_ste_p) + bool inner, u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; struct mlx5dr_match_misc *misc_spec = &value->misc; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid); DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi); @@ -1403,16 +1375,14 @@ static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16); DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0); - return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p); + return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); } void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb, @@ -1440,16 +1410,14 @@ static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16); DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0); - return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p); + return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); } void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, @@ -1495,12 +1463,10 @@ static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc *misc = &value->misc; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16); DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0); @@ -1561,11 +1527,9 @@ static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *va static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit); @@ -1608,11 +1572,9 @@ static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport); DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport); @@ -1647,7 +1609,7 @@ void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { return 0; } @@ -1673,11 +1635,9 @@ static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc2 *misc2_mask = &value->misc2; - u8 *tag = hw_ste->tag; if (sb->inner) DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag); @@ -1716,11 +1676,9 @@ static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc *misc = &value->misc; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol); @@ -1781,11 +1739,9 @@ static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; - u8 *tag = hw_ste->tag; if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) { DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, @@ -1903,11 +1859,9 @@ static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask, static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc3 *misc_3 = &value->misc3; - u8 *tag = hw_ste->tag; u32 icmp_header_data; int dw0_location; int dw1_location; @@ -2007,11 +1961,9 @@ static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *val static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, misc_2_mask, metadata_reg_a); @@ -2052,11 +2004,9 @@ static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc3 *misc3 = &value->misc3; - u8 *tag = hw_ste->tag; if (sb->inner) { DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num); @@ -2102,11 +2052,9 @@ dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(struct mlx5dr_match_param *value static int dr_ste_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc3 *misc3 = &value->misc3; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, outer_vxlan_gpe_flags, misc3, @@ -2158,11 +2106,9 @@ dr_ste_build_flex_parser_tnl_geneve_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc *misc = &value->misc; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, geneve_protocol_type, misc, geneve_protocol_type); @@ -2205,11 +2151,9 @@ static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc2 *misc2 = &value->misc2; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); @@ -2249,11 +2193,9 @@ static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value, static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc2 *misc2 = &value->misc2; - u8 *tag = hw_ste->tag; DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); @@ -2276,38 +2218,25 @@ void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_build_register_1_tag; } -static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, - u8 *bit_mask) +static void dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) { struct mlx5dr_match_misc *misc_mask = &value->misc; - /* Partial misc source_port is not supported */ - if (misc_mask->source_port && misc_mask->source_port != 0xffff) - return -EINVAL; - - /* Partial misc source_eswitch_owner_vhca_id is not supported */ - if (misc_mask->source_eswitch_owner_vhca_id && - misc_mask->source_eswitch_owner_vhca_id != 0xffff) - return -EINVAL; - DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port); DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn); misc_mask->source_eswitch_owner_vhca_id = 0; - - return 0; } static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p) + u8 *tag) { - struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; struct mlx5dr_match_misc *misc = &value->misc; struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_cmd_caps *caps; u8 *bit_mask = sb->bit_mask; - u8 *tag = hw_ste->tag; bool source_gvmi_set; DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); @@ -2339,19 +2268,15 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, return 0; } -int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_domain *dmn, - bool inner, bool rx) +void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn, + bool inner, bool rx) { - int ret; - /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */ sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id; - ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); - if (ret) - return ret; + dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); sb->rx = rx; sb->dmn = dmn; @@ -2359,6 +2284,4 @@ int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP; sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag; - - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 0883956c58c0..f50f3b107aa3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -194,7 +194,7 @@ struct mlx5dr_ste_build { u8 bit_mask[DR_STE_SIZE_MASK]; int (*ste_build_tag_func)(struct mlx5dr_match_param *spec, struct mlx5dr_ste_build *sb, - u8 *hw_ste_p); + u8 *tag); }; struct mlx5dr_ste_htbl * @@ -227,7 +227,6 @@ void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi); void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size); void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr); void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask); -bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste); bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, u8 ste_location); void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag); @@ -266,6 +265,11 @@ static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste) ste->refcount++; } +static inline bool mlx5dr_ste_is_not_used(struct mlx5dr_ste *ste) +{ + return !ste->refcount; +} + void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste, struct mlx5dr_ste_htbl *next_htbl); bool mlx5dr_ste_equal_tag(void *src, void *dst); @@ -284,9 +288,9 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, struct mlx5dr_match_param *value, u8 *ste_arr); -int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder, - struct mlx5dr_match_param *mask, - bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); @@ -342,10 +346,10 @@ void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb, void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_domain *dmn, - bool inner, bool rx); +void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn, + bool inner, bool rx); void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx); /* Actions utils */ @@ -793,6 +797,7 @@ struct mlx5dr_rule { struct mlx5dr_rule_rx_tx rx; struct mlx5dr_rule_rx_tx tx; struct list_head rule_actions_list; + u32 flow_source; }; void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste, @@ -991,7 +996,6 @@ struct mlx5dr_icm_chunk * mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, enum mlx5dr_icm_chunk_size chunk_size); void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk); -bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste); int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, struct mlx5dr_domain_rx_tx *nic_dmn, struct mlx5dr_ste_htbl *htbl, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 9b08eb557a31..96c39a17d026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -487,7 +487,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher, ¶ms, num_actions, - actions); + actions, + fte->flow_context.flow_source); if (!rule) { err = -EINVAL; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 7deaca9ade3b..7914fe3fc68d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -67,7 +67,8 @@ struct mlx5dr_rule * mlx5dr_rule_create(struct mlx5dr_matcher *matcher, struct mlx5dr_match_parameters *value, size_t num_actions, - struct mlx5dr_action *actions[]); + struct mlx5dr_action *actions[], + u32 flow_source); int mlx5dr_rule_destroy(struct mlx5dr_rule *rule); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index ec45a03140d7..7f77c2a71d1c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -20,11 +20,13 @@ #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/workqueue.h> +#include <linux/firmware.h> #include <asm/byteorder.h> #include <net/devlink.h> #include <trace/events/devlink.h> #include "core.h" +#include "core_env.h" #include "item.h" #include "cmd.h" #include "port.h" @@ -32,6 +34,7 @@ #include "emad.h" #include "reg.h" #include "resources.h" +#include "../mlxfw/mlxfw.h" static LIST_HEAD(mlxsw_core_driver_list); static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock); @@ -82,6 +85,11 @@ struct mlxsw_core { struct mlxsw_core_port *ports; unsigned int max_ports; bool fw_flash_in_progress; + struct { + struct devlink_health_reporter *fw_fatal; + } health; + struct mlxsw_env *env; + bool is_initialized; /* Denotes if core was already initialized. */ unsigned long driver_priv[]; /* driver_priv has to be always the last item */ }; @@ -128,6 +136,11 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_res_query_enabled); +bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->driver->temp_warn_enabled; +} + bool mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, const struct mlxsw_fw_rev *req_rev) @@ -864,6 +877,294 @@ static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind) return mlxsw_driver; } +struct mlxsw_core_fw_info { + struct mlxfw_dev mlxfw_dev; + struct mlxsw_core *mlxsw_core; +}; + +static int mlxsw_core_fw_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, u32 *p_max_size, + u8 *p_align_bits, u16 *p_max_write_size) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcqi_pl[MLXSW_REG_MCQI_LEN]; + int err; + + mlxsw_reg_mcqi_pack(mcqi_pl, component_index); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcqi), mcqi_pl); + if (err) + return err; + mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits, p_max_write_size); + + *p_align_bits = max_t(u8, *p_align_bits, 2); + *p_max_write_size = min_t(u16, *p_max_write_size, MLXSW_REG_MCDA_MAX_DATA_LEN); + return 0; +} + +static int mlxsw_core_fw_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + u8 control_state; + int err; + + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcc), mcc_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state); + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, 0, *fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_core_fw_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_core_fw_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcda_pl[MLXSW_REG_MCDA_LEN]; + + mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcda), mcda_pl); +} + +static int mlxsw_core_fw_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_core_fw_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, 0, fwhandle, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_core_fw_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + u8 control_state; + u8 error_code; + int err; + + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcc), mcc_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state); + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlxsw_core_fw_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static void mlxsw_core_fw_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_core_fw_info *mlxsw_core_fw_info = + container_of(mlxfw_dev, struct mlxsw_core_fw_info, mlxfw_dev); + struct mlxsw_core *mlxsw_core = mlxsw_core_fw_info->mlxsw_core; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, fwhandle, 0); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcc), mcc_pl); +} + +static const struct mlxfw_dev_ops mlxsw_core_fw_mlxsw_dev_ops = { + .component_query = mlxsw_core_fw_component_query, + .fsm_lock = mlxsw_core_fw_fsm_lock, + .fsm_component_update = mlxsw_core_fw_fsm_component_update, + .fsm_block_download = mlxsw_core_fw_fsm_block_download, + .fsm_component_verify = mlxsw_core_fw_fsm_component_verify, + .fsm_activate = mlxsw_core_fw_fsm_activate, + .fsm_query_state = mlxsw_core_fw_fsm_query_state, + .fsm_cancel = mlxsw_core_fw_fsm_cancel, + .fsm_release = mlxsw_core_fw_fsm_release, +}; + +static int mlxsw_core_fw_flash(struct mlxsw_core *mlxsw_core, const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core_fw_info mlxsw_core_fw_info = { + .mlxfw_dev = { + .ops = &mlxsw_core_fw_mlxsw_dev_ops, + .psid = mlxsw_core->bus_info->psid, + .psid_size = strlen(mlxsw_core->bus_info->psid), + .devlink = priv_to_devlink(mlxsw_core), + }, + .mlxsw_core = mlxsw_core + }; + int err; + + mlxsw_core->fw_flash_in_progress = true; + err = mlxfw_firmware_flash(&mlxsw_core_fw_info.mlxfw_dev, firmware, extack); + mlxsw_core->fw_flash_in_progress = false; + + return err; +} + +static int mlxsw_core_fw_rev_validate(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_fw_rev *req_rev, + const char *filename) +{ + const struct mlxsw_fw_rev *rev = &mlxsw_bus_info->fw_rev; + union devlink_param_value value; + const struct firmware *firmware; + int err; + + /* Don't check if driver does not require it */ + if (!req_rev || !filename) + return 0; + + /* Don't check if devlink 'fw_load_policy' param is 'flash' */ + err = devlink_param_driverinit_value_get(priv_to_devlink(mlxsw_core), + DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, + &value); + if (err) + return err; + if (value.vu8 == DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH) + return 0; + + /* Validate driver & FW are compatible */ + if (rev->major != req_rev->major) { + WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n", + rev->major, req_rev->major); + return -EINVAL; + } + if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev)) + return 0; + + dev_err(mlxsw_bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", + rev->major, rev->minor, rev->subminor, req_rev->major, + req_rev->minor, req_rev->subminor); + dev_info(mlxsw_bus_info->dev, "Flashing firmware using file %s\n", filename); + + err = request_firmware_direct(&firmware, filename, mlxsw_bus_info->dev); + if (err) { + dev_err(mlxsw_bus_info->dev, "Could not request firmware file %s\n", filename); + return err; + } + + err = mlxsw_core_fw_flash(mlxsw_core, firmware, NULL); + release_firmware(firmware); + if (err) + dev_err(mlxsw_bus_info->dev, "Could not upgrade firmware\n"); + + /* On FW flash success, tell the caller FW reset is needed + * if current FW supports it. + */ + if (rev->minor >= req_rev->can_reset_minor) + return err ? err : -EAGAIN; + else + return 0; +} + +static int mlxsw_core_fw_flash_update(struct mlxsw_core *mlxsw_core, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + const struct firmware *firmware; + int err; + + err = request_firmware_direct(&firmware, params->file_name, mlxsw_core->bus_info->dev); + if (err) + return err; + err = mlxsw_core_fw_flash(mlxsw_core, firmware, extack); + release_firmware(firmware); + + return err; +} + +static int mlxsw_core_devlink_param_fw_load_policy_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER && + val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH) { + NL_SET_ERR_MSG_MOD(extack, "'fw_load_policy' must be 'driver' or 'flash'"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param mlxsw_core_fw_devlink_params[] = { + DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlxsw_core_devlink_param_fw_load_policy_validate), +}; + +static int mlxsw_core_fw_params_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + union devlink_param_value value; + int err; + + err = devlink_params_register(devlink, mlxsw_core_fw_devlink_params, + ARRAY_SIZE(mlxsw_core_fw_devlink_params)); + if (err) + return err; + + value.vu8 = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER; + devlink_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, value); + return 0; +} + +static void mlxsw_core_fw_params_unregister(struct mlxsw_core *mlxsw_core) +{ + devlink_params_unregister(priv_to_devlink(mlxsw_core), mlxsw_core_fw_devlink_params, + ARRAY_SIZE(mlxsw_core_fw_devlink_params)); +} + static int mlxsw_devlink_port_split(struct devlink *devlink, unsigned int port_index, unsigned int count, @@ -1113,7 +1414,8 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, static int mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, - bool netns_change, + bool netns_change, enum devlink_reload_action action, + enum devlink_reload_limit limit, struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); @@ -1126,11 +1428,14 @@ mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, } static int -mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, +mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE); return mlxsw_core_bus_device_register(mlxsw_core->bus_info, mlxsw_core->bus, mlxsw_core->bus_priv, true, @@ -1138,17 +1443,12 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, } static int mlxsw_devlink_flash_update(struct devlink *devlink, - const char *file_name, - const char *component, + struct devlink_flash_update_params *params, struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; - if (!mlxsw_driver->flash_update) - return -EOPNOTSUPP; - return mlxsw_driver->flash_update(mlxsw_core, file_name, - component, extack); + return mlxsw_core_fw_flash_update(mlxsw_core, params, extack); } static int mlxsw_devlink_trap_init(struct devlink *devlink, @@ -1268,6 +1568,8 @@ mlxsw_devlink_trap_policer_counter_get(struct devlink *devlink, } static const struct devlink_ops mlxsw_devlink_ops = { + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), .reload_down = mlxsw_devlink_core_bus_device_reload_down, .reload_up = mlxsw_devlink_core_bus_device_reload_up, .port_type_set = mlxsw_devlink_port_type_set, @@ -1296,6 +1598,263 @@ static const struct devlink_ops mlxsw_devlink_ops = { .trap_policer_counter_get = mlxsw_devlink_trap_policer_counter_get, }; +static int mlxsw_core_params_register(struct mlxsw_core *mlxsw_core) +{ + int err; + + err = mlxsw_core_fw_params_register(mlxsw_core); + if (err) + return err; + + if (mlxsw_core->driver->params_register) { + err = mlxsw_core->driver->params_register(mlxsw_core); + if (err) + goto err_params_register; + } + return 0; + +err_params_register: + mlxsw_core_fw_params_unregister(mlxsw_core); + return err; +} + +static void mlxsw_core_params_unregister(struct mlxsw_core *mlxsw_core) +{ + mlxsw_core_fw_params_unregister(mlxsw_core); + if (mlxsw_core->driver->params_register) + mlxsw_core->driver->params_unregister(mlxsw_core); +} + +struct mlxsw_core_health_event { + struct mlxsw_core *mlxsw_core; + char mfde_pl[MLXSW_REG_MFDE_LEN]; + struct work_struct work; +}; + +static void mlxsw_core_health_event_work(struct work_struct *work) +{ + struct mlxsw_core_health_event *event; + struct mlxsw_core *mlxsw_core; + + event = container_of(work, struct mlxsw_core_health_event, work); + mlxsw_core = event->mlxsw_core; + devlink_health_report(mlxsw_core->health.fw_fatal, "FW fatal event occurred", + event->mfde_pl); + kfree(event); +} + +static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg, + char *mfde_pl, void *priv) +{ + struct mlxsw_core_health_event *event; + struct mlxsw_core *mlxsw_core = priv; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + event->mlxsw_core = mlxsw_core; + memcpy(event->mfde_pl, mfde_pl, sizeof(event->mfde_pl)); + INIT_WORK(&event->work, mlxsw_core_health_event_work); + mlxsw_core_schedule_work(&event->work); +} + +static const struct mlxsw_listener mlxsw_core_health_listener = + MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE); + +static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + char *mfde_pl = priv_ctx; + char *val_str; + u8 event_id; + u32 val; + int err; + + if (!priv_ctx) + /* User-triggered dumps are not possible */ + return -EOPNOTSUPP; + + val = mlxsw_reg_mfde_irisc_id_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "irisc_id", val); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "event"); + if (err) + return err; + + event_id = mlxsw_reg_mfde_event_id_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "id", event_id); + if (err) + return err; + switch (event_id) { + case MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO: + val_str = "CR space timeout"; + break; + case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP: + val_str = "KVD insertion machine stopped"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + val = mlxsw_reg_mfde_method_get(mfde_pl); + switch (val) { + case MLXSW_REG_MFDE_METHOD_QUERY: + val_str = "query"; + break; + case MLXSW_REG_MFDE_METHOD_WRITE: + val_str = "write"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "method", val_str); + if (err) + return err; + } + + val = mlxsw_reg_mfde_long_process_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "long_process", val); + if (err) + return err; + + val = mlxsw_reg_mfde_command_type_get(mfde_pl); + switch (val) { + case MLXSW_REG_MFDE_COMMAND_TYPE_MAD: + val_str = "mad"; + break; + case MLXSW_REG_MFDE_COMMAND_TYPE_EMAD: + val_str = "emad"; + break; + case MLXSW_REG_MFDE_COMMAND_TYPE_CMDIF: + val_str = "cmdif"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "command_type", val_str); + if (err) + return err; + } + + val = mlxsw_reg_mfde_reg_attr_id_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "reg_attr_id", val); + if (err) + return err; + + if (event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO) { + val = mlxsw_reg_mfde_log_address_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val); + if (err) + return err; + val = mlxsw_reg_mfde_log_id_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val); + if (err) + return err; + } else if (event_id == MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP) { + val = mlxsw_reg_mfde_pipes_mask_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val); + if (err) + return err; + } + + return 0; +} + +static int +mlxsw_core_health_fw_fatal_test(struct devlink_health_reporter *reporter, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_health_reporter_priv(reporter); + char mfgd_pl[MLXSW_REG_MFGD_LEN]; + int err; + + /* Read the register first to make sure no other bits are changed. */ + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl); + if (err) + return err; + mlxsw_reg_mfgd_trigger_test_set(mfgd_pl, true); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl); +} + +static const struct devlink_health_reporter_ops +mlxsw_core_health_fw_fatal_ops = { + .name = "fw_fatal", + .dump = mlxsw_core_health_fw_fatal_dump, + .test = mlxsw_core_health_fw_fatal_test, +}; + +static int mlxsw_core_health_fw_fatal_config(struct mlxsw_core *mlxsw_core, + bool enable) +{ + char mfgd_pl[MLXSW_REG_MFGD_LEN]; + int err; + + /* Read the register first to make sure no other bits are changed. */ + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl); + if (err) + return err; + mlxsw_reg_mfgd_fatal_event_mode_set(mfgd_pl, enable); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl); +} + +static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_health_reporter *fw_fatal; + int err; + + if (!mlxsw_core->driver->fw_fatal_enabled) + return 0; + + fw_fatal = devlink_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops, + 0, mlxsw_core); + if (IS_ERR(fw_fatal)) { + dev_err(mlxsw_core->bus_info->dev, "Failed to create fw fatal reporter"); + return PTR_ERR(fw_fatal); + } + mlxsw_core->health.fw_fatal = fw_fatal; + + err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core); + if (err) + goto err_trap_register; + + err = mlxsw_core_health_fw_fatal_config(mlxsw_core, true); + if (err) + goto err_fw_fatal_config; + + return 0; + +err_fw_fatal_config: + mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core); +err_trap_register: + devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal); + return err; +} + +static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core) +{ + if (!mlxsw_core->driver->fw_fatal_enabled) + return; + + mlxsw_core_health_fw_fatal_config(mlxsw_core, false); + mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core); + /* Make sure there is no more event work scheduled */ + mlxsw_core_flush_owq(); + devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal); +} + static int __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, @@ -1368,12 +1927,21 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_devlink_register; } - if (mlxsw_driver->params_register && !reload) { - err = mlxsw_driver->params_register(mlxsw_core); + if (!reload) { + err = mlxsw_core_params_register(mlxsw_core); if (err) goto err_register_params; } + err = mlxsw_core_fw_rev_validate(mlxsw_core, mlxsw_bus_info, mlxsw_driver->fw_req_rev, + mlxsw_driver->fw_filename); + if (err) + goto err_fw_rev_validate; + + err = mlxsw_core_health_init(mlxsw_core); + if (err) + goto err_health_init; + if (mlxsw_driver->init) { err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack); if (err) @@ -1389,22 +1957,31 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->params_register) - devlink_params_publish(devlink); + err = mlxsw_env_init(mlxsw_core, &mlxsw_core->env); + if (err) + goto err_env_init; + + mlxsw_core->is_initialized = true; + devlink_params_publish(devlink); if (!reload) devlink_reload_enable(devlink); return 0; +err_env_init: + mlxsw_thermal_fini(mlxsw_core->thermal); err_thermal_init: mlxsw_hwmon_fini(mlxsw_core->hwmon); err_hwmon_init: if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); err_driver_init: - if (mlxsw_driver->params_unregister && !reload) - mlxsw_driver->params_unregister(mlxsw_core); + mlxsw_core_health_fini(mlxsw_core); +err_health_init: +err_fw_rev_validate: + if (!reload) + mlxsw_core_params_unregister(mlxsw_core); err_register_params: if (!reload) devlink_unregister(devlink); @@ -1469,14 +2046,16 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, return; } - if (mlxsw_core->driver->params_unregister) - devlink_params_unpublish(devlink); + devlink_params_unpublish(devlink); + mlxsw_core->is_initialized = false; + mlxsw_env_fini(mlxsw_core->env); mlxsw_thermal_fini(mlxsw_core->thermal); mlxsw_hwmon_fini(mlxsw_core->hwmon); if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); - if (mlxsw_core->driver->params_unregister && !reload) - mlxsw_core->driver->params_unregister(mlxsw_core); + mlxsw_core_health_fini(mlxsw_core); + if (!reload) + mlxsw_core_params_unregister(mlxsw_core); if (!reload) devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); @@ -1489,8 +2068,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, return; reload_fail_deinit: - if (mlxsw_core->driver->params_unregister) - mlxsw_core->driver->params_unregister(mlxsw_core); + mlxsw_core_params_unregister(mlxsw_core); devlink_unregister(devlink); devlink_resources_unregister(devlink, NULL); devlink_free(devlink); @@ -2274,6 +2852,16 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get); +struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->env; +} + +bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->is_initialized; +} + int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module) { enum mlxsw_reg_pmtm_module_type module_type; @@ -2410,18 +2998,6 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_kvd_sizes_get); -void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core) -{ - mlxsw_core->fw_flash_in_progress = true; -} -EXPORT_SYMBOL(mlxsw_core_fw_flash_start); - -void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core) -{ - mlxsw_core->fw_flash_in_progress = false; -} -EXPORT_SYMBOL(mlxsw_core_fw_flash_end); - int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, struct mlxsw_res *res) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 11af3308f8cc..92f7398287be 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -32,6 +32,8 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core); +bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core); + bool mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, const struct mlxsw_fw_rev *req_rev); @@ -221,6 +223,8 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core, struct devlink_port * mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core, u8 local_port); +struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core); +bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core); int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module); int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); @@ -280,6 +284,8 @@ struct mlxsw_driver { struct list_head list; const char *kind; size_t priv_size; + const struct mlxsw_fw_rev *fw_req_rev; + const char *fw_filename; int (*init)(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info, struct netlink_ext_ack *extack); @@ -324,9 +330,6 @@ struct mlxsw_driver { unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); - int (*flash_update)(struct mlxsw_core *mlxsw_core, - const char *file_name, const char *component, - struct netlink_ext_ack *extack); int (*trap_init)(struct mlxsw_core *mlxsw_core, const struct devlink_trap *trap, void *trap_ctx); void (*trap_fini)(struct mlxsw_core *mlxsw_core, @@ -371,6 +374,8 @@ struct mlxsw_driver { u8 txhdr_len; const struct mlxsw_config_profile *profile; bool res_query_enabled; + bool fw_fatal_enabled; + bool temp_warn_enabled; }; int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, @@ -378,9 +383,6 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, u64 *p_single_size, u64 *p_double_size, u64 *p_linear_size); -void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core); -void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core); - u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core); u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 056eeb85be60..dd26865bd587 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -10,6 +10,18 @@ #include "item.h" #include "reg.h" +struct mlxsw_env_module_info { + u64 module_overheat_counter; + bool is_overheat; +}; + +struct mlxsw_env { + struct mlxsw_core *core; + u8 module_count; + spinlock_t module_info_lock; /* Protects 'module_info'. */ + struct mlxsw_env_module_info module_info[]; +}; + static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp, bool *cmis) { @@ -293,3 +305,359 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev, return 0; } EXPORT_SYMBOL(mlxsw_env_get_module_eeprom); + +static int mlxsw_env_module_has_temp_sensor(struct mlxsw_core *mlxsw_core, + u8 module, + bool *p_has_temp_sensor) +{ + char mtbr_pl[MLXSW_REG_MTBR_LEN]; + u16 temp; + int err; + + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtbr), mtbr_pl); + if (err) + return err; + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + + switch (temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: + case MLXSW_REG_MTBR_NO_CONN: + case MLXSW_REG_MTBR_NO_TEMP_SENS: + case MLXSW_REG_MTBR_INDEX_NA: + *p_has_temp_sensor = false; + break; + default: + *p_has_temp_sensor = temp ? true : false; + } + return 0; +} + +static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core, + u16 sensor_index, bool enable) +{ + char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0}; + enum mlxsw_reg_mtmp_tee tee; + int err, threshold_hi; + + mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, sensor_index); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + + if (enable) { + err = mlxsw_env_module_temp_thresholds_get(mlxsw_core, + sensor_index - + MLXSW_REG_MTMP_MODULE_INDEX_MIN, + SFP_TEMP_HIGH_WARN, + &threshold_hi); + /* In case it is not possible to query the module's threshold, + * use the default value. + */ + if (err) + threshold_hi = MLXSW_REG_MTMP_THRESH_HI; + else + /* mlxsw_env_module_temp_thresholds_get() multiplies + * Celsius degrees by 1000 whereas MTMP expects + * temperature in 0.125 Celsius degrees units. + * Convert threshold_hi to correct units. + */ + threshold_hi = threshold_hi / 1000 * 8; + + mlxsw_reg_mtmp_temperature_threshold_hi_set(mtmp_pl, threshold_hi); + mlxsw_reg_mtmp_temperature_threshold_lo_set(mtmp_pl, threshold_hi - + MLXSW_REG_MTMP_HYSTERESIS_TEMP); + } + tee = enable ? MLXSW_REG_MTMP_TEE_GENERATE_EVENT : MLXSW_REG_MTMP_TEE_NO_EVENT; + mlxsw_reg_mtmp_tee_set(mtmp_pl, tee); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl); +} + +static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core, + u8 module_count) +{ + int i, err, sensor_index; + bool has_temp_sensor; + + for (i = 0; i < module_count; i++) { + err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i, + &has_temp_sensor); + if (err) + return err; + + if (!has_temp_sensor) + continue; + + sensor_index = i + MLXSW_REG_MTMP_MODULE_INDEX_MIN; + err = mlxsw_env_temp_event_set(mlxsw_core, sensor_index, true); + if (err) + return err; + } + + return 0; +} + +static void mlxsw_env_mtwe_event_func(const struct mlxsw_reg_info *reg, + char *mtwe_pl, void *priv) +{ + struct mlxsw_env *mlxsw_env = priv; + int i, sensor_warning; + bool is_overheat; + + for (i = 0; i < mlxsw_env->module_count; i++) { + /* 64-127 of sensor_index are mapped to the port modules + * sequentially (module 0 is mapped to sensor_index 64, + * module 1 to sensor_index 65 and so on) + */ + sensor_warning = + mlxsw_reg_mtwe_sensor_warning_get(mtwe_pl, + i + MLXSW_REG_MTMP_MODULE_INDEX_MIN); + spin_lock(&mlxsw_env->module_info_lock); + is_overheat = + mlxsw_env->module_info[i].is_overheat; + + if ((is_overheat && sensor_warning) || + (!is_overheat && !sensor_warning)) { + /* Current state is "warning" and MTWE still reports + * warning OR current state in "no warning" and MTWE + * does not report warning. + */ + spin_unlock(&mlxsw_env->module_info_lock); + continue; + } else if (is_overheat && !sensor_warning) { + /* MTWE reports "no warning", turn is_overheat off. + */ + mlxsw_env->module_info[i].is_overheat = false; + spin_unlock(&mlxsw_env->module_info_lock); + } else { + /* Current state is "no warning" and MTWE reports + * "warning", increase the counter and turn is_overheat + * on. + */ + mlxsw_env->module_info[i].is_overheat = true; + mlxsw_env->module_info[i].module_overheat_counter++; + spin_unlock(&mlxsw_env->module_info_lock); + } + } +} + +static const struct mlxsw_listener mlxsw_env_temp_warn_listener = + MLXSW_EVENTL(mlxsw_env_mtwe_event_func, MTWE, MTWE); + +static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + + if (!mlxsw_core_temp_warn_enabled(mlxsw_core)) + return 0; + + return mlxsw_core_trap_register(mlxsw_core, + &mlxsw_env_temp_warn_listener, + mlxsw_env); +} + +static void mlxsw_env_temp_warn_event_unregister(struct mlxsw_env *mlxsw_env) +{ + if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core)) + return; + + mlxsw_core_trap_unregister(mlxsw_env->core, + &mlxsw_env_temp_warn_listener, mlxsw_env); +} + +struct mlxsw_env_module_plug_unplug_event { + struct mlxsw_env *mlxsw_env; + u8 module; + struct work_struct work; +}; + +static void mlxsw_env_pmpe_event_work(struct work_struct *work) +{ + struct mlxsw_env_module_plug_unplug_event *event; + struct mlxsw_env *mlxsw_env; + bool has_temp_sensor; + u16 sensor_index; + int err; + + event = container_of(work, struct mlxsw_env_module_plug_unplug_event, + work); + mlxsw_env = event->mlxsw_env; + + spin_lock_bh(&mlxsw_env->module_info_lock); + mlxsw_env->module_info[event->module].is_overheat = false; + spin_unlock_bh(&mlxsw_env->module_info_lock); + + err = mlxsw_env_module_has_temp_sensor(mlxsw_env->core, event->module, + &has_temp_sensor); + /* Do not disable events on modules without sensors or faulty sensors + * because FW returns errors. + */ + if (err) + goto out; + + if (!has_temp_sensor) + goto out; + + sensor_index = event->module + MLXSW_REG_MTMP_MODULE_INDEX_MIN; + mlxsw_env_temp_event_set(mlxsw_env->core, sensor_index, true); + +out: + kfree(event); +} + +static void +mlxsw_env_pmpe_listener_func(const struct mlxsw_reg_info *reg, char *pmpe_pl, + void *priv) +{ + struct mlxsw_env_module_plug_unplug_event *event; + enum mlxsw_reg_pmpe_module_status module_status; + u8 module = mlxsw_reg_pmpe_module_get(pmpe_pl); + struct mlxsw_env *mlxsw_env = priv; + + if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) + return; + + module_status = mlxsw_reg_pmpe_module_status_get(pmpe_pl); + if (module_status != MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ENABLED) + return; + + event = kmalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + + event->mlxsw_env = mlxsw_env; + event->module = module; + INIT_WORK(&event->work, mlxsw_env_pmpe_event_work); + mlxsw_core_schedule_work(&event->work); +} + +static const struct mlxsw_listener mlxsw_env_module_plug_listener = + MLXSW_EVENTL(mlxsw_env_pmpe_listener_func, PMPE, PMPE); + +static int +mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + + if (!mlxsw_core_temp_warn_enabled(mlxsw_core)) + return 0; + + return mlxsw_core_trap_register(mlxsw_core, + &mlxsw_env_module_plug_listener, + mlxsw_env); +} + +static void +mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env) +{ + if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core)) + return; + + mlxsw_core_trap_unregister(mlxsw_env->core, + &mlxsw_env_module_plug_listener, + mlxsw_env); +} + +static int +mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core, + u8 module_count) +{ + int i, err; + + for (i = 0; i < module_count; i++) { + char pmaos_pl[MLXSW_REG_PMAOS_LEN]; + + mlxsw_reg_pmaos_pack(pmaos_pl, i, + MLXSW_REG_PMAOS_E_GENERATE_EVENT); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmaos), pmaos_pl); + if (err) + return err; + } + return 0; +} + +int +mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module, + u64 *p_counter) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + + /* Prevent switch driver from accessing uninitialized data. */ + if (!mlxsw_core_is_initialized(mlxsw_core)) { + *p_counter = 0; + return 0; + } + + if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) + return -EINVAL; + + spin_lock_bh(&mlxsw_env->module_info_lock); + *p_counter = mlxsw_env->module_info[module].module_overheat_counter; + spin_unlock_bh(&mlxsw_env->module_info_lock); + + return 0; +} +EXPORT_SYMBOL(mlxsw_env_module_overheat_counter_get); + +int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env) +{ + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + struct mlxsw_env *env; + u8 module_count; + int err; + + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &module_count); + + env = kzalloc(struct_size(env, module_info, module_count), GFP_KERNEL); + if (!env) + return -ENOMEM; + + spin_lock_init(&env->module_info_lock); + env->core = mlxsw_core; + env->module_count = module_count; + *p_env = env; + + err = mlxsw_env_temp_warn_event_register(mlxsw_core); + if (err) + goto err_temp_warn_event_register; + + err = mlxsw_env_module_plug_event_register(mlxsw_core); + if (err) + goto err_module_plug_event_register; + + err = mlxsw_env_module_oper_state_event_enable(mlxsw_core, + env->module_count); + if (err) + goto err_oper_state_event_enable; + + err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count); + if (err) + goto err_temp_event_enable; + + return 0; + +err_temp_event_enable: +err_oper_state_event_enable: + mlxsw_env_module_plug_event_unregister(env); +err_module_plug_event_register: + mlxsw_env_temp_warn_event_unregister(env); +err_temp_warn_event_register: + kfree(env); + return err; +} + +void mlxsw_env_fini(struct mlxsw_env *env) +{ + mlxsw_env_module_plug_event_unregister(env); + /* Make sure there is no more event work scheduled. */ + mlxsw_core_flush_owq(); + mlxsw_env_temp_warn_event_unregister(env); + kfree(env); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h index 064d0e770c01..8e36a2634ef5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -14,4 +14,10 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev, struct mlxsw_core *mlxsw_core, int module, struct ethtool_eeprom *ee, u8 *data); +int +mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module, + u64 *p_counter); +int mlxsw_env_init(struct mlxsw_core *core, struct mlxsw_env **p_env); +void mlxsw_env_fini(struct mlxsw_env *env); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 61719ec89808..2196c946698a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -12,8 +12,17 @@ #include "core.h" #include "core_env.h" -#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127 -#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \ +#define MLXSW_HWMON_SENSORS_MAX_COUNT 64 +#define MLXSW_HWMON_MODULES_MAX_COUNT 64 +#define MLXSW_HWMON_GEARBOXES_MAX_COUNT 32 + +#define MLXSW_HWMON_ATTR_PER_SENSOR 3 +#define MLXSW_HWMON_ATTR_PER_MODULE 7 +#define MLXSW_HWMON_ATTR_PER_GEARBOX 4 + +#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_SENSORS_MAX_COUNT * MLXSW_HWMON_ATTR_PER_SENSOR + \ + MLXSW_HWMON_MODULES_MAX_COUNT * MLXSW_HWMON_ATTR_PER_MODULE + \ + MLXSW_HWMON_GEARBOXES_MAX_COUNT * MLXSW_HWMON_ATTR_PER_GEARBOX + \ MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX) struct mlxsw_hwmon_attr { @@ -97,7 +106,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; - char mtmp_pl[MLXSW_REG_MTMP_LEN]; + char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0}; unsigned long val; int index; int err; @@ -110,7 +119,13 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); - mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true); + + mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + mlxsw_reg_mtmp_mte_set(mtmp_pl, true); + mlxsw_reg_mtmp_mtr_set(mtmp_pl, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n"); @@ -205,25 +220,39 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, return len; } -static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static int mlxsw_hwmon_module_temp_get(struct device *dev, + struct device_attribute *attr, + int *p_temp) { struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; u8 module; - int temp; int err; module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(dev, "Failed to query module temperature\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL); + + return 0; +} + +static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, temp; + + err = mlxsw_hwmon_module_temp_get(dev, attr, &temp); if (err) return err; - mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); return sprintf(buf, "%d\n", temp); } @@ -270,48 +299,72 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, return sprintf(buf, "%u\n", fault); } -static ssize_t -mlxsw_hwmon_module_temp_critical_show(struct device *dev, - struct device_attribute *attr, char *buf) +static int mlxsw_hwmon_module_temp_critical_get(struct device *dev, + struct device_attribute *attr, + int *p_temp) { struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; - int temp; u8 module; int err; module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, - SFP_TEMP_HIGH_WARN, &temp); + SFP_TEMP_HIGH_WARN, p_temp); if (err) { dev_err(dev, "Failed to query module temperature thresholds\n"); return err; } - return sprintf(buf, "%u\n", temp); + return 0; } static ssize_t -mlxsw_hwmon_module_temp_emergency_show(struct device *dev, - struct device_attribute *attr, - char *buf) +mlxsw_hwmon_module_temp_critical_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int err, temp; + + err = mlxsw_hwmon_module_temp_critical_get(dev, attr, &temp); + if (err) + return err; + + return sprintf(buf, "%u\n", temp); +} + +static int mlxsw_hwmon_module_temp_emergency_get(struct device *dev, + struct device_attribute *attr, + int *p_temp) { struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; u8 module; - int temp; int err; module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, - SFP_TEMP_HIGH_ALARM, &temp); + SFP_TEMP_HIGH_ALARM, p_temp); if (err) { dev_err(dev, "Failed to query module temperature thresholds\n"); return err; } + return 0; +} + +static ssize_t +mlxsw_hwmon_module_temp_emergency_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, temp; + + err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &temp); + if (err) + return err; + return sprintf(buf, "%u\n", temp); } @@ -341,6 +394,53 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev, return sprintf(buf, "gearbox %03u\n", index); } +static ssize_t mlxsw_hwmon_temp_critical_alarm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, temp, emergency_temp, critic_temp; + + err = mlxsw_hwmon_module_temp_get(dev, attr, &temp); + if (err) + return err; + + if (temp <= 0) + return sprintf(buf, "%d\n", false); + + err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &emergency_temp); + if (err) + return err; + + if (temp >= emergency_temp) + return sprintf(buf, "%d\n", false); + + err = mlxsw_hwmon_module_temp_critical_get(dev, attr, &critic_temp); + if (err) + return err; + + return sprintf(buf, "%d\n", temp >= critic_temp); +} + +static ssize_t mlxsw_hwmon_temp_emergency_alarm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, temp, emergency_temp; + + err = mlxsw_hwmon_module_temp_get(dev, attr, &temp); + if (err) + return err; + + if (temp <= 0) + return sprintf(buf, "%d\n", false); + + err = mlxsw_hwmon_module_temp_emergency_get(dev, attr, &emergency_temp); + if (err) + return err; + + return sprintf(buf, "%d\n", temp >= emergency_temp); +} + enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP, MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, @@ -354,6 +454,8 @@ enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL, + MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM, + MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM, }; static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, @@ -444,6 +546,20 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "temp%u_label", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_temp_critical_alarm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_crit_alarm", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_temp_emergency_alarm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_emergency_alarm", num + 1); + break; default: WARN_ON(1); } @@ -460,7 +576,6 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) { char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0}; - char mtmp_pl[MLXSW_REG_MTMP_LEN]; int i; int err; @@ -471,7 +586,15 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) } mlxsw_hwmon->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); for (i = 0; i < mlxsw_hwmon->sensor_count; i++) { - mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true); + char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0}; + + mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, i); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), + mtmp_pl); + if (err) + return err; + mlxsw_reg_mtmp_mte_set(mtmp_pl, true); + mlxsw_reg_mtmp_mtr_set(mtmp_pl, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -566,6 +689,12 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_CRIT_ALARM, + i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_EMERGENCY_ALARM, + i, i); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 1c64b03ff48e..641cdd81882b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -620,9 +620,9 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) return elem; } -static void mlxsw_pci_cq_tasklet(unsigned long data) +static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) { - struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data; + struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); struct mlxsw_pci *mlxsw_pci = q->pci; char *cqe; int items = 0; @@ -733,9 +733,9 @@ static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) return elem; } -static void mlxsw_pci_eq_tasklet(unsigned long data) +static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) { - struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data; + struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); struct mlxsw_pci *mlxsw_pci = q->pci; u8 cq_count = mlxsw_pci_cq_count(mlxsw_pci); unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)]; @@ -792,7 +792,7 @@ struct mlxsw_pci_queue_ops { struct mlxsw_pci_queue *q); void (*fini)(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q); - void (*tasklet)(unsigned long data); + void (*tasklet)(struct tasklet_struct *t); u16 (*elem_count_f)(const struct mlxsw_pci_queue *q); u8 (*elem_size_f)(const struct mlxsw_pci_queue *q); u16 elem_count; @@ -855,7 +855,7 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, q->pci = mlxsw_pci; if (q_ops->tasklet) - tasklet_init(&q->tasklet, q_ops->tasklet, (unsigned long) q); + tasklet_setup(&q->tasklet, q_ops->tasklet); mem_item->size = MLXSW_PCI_AQ_SIZE; mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 079b080de7f7..39eff6a57ba2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4174,7 +4174,6 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); #define MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M BIT(0) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII BIT(1) -#define MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII BIT(2) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R BIT(3) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G BIT(4) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G BIT(5) @@ -4197,7 +4196,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 BIT(3) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR BIT(4) -#define MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2 BIT(5) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 BIT(6) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 BIT(7) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR BIT(12) @@ -4210,10 +4208,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22) -#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23) -#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX BIT(24) -#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(25) -#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T BIT(26) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29) @@ -5411,6 +5405,64 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port) mlxsw_reg_pspa_sub_port_set(payload, 0); } +/* PMAOS - Ports Module Administrative and Operational Status + * ---------------------------------------------------------- + * This register configures and retrieves the per module status. + */ +#define MLXSW_REG_PMAOS_ID 0x5012 +#define MLXSW_REG_PMAOS_LEN 0x10 + +MLXSW_REG_DEFINE(pmaos, MLXSW_REG_PMAOS_ID, MLXSW_REG_PMAOS_LEN); + +/* reg_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmaos, slot_index, 0x00, 24, 4); + +/* reg_pmaos_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmaos, module, 0x00, 16, 8); + +/* reg_pmaos_ase + * Admin state update enable. + * If this bit is set, admin state will be updated based on admin_state field. + * Only relevant on Set() operations. + * Access: WO + */ +MLXSW_ITEM32(reg, pmaos, ase, 0x04, 31, 1); + +/* reg_pmaos_ee + * Event update enable. + * If this bit is set, event generation will be updated based on the e field. + * Only relevant on Set operations. + * Access: WO + */ +MLXSW_ITEM32(reg, pmaos, ee, 0x04, 30, 1); + +enum mlxsw_reg_pmaos_e { + MLXSW_REG_PMAOS_E_DO_NOT_GENERATE_EVENT, + MLXSW_REG_PMAOS_E_GENERATE_EVENT, + MLXSW_REG_PMAOS_E_GENERATE_SINGLE_EVENT, +}; + +/* reg_pmaos_e + * Event Generation on operational state change. + * Access: RW + */ +MLXSW_ITEM32(reg, pmaos, e, 0x04, 0, 2); + +static inline void mlxsw_reg_pmaos_pack(char *payload, u8 module, + enum mlxsw_reg_pmaos_e e) +{ + MLXSW_REG_ZERO(pmaos, payload); + mlxsw_reg_pmaos_module_set(payload, module); + mlxsw_reg_pmaos_e_set(payload, e); + mlxsw_reg_pmaos_ee_set(payload, true); +} + /* PPLR - Port Physical Loopback Register * -------------------------------------- * This register allows configuration of the port's loopback mode. @@ -5447,6 +5499,50 @@ static inline void mlxsw_reg_pplr_pack(char *payload, u8 local_port, MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL : 0); } +/* PMPE - Port Module Plug/Unplug Event Register + * --------------------------------------------- + * This register reports any operational status change of a module. + * A change in the module’s state will generate an event only if the change + * happens after arming the event mechanism. Any changes to the module state + * while the event mechanism is not armed will not be reported. Software can + * query the PMPE register for module status. + */ +#define MLXSW_REG_PMPE_ID 0x5024 +#define MLXSW_REG_PMPE_LEN 0x10 + +MLXSW_REG_DEFINE(pmpe, MLXSW_REG_PMPE_ID, MLXSW_REG_PMPE_LEN); + +/* reg_pmpe_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmpe, slot_index, 0x00, 24, 4); + +/* reg_pmpe_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmpe, module, 0x00, 16, 8); + +enum mlxsw_reg_pmpe_module_status { + MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ENABLED = 1, + MLXSW_REG_PMPE_MODULE_STATUS_UNPLUGGED, + MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ERROR, + MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_DISABLED, +}; + +/* reg_pmpe_module_status + * Module status. + * Access: RO + */ +MLXSW_ITEM32(reg, pmpe, module_status, 0x00, 0, 4); + +/* reg_pmpe_error_type + * Module error details. + * Access: RO + */ +MLXSW_ITEM32(reg, pmpe, error_type, 0x04, 8, 4); + /* PDDR - Port Diagnostics Database Register * ----------------------------------------- * The PDDR enables to read the Phy debug database @@ -5585,6 +5681,9 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4); enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_EMAD, + MLXSW_REG_HTGT_TRAP_GROUP_MFDE, + MLXSW_REG_HTGT_TRAP_GROUP_MTWE, + MLXSW_REG_HTGT_TRAP_GROUP_PMPE, MLXSW_REG_HTGT_TRAP_GROUP_SP_STP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, @@ -8418,6 +8517,13 @@ MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16); * 2 - Generate single event * Access: RW */ + +enum mlxsw_reg_mtmp_tee { + MLXSW_REG_MTMP_TEE_NO_EVENT, + MLXSW_REG_MTMP_TEE_GENERATE_EVENT, + MLXSW_REG_MTMP_TEE_GENERATE_SINGLE_EVENT, +}; + MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2); #define MLXSW_REG_MTMP_THRESH_HI 0x348 /* 105 Celsius */ @@ -8428,6 +8534,7 @@ MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2); */ MLXSW_ITEM32(reg, mtmp, temperature_threshold_hi, 0x0C, 0, 16); +#define MLXSW_REG_MTMP_HYSTERESIS_TEMP 0x28 /* 5 Celsius */ /* reg_mtmp_temperature_threshold_lo * Low threshold for Temperature Warning Event. In 0.125 Celsius. * Access: RW @@ -8471,6 +8578,23 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp, mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } +/* MTWE - Management Temperature Warning Event + * ------------------------------------------- + * This register is used for over temperature warning. + */ +#define MLXSW_REG_MTWE_ID 0x900B +#define MLXSW_REG_MTWE_LEN 0x10 + +MLXSW_REG_DEFINE(mtwe, MLXSW_REG_MTWE_ID, MLXSW_REG_MTWE_LEN); + +/* reg_mtwe_sensor_warning + * Bit vector indicating which of the sensor reading is above threshold. + * Address 00h bit31 is sensor_warning[127]. + * Address 0Ch bit0 is sensor_warning[0]. + * Access: RO + */ +MLXSW_ITEM_BIT_ARRAY(reg, mtwe, sensor_warning, 0x0, 0x10, 1); + /* MTBR - Management Temperature Bulk Register * ------------------------------------------- * This register is used for bulk temperature reading. @@ -9827,6 +9951,26 @@ static inline void mlxsw_reg_mtptptp_pack(char *payload, mlxsw_reg_mtptpt_message_type_set(payload, message_type); } +/* MFGD - Monitoring FW General Debug Register + * ------------------------------------------- + */ +#define MLXSW_REG_MFGD_ID 0x90F0 +#define MLXSW_REG_MFGD_LEN 0x0C + +MLXSW_REG_DEFINE(mfgd, MLXSW_REG_MFGD_ID, MLXSW_REG_MFGD_LEN); + +/* reg_mfgd_fw_fatal_event_mode + * 0 - don't check FW fatal (default) + * 1 - check FW fatal - enable MFDE trap + * Access: RW + */ +MLXSW_ITEM32(reg, mfgd, fatal_event_mode, 0x00, 9, 2); + +/* reg_mfgd_trigger_test + * Access: WO + */ +MLXSW_ITEM32(reg, mfgd, trigger_test, 0x00, 11, 1); + /* MGPIR - Management General Peripheral Information Register * ---------------------------------------------------------- * MGPIR register allows software to query the hardware and @@ -9886,6 +10030,84 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, *num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload); } +/* MFDE - Monitoring FW Debug Register + * ----------------------------------- + */ +#define MLXSW_REG_MFDE_ID 0x9200 +#define MLXSW_REG_MFDE_LEN 0x18 + +MLXSW_REG_DEFINE(mfde, MLXSW_REG_MFDE_ID, MLXSW_REG_MFDE_LEN); + +/* reg_mfde_irisc_id + * Which irisc triggered the event + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 8, 4); + +enum mlxsw_reg_mfde_event_id { + MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO = 1, + /* KVD insertion machine stopped */ + MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP, +}; + +/* reg_mfde_event_id + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 8); + +enum mlxsw_reg_mfde_method { + MLXSW_REG_MFDE_METHOD_QUERY, + MLXSW_REG_MFDE_METHOD_WRITE, +}; + +/* reg_mfde_method + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, method, 0x04, 29, 1); + +/* reg_mfde_long_process + * Indicates if the command is in long_process mode. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, long_process, 0x04, 28, 1); + +enum mlxsw_reg_mfde_command_type { + MLXSW_REG_MFDE_COMMAND_TYPE_MAD, + MLXSW_REG_MFDE_COMMAND_TYPE_EMAD, + MLXSW_REG_MFDE_COMMAND_TYPE_CMDIF, +}; + +/* reg_mfde_command_type + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, command_type, 0x04, 24, 2); + +/* reg_mfde_reg_attr_id + * EMAD - register id, MAD - attibute id + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, reg_attr_id, 0x04, 0, 16); + +/* reg_mfde_log_address + * crspace address accessed, which resulted in timeout. + * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, log_address, 0x10, 0, 32); + +/* reg_mfde_log_id + * Which irisc triggered the timeout. + * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, log_id, 0x14, 0, 4); + +/* reg_mfde_pipes_mask + * Bit per kvh pipe. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, pipes_mask, 0x10, 0, 16); + /* TNGCR - Tunneling NVE General Configuration Register * ---------------------------------------------------- * The TNGCR register is used for setting up the NVE Tunneling configuration. @@ -10948,7 +11170,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(pptb), MLXSW_REG(pbmc), MLXSW_REG(pspa), + MLXSW_REG(pmaos), MLXSW_REG(pplr), + MLXSW_REG(pmpe), MLXSW_REG(pddr), MLXSW_REG(pmtm), MLXSW_REG(htgt), @@ -10978,6 +11202,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(fore), MLXSW_REG(mtcap), MLXSW_REG(mtmp), + MLXSW_REG(mtwe), MLXSW_REG(mtbr), MLXSW_REG(mcia), MLXSW_REG(mpat), @@ -10999,7 +11224,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mtpppc), MLXSW_REG(mtpptr), MLXSW_REG(mtptpt), + MLXSW_REG(mfgd), MLXSW_REG(mgpir), + MLXSW_REG(mfde), MLXSW_REG(tngcr), MLXSW_REG(tnumt), MLXSW_REG(tnqcr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index f3c0e241e1b4..16b47fce540b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -42,11 +42,10 @@ #include "spectrum_span.h" #include "spectrum_ptp.h" #include "spectrum_trap.h" -#include "../mlxfw/mlxfw.h" #define MLXSW_SP1_FWREV_MAJOR 13 -#define MLXSW_SP1_FWREV_MINOR 2007 -#define MLXSW_SP1_FWREV_SUBMINOR 1168 +#define MLXSW_SP1_FWREV_MINOR 2008 +#define MLXSW_SP1_FWREV_SUBMINOR 1310 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { @@ -62,8 +61,8 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { "." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2" #define MLXSW_SP2_FWREV_MAJOR 29 -#define MLXSW_SP2_FWREV_MINOR 2007 -#define MLXSW_SP2_FWREV_SUBMINOR 1168 +#define MLXSW_SP2_FWREV_MINOR 2008 +#define MLXSW_SP2_FWREV_SUBMINOR 1310 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { .major = MLXSW_SP2_FWREV_MAJOR, @@ -77,8 +76,8 @@ static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { "." __stringify(MLXSW_SP2_FWREV_SUBMINOR) ".mfa2" #define MLXSW_SP3_FWREV_MAJOR 30 -#define MLXSW_SP3_FWREV_MINOR 2007 -#define MLXSW_SP3_FWREV_SUBMINOR 1168 +#define MLXSW_SP3_FWREV_MINOR 2008 +#define MLXSW_SP3_FWREV_SUBMINOR 1310 static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = { .major = MLXSW_SP3_FWREV_MAJOR, @@ -170,274 +169,6 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16); */ MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); -struct mlxsw_sp_mlxfw_dev { - struct mlxfw_dev mlxfw_dev; - struct mlxsw_sp *mlxsw_sp; -}; - -static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev, - u16 component_index, u32 *p_max_size, - u8 *p_align_bits, u16 *p_max_write_size) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcqi_pl[MLXSW_REG_MCQI_LEN]; - int err; - - mlxsw_reg_mcqi_pack(mcqi_pl, component_index); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcqi), mcqi_pl); - if (err) - return err; - mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits, - p_max_write_size); - - *p_align_bits = max_t(u8, *p_align_bits, 2); - *p_max_write_size = min_t(u16, *p_max_write_size, - MLXSW_REG_MCDA_MAX_DATA_LEN); - return 0; -} - -static int mlxsw_sp_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcc_pl[MLXSW_REG_MCC_LEN]; - u8 control_state; - int err; - - mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); - if (err) - return err; - - mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state); - if (control_state != MLXFW_FSM_STATE_IDLE) - return -EBUSY; - - mlxsw_reg_mcc_pack(mcc_pl, - MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, - 0, *fwhandle, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); -} - -static int mlxsw_sp_fsm_component_update(struct mlxfw_dev *mlxfw_dev, - u32 fwhandle, u16 component_index, - u32 component_size) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcc_pl[MLXSW_REG_MCC_LEN]; - - mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, - component_index, fwhandle, component_size); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); -} - -static int mlxsw_sp_fsm_block_download(struct mlxfw_dev *mlxfw_dev, - u32 fwhandle, u8 *data, u16 size, - u32 offset) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcda_pl[MLXSW_REG_MCDA_LEN]; - - mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcda), mcda_pl); -} - -static int mlxsw_sp_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, - u32 fwhandle, u16 component_index) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcc_pl[MLXSW_REG_MCC_LEN]; - - mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, - component_index, fwhandle, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); -} - -static int mlxsw_sp_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcc_pl[MLXSW_REG_MCC_LEN]; - - mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, 0, - fwhandle, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); -} - -static int mlxsw_sp_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, - enum mlxfw_fsm_state *fsm_state, - enum mlxfw_fsm_state_err *fsm_state_err) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcc_pl[MLXSW_REG_MCC_LEN]; - u8 control_state; - u8 error_code; - int err; - - mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); - if (err) - return err; - - mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state); - *fsm_state = control_state; - *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, - MLXFW_FSM_STATE_ERR_MAX); - return 0; -} - -static void mlxsw_sp_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcc_pl[MLXSW_REG_MCC_LEN]; - - mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, 0, - fwhandle, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); -} - -static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - char mcc_pl[MLXSW_REG_MCC_LEN]; - - mlxsw_reg_mcc_pack(mcc_pl, - MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, - fwhandle, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); -} - -static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { - .component_query = mlxsw_sp_component_query, - .fsm_lock = mlxsw_sp_fsm_lock, - .fsm_component_update = mlxsw_sp_fsm_component_update, - .fsm_block_download = mlxsw_sp_fsm_block_download, - .fsm_component_verify = mlxsw_sp_fsm_component_verify, - .fsm_activate = mlxsw_sp_fsm_activate, - .fsm_query_state = mlxsw_sp_fsm_query_state, - .fsm_cancel = mlxsw_sp_fsm_cancel, - .fsm_release = mlxsw_sp_fsm_release, -}; - -static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, - const struct firmware *firmware, - struct netlink_ext_ack *extack) -{ - struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = { - .mlxfw_dev = { - .ops = &mlxsw_sp_mlxfw_dev_ops, - .psid = mlxsw_sp->bus_info->psid, - .psid_size = strlen(mlxsw_sp->bus_info->psid), - .devlink = priv_to_devlink(mlxsw_sp->core), - }, - .mlxsw_sp = mlxsw_sp - }; - int err; - - mlxsw_core_fw_flash_start(mlxsw_sp->core); - err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, - firmware, extack); - mlxsw_core_fw_flash_end(mlxsw_sp->core); - - return err; -} - -static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) -{ - const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev; - const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev; - const char *fw_filename = mlxsw_sp->fw_filename; - union devlink_param_value value; - const struct firmware *firmware; - int err; - - /* Don't check if driver does not require it */ - if (!req_rev || !fw_filename) - return 0; - - /* Don't check if devlink 'fw_load_policy' param is 'flash' */ - err = devlink_param_driverinit_value_get(priv_to_devlink(mlxsw_sp->core), - DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, - &value); - if (err) - return err; - if (value.vu8 == DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH) - return 0; - - /* Validate driver & FW are compatible */ - if (rev->major != req_rev->major) { - WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n", - rev->major, req_rev->major); - return -EINVAL; - } - if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev)) - return 0; - - dev_err(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", - rev->major, rev->minor, rev->subminor, req_rev->major, - req_rev->minor, req_rev->subminor); - dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n", - fw_filename); - - err = request_firmware_direct(&firmware, fw_filename, - mlxsw_sp->bus_info->dev); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Could not request firmware file %s\n", - fw_filename); - return err; - } - - err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, NULL); - release_firmware(firmware); - if (err) - dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n"); - - /* On FW flash success, tell the caller FW reset is needed - * if current FW supports it. - */ - if (rev->minor >= req_rev->can_reset_minor) - return err ? err : -EAGAIN; - else - return 0; -} - -static int mlxsw_sp_flash_update(struct mlxsw_core *mlxsw_core, - const char *file_name, const char *component, - struct netlink_ext_ack *extack) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - const struct firmware *firmware; - int err; - - if (component) - return -EOPNOTSUPP; - - err = request_firmware_direct(&firmware, file_name, - mlxsw_sp->bus_info->dev); - if (err) - return err; - err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, extack); - release_firmware(firmware); - - return err; -} - int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index, u64 *packets, u64 *bytes) @@ -590,21 +321,28 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr); } -static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) +static int mlxsw_sp_port_max_mtu_get(struct mlxsw_sp_port *mlxsw_sp_port, int *p_max_mtu) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pmtu_pl[MLXSW_REG_PMTU_LEN]; - int max_mtu; int err; - mtu += MLXSW_TXHDR_LEN + ETH_HLEN; mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); if (err) return err; - max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl); - if (mtu > max_mtu) + *p_max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl); + return 0; +} + +static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pmtu_pl[MLXSW_REG_PMTU_LEN]; + + mtu += MLXSW_TXHDR_LEN + ETH_HLEN; + if (mtu > mlxsw_sp_port->max_mtu) return -EINVAL; mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu); @@ -872,133 +610,25 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) return 0; } -static u16 mlxsw_sp_pg_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp, - int mtu) -{ - return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu); -} - -#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */ - -static u16 mlxsw_sp_pfc_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu, - u16 delay) -{ - delay = mlxsw_sp_bytes_cells(mlxsw_sp, DIV_ROUND_UP(delay, - BITS_PER_BYTE)); - return MLXSW_SP_CELL_FACTOR * delay + mlxsw_sp_bytes_cells(mlxsw_sp, - mtu); -} - -/* Maximum delay buffer needed in case of PAUSE frames, in bytes. - * Assumes 100m cable and maximum MTU. - */ -#define MLXSW_SP_PAUSE_DELAY 58752 - -static u16 mlxsw_sp_pg_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu, - u16 delay, bool pfc, bool pause) -{ - if (pfc) - return mlxsw_sp_pfc_delay_get(mlxsw_sp, mtu, delay); - else if (pause) - return mlxsw_sp_bytes_cells(mlxsw_sp, MLXSW_SP_PAUSE_DELAY); - else - return 0; -} - -static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres, - bool lossy) +static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) { - if (lossy) - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size); - else - mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size, - thres); -} + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_hdroom orig_hdroom; + struct mlxsw_sp_hdroom hdroom; + int err; -int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, - u8 *prio_tc, bool pause_en, - struct ieee_pfc *my_pfc) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0; - u16 delay = !!my_pfc ? my_pfc->delay : 0; - char pbmc_pl[MLXSW_REG_PBMC_LEN]; - u32 taken_headroom_cells = 0; - u32 max_headroom_cells; - int i, j, err; + orig_hdroom = *mlxsw_sp_port->hdroom; - max_headroom_cells = mlxsw_sp_sb_max_headroom_cells(mlxsw_sp); + hdroom = orig_hdroom; + hdroom.mtu = mtu; + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); - mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); - if (err) + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); return err; - - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - bool configure = false; - bool pfc = false; - u16 thres_cells; - u16 delay_cells; - u16 total_cells; - bool lossy; - - for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { - if (prio_tc[j] == i) { - pfc = pfc_en & BIT(j); - configure = true; - break; - } - } - - if (!configure) - continue; - - lossy = !(pfc || pause_en); - thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); - thres_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, thres_cells); - delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, - pfc, pause_en); - delay_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, delay_cells); - total_cells = thres_cells + delay_cells; - - taken_headroom_cells += total_cells; - if (taken_headroom_cells > max_headroom_cells) - return -ENOBUFS; - - mlxsw_sp_pg_buf_pack(pbmc_pl, i, total_cells, - thres_cells, lossy); } - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); -} - -int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, - int mtu, bool pause_en) -{ - u8 def_prio_tc[IEEE_8021QAZ_MAX_TCS] = {0}; - bool dcb_en = !!mlxsw_sp_port->dcb.ets; - struct ieee_pfc *my_pfc; - u8 *prio_tc; - - prio_tc = dcb_en ? mlxsw_sp_port->dcb.ets->prio_tc : def_prio_tc; - my_pfc = dcb_en ? mlxsw_sp_port->dcb.pfc : NULL; - - return __mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, prio_tc, - pause_en, my_pfc); -} - -static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); - int err; - - err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en); - if (err) - return err; - err = mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, mtu); - if (err) - goto err_span_port_mtu_update; err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); if (err) goto err_port_mtu_set; @@ -1006,9 +636,7 @@ static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) return 0; err_port_mtu_set: - mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, dev->mtu); -err_span_port_mtu_update: - mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); return err; } @@ -1737,6 +1365,22 @@ static int mlxsw_sp_port_tc_mc_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtctm), qtctm_pl); } +static int mlxsw_sp_port_overheat_init_val_set(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 module = mlxsw_sp_port->mapping.module; + u64 overheat_counter; + int err; + + err = mlxsw_env_module_overheat_counter_get(mlxsw_sp->core, module, + &overheat_counter); + if (err) + return err; + + mlxsw_sp_port->module_overheat_initial_val = overheat_counter; + return 0; +} + static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 split_base_local_port, struct mlxsw_sp_port_mapping *port_mapping) @@ -1842,6 +1486,21 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_speed_by_width_set; } + err = mlxsw_sp->port_type_speed_ops->ptys_max_speed(mlxsw_sp_port, + &mlxsw_sp_port->max_speed); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum speed\n", + mlxsw_sp_port->local_port); + goto err_max_speed_get; + } + + err = mlxsw_sp_port_max_mtu_get(mlxsw_sp_port, &mlxsw_sp_port->max_mtu); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum MTU\n", + mlxsw_sp_port->local_port); + goto err_port_max_mtu_get; + } + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n", @@ -1930,10 +1589,16 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw, mlxsw_sp->ptp_ops->shaper_work); - INIT_DELAYED_WORK(&mlxsw_sp_port->span.speed_update_dw, - mlxsw_sp_span_speed_update_work); mlxsw_sp->ports[local_port] = mlxsw_sp_port; + + err = mlxsw_sp_port_overheat_init_val_set(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set overheat initial value\n", + mlxsw_sp_port->local_port); + goto err_port_overheat_init_val_set; + } + err = register_netdev(dev); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n", @@ -1947,6 +1612,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, return 0; err_register_netdev: +err_port_overheat_init_val_set: mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); err_port_vlan_create: @@ -1963,9 +1629,12 @@ err_port_dcb_init: mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false); err_port_tc_mc_mode: err_port_ets_init: + mlxsw_sp_port_buffers_fini(mlxsw_sp_port); err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: +err_port_max_mtu_get: +err_max_speed_get: err_port_speed_by_width_set: err_port_system_port_mapping_set: err_dev_addr_init: @@ -1986,7 +1655,6 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); - cancel_delayed_work_sync(&mlxsw_sp_port->span.speed_update_dw); cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw); mlxsw_sp_port_ptp_clear(mlxsw_sp_port); mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); @@ -1998,6 +1666,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port_fids_fini(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false); + mlxsw_sp_port_buffers_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp_port); free_percpu(mlxsw_sp_port->pcpu_stats); @@ -2390,7 +2059,6 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, netdev_info(mlxsw_sp_port->dev, "link up\n"); netif_carrier_on(mlxsw_sp_port->dev); mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0); - mlxsw_core_schedule_dw(&mlxsw_sp_port->span.speed_update_dw, 0); } else { netdev_info(mlxsw_sp_port->dev, "link down\n"); netif_carrier_off(mlxsw_sp_port->dev); @@ -2783,11 +2451,36 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) { char htgt_pl[MLXSW_REG_HTGT_LEN]; + int err; mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD, MLXSW_REG_HTGT_INVALID_POLICER, MLXSW_REG_HTGT_DEFAULT_PRIORITY, MLXSW_REG_HTGT_DEFAULT_TC); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MFDE, + MLXSW_REG_HTGT_INVALID_POLICER, + MLXSW_REG_HTGT_DEFAULT_PRIORITY, + MLXSW_REG_HTGT_DEFAULT_TC); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MTWE, + MLXSW_REG_HTGT_INVALID_POLICER, + MLXSW_REG_HTGT_DEFAULT_PRIORITY, + MLXSW_REG_HTGT_DEFAULT_TC); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_PMPE, + MLXSW_REG_HTGT_INVALID_POLICER, + MLXSW_REG_HTGT_DEFAULT_PRIORITY, + MLXSW_REG_HTGT_DEFAULT_TC); return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } @@ -2836,10 +2529,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->core = mlxsw_core; mlxsw_sp->bus_info = mlxsw_bus_info; - err = mlxsw_sp_fw_rev_validate(mlxsw_sp); - if (err) - return err; - mlxsw_core_emad_string_tlv_enable(mlxsw_core); err = mlxsw_sp_base_mac_get(mlxsw_sp); @@ -3039,8 +2728,6 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - mlxsw_sp->req_rev = &mlxsw_sp1_fw_rev; - mlxsw_sp->fw_filename = MLXSW_SP1_FW_FILENAME; mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops; mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops; mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops; @@ -3051,6 +2738,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask; mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; + mlxsw_sp->sb_ops = &mlxsw_sp1_sb_ops; mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops; mlxsw_sp->span_ops = &mlxsw_sp1_span_ops; @@ -3069,8 +2757,6 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - mlxsw_sp->req_rev = &mlxsw_sp2_fw_rev; - mlxsw_sp->fw_filename = MLXSW_SP2_FW_FILENAME; mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; @@ -3081,6 +2767,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; + mlxsw_sp->sb_ops = &mlxsw_sp2_sb_ops; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; @@ -3097,8 +2784,6 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - mlxsw_sp->req_rev = &mlxsw_sp3_fw_rev; - mlxsw_sp->fw_filename = MLXSW_SP3_FW_FILENAME; mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; @@ -3109,6 +2794,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; + mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; mlxsw_sp->span_ops = &mlxsw_sp3_span_ops; @@ -3451,52 +3137,6 @@ static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core, } static int -mlxsw_sp_devlink_param_fw_load_policy_validate(struct devlink *devlink, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) -{ - if ((val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER) && - (val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH)) { - NL_SET_ERR_MSG_MOD(extack, "'fw_load_policy' must be 'driver' or 'flash'"); - return -EINVAL; - } - - return 0; -} - -static const struct devlink_param mlxsw_sp_devlink_params[] = { - DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY, - BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), - NULL, NULL, - mlxsw_sp_devlink_param_fw_load_policy_validate), -}; - -static int mlxsw_sp_params_register(struct mlxsw_core *mlxsw_core) -{ - struct devlink *devlink = priv_to_devlink(mlxsw_core); - union devlink_param_value value; - int err; - - err = devlink_params_register(devlink, mlxsw_sp_devlink_params, - ARRAY_SIZE(mlxsw_sp_devlink_params)); - if (err) - return err; - - value.vu8 = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER; - devlink_param_driverinit_value_set(devlink, - DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, - value); - return 0; -} - -static void mlxsw_sp_params_unregister(struct mlxsw_core *mlxsw_core) -{ - devlink_params_unregister(priv_to_devlink(mlxsw_core), - mlxsw_sp_devlink_params, - ARRAY_SIZE(mlxsw_sp_devlink_params)); -} - -static int mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx) { @@ -3533,24 +3173,16 @@ static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core) union devlink_param_value value; int err; - err = mlxsw_sp_params_register(mlxsw_core); - if (err) - return err; - err = devlink_params_register(devlink, mlxsw_sp2_devlink_params, ARRAY_SIZE(mlxsw_sp2_devlink_params)); if (err) - goto err_devlink_params_register; + return err; value.vu32 = 0; devlink_param_driverinit_value_set(devlink, MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, value); return 0; - -err_devlink_params_register: - mlxsw_sp_params_unregister(mlxsw_core); - return err; } static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) @@ -3558,7 +3190,6 @@ static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) devlink_params_unregister(priv_to_devlink(mlxsw_core), mlxsw_sp2_devlink_params, ARRAY_SIZE(mlxsw_sp2_devlink_params)); - mlxsw_sp_params_unregister(mlxsw_core); } static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core, @@ -3573,6 +3204,8 @@ static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core, static struct mlxsw_driver mlxsw_sp1_driver = { .kind = mlxsw_sp1_driver_name, .priv_size = sizeof(struct mlxsw_sp), + .fw_req_rev = &mlxsw_sp1_fw_rev, + .fw_filename = MLXSW_SP1_FW_FILENAME, .init = mlxsw_sp1_init, .fini = mlxsw_sp_fini, .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, @@ -3588,7 +3221,6 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, - .flash_update = mlxsw_sp_flash_update, .trap_init = mlxsw_sp_trap_init, .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, @@ -3601,17 +3233,19 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, - .params_register = mlxsw_sp_params_register, - .params_unregister = mlxsw_sp_params_unregister, .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, .res_query_enabled = true, + .fw_fatal_enabled = true, + .temp_warn_enabled = true, }; static struct mlxsw_driver mlxsw_sp2_driver = { .kind = mlxsw_sp2_driver_name, .priv_size = sizeof(struct mlxsw_sp), + .fw_req_rev = &mlxsw_sp2_fw_rev, + .fw_filename = MLXSW_SP2_FW_FILENAME, .init = mlxsw_sp2_init, .fini = mlxsw_sp_fini, .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, @@ -3627,7 +3261,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, - .flash_update = mlxsw_sp_flash_update, .trap_init = mlxsw_sp_trap_init, .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, @@ -3645,11 +3278,15 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .res_query_enabled = true, + .fw_fatal_enabled = true, + .temp_warn_enabled = true, }; static struct mlxsw_driver mlxsw_sp3_driver = { .kind = mlxsw_sp3_driver_name, .priv_size = sizeof(struct mlxsw_sp), + .fw_req_rev = &mlxsw_sp3_fw_rev, + .fw_filename = MLXSW_SP3_FW_FILENAME, .init = mlxsw_sp3_init, .fini = mlxsw_sp_fini, .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, @@ -3665,7 +3302,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, - .flash_update = mlxsw_sp_flash_update, .trap_init = mlxsw_sp_trap_init, .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, @@ -3683,6 +3319,8 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .res_query_enabled = true, + .fw_fatal_enabled = true, + .temp_warn_enabled = true, }; bool mlxsw_sp_port_dev_check(const struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 5240bf11b6c4..3e26eb6cb140 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -125,6 +125,7 @@ struct mlxsw_sp_mr_tcam_ops; struct mlxsw_sp_acl_rulei_ops; struct mlxsw_sp_acl_tcam_ops; struct mlxsw_sp_nve_ops; +struct mlxsw_sp_sb_ops; struct mlxsw_sp_sb_vals; struct mlxsw_sp_port_type_speed_ops; struct mlxsw_sp_ptp_state; @@ -162,8 +163,6 @@ struct mlxsw_sp { struct mlxsw_sp_counter_pool *counter_pool; struct mlxsw_sp_span *span; struct mlxsw_sp_trap *trap; - const struct mlxsw_fw_rev *req_rev; - const char *fw_filename; const struct mlxsw_sp_kvdl_ops *kvdl_ops; const struct mlxsw_afa_ops *afa_ops; const struct mlxsw_afk_ops *afk_ops; @@ -173,6 +172,7 @@ struct mlxsw_sp { const struct mlxsw_sp_nve_ops **nve_ops_arr; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_sb_vals *sb_vals; + const struct mlxsw_sp_sb_ops *sb_ops; const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; const struct mlxsw_sp_ptp_ops *ptp_ops; const struct mlxsw_sp_span_ops *span_ops; @@ -316,9 +316,10 @@ struct mlxsw_sp_port { struct mlxsw_sp_ptp_port_stats stats; } ptp; u8 split_base_local_port; - struct { - struct delayed_work speed_update_dw; - } span; + int max_mtu; + u32 max_speed; + struct mlxsw_sp_hdroom *hdroom; + u64 module_overheat_initial_val; }; struct mlxsw_sp_port_type_speed_ops { @@ -331,6 +332,7 @@ struct mlxsw_sp_port_type_speed_ops { void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); + int (*ptys_max_speed)(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed); u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd); u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed); @@ -414,34 +416,73 @@ mlxsw_sp_port_vlan_find_by_vid(const struct mlxsw_sp_port *mlxsw_sp_port, return NULL; } -static inline u32 -mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port, - u32 size_cells) -{ - /* Ports with eight lanes use two headroom buffers between which the - * configured headroom size is split. Therefore, multiply the calculated - * headroom size by two. - */ - return mlxsw_sp_port->mapping.width == 8 ? 2 * size_cells : size_cells; -} - enum mlxsw_sp_flood_type { MLXSW_SP_FLOOD_TYPE_UC, MLXSW_SP_FLOOD_TYPE_BC, MLXSW_SP_FLOOD_TYPE_MC, }; -int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, - int mtu, bool pause_en); int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp, int prio, char *ppcnt_pl); int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, bool is_up); /* spectrum_buffers.c */ +struct mlxsw_sp_hdroom_prio { + /* Number of port buffer associated with this priority. This is the + * actually configured value. + */ + u8 buf_idx; + /* Value of buf_idx deduced from the DCB ETS configuration. */ + u8 ets_buf_idx; + /* Value of buf_idx taken from the dcbnl_setbuffer configuration. */ + u8 set_buf_idx; + bool lossy; +}; + +struct mlxsw_sp_hdroom_buf { + u32 thres_cells; + u32 size_cells; + /* Size requirement form dcbnl_setbuffer. */ + u32 set_size_cells; + bool lossy; +}; + +enum mlxsw_sp_hdroom_mode { + MLXSW_SP_HDROOM_MODE_DCB, + MLXSW_SP_HDROOM_MODE_TC, +}; + +#define MLXSW_SP_PB_COUNT 10 + +struct mlxsw_sp_hdroom { + enum mlxsw_sp_hdroom_mode mode; + + struct { + struct mlxsw_sp_hdroom_prio prio[IEEE_8021Q_MAX_PRIORITIES]; + } prios; + struct { + struct mlxsw_sp_hdroom_buf buf[MLXSW_SP_PB_COUNT]; + } bufs; + struct { + /* Size actually configured for the internal buffer. Equal to + * reserve when internal buffer is enabled. + */ + u32 size_cells; + /* Space reserved in the headroom for the internal buffer. Port + * buffers are not allowed to grow into this space. + */ + u32 reserve_cells; + bool enable; + } int_buf; + int delay_bytes; + int mtu; +}; + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_buffers_fini(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info); @@ -477,11 +518,20 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, u32 *p_cur, u32 *p_max); u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells); u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes); -u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_hdroom_prios_reset_buf_idx(struct mlxsw_sp_hdroom *hdroom); +void mlxsw_sp_hdroom_bufs_reset_lossiness(struct mlxsw_sp_hdroom *hdroom); +void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_hdroom *hdroom); +int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom); extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals; extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals; +extern const struct mlxsw_sp_sb_ops mlxsw_sp1_sb_ops; +extern const struct mlxsw_sp_sb_ops mlxsw_sp2_sb_ops; +extern const struct mlxsw_sp_sb_ops mlxsw_sp3_sb_ops; + /* spectrum_switchdev.c */ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); @@ -519,9 +569,6 @@ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, bool dwrr, u8 dwrr_weight); int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 switch_prio, u8 tclass); -int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, - u8 *prio_tc, bool pause_en, - struct ieee_pfc *my_pfc); int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, u32 maxrate, u8 burst_size); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 6f84557a5a6f..37ff29a1686e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -121,6 +121,10 @@ struct mlxsw_sp_sb_vals { unsigned int cms_cpu_count; }; +struct mlxsw_sp_sb_ops { + u32 (*int_buf_size_get)(int mtu, u32 speed); +}; + u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) { return mlxsw_sp->sb->cell_size * cells; @@ -131,9 +135,14 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes) return DIV_ROUND_UP(bytes, mlxsw_sp->sb->cell_size); } -u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp) +static u32 mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port, + u32 size_cells) { - return mlxsw_sp->sb->max_headroom_cells; + /* Ports with eight lanes use two headroom buffers between which the + * configured headroom size is split. Therefore, multiply the calculated + * headroom size by two. + */ + return mlxsw_sp_port->mapping.width == 8 ? 2 * size_cells : size_cells; } static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, @@ -291,55 +300,308 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, (unsigned long) pm); } -/* 1/4 of a headroom necessary for 100Gbps port and 100m cable. */ -#define MLXSW_SP_PB_HEADROOM 25632 +void mlxsw_sp_hdroom_prios_reset_buf_idx(struct mlxsw_sp_hdroom *hdroom) +{ + int prio; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + switch (hdroom->mode) { + case MLXSW_SP_HDROOM_MODE_DCB: + hdroom->prios.prio[prio].buf_idx = hdroom->prios.prio[prio].ets_buf_idx; + break; + case MLXSW_SP_HDROOM_MODE_TC: + hdroom->prios.prio[prio].buf_idx = hdroom->prios.prio[prio].set_buf_idx; + break; + } + } +} + +void mlxsw_sp_hdroom_bufs_reset_lossiness(struct mlxsw_sp_hdroom *hdroom) +{ + int prio; + int i; + + for (i = 0; i < DCBX_MAX_BUFFERS; i++) + hdroom->bufs.buf[i].lossy = true; + + for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++) { + if (!hdroom->prios.prio[prio].lossy) + hdroom->bufs.buf[hdroom->prios.prio[prio].buf_idx].lossy = false; + } +} + +static u16 mlxsw_sp_hdroom_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp, int mtu) +{ + return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu); +} + +static void mlxsw_sp_hdroom_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres, bool lossy) +{ + if (lossy) + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size); + else + mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size, + thres); +} + +static u16 mlxsw_sp_hdroom_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_hdroom *hdroom) +{ + u16 delay_cells; + + delay_cells = mlxsw_sp_bytes_cells(mlxsw_sp, hdroom->delay_bytes); + + /* In the worst case scenario the delay will be made up of packets that + * are all of size CELL_SIZE + 1, which means each packet will require + * almost twice its true size when buffered in the switch. We therefore + * multiply this value by the "cell factor", which is close to 2. + * + * Another MTU is added in case the transmitting host already started + * transmitting a maximum length frame when the PFC packet was received. + */ + return 2 * delay_cells + mlxsw_sp_bytes_cells(mlxsw_sp, hdroom->mtu); +} + +static u32 mlxsw_sp_hdroom_int_buf_size_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed) +{ + u32 buffsize = mlxsw_sp->sb_ops->int_buf_size_get(speed, mtu); + + return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1; +} + +static bool mlxsw_sp_hdroom_buf_is_used(const struct mlxsw_sp_hdroom *hdroom, int buf) +{ + int prio; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + if (hdroom->prios.prio[prio].buf_idx == buf) + return true; + } + return false; +} + +void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_hdroom *hdroom) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 reserve_cells; + int i; + + /* Internal buffer. */ + reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, mlxsw_sp_port->max_speed, + mlxsw_sp_port->max_mtu); + reserve_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, reserve_cells); + hdroom->int_buf.reserve_cells = reserve_cells; + + if (hdroom->int_buf.enable) + hdroom->int_buf.size_cells = reserve_cells; + else + hdroom->int_buf.size_cells = 0; + + /* PG buffers. */ + for (i = 0; i < DCBX_MAX_BUFFERS; i++) { + struct mlxsw_sp_hdroom_buf *buf = &hdroom->bufs.buf[i]; + u16 thres_cells; + u16 delay_cells; + + if (!mlxsw_sp_hdroom_buf_is_used(hdroom, i)) { + thres_cells = 0; + delay_cells = 0; + } else if (buf->lossy) { + thres_cells = mlxsw_sp_hdroom_buf_threshold_get(mlxsw_sp, hdroom->mtu); + delay_cells = 0; + } else { + thres_cells = mlxsw_sp_hdroom_buf_threshold_get(mlxsw_sp, hdroom->mtu); + delay_cells = mlxsw_sp_hdroom_buf_delay_get(mlxsw_sp, hdroom); + } + + thres_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, thres_cells); + delay_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, delay_cells); + + buf->thres_cells = thres_cells; + if (hdroom->mode == MLXSW_SP_HDROOM_MODE_DCB) { + buf->size_cells = thres_cells + delay_cells; + } else { + /* Do not allow going below the minimum size, even if + * the user requested it. + */ + buf->size_cells = max(buf->set_size_cells, buf->thres_cells); + } + } +} + #define MLXSW_SP_PB_UNUSED 8 -static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) +static int mlxsw_sp_hdroom_configure_buffers(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom, bool force) { - const u32 pbs[] = { - [0] = MLXSW_SP_PB_HEADROOM * mlxsw_sp_port->mapping.width, - [9] = MLXSW_PORT_MAX_MTU, - }; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pbmc_pl[MLXSW_REG_PBMC_LEN]; + bool dirty; + int err; int i; - mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, - 0xffff, 0xffff / 2); - for (i = 0; i < ARRAY_SIZE(pbs); i++) { - u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, pbs[i]); + dirty = memcmp(&mlxsw_sp_port->hdroom->bufs, &hdroom->bufs, sizeof(hdroom->bufs)); + if (!dirty && !force) + return 0; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); + for (i = 0; i < MLXSW_SP_PB_COUNT; i++) { + const struct mlxsw_sp_hdroom_buf *buf = &hdroom->bufs.buf[i]; if (i == MLXSW_SP_PB_UNUSED) continue; - size = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, size); - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size); + + mlxsw_sp_hdroom_buf_pack(pbmc_pl, i, buf->size_cells, buf->thres_cells, buf->lossy); } - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, - MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + if (err) + return err; + + mlxsw_sp_port->hdroom->bufs = hdroom->bufs; + return 0; } -static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port) +static int mlxsw_sp_hdroom_configure_priomap(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom, bool force) { char pptb_pl[MLXSW_REG_PPTB_LEN]; - int i; + bool dirty; + int prio; + int err; + + dirty = memcmp(&mlxsw_sp_port->hdroom->prios, &hdroom->prios, sizeof(hdroom->prios)); + if (!dirty && !force) + return 0; mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, 0); - return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), - pptb_pl); + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, prio, hdroom->prios.prio[prio].buf_idx); + + err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), pptb_pl); + if (err) + return err; + + mlxsw_sp_port->hdroom->prios = hdroom->prios; + return 0; } -static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) +static int mlxsw_sp_hdroom_configure_int_buf(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom, bool force) +{ + char sbib_pl[MLXSW_REG_SBIB_LEN]; + bool dirty; + int err; + + dirty = memcmp(&mlxsw_sp_port->hdroom->int_buf, &hdroom->int_buf, sizeof(hdroom->int_buf)); + if (!dirty && !force) + return 0; + + mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, hdroom->int_buf.size_cells); + err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) + return err; + + mlxsw_sp_port->hdroom->int_buf = hdroom->int_buf; + return 0; +} + +static bool mlxsw_sp_hdroom_bufs_fit(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_hdroom *hdroom) { + u32 taken_headroom_cells = 0; + int i; + + for (i = 0; i < MLXSW_SP_PB_COUNT; i++) + taken_headroom_cells += hdroom->bufs.buf[i].size_cells; + + taken_headroom_cells += hdroom->int_buf.reserve_cells; + return taken_headroom_cells <= mlxsw_sp->sb->max_headroom_cells; +} + +static int __mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom, bool force) +{ + struct mlxsw_sp_hdroom orig_hdroom; + struct mlxsw_sp_hdroom tmp_hdroom; int err; + int i; + + /* Port buffers need to be configured in three steps. First, all buffers + * with non-zero size are configured. Then, prio-to-buffer map is + * updated, allowing traffic to flow to the now non-zero buffers. + * Finally, zero-sized buffers are configured, because now no traffic + * should be directed to them anymore. This way, in a non-congested + * system, no packet drops are introduced by the reconfiguration. + */ - err = mlxsw_sp_port_pb_init(mlxsw_sp_port); + orig_hdroom = *mlxsw_sp_port->hdroom; + tmp_hdroom = orig_hdroom; + for (i = 0; i < MLXSW_SP_PB_COUNT; i++) { + if (hdroom->bufs.buf[i].size_cells) + tmp_hdroom.bufs.buf[i] = hdroom->bufs.buf[i]; + } + + if (!mlxsw_sp_hdroom_bufs_fit(mlxsw_sp_port->mlxsw_sp, &tmp_hdroom) || + !mlxsw_sp_hdroom_bufs_fit(mlxsw_sp_port->mlxsw_sp, hdroom)) + return -ENOBUFS; + + err = mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &tmp_hdroom, force); if (err) return err; - return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); + + err = mlxsw_sp_hdroom_configure_priomap(mlxsw_sp_port, hdroom, force); + if (err) + goto err_configure_priomap; + + err = mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, hdroom, false); + if (err) + goto err_configure_buffers; + + err = mlxsw_sp_hdroom_configure_int_buf(mlxsw_sp_port, hdroom, false); + if (err) + goto err_configure_int_buf; + + *mlxsw_sp_port->hdroom = *hdroom; + return 0; + +err_configure_int_buf: + mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &tmp_hdroom, false); +err_configure_buffers: + mlxsw_sp_hdroom_configure_priomap(mlxsw_sp_port, &tmp_hdroom, false); +err_configure_priomap: + mlxsw_sp_hdroom_configure_buffers(mlxsw_sp_port, &orig_hdroom, false); + return err; +} + +int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_hdroom *hdroom) +{ + return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, hdroom, false); +} + +static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_hdroom hdroom = {}; + u32 size9; + int prio; + + hdroom.mtu = mlxsw_sp_port->dev->mtu; + hdroom.mode = MLXSW_SP_HDROOM_MODE_DCB; + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + hdroom.prios.prio[prio].lossy = true; + + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + /* Buffer 9 is used for control traffic. */ + size9 = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, mlxsw_sp_port->max_mtu); + hdroom.bufs.buf[9].size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size9); + + return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom, true); } static int mlxsw_sp_sb_port_init(struct mlxsw_sp *mlxsw_sp, @@ -916,6 +1178,46 @@ const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), }; +static u32 mlxsw_sp1_pb_int_buf_size_get(int mtu, u32 speed) +{ + return mtu * 5 / 2; +} + +static u32 __mlxsw_sp_pb_int_buf_size_get(int mtu, u32 speed, u32 buffer_factor) +{ + return 3 * mtu + buffer_factor * speed / 1000; +} + +#define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38 + +static u32 mlxsw_sp2_pb_int_buf_size_get(int mtu, u32 speed) +{ + int factor = MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR; + + return __mlxsw_sp_pb_int_buf_size_get(mtu, speed, factor); +} + +#define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50 + +static u32 mlxsw_sp3_pb_int_buf_size_get(int mtu, u32 speed) +{ + int factor = MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR; + + return __mlxsw_sp_pb_int_buf_size_get(mtu, speed, factor); +} + +const struct mlxsw_sp_sb_ops mlxsw_sp1_sb_ops = { + .int_buf_size_get = mlxsw_sp1_pb_int_buf_size_get, +}; + +const struct mlxsw_sp_sb_ops mlxsw_sp2_sb_ops = { + .int_buf_size_get = mlxsw_sp2_pb_int_buf_size_get, +}; + +const struct mlxsw_sp_sb_ops mlxsw_sp3_sb_ops = { + .int_buf_size_get = mlxsw_sp3_pb_int_buf_size_get, +}; + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { u32 max_headroom_size; @@ -995,17 +1297,34 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) { int err; + mlxsw_sp_port->hdroom = kzalloc(sizeof(*mlxsw_sp_port->hdroom), GFP_KERNEL); + if (!mlxsw_sp_port->hdroom) + return -ENOMEM; + mlxsw_sp_port->hdroom->mtu = mlxsw_sp_port->dev->mtu; + err = mlxsw_sp_port_headroom_init(mlxsw_sp_port); if (err) - return err; + goto err_headroom_init; err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port); if (err) - return err; + goto err_port_sb_cms_init; err = mlxsw_sp_port_sb_pms_init(mlxsw_sp_port); + if (err) + goto err_port_sb_pms_init; + return 0; +err_port_sb_pms_init: +err_port_sb_cms_init: +err_headroom_init: + kfree(mlxsw_sp_port->hdroom); return err; } +void mlxsw_sp_port_buffers_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->hdroom); +} + int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 0d3fb2e51ea5..5f92b1691360 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -64,87 +64,28 @@ static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } -static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port, - u8 *prio_tc) -{ - char pptb_pl[MLXSW_REG_PPTB_LEN]; - int i; - - mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, prio_tc[i]); - - return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), - pptb_pl); -} - -static bool mlxsw_sp_ets_has_pg(u8 *prio_tc, u8 pg) -{ - int i; - - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - if (prio_tc[i] == pg) - return true; - return false; -} - -static int mlxsw_sp_port_pg_destroy(struct mlxsw_sp_port *mlxsw_sp_port, - u8 *old_prio_tc, u8 *new_prio_tc) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char pbmc_pl[MLXSW_REG_PBMC_LEN]; - int err, i; - - mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); - if (err) - return err; - - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - u8 pg = old_prio_tc[i]; - - if (!mlxsw_sp_ets_has_pg(new_prio_tc, pg)) - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg, 0); - } - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); -} - static int mlxsw_sp_port_headroom_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, struct ieee_ets *ets) { - bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); - struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets; struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_hdroom hdroom; + int prio; int err; - /* Create the required PGs, but don't destroy existing ones, as - * traffic is still directed to them. - */ - err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, - ets->prio_tc, pause_en, - mlxsw_sp_port->dcb.pfc); + hdroom = *mlxsw_sp_port->hdroom; + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + hdroom.prios.prio[prio].ets_buf_idx = ets->prio_tc[prio]; + mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom); + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); if (err) { netdev_err(dev, "Failed to configure port's headroom\n"); return err; } - err = mlxsw_sp_port_pg_prio_map(mlxsw_sp_port, ets->prio_tc); - if (err) { - netdev_err(dev, "Failed to set PG-priority mapping\n"); - goto err_port_prio_pg_map; - } - - err = mlxsw_sp_port_pg_destroy(mlxsw_sp_port, my_ets->prio_tc, - ets->prio_tc); - if (err) - netdev_warn(dev, "Failed to remove unused PGs\n"); - return 0; - -err_port_prio_pg_map: - mlxsw_sp_port_pg_destroy(mlxsw_sp_port, ets->prio_tc, my_ets->prio_tc); - return err; } static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, @@ -605,6 +546,9 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + struct mlxsw_sp_hdroom orig_hdroom; + struct mlxsw_sp_hdroom hdroom; + int prio; int err; if (pause_en && pfc->pfc_en) { @@ -612,9 +556,21 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, return -EINVAL; } - err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, - mlxsw_sp_port->dcb.ets->prio_tc, - pause_en, pfc); + orig_hdroom = *mlxsw_sp_port->hdroom; + + hdroom = orig_hdroom; + if (pfc->pfc_en) + hdroom.delay_bytes = DIV_ROUND_UP(pfc->delay, BITS_PER_BYTE); + else + hdroom.delay_bytes = 0; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + hdroom.prios.prio[prio].lossy = !(pfc->pfc_en & BIT(prio)); + + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); if (err) { netdev_err(dev, "Failed to configure port's headroom for PFC\n"); return err; @@ -632,12 +588,66 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, return 0; err_port_pfc_set: - __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, - mlxsw_sp_port->dcb.ets->prio_tc, pause_en, - mlxsw_sp_port->dcb.pfc); + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); return err; } +static int mlxsw_sp_dcbnl_getbuffer(struct net_device *dev, struct dcbnl_buffer *buf) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_hdroom *hdroom = mlxsw_sp_port->hdroom; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int prio; + int i; + + buf->total_size = 0; + + BUILD_BUG_ON(DCBX_MAX_BUFFERS > MLXSW_SP_PB_COUNT); + for (i = 0; i < MLXSW_SP_PB_COUNT; i++) { + u32 bytes = mlxsw_sp_cells_bytes(mlxsw_sp, hdroom->bufs.buf[i].size_cells); + + if (i < DCBX_MAX_BUFFERS) + buf->buffer_size[i] = bytes; + buf->total_size += bytes; + } + + buf->total_size += mlxsw_sp_cells_bytes(mlxsw_sp, hdroom->int_buf.size_cells); + + for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++) + buf->prio2buffer[prio] = hdroom->prios.prio[prio].buf_idx; + + return 0; +} + +static int mlxsw_sp_dcbnl_setbuffer(struct net_device *dev, struct dcbnl_buffer *buf) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_hdroom hdroom; + int prio; + int i; + + hdroom = *mlxsw_sp_port->hdroom; + + if (hdroom.mode != MLXSW_SP_HDROOM_MODE_TC) { + netdev_err(dev, "The use of dcbnl_setbuffer is only allowed if egress is configured using TC\n"); + return -EINVAL; + } + + for (prio = 0; prio < IEEE_8021Q_MAX_PRIORITIES; prio++) + hdroom.prios.prio[prio].set_buf_idx = buf->prio2buffer[prio]; + + BUILD_BUG_ON(DCBX_MAX_BUFFERS > MLXSW_SP_PB_COUNT); + for (i = 0; i < DCBX_MAX_BUFFERS; i++) + hdroom.bufs.buf[i].set_size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, + buf->buffer_size[i]); + + mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom); + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + return mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); +} + static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = { .ieee_getets = mlxsw_sp_dcbnl_ieee_getets, .ieee_setets = mlxsw_sp_dcbnl_ieee_setets, @@ -650,6 +660,9 @@ static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = { .getdcbx = mlxsw_sp_dcbnl_getdcbx, .setdcbx = mlxsw_sp_dcbnl_setdcbx, + + .dcbnl_getbuffer = mlxsw_sp_dcbnl_getbuffer, + .dcbnl_setbuffer = mlxsw_sp_dcbnl_setbuffer, }; static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 14c78f73bb65..2096b6478958 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -2,6 +2,7 @@ /* Copyright (c) 2020 Mellanox Technologies. All rights reserved */ #include "reg.h" +#include "core.h" #include "spectrum.h" #include "core_env.h" @@ -192,11 +193,19 @@ static int mlxsw_sp_port_pause_set(struct mlxsw_sp_port *mlxsw_sp_port, pfcc_pl); } +/* Maximum delay buffer needed in case of PAUSE frames. Similar to PFC delay, but is + * measured in bytes. Assumes 100m cable and does not take into account MTU. + */ +#define MLXSW_SP_PAUSE_DELAY_BYTES 19476 + static int mlxsw_sp_port_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); bool pause_en = pause->tx_pause || pause->rx_pause; + struct mlxsw_sp_hdroom orig_hdroom; + struct mlxsw_sp_hdroom hdroom; + int prio; int err; if (mlxsw_sp_port->dcb.pfc && mlxsw_sp_port->dcb.pfc->pfc_en) { @@ -209,7 +218,21 @@ static int mlxsw_sp_port_set_pauseparam(struct net_device *dev, return -EINVAL; } - err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + orig_hdroom = *mlxsw_sp_port->hdroom; + + hdroom = orig_hdroom; + if (pause_en) + hdroom.delay_bytes = MLXSW_SP_PAUSE_DELAY_BYTES; + else + hdroom.delay_bytes = 0; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + hdroom.prios.prio[prio].lossy = !pause_en; + + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); if (err) { netdev_err(dev, "Failed to configure port's headroom\n"); return err; @@ -227,8 +250,7 @@ static int mlxsw_sp_port_set_pauseparam(struct net_device *dev, return 0; err_port_pause_configure: - pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); - mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); return err; } @@ -531,6 +553,37 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { #define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats) +struct mlxsw_sp_port_stats { + char str[ETH_GSTRING_LEN]; + u64 (*getter)(struct mlxsw_sp_port *mlxsw_sp_port); +}; + +static u64 +mlxsw_sp_port_get_transceiver_overheat_stats(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_mapping port_mapping = mlxsw_sp_port->mapping; + struct mlxsw_core *mlxsw_core = mlxsw_sp_port->mlxsw_sp->core; + u64 stats; + int err; + + err = mlxsw_env_module_overheat_counter_get(mlxsw_core, + port_mapping.module, + &stats); + if (err) + return mlxsw_sp_port->module_overheat_initial_val; + + return stats - mlxsw_sp_port->module_overheat_initial_val; +} + +static struct mlxsw_sp_port_stats mlxsw_sp_port_transceiver_stats[] = { + { + .str = "transceiver_overheat", + .getter = mlxsw_sp_port_get_transceiver_overheat_stats, + }, +}; + +#define MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_transceiver_stats) + #define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \ MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN + \ MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \ @@ -540,7 +593,8 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { (MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \ IEEE_8021QAZ_MAX_TCS) + \ (MLXSW_SP_PORT_HW_TC_STATS_LEN * \ - TC_MAX_QUEUE)) + TC_MAX_QUEUE) + \ + MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN) static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio) { @@ -616,6 +670,12 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, mlxsw_sp_port_get_tc_strings(&p, i); mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_strings(&p); + + for (i = 0; i < MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_transceiver_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } break; } } @@ -711,6 +771,17 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, } } +static void __mlxsw_sp_port_get_env_stats(struct net_device *dev, u64 *data, int data_index, + struct mlxsw_sp_port_stats *port_stats, + int len) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int i; + + for (i = 0; i < len; i++) + data[data_index + i] = port_stats[i].getter(mlxsw_sp_port); +} + static void mlxsw_sp_port_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -765,6 +836,11 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev, mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats(mlxsw_sp_port, data, data_index); data_index += mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count(); + + /* Transceiver counters */ + __mlxsw_sp_port_get_env_stats(dev, data, data_index, mlxsw_sp_port_transceiver_stats, + MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN); + data_index += MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN; } static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) @@ -842,6 +918,29 @@ mlxsw_sp_port_connector_port(enum mlxsw_reg_ptys_connector_type connector_type) } } +static int mlxsw_sp_port_ptys_query(struct mlxsw_sp_port *mlxsw_sp_port, + u32 *p_eth_proto_cap, u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper, u8 *p_connector_type) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + const struct mlxsw_sp_port_type_speed_ops *ops; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + int err; + + ops = mlxsw_sp->port_type_speed_ops; + + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, 0, false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + + ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, p_eth_proto_cap, p_eth_proto_admin, + p_eth_proto_oper); + if (p_connector_type) + *p_connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl); + return 0; +} + static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { @@ -849,21 +948,17 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; const struct mlxsw_sp_port_type_speed_ops *ops; - char ptys_pl[MLXSW_REG_PTYS_LEN]; u8 connector_type; bool autoneg; int err; - ops = mlxsw_sp->port_type_speed_ops; - - autoneg = mlxsw_sp_port->link.autoneg; - ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, - 0, false); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, ð_proto_cap, ð_proto_admin, + ð_proto_oper, &connector_type); if (err) return err; - ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, - ð_proto_admin, ð_proto_oper); + + ops = mlxsw_sp->port_type_speed_ops; + autoneg = mlxsw_sp_port->link.autoneg; mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, mlxsw_sp_port->mapping.width, cmd); @@ -872,7 +967,6 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, mlxsw_sp_port->mapping.width, cmd); cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; - connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl); cmd->base.port = mlxsw_sp_port_connector_port(connector_type); ops->from_ptys_speed_duplex(mlxsw_sp, netif_carrier_ok(dev), eth_proto_oper, cmd); @@ -993,22 +1087,12 @@ struct mlxsw_sp1_port_link_mode { static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, - .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT, - .speed = SPEED_100, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, .mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, .speed = SPEED_1000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T, - .mask_ethtool = ETHTOOL_LINK_MODE_10000baseT_Full_BIT, - .speed = SPEED_10000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, @@ -1023,11 +1107,6 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { .speed = SPEED_10000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2, - .mask_ethtool = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT, - .speed = SPEED_20000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4, .mask_ethtool = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, .speed = SPEED_40000, @@ -1092,11 +1171,6 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { .mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, .speed = SPEED_100000, }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, - .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, - .speed = SPEED_100000, - }, }; #define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode) @@ -1164,6 +1238,27 @@ mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, cmd->base.duplex = DUPLEX_FULL; } +static int mlxsw_sp1_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed) +{ + u32 eth_proto_cap; + u32 max_speed = 0; + int err; + int i; + + err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, ð_proto_cap, NULL, NULL, NULL); + if (err) + return err; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if ((eth_proto_cap & mlxsw_sp1_port_link_mode[i].mask) && + mlxsw_sp1_port_link_mode[i].speed > max_speed) + max_speed = mlxsw_sp1_port_link_mode[i].speed; + } + + *p_max_speed = max_speed; + return 0; +} + static u32 mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd) @@ -1213,6 +1308,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = { .from_ptys_link = mlxsw_sp1_from_ptys_link, .from_ptys_speed = mlxsw_sp1_from_ptys_speed, .from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex, + .ptys_max_speed = mlxsw_sp1_ptys_max_speed, .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp1_to_ptys_speed, .reg_ptys_eth_pack = mlxsw_sp1_reg_ptys_eth_pack, @@ -1237,14 +1333,6 @@ mlxsw_sp2_mask_ethtool_1000base_x_sgmii[] = { ARRAY_SIZE(mlxsw_sp2_mask_ethtool_1000base_x_sgmii) static const enum ethtool_link_mode_bit_indices -mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii[] = { - ETHTOOL_LINK_MODE_2500baseX_Full_BIT, -}; - -#define MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN \ - ARRAY_SIZE(mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii) - -static const enum ethtool_link_mode_bit_indices mlxsw_sp2_mask_ethtool_5gbase_r[] = { ETHTOOL_LINK_MODE_5000baseT_Full_BIT, }; @@ -1408,16 +1496,6 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .speed = SPEED_1000, }, { - .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII, - .mask_ethtool = mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii, - .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | - MLXSW_SP_PORT_MASK_WIDTH_2X | - MLXSW_SP_PORT_MASK_WIDTH_4X | - MLXSW_SP_PORT_MASK_WIDTH_8X, - .speed = SPEED_2500, - }, - { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R, .mask_ethtool = mlxsw_sp2_mask_ethtool_5gbase_r, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN, @@ -1568,6 +1646,27 @@ mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, cmd->base.duplex = DUPLEX_FULL; } +static int mlxsw_sp2_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed) +{ + u32 eth_proto_cap; + u32 max_speed = 0; + int err; + int i; + + err = mlxsw_sp_port_ptys_query(mlxsw_sp_port, ð_proto_cap, NULL, NULL, NULL); + if (err) + return err; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if ((eth_proto_cap & mlxsw_sp2_port_link_mode[i].mask) && + mlxsw_sp2_port_link_mode[i].speed > max_speed) + max_speed = mlxsw_sp2_port_link_mode[i].speed; + } + + *p_max_speed = max_speed; + return 0; +} + static bool mlxsw_sp2_test_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, const unsigned long *mode) @@ -1637,6 +1736,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = { .from_ptys_link = mlxsw_sp2_from_ptys_link, .from_ptys_speed = mlxsw_sp2_from_ptys_speed, .from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex, + .ptys_max_speed = mlxsw_sp2_ptys_max_speed, .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp2_to_ptys_speed, .reg_ptys_eth_pack = mlxsw_sp2_reg_ptys_eth_pack, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 9650562fc0ef..ca8090a28dec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -314,11 +314,9 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb, u8 *p_message_type, u16 *p_sequence_id) { - unsigned int offset = 0; unsigned int ptp_class; - u8 *data; + struct ptp_header *hdr; - data = skb_mac_header(skb); ptp_class = ptp_classify_raw(skb); switch (ptp_class & PTP_CLASS_VMASK) { @@ -329,30 +327,14 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb, return -ERANGE; } - if (ptp_class & PTP_CLASS_VLAN) - offset += VLAN_HLEN; - - switch (ptp_class & PTP_CLASS_PMASK) { - case PTP_CLASS_IPV4: - offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; - break; - case PTP_CLASS_IPV6: - offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; - break; - case PTP_CLASS_L2: - offset += ETH_HLEN; - break; - default: - return -ERANGE; - } - - /* PTP header is 34 bytes. */ - if (skb->len < offset + 34) + hdr = ptp_parse_header(skb, ptp_class); + if (!hdr) return -EINVAL; - *p_message_type = data[offset] & 0x0f; - *p_domain_number = data[offset + 4]; - *p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31]; + *p_message_type = ptp_get_msgtype(hdr, ptp_class); + *p_domain_number = hdr->domain_number; + *p_sequence_id = be16_to_cpu(hdr->sequence_id); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 964fd444bb10..fd672c6c9133 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -140,18 +140,31 @@ static int mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { + struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; + int err_hdroom = 0; int err = 0; if (!mlxsw_sp_qdisc) return 0; + if (root_qdisc == mlxsw_sp_qdisc) { + struct mlxsw_sp_hdroom hdroom = *mlxsw_sp_port->hdroom; + + hdroom.mode = MLXSW_SP_HDROOM_MODE_DCB; + mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom); + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + err_hdroom = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + } + if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->destroy) err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port, mlxsw_sp_qdisc); mlxsw_sp_qdisc->handle = TC_H_UNSPEC; mlxsw_sp_qdisc->ops = NULL; - return err; + + return err_hdroom ?: err; } static int @@ -159,6 +172,8 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct mlxsw_sp_qdisc_ops *ops, void *params) { + struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; + struct mlxsw_sp_hdroom orig_hdroom; int err; if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type) @@ -168,6 +183,21 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, * new one. */ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + + orig_hdroom = *mlxsw_sp_port->hdroom; + if (root_qdisc == mlxsw_sp_qdisc) { + struct mlxsw_sp_hdroom hdroom = orig_hdroom; + + hdroom.mode = MLXSW_SP_HDROOM_MODE_TC; + mlxsw_sp_hdroom_prios_reset_buf_idx(&hdroom); + mlxsw_sp_hdroom_bufs_reset_lossiness(&hdroom); + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + err = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); + if (err) + goto err_hdroom_configure; + } + err = ops->check_params(mlxsw_sp_port, mlxsw_sp_qdisc, params); if (err) goto err_bad_param; @@ -191,6 +221,8 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, err_bad_param: err_config: + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); +err_hdroom_configure: if (mlxsw_sp_qdisc->handle == handle && ops->unoffload) ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 460cb523312f..4381f8c6c3fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8038,7 +8038,6 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) bool usp = net->ipv4.sysctl_ip_fwd_update_priority; char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; - int err; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) return -EIO; @@ -8047,10 +8046,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); mlxsw_reg_rgcr_usp_set(rgcr_pl, usp); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - if (err) - return err; - return 0; + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 1d18e41ab255..c6c5826aba41 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -968,42 +968,26 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, return 0; } -static u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, - u32 speed) +static int mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) { - u32 buffsize = mlxsw_sp->span_ops->buffsize_get(speed, mtu); + struct mlxsw_sp_hdroom hdroom; - return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1; + hdroom = *mlxsw_sp_port->hdroom; + hdroom.int_buf.enable = enable; + mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); + + return mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); } static int -mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) +mlxsw_sp_span_port_buffer_enable(struct mlxsw_sp_port *mlxsw_sp_port) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char sbib_pl[MLXSW_REG_SBIB_LEN]; - u32 buffsize; - u32 speed; - int err; - - err = mlxsw_sp_port_speed_get(mlxsw_sp_port, &speed); - if (err) - return err; - if (speed == SPEED_UNKNOWN) - speed = 0; - - buffsize = mlxsw_sp_span_buffsize_get(mlxsw_sp, speed, mtu); - buffsize = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, buffsize); - mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, buffsize); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + return mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, true); } -static void mlxsw_sp_span_port_buffer_disable(struct mlxsw_sp *mlxsw_sp, - u8 local_port) +static void mlxsw_sp_span_port_buffer_disable(struct mlxsw_sp_port *mlxsw_sp_port) { - char sbib_pl[MLXSW_REG_SBIB_LEN]; - - mlxsw_reg_sbib_pack(sbib_pl, local_port, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, false); } static struct mlxsw_sp_span_analyzed_port * @@ -1021,48 +1005,6 @@ mlxsw_sp_span_analyzed_port_find(struct mlxsw_sp_span *span, u8 local_port, return NULL; } -int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - int err = 0; - - /* If port is egress mirrored, the shared buffer size should be - * updated according to the mtu value - */ - mutex_lock(&mlxsw_sp->span->analyzed_ports_lock); - - if (mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span, port->local_port, - false)) - err = mlxsw_sp_span_port_buffer_update(port, mtu); - - mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock); - - return err; -} - -void mlxsw_sp_span_speed_update_work(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct mlxsw_sp_port *mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp; - - mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port, - span.speed_update_dw); - - /* If port is egress mirrored, the shared buffer size should be - * updated according to the speed value. - */ - mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - mutex_lock(&mlxsw_sp->span->analyzed_ports_lock); - - if (mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span, - mlxsw_sp_port->local_port, false)) - mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, - mlxsw_sp_port->dev->mtu); - - mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock); -} - static const struct mlxsw_sp_span_entry_ops * mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp, const struct net_device *to_dev) @@ -1180,9 +1122,7 @@ mlxsw_sp_span_analyzed_port_create(struct mlxsw_sp_span *span, * does the mirroring. */ if (!ingress) { - u16 mtu = mlxsw_sp_port->dev->mtu; - - err = mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, mtu); + err = mlxsw_sp_span_port_buffer_enable(mlxsw_sp_port); if (err) goto err_buffer_update; } @@ -1196,18 +1136,15 @@ err_buffer_update: } static void -mlxsw_sp_span_analyzed_port_destroy(struct mlxsw_sp_span *span, +mlxsw_sp_span_analyzed_port_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_span_analyzed_port * analyzed_port) { - struct mlxsw_sp *mlxsw_sp = span->mlxsw_sp; - /* Remove egress mirror buffer now that port is no longer analyzed * at egress. */ if (!analyzed_port->ingress) - mlxsw_sp_span_port_buffer_disable(mlxsw_sp, - analyzed_port->local_port); + mlxsw_sp_span_port_buffer_disable(mlxsw_sp_port); list_del(&analyzed_port->list); kfree(analyzed_port); @@ -1258,7 +1195,7 @@ void mlxsw_sp_span_analyzed_port_put(struct mlxsw_sp_port *mlxsw_sp_port, if (!refcount_dec_and_test(&analyzed_port->ref_count)) goto out_unlock; - mlxsw_sp_span_analyzed_port_destroy(mlxsw_sp->span, analyzed_port); + mlxsw_sp_span_analyzed_port_destroy(mlxsw_sp_port, analyzed_port); out_unlock: mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock); @@ -1712,11 +1649,6 @@ static int mlxsw_sp1_span_init(struct mlxsw_sp *mlxsw_sp) return 0; } -static u32 mlxsw_sp1_span_buffsize_get(int mtu, u32 speed) -{ - return mtu * 5 / 2; -} - static int mlxsw_sp1_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp, u16 policer_id_base) { @@ -1725,7 +1657,6 @@ static int mlxsw_sp1_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops = { .init = mlxsw_sp1_span_init, - .buffsize_get = mlxsw_sp1_span_buffsize_get, .policer_id_base_set = mlxsw_sp1_span_policer_id_base_set, }; @@ -1750,18 +1681,6 @@ static int mlxsw_sp2_span_init(struct mlxsw_sp *mlxsw_sp) #define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38 #define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50 -static u32 __mlxsw_sp_span_buffsize_get(int mtu, u32 speed, u32 buffer_factor) -{ - return 3 * mtu + buffer_factor * speed / 1000; -} - -static u32 mlxsw_sp2_span_buffsize_get(int mtu, u32 speed) -{ - int factor = MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR; - - return __mlxsw_sp_span_buffsize_get(mtu, speed, factor); -} - static int mlxsw_sp2_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp, u16 policer_id_base) { @@ -1778,19 +1697,10 @@ static int mlxsw_sp2_span_policer_id_base_set(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops = { .init = mlxsw_sp2_span_init, - .buffsize_get = mlxsw_sp2_span_buffsize_get, .policer_id_base_set = mlxsw_sp2_span_policer_id_base_set, }; -static u32 mlxsw_sp3_span_buffsize_get(int mtu, u32 speed) -{ - int factor = MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR; - - return __mlxsw_sp_span_buffsize_get(mtu, speed, factor); -} - const struct mlxsw_sp_span_ops mlxsw_sp3_span_ops = { .init = mlxsw_sp2_span_init, - .buffsize_get = mlxsw_sp3_span_buffsize_get, .policer_id_base_set = mlxsw_sp2_span_policer_id_base_set, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index 1c746dd3b1bd..d907718bc8c5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -47,7 +47,6 @@ struct mlxsw_sp_span_entry_ops; struct mlxsw_sp_span_ops { int (*init)(struct mlxsw_sp *mlxsw_sp); - u32 (*buffsize_get)(int mtu, u32 speed); int (*policer_id_base_set)(struct mlxsw_sp *mlxsw_sp, u16 policer_id_base); }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 2e41c5519c1b..433f14ade464 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -291,7 +291,7 @@ static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port, static const struct mlxsw_sp_trap_policer_item mlxsw_sp_trap_policer_items_arr[] = { { - .policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128), + .policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 4096), }, { .policer = MLXSW_SP_TRAP_POLICER(2, 128, 128), @@ -303,25 +303,25 @@ mlxsw_sp_trap_policer_items_arr[] = { .policer = MLXSW_SP_TRAP_POLICER(4, 128, 128), }, { - .policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 128), + .policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 8192), }, { .policer = MLXSW_SP_TRAP_POLICER(6, 128, 128), }, { - .policer = MLXSW_SP_TRAP_POLICER(7, 1024, 128), + .policer = MLXSW_SP_TRAP_POLICER(7, 1024, 512), }, { - .policer = MLXSW_SP_TRAP_POLICER(8, 20 * 1024, 1024), + .policer = MLXSW_SP_TRAP_POLICER(8, 20 * 1024, 8192), }, { .policer = MLXSW_SP_TRAP_POLICER(9, 128, 128), }, { - .policer = MLXSW_SP_TRAP_POLICER(10, 1024, 128), + .policer = MLXSW_SP_TRAP_POLICER(10, 1024, 512), }, { - .policer = MLXSW_SP_TRAP_POLICER(11, 360, 128), + .policer = MLXSW_SP_TRAP_POLICER(11, 256, 128), }, { .policer = MLXSW_SP_TRAP_POLICER(12, 128, 128), @@ -330,19 +330,19 @@ mlxsw_sp_trap_policer_items_arr[] = { .policer = MLXSW_SP_TRAP_POLICER(13, 128, 128), }, { - .policer = MLXSW_SP_TRAP_POLICER(14, 1024, 128), + .policer = MLXSW_SP_TRAP_POLICER(14, 1024, 512), }, { - .policer = MLXSW_SP_TRAP_POLICER(15, 1024, 128), + .policer = MLXSW_SP_TRAP_POLICER(15, 1024, 512), }, { - .policer = MLXSW_SP_TRAP_POLICER(16, 24 * 1024, 4096), + .policer = MLXSW_SP_TRAP_POLICER(16, 24 * 1024, 16384), }, { - .policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 4096), + .policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 8192), }, { - .policer = MLXSW_SP_TRAP_POLICER(18, 1024, 128), + .policer = MLXSW_SP_TRAP_POLICER(18, 1024, 512), }, { .policer = MLXSW_SP_TRAP_POLICER(19, 1024, 512), diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 6f9a725662fb..5023d91269f4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -551,16 +551,6 @@ struct mlxsw_sx_port_link_mode { static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, - .supported = SUPPORTED_100baseT_Full, - .advertised = ADVERTISED_100baseT_Full, - .speed = 100, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX, - .speed = 100, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, .supported = SUPPORTED_1000baseKX_Full, @@ -568,12 +558,6 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { .speed = 1000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T, - .supported = SUPPORTED_10000baseT_Full, - .advertised = ADVERTISED_10000baseT_Full, - .speed = 10000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, .supported = SUPPORTED_10000baseKX4_Full, @@ -590,12 +574,6 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { .speed = 10000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2, - .supported = SUPPORTED_20000baseKR2_Full, - .advertised = ADVERTISED_20000baseKR2_Full, - .speed = 20000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4, .supported = SUPPORTED_40000baseCR4_Full, .advertised = ADVERTISED_40000baseCR4_Full, @@ -634,8 +612,7 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { { .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4, .speed = 100000, }, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 33909887d0ac..57f9e24602d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -120,8 +120,14 @@ enum { }; enum mlxsw_event_trap_id { + /* Fatal Event generated by FW */ + MLXSW_TRAP_ID_MFDE = 0x3, /* Port Up/Down event generated by hardware */ MLXSW_TRAP_ID_PUDE = 0x8, + /* Port Module Plug/Unplug Event generated by hardware */ + MLXSW_TRAP_ID_PMPE = 0x9, + /* Temperature Warning event generated by hardware */ + MLXSW_TRAP_ID_MTWE = 0xC, /* PTP Ingress FIFO has a new entry */ MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D, /* PTP Egress FIFO has a new entry */ |