From 530b69a26952c95ffc9e6dcd1cff6f249032780a Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 3 Dec 2024 22:49:15 +0200 Subject: [PATCH 1/6] net/mlx5: HWS: Fix memory leak in mlx5hws_definer_calc_layout It allocates a match template, which creates a compressed definer fc struct, but that is not deallocated. This commit fixes that. Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Signed-off-by: Cosmin Ratiu Reviewed-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index c00010ca86bdc1..9fb059a6511f7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -39,6 +39,8 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, } else { mlx5hws_err(ctx, "Failed to calculate matcher definer layout\n"); } + } else { + kfree(mt->fc); } mlx5hws_match_template_destroy(mt); From 10e0f0c018d5ce5ba4f349875c67f99c3253b5c3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 3 Dec 2024 22:49:16 +0200 Subject: [PATCH 2/6] net/mlx5: HWS: Properly set bwc queue locks lock classes The mentioned "Fixes" patch forgot to do that. Fixes: 9addffa34359 ("net/mlx5: HWS, use lock classes for bwc locks") Signed-off-by: Cosmin Ratiu Reviewed-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index 424797b6d8020d..883b4ed30892c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -990,6 +990,7 @@ static int hws_bwc_send_queues_init(struct mlx5hws_context *ctx) for (i = 0; i < bwc_queues; i++) { mutex_init(&ctx->bwc_send_queue_locks[i]); lockdep_register_key(ctx->bwc_lock_class_keys + i); + lockdep_set_class(ctx->bwc_send_queue_locks + i, ctx->bwc_lock_class_keys + i); } return 0; From 5f9b2bf019b7fb58e883dca7522c606c52612ea4 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 3 Dec 2024 22:49:17 +0200 Subject: [PATCH 3/6] net/mlx5: E-Switch, Fix switching to switchdev mode with IB device disabled In case that IB device is already disabled when moving to switchdev mode, which can happen when working with LAG, need to do rescan_drivers() before leaving in order to add ethernet representor auxiliary device. Fixes: ab85ebf43723 ("net/mlx5: E-switch, refactor eswitch mode change") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d6ff2dc4c19e9d..5213d5b2cad58d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2338,6 +2338,7 @@ static void esw_mode_change(struct mlx5_eswitch *esw, u16 mode) if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) { esw->mode = mode; + mlx5_rescan_drivers_locked(esw->dev); mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp); return; } From d04c81a3e3ced59f68018a7906c82e421bf748a5 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 3 Dec 2024 22:49:18 +0200 Subject: [PATCH 4/6] net/mlx5: E-Switch, Fix switching to switchdev mode in MPV Fix the mentioned commit change for MPV mode, since in MPV mode the IB device is shared between different core devices, so under this change when moving both devices simultaneously to switchdev mode the IB device removal and re-addition can race with itself causing unexpected behavior. In such case do rescan_drivers() only once in order to add the ethernet representor auxiliary device, and skip adding and removing IB devices. Fixes: ab85ebf43723 ("net/mlx5: E-switch, refactor eswitch mode change") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 5213d5b2cad58d..d5b42b3a19fdf7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2335,8 +2335,8 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw) static void esw_mode_change(struct mlx5_eswitch *esw, u16 mode) { mlx5_devcom_comp_lock(esw->dev->priv.hca_devcom_comp); - - if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) { + if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV || + mlx5_core_mp_enabled(esw->dev)) { esw->mode = mode; mlx5_rescan_drivers_locked(esw->dev); mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp); From 31f114c3d158dacbeda33f2afb0ca41f4ec6c9f9 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 3 Dec 2024 22:49:19 +0200 Subject: [PATCH 5/6] net/mlx5e: SD, Use correct mdev to build channel param In a multi-PF netdev, each traffic channel creates its own resources against a specific PF. In the cited commit, where this support was added, the channel_param logic was mistakenly kept unchanged, so it always used the primary PF which is found at priv->mdev. In this patch we fix this by moving the logic to be per-channel, and passing the correct mdev instance. This bug happened to be usually harmless, as the resulting cparam structures would be the same for all channels, due to identical FW logic and decisions. However, in some use cases, like fwreset, this gets broken. This could lead to different symptoms. Example: Error cqe on cqn 0x428, ci 0x0, qn 0x10a9, opcode 0xe, syndrome 0x4, vendor syndrome 0x32 Fixes: e4f9686bdee7 ("net/mlx5e: Let channels be SD-aware") Signed-off-by: Tariq Toukan Reviewed-by: Lama Kayal Reviewed-by: Gal Pressman Link: https://patch.msgid.link/20241203204920.232744-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d0b80b520397f9..dd16d73000c312 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2680,11 +2680,11 @@ void mlx5e_trigger_napi_sched(struct napi_struct *napi) static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, - struct mlx5e_channel_param *cparam, struct xsk_buff_pool *xsk_pool, struct mlx5e_channel **cp) { struct net_device *netdev = priv->netdev; + struct mlx5e_channel_param *cparam; struct mlx5_core_dev *mdev; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; @@ -2706,8 +2706,15 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return err; c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); - if (!c) - return -ENOMEM; + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!c || !cparam) { + err = -ENOMEM; + goto err_free; + } + + err = mlx5e_build_channel_param(mdev, params, cparam); + if (err) + goto err_free; c->priv = priv; c->mdev = mdev; @@ -2741,6 +2748,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, *cp = c; + kvfree(cparam); return 0; err_close_queues: @@ -2749,6 +2757,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, err_napi_del: netif_napi_del(&c->napi); +err_free: + kvfree(cparam); kvfree(c); return err; @@ -2807,20 +2817,14 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { - struct mlx5e_channel_param *cparam; int err = -ENOMEM; int i; chs->num = chs->params.num_channels; chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); - cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); - if (!chs->c || !cparam) - goto err_free; - - err = mlx5e_build_channel_param(priv->mdev, &chs->params, cparam); - if (err) - goto err_free; + if (!chs->c) + goto err_out; for (i = 0; i < chs->num; i++) { struct xsk_buff_pool *xsk_pool = NULL; @@ -2828,7 +2832,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, if (chs->params.xdp_prog) xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); - err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]); + err = mlx5e_open_channel(priv, i, &chs->params, xsk_pool, &chs->c[i]); if (err) goto err_close_channels; } @@ -2846,7 +2850,6 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, } mlx5e_health_channels_update(priv); - kvfree(cparam); return 0; err_close_ptp: @@ -2857,9 +2860,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, for (i--; i >= 0; i--) mlx5e_close_channel(chs->c[i]); -err_free: kfree(chs->c); - kvfree(cparam); +err_out: chs->num = 0; return err; } From 5085f861b414e4a51ce28a891dfa32a10a54b64e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 3 Dec 2024 22:49:20 +0200 Subject: [PATCH 6/6] net/mlx5e: Remove workaround to avoid syndrome for internal port Previously a workaround was added to avoid syndrome 0xcdb051. It is triggered when offload a rule with tunnel encapsulation, and forwarding to another table, but not matching on the internal port in firmware steering mode. The original workaround skips internal tunnel port logic, which is not correct as not all cases are considered. As an example, if vlan is configured on the uplink port, traffic can't pass because vlan header is not added with this workaround. Besides, there is no such issue for software steering. So, this patch removes that, and returns error directly if trying to offload such rule for firmware steering. Fixes: 06b4eac9c4be ("net/mlx5e: Don't offload internal port if filter device is out device") Signed-off-by: Jianbo Liu Tested-by: Frode Nordahl Reviewed-by: Chris Mi Reviewed-by: Ariel Levkovich Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241203204920.232744-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 878cbdbf5ec8b4..e7e01f3298efb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -5,6 +5,7 @@ #include #include #include "tc_tun_encap.h" +#include "fs_core.h" #include "en_tc.h" #include "tc_tun.h" #include "rep/tc.h" @@ -24,10 +25,18 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); - if (!route_dev || !netif_is_ovs_master(route_dev) || - attr->parse_attr->filter_dev == e->out_dev) + if (!route_dev || !netif_is_ovs_master(route_dev)) goto out; + if (priv->mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS && + mlx5e_eswitch_uplink_rep(attr->parse_attr->filter_dev) && + (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) { + mlx5_core_warn(priv->mdev, + "Matching on external port with encap + fwd to table actions is not allowed for firmware steering\n"); + err = -EINVAL; + goto out; + } + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, MLX5E_TC_INT_PORT_EGRESS, &attr->action, out_index);