From 195624d9c26b64c6856863da30ec578a790feec4 Mon Sep 17 00:00:00 2001 From: Patrick Rohr Date: Tue, 20 Sep 2022 12:48:25 -0700 Subject: [PATCH 01/30] tun: support not enabling carrier in TUNSETIFF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds support for not enabling carrier during TUNSETIFF interface creation by specifying the IFF_NO_CARRIER flag. Our tests make heavy use of tun interfaces. In some scenarios, the test process creates the interface but another process brings it up after the interface is discovered via netlink notification. In that case, it is not possible to create a tun/tap interface with carrier off without it racing against the bring up. Immediately setting carrier off via TUNSETCARRIER is still too late. Signed-off-by: Patrick Rohr Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Cc: Jason Wang Cc: Stephen Hemminger Cc: Nicolas Dichtel Reviewed-by: Maciej Żenczykowski Acked-by: Jason Wang Reviewed-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/tun.c | 9 ++++++--- include/uapi/linux/if_tun.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 259b2b84b2b3..db736b944016 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2828,7 +2828,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) rcu_assign_pointer(tfile->tun, tun); } - netif_carrier_on(tun->dev); + if (ifr->ifr_flags & IFF_NO_CARRIER) + netif_carrier_off(tun->dev); + else + netif_carrier_on(tun->dev); /* Make sure persistent devices do not get stuck in * xoff state. @@ -3056,8 +3059,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, * This is needed because we never checked for invalid flags on * TUNSETIFF. */ - return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES, - (unsigned int __user*)argp); + return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER | + TUN_FEATURES, (unsigned int __user*)argp); } else if (cmd == TUNSETQUEUE) { return tun_set_queue(file, &ifr); } else if (cmd == SIOCGSKNS) { diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 2ec07de1d73b..b6d7b868f290 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -67,6 +67,8 @@ #define IFF_TAP 0x0002 #define IFF_NAPI 0x0010 #define IFF_NAPI_FRAGS 0x0020 +/* Used in TUNSETIFF to bring up tun/tap without carrier */ +#define IFF_NO_CARRIER 0x0040 #define IFF_NO_PI 0x1000 /* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 From f22bd29ba19a43e758b192429613e04aa7abb70d Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Wed, 21 Sep 2022 19:06:10 +0530 Subject: [PATCH 02/30] net: macb: Fix ZynqMP SGMII non-wakeup source resume failure When GEM is in SGMII mode and disabled as a wakeup source, the power management controller can power down the entire full power domain(FPD) if none of the FPD devices are in use. Incase of FPD off, there are below ethernet link up issues on non-wakeup suspend/resume. To fix it add phy_exit() in suspend and phy_init() in the resume path which reinitializes PS GTR SGMII lanes. $ echo +20 > /sys/class/rtc/rtc0/wakealarm $ echo mem > /sys/power/state After resume: $ ifconfig eth0 up xilinx-psgtr fd400000.phy: lane 0 (type 10, protocol 5): PLL lock timeout phy phy-fd400000.phy.0: phy poweron failed --> -110 xilinx-psgtr fd400000.phy: lane 0 (type 10, protocol 5): PLL lock timeout SIOCSIFFLAGS: Connection timed out phy phy-fd400000.phy.0: phy poweron failed --> -110 Fixes: 8b73fa3ae02b ("net: macb: Added ZynqMP-specific initialization") Signed-off-by: Radhey Shyam Pandey Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 66c7d08d376a..a2897549f9c4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5109,6 +5109,7 @@ static int __maybe_unused macb_suspend(struct device *dev) if (!(bp->wol & MACB_WOL_ENABLED)) { rtnl_lock(); phylink_stop(bp->phylink); + phy_exit(bp->sgmii_phy); rtnl_unlock(); spin_lock_irqsave(&bp->lock, flags); macb_reset_hw(bp); @@ -5198,6 +5199,9 @@ static int __maybe_unused macb_resume(struct device *dev) macb_set_rx_mode(netdev); macb_restore_features(bp); rtnl_lock(); + if (!device_may_wakeup(&bp->dev->dev)) + phy_init(bp->sgmii_phy); + phylink_start(bp->phylink); rtnl_unlock(); From f8497b3e9650a0d094a3995c644dfedd32e8cde7 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 22 Sep 2022 13:40:53 +0200 Subject: [PATCH 03/30] MAINTAINERS: rectify file entry in TEAM DRIVER Commit bbb774d921e2 ("net: Add tests for bonding and team address list management") adds the net team driver tests in the directory: tools/testing/selftests/drivers/net/team/ The file entry in MAINTAINERS for the TEAM DRIVER however refers to: tools/testing/selftests/net/team/ Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken file pattern. Repair this file entry in TEAM DRIVER. Signed-off-by: Lukas Bulwahn Acked-by: Benjamin Poirier Link: https://lore.kernel.org/r/20220922114053.10883-1-lukas.bulwahn@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index efada49e2e2e..67d5079ec703 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19960,7 +19960,7 @@ S: Supported F: drivers/net/team/ F: include/linux/if_team.h F: include/uapi/linux/if_team.h -F: tools/testing/selftests/net/team/ +F: tools/testing/selftests/drivers/net/team/ TECHNOLOGIC SYSTEMS TS-5500 PLATFORM SUPPORT M: "Savoir-faire Linux Inc." From 42bc4fafe359ed6b73602b7a2dba0dd99588f8ce Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Sat, 17 Sep 2022 02:07:33 +0200 Subject: [PATCH 04/30] net: mt7531: only do PLL once after the reset Move the PLL init of the switch out of the pad configuration of the port 6 (usally cpu port). Fix a unidirectional 100 mbit limitation on 1 gbit or 2.5 gbit links for outbound traffic on port 5 or port 6. Fixes: c288575f7810 ("net: dsa: mt7530: Add the support of MT7531 switch") Cc: stable@vger.kernel.org Signed-off-by: Alexander Couzens Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 835807911be0..95a57aeb466e 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -506,14 +506,19 @@ static bool mt7531_dual_sgmii_supported(struct mt7530_priv *priv) static int mt7531_pad_setup(struct dsa_switch *ds, phy_interface_t interface) { - struct mt7530_priv *priv = ds->priv; + return 0; +} + +static void +mt7531_pll_setup(struct mt7530_priv *priv) +{ u32 top_sig; u32 hwstrap; u32 xtal; u32 val; if (mt7531_dual_sgmii_supported(priv)) - return 0; + return; val = mt7530_read(priv, MT7531_CREV); top_sig = mt7530_read(priv, MT7531_TOP_SIG_SR); @@ -592,8 +597,6 @@ mt7531_pad_setup(struct dsa_switch *ds, phy_interface_t interface) val |= EN_COREPLL; mt7530_write(priv, MT7531_PLLGP_EN, val); usleep_range(25, 35); - - return 0; } static void @@ -2331,6 +2334,8 @@ mt7531_setup(struct dsa_switch *ds) SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); + mt7531_pll_setup(priv); + if (mt7531_dual_sgmii_supported(priv)) { priv->p5_intf_sel = P5_INTF_SEL_GMAC5_SGMII; @@ -2887,8 +2892,6 @@ mt7531_cpu_port_config(struct dsa_switch *ds, int port) case 6: interface = PHY_INTERFACE_MODE_2500BASEX; - mt7531_pad_setup(ds, interface); - priv->p6_interface = interface; break; default: From 728c2af6ad8c809f66639b70e4fd8c67c57c66e7 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Sat, 17 Sep 2022 02:07:34 +0200 Subject: [PATCH 05/30] net: mt7531: ensure all MACs are powered down before reset The datasheet [1] explicit describes it as requirement for a reset. [1] MT7531 Reference Manual for Development Board rev 1.0, page 735 Signed-off-by: Alexander Couzens Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 95a57aeb466e..409d5c3d76ea 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2329,6 +2329,10 @@ mt7531_setup(struct dsa_switch *ds) return -ENODEV; } + /* all MACs must be forced link-down before sw reset */ + for (i = 0; i < MT7530_NUM_PORTS; i++) + mt7530_write(priv, MT7530_PMCR_P(i), MT7531_FORCE_LNK); + /* Reset the switch through internal reset */ mt7530_write(priv, MT7530_SYS_CTRL, SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | From b7ca8d5f56e6c57b671ceb8de1361d2a5a495087 Mon Sep 17 00:00:00 2001 From: Andy Moreton Date: Thu, 22 Sep 2022 22:12:18 +0100 Subject: [PATCH 06/30] sfc: correct filter_table_remove method for EF10 PFs A previous patch added a wrapper function to take a lock around efx_mcdi_filter_table_remove(), but only changed EF10 VFs' method table to call it. Change it in the PF method table too. Fixes: 77eb40749d73 ("sfc: move table locking into filter_table_{probe,remove} methods") Signed-off-by: Andy Moreton Signed-off-by: Edward Cree Link: https://lore.kernel.org/r/20220922211218.814-1-ecree@xilinx.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index ee734b69150f..d1e1aa19a68e 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4213,7 +4213,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .ev_test_generate = efx_ef10_ev_test_generate, .filter_table_probe = efx_ef10_filter_table_probe, .filter_table_restore = efx_mcdi_filter_table_restore, - .filter_table_remove = efx_mcdi_filter_table_remove, + .filter_table_remove = efx_ef10_filter_table_remove, .filter_update_rx_scatter = efx_mcdi_update_rx_scatter, .filter_insert = efx_mcdi_filter_insert, .filter_remove_safe = efx_mcdi_filter_remove_safe, From 6052a4c11fd893234e085edf7bf2e00a33a79d4e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 23 Sep 2022 19:47:36 -0400 Subject: [PATCH 07/30] Revert "net: mvpp2: debugfs: fix memory leak when using debugfs_lookup()" This reverts commit fe2c9c61f668cde28dac2b188028c5299cedcc1e. On Tue, Sep 13, 2022 at 05:48:58PM +0100, Russell King (Oracle) wrote: >What happens if this is built as a module, and the module is loaded, >binds (and creates the directory), then is removed, and then re- >inserted? Nothing removes the old directory, so doesn't >debugfs_create_dir() fail, resulting in subsequent failure to add >any subsequent debugfs entries? > >I don't think this patch should be backported to stable trees until >this point is addressed. Revert until a proper fix is available as the original behavior was better. Cc: Marcin Wojtas Cc: Eric Dumazet Cc: Paolo Abeni Cc: stable@kernel.org Reported-by: Russell King Fixes: fe2c9c61f668 ("net: mvpp2: debugfs: fix memory leak when using debugfs_lookup()") Signed-off-by: Sasha Levin Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220923234736.657413-1-sashal@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c index 0eec05d905eb..4a3baa7e0142 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c @@ -700,10 +700,10 @@ void mvpp2_dbgfs_cleanup(struct mvpp2 *priv) void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name) { - static struct dentry *mvpp2_root; - struct dentry *mvpp2_dir; + struct dentry *mvpp2_dir, *mvpp2_root; int ret, i; + mvpp2_root = debugfs_lookup(MVPP2_DRIVER_NAME, NULL); if (!mvpp2_root) mvpp2_root = debugfs_create_dir(MVPP2_DRIVER_NAME, NULL); From a43206156263fbaf1f2b7f96257441f331e91bb7 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 22 Sep 2022 21:25:51 -0700 Subject: [PATCH 08/30] usbnet: Fix memory leak in usbnet_disconnect() Currently usbnet_disconnect() unanchors and frees all deferred URBs using usb_scuttle_anchored_urbs(), which does not free urb->context, causing a memory leak as reported by syzbot. Use a usb_get_from_anchor() while loop instead, similar to what we did in commit 19cfe912c37b ("Bluetooth: btusb: Fix memory leak in play_deferred"). Also free urb->sg. Reported-and-tested-by: syzbot+dcd3e13cf4472f2e0ba1@syzkaller.appspotmail.com Fixes: 69ee472f2706 ("usbnet & cdc-ether: Autosuspend for online devices") Fixes: 638c5115a794 ("USBNET: support DMA SG") Signed-off-by: Peilin Ye Link: https://lore.kernel.org/r/20220923042551.2745-1-yepeilin.cs@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index aaa89b4cfd50..e368b0780753 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1598,6 +1598,7 @@ void usbnet_disconnect (struct usb_interface *intf) struct usbnet *dev; struct usb_device *xdev; struct net_device *net; + struct urb *urb; dev = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); @@ -1614,7 +1615,11 @@ void usbnet_disconnect (struct usb_interface *intf) net = dev->net; unregister_netdev (net); - usb_scuttle_anchored_urbs(&dev->deferred); + while ((urb = usb_get_from_anchor(&dev->deferred))) { + dev_kfree_skb(urb->context); + kfree(urb->sg); + usb_free_urb(urb); + } if (dev->driver_info->unbind) dev->driver_info->unbind(dev, intf); From 6e23ec0ba92d426c77a73a9ccab16346e5e0ef49 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 23 Sep 2022 10:00:46 +0800 Subject: [PATCH 09/30] net: sched: act_ct: fix possible refcount leak in tcf_ct_init() nf_ct_put need to be called to put the refcount got by tcf_ct_fill_params to avoid possible refcount leak when tcf_ct_flow_table_get fails. Fixes: c34b961a2492 ("net/sched: act_ct: Create nf flow table per zone") Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20220923020046.8021-1-hbh25y@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/act_ct.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index d55afb8d14be..5950974ae8f6 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1394,7 +1394,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, err = tcf_ct_flow_table_get(net, params); if (err) - goto cleanup; + goto cleanup_params; spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -1409,6 +1409,9 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, return res; +cleanup_params: + if (params->tmpl) + nf_ct_put(params->tmpl); cleanup: if (goto_ch) tcf_chain_put_by_act(goto_ch); From c635ebe8d911a93bd849a9419b01a58783de76f1 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Thu, 22 Sep 2022 14:51:08 -0300 Subject: [PATCH 10/30] cxgb4: fix missing unlock on ETHOFLD desc collect fail path The label passed to the QDESC_GET for the ETHOFLD TXQ, RXQ, and FLQ, is the 'out' one, which skips the 'out_unlock' label, and thus doesn't unlock the 'uld_mutex' before returning. Additionally, since commit 5148e5950c67 ("cxgb4: add EOTID tracking and software context dump"), the access to these ETHOFLD hardware queues should be protected by the 'mqprio_mutex' instead. Fixes: 2d0cb84dd973 ("cxgb4: add ETHOFLD hardware queue support") Fixes: 5148e5950c67 ("cxgb4: add EOTID tracking and software context dump") Signed-off-by: Rafael Mendonca Reviewed-by: Rahul Lakkireddy Link: https://lore.kernel.org/r/20220922175109.764898-1-rafaelmendsr@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index a7f291c89702..557c591a6ce3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -14,6 +14,7 @@ #include "cudbg_entity.h" #include "cudbg_lib.h" #include "cudbg_zlib.h" +#include "cxgb4_tc_mqprio.h" static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = { {0x7e40, 0x7e44, 0x020, 28}, /* t6_tp_pio_regs_20_to_3b */ @@ -3458,7 +3459,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init, for (i = 0; i < utxq->ntxq; i++) QDESC_GET_TXQ(&utxq->uldtxq[i].q, cudbg_uld_txq_to_qtype(j), - out_unlock); + out_unlock_uld); } } @@ -3475,7 +3476,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init, for (i = 0; i < urxq->nrxq; i++) QDESC_GET_RXQ(&urxq->uldrxq[i].rspq, cudbg_uld_rxq_to_qtype(j), - out_unlock); + out_unlock_uld); } /* ULD FLQ */ @@ -3487,7 +3488,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init, for (i = 0; i < urxq->nrxq; i++) QDESC_GET_FLQ(&urxq->uldrxq[i].fl, cudbg_uld_flq_to_qtype(j), - out_unlock); + out_unlock_uld); } /* ULD CIQ */ @@ -3500,29 +3501,34 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init, for (i = 0; i < urxq->nciq; i++) QDESC_GET_RXQ(&urxq->uldrxq[base + i].rspq, cudbg_uld_ciq_to_qtype(j), - out_unlock); + out_unlock_uld); } } + mutex_unlock(&uld_mutex); + if (!padap->tc_mqprio) + goto out; + + mutex_lock(&padap->tc_mqprio->mqprio_mutex); /* ETHOFLD TXQ */ if (s->eohw_txq) for (i = 0; i < s->eoqsets; i++) QDESC_GET_TXQ(&s->eohw_txq[i].q, - CUDBG_QTYPE_ETHOFLD_TXQ, out); + CUDBG_QTYPE_ETHOFLD_TXQ, out_unlock_mqprio); /* ETHOFLD RXQ and FLQ */ if (s->eohw_rxq) { for (i = 0; i < s->eoqsets; i++) QDESC_GET_RXQ(&s->eohw_rxq[i].rspq, - CUDBG_QTYPE_ETHOFLD_RXQ, out); + CUDBG_QTYPE_ETHOFLD_RXQ, out_unlock_mqprio); for (i = 0; i < s->eoqsets; i++) QDESC_GET_FLQ(&s->eohw_rxq[i].fl, - CUDBG_QTYPE_ETHOFLD_FLQ, out); + CUDBG_QTYPE_ETHOFLD_FLQ, out_unlock_mqprio); } -out_unlock: - mutex_unlock(&uld_mutex); +out_unlock_mqprio: + mutex_unlock(&padap->tc_mqprio->mqprio_mutex); out: qdesc_info->qdesc_entry_size = sizeof(*qdesc_entry); @@ -3559,6 +3565,10 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init, #undef QDESC_GET return rc; + +out_unlock_uld: + mutex_unlock(&uld_mutex); + goto out; } int cudbg_collect_flash(struct cudbg_init *pdbg_init, From 4774db8dfc6a2e6649920ebb2fc8e2f062c2080d Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Fri, 23 Sep 2022 02:36:40 +0000 Subject: [PATCH 11/30] net/mlxbf_gige: Fix an IS_ERR() vs NULL bug in mlxbf_gige_mdio_probe The devm_ioremap() function returns NULL on error, it doesn't return error pointers. Fixes: 3a1a274e933f ("mlxbf_gige: compute MDIO period based on i1clk") Signed-off-by: Peng Wu Link: https://lore.kernel.org/r/20220923023640.116057-1-wupeng58@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c index 4aeb927c3715..aa780b1614a3 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c @@ -246,8 +246,8 @@ int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv) } priv->clk_io = devm_ioremap(dev, res->start, resource_size(res)); - if (IS_ERR(priv->clk_io)) - return PTR_ERR(priv->clk_io); + if (!priv->clk_io) + return -ENOMEM; mlxbf_gige_mdio_cfg(priv); From 0dc383796f9c59d7297190aa864b986eaf2f73e6 Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Fri, 23 Sep 2022 17:43:20 +0800 Subject: [PATCH 12/30] net: hippi: Add missing pci_disable_device() in rr_init_one() Add missing pci_disable_device() if rr_init_one() fails Signed-off-by: ruanjinjie Link: https://lore.kernel.org/r/20220923094320.3109154-1-ruanjinjie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/hippi/rrunner.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 74e845fa2e07..aa8f828a0ae7 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -213,6 +213,7 @@ static int rr_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_iounmap(pdev, rrpriv->regs); if (pdev) pci_release_regions(pdev); + pci_disable_device(pdev); out2: free_netdev(dev); out3: From 64e966d1e84b29c9fa916cfeaabbf4013703942e Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Raja Date: Thu, 8 Sep 2022 23:40:34 +0530 Subject: [PATCH 13/30] wifi: cfg80211: fix MCS divisor value The Bitrate for HE/EHT MCS6 is calculated wrongly due to the incorrect MCS divisor value for mcs6. Fix it with the proper value. previous mcs_divisor value = (11769/6144) = 1.915527 fixed mcs_divisor value = (11377/6144) = 1.851725 Fixes: 9c97c88d2f4b ("cfg80211: Add support to calculate and report 4096-QAM HE rates") Signed-off-by: Tamizh Chelvam Raja Link: https://lore.kernel.org/r/20220908181034.9936-1-quic_tamizhr@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 2c127951764a..775836f6785a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1361,7 +1361,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) 25599, /* 4.166666... */ 17067, /* 2.777777... */ 12801, /* 2.083333... */ - 11769, /* 1.851851... */ + 11377, /* 1.851725... */ 10239, /* 1.666666... */ 8532, /* 1.388888... */ 7680, /* 1.250000... */ @@ -1444,7 +1444,7 @@ static u32 cfg80211_calculate_bitrate_eht(struct rate_info *rate) 25599, /* 4.166666... */ 17067, /* 2.777777... */ 12801, /* 2.083333... */ - 11769, /* 1.851851... */ + 11377, /* 1.851725... */ 10239, /* 1.666666... */ 8532, /* 1.388888... */ 7680, /* 1.250000... */ From b7ce33df1ce2b6631234233c5367bcfe9c67fbe9 Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Thu, 15 Sep 2022 14:41:20 +0200 Subject: [PATCH 14/30] wifi: mac80211: don't start TX with fq->lock to fix deadlock ieee80211_txq_purge() calls fq_tin_reset() and ieee80211_purge_tx_queue(); Both are then calling ieee80211_free_txskb(). Which can decide to TX the skb again. There are at least two ways to get a deadlock: 1) When we have a TDLS teardown packet queued in either tin or frags ieee80211_tdls_td_tx_handle() will call ieee80211_subif_start_xmit() while we still hold fq->lock. ieee80211_txq_enqueue() will thus deadlock. 2) A variant of the above happens if aggregation is up and running: In that case ieee80211_iface_work() will deadlock with the original task: The original tasks already holds fq->lock and tries to get sta->lock after kicking off ieee80211_iface_work(). But the worker can get sta->lock prior to the original task and will then spin for fq->lock. Avoid these deadlocks by not sending out any skbs when called via ieee80211_free_txskb(). Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20220915124120.301918-1-alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/mac80211/status.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8e77fd2e9fdf..3f9ddd7f04b6 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -729,7 +729,7 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, if (!sdata) { skb->dev = NULL; - } else { + } else if (!dropped) { unsigned int hdr_size = ieee80211_hdrlen(hdr->frame_control); From 527008e5e87600a389feb8a57042c928ecca195d Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Thu, 15 Sep 2022 15:09:46 +0200 Subject: [PATCH 15/30] wifi: mac80211: ensure vif queues are operational after start Make sure local->queue_stop_reasons and vif.txqs_stopped stay in sync. When a new vif is created the queues may end up in an inconsistent state and be inoperable: Communication not using iTXQ will work, allowing to e.g. complete the association. But the 4-way handshake will time out. The sta will not send out any skbs queued in iTXQs. All normal attempts to start the queues will fail when reaching this state. local->queue_stop_reasons will have marked all queues as operational but vif.txqs_stopped will still be set, creating an inconsistent internal state. In reality this seems to be race between the mac80211 function ieee80211_do_open() setting SDATA_STATE_RUNNING and the wake_txqs_tasklet: Depending on the driver and the timing the queues may end up to be operational or not. Cc: stable@vger.kernel.org Fixes: f856373e2f31 ("wifi: mac80211: do not wake queues on a vif that is being stopped") Signed-off-by: Alexander Wetzel Acked-by: Felix Fietkau Link: https://lore.kernel.org/r/20220915130946.302803-1-alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/mac80211/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 53826c663723..efcefb2dd882 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -301,14 +301,14 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) local_bh_disable(); spin_lock(&fq->lock); + sdata->vif.txqs_stopped[ac] = false; + if (!test_bit(SDATA_STATE_RUNNING, &sdata->state)) goto out; if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; - sdata->vif.txqs_stopped[ac] = false; - list_for_each_entry_rcu(sta, &local->sta_list, list) { if (sdata != sta->sdata) continue; From d873697ef2b7e1b6fdd8e9d449d9354bd5d29a4a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 18 Sep 2022 21:20:52 +0200 Subject: [PATCH 16/30] wifi: mac80211: fix regression with non-QoS drivers Commit 10cb8e617560 ("mac80211: enable QoS support for nl80211 ctrl port") changed ieee80211_tx_control_port() to aways call __ieee80211_select_queue() without checking local->hw.queues. __ieee80211_select_queue() returns a queue-id between 0 and 3, which means that now ieee80211_tx_control_port() may end up setting the queue-mapping for a skb to a value higher then local->hw.queues if local->hw.queues is less then 4. Specifically this is a problem for ralink rt2500-pci cards where local->hw.queues is 2. There this causes rt2x00queue_get_tx_queue() to return NULL and the following error to be logged: "ieee80211 phy0: rt2x00mac_tx: Error - Attempt to send packet over invalid queue 2", after which association with the AP fails. Other callers of __ieee80211_select_queue() skip calling it when local->hw.queues < IEEE80211_NUM_ACS, add the same check to ieee80211_tx_control_port(). This fixes ralink rt2500-pci and similar cards when less then 4 tx-queues no longer working. Fixes: 10cb8e617560 ("mac80211: enable QoS support for nl80211 ctrl port") Cc: Markus Theil Suggested-by: Stanislaw Gruszka Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220918192052.443529-1-hdegoede@redhat.com Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bf7fe6cd9dfc..13249e97a069 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5878,6 +5878,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, skb_reset_network_header(skb); skb_reset_mac_header(skb); + if (local->hw.queues < IEEE80211_NUM_ACS) + goto start_xmit; + /* update QoS header to prioritize control port frames if possible, * priorization also happens for control port frames send over * AF_PACKET @@ -5905,6 +5908,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, } rcu_read_unlock(); +start_xmit: /* mutex lock is only needed for incrementing the cookie counter */ mutex_lock(&local->mtx); From be92292b90bfdc31f332c962882b6d3ea0285fdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Lenkow?= Date: Mon, 19 Sep 2022 17:01:35 +0200 Subject: [PATCH 17/30] wifi: mac80211: fix memory corruption in minstrel_ht_update_rates() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During our testing of WFM200 module over SDIO on i.MX6Q-based platform, we discovered a memory corruption on the system, tracing back to the wfx driver. Using kfence, it was possible to trace it back to the root cause, which is hw->max_rates set to 8 in wfx_init_common, while the maximum defined by IEEE80211_TX_TABLE_SIZE is 4. This causes array out-of-bounds writes during updates of the rate table, as seen below: BUG: KFENCE: memory corruption in kfree_rcu_work+0x320/0x36c Corrupted memory at 0xe0a4ffe0 [ 0x03 0x03 0x03 0x03 0x01 0x00 0x00 0x02 0x02 0x02 0x09 0x00 0x21 0xbb 0xbb 0xbb ] (in kfence-#81): kfree_rcu_work+0x320/0x36c process_one_work+0x3ec/0x920 worker_thread+0x60/0x7a4 kthread+0x174/0x1b4 ret_from_fork+0x14/0x2c 0x0 kfence-#81: 0xe0a4ffc0-0xe0a4ffdf, size=32, cache=kmalloc-64 allocated by task 297 on cpu 0 at 631.039555s: minstrel_ht_update_rates+0x38/0x2b0 [mac80211] rate_control_tx_status+0xb4/0x148 [mac80211] ieee80211_tx_status_ext+0x364/0x1030 [mac80211] ieee80211_tx_status+0xe0/0x118 [mac80211] ieee80211_tasklet_handler+0xb0/0xe0 [mac80211] tasklet_action_common.constprop.0+0x11c/0x148 __do_softirq+0x1a4/0x61c irq_exit+0xcc/0x104 call_with_stack+0x18/0x20 __irq_svc+0x80/0xb0 wq_worker_sleeping+0x10/0x100 wq_worker_sleeping+0x10/0x100 schedule+0x50/0xe0 schedule_timeout+0x2e0/0x474 wait_for_completion+0xdc/0x1ec mmc_wait_for_req_done+0xc4/0xf8 mmc_io_rw_extended+0x3b4/0x4ec sdio_io_rw_ext_helper+0x290/0x384 sdio_memcpy_toio+0x30/0x38 wfx_sdio_copy_to_io+0x88/0x108 [wfx] wfx_data_write+0x88/0x1f0 [wfx] bh_work+0x1c8/0xcc0 [wfx] process_one_work+0x3ec/0x920 worker_thread+0x60/0x7a4 kthread+0x174/0x1b4 ret_from_fork+0x14/0x2c 0x0 After discussion on the wireless mailing list it was clarified that the issue has been introduced by: commit ee0e16ab756a ("mac80211: minstrel_ht: fill all requested rates") and fix shall be in minstrel_ht_update_rates in rc80211_minstrel_ht.c. Fixes: ee0e16ab756a ("mac80211: minstrel_ht: fill all requested rates") Link: https://lore.kernel.org/all/12e5adcd-8aed-f0f7-70cc-4fb7b656b829@camlingroup.com/ Link: https://lore.kernel.org/linux-wireless/20220915131445.30600-1-lech.perczak@camlingroup.com/ Cc: Jérôme Pouiller Cc: Johannes Berg Cc: Peter Seiderer Cc: Kalle Valo Cc: Krzysztof Drobiński , Signed-off-by: Paweł Lenkow Signed-off-by: Lech Perczak Reviewed-by: Peter Seiderer Reviewed-by: Jérôme Pouiller Acked-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5f27e6746762..788a82f9c74d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "rate.h" #include "sta_info.h" @@ -1550,6 +1551,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct ieee80211_sta_rates *rates; int i = 0; + int max_rates = min_t(int, mp->hw->max_rates, IEEE80211_TX_RATE_TABLE_SIZE); rates = kzalloc(sizeof(*rates), GFP_ATOMIC); if (!rates) @@ -1559,10 +1561,10 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]); /* Fill up remaining, keep one entry for max_probe_rate */ - for (; i < (mp->hw->max_rates - 1); i++) + for (; i < (max_rates - 1); i++) minstrel_ht_set_rate(mp, mi, rates, i, mi->max_tp_rate[i]); - if (i < mp->hw->max_rates) + if (i < max_rates) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); if (i < IEEE80211_TX_RATE_TABLE_SIZE) From 883b8dc1a8766464d5bde4d97e1d7c795d990d31 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Sat, 24 Sep 2022 15:40:41 -0300 Subject: [PATCH 18/30] wifi: mac80211: mlme: Fix missing unlock on beacon RX Commit 98b0b467466c ("wifi: mac80211: mlme: use correct link_sta") switched to link station instead of deflink and added some checks to do that, which are done with the 'sta_mtx' mutex held. However, the error path of these checks does not unlock 'sta_mtx' before returning. Fixes: 98b0b467466c ("wifi: mac80211: mlme: use correct link_sta") Signed-off-by: Rafael Mendonca Link: https://lore.kernel.org/r/20220924184042.778676-1-rafaelmendsr@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5265d2b6db12..c0fbffd9b153 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5589,12 +5589,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); - if (WARN_ON(!sta)) + if (WARN_ON(!sta)) { + mutex_unlock(&local->sta_mtx); goto free; + } link_sta = rcu_dereference_protected(sta->link[link->link_id], lockdep_is_held(&local->sta_mtx)); - if (WARN_ON(!link_sta)) + if (WARN_ON(!link_sta)) { + mutex_unlock(&local->sta_mtx); goto free; + } changed |= ieee80211_recalc_twt_req(link, link_sta, elems); From 6546646a7fb0d7fe1caef947889497c16aaecc8c Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Sun, 25 Sep 2022 11:34:19 -0300 Subject: [PATCH 19/30] wifi: mac80211: mlme: Fix double unlock on assoc success handling Commit 6911458dc428 ("wifi: mac80211: mlme: refactor assoc success handling") moved the per-link setup out of ieee80211_assoc_success() into a new function ieee80211_assoc_config_link() but missed to remove the unlock of 'sta_mtx' in case of HE capability/operation missing on HE AP, which leads to a double unlock: ieee80211_assoc_success() { ... ieee80211_assoc_config_link() { ... if (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && (!elems->he_cap || !elems->he_operation)) { mutex_unlock(&sdata->local->sta_mtx); ... } ... } ... mutex_unlock(&sdata->local->sta_mtx); ... } Fixes: 6911458dc428 ("wifi: mac80211: mlme: refactor assoc success handling") Signed-off-by: Rafael Mendonca Link: https://lore.kernel.org/r/20220925143420.784975-1-rafaelmendsr@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c0fbffd9b153..fc764984d687 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4040,7 +4040,6 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, if (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && (!elems->he_cap || !elems->he_operation)) { - mutex_unlock(&sdata->local->sta_mtx); sdata_info(sdata, "HE AP is missing HE capability/operation\n"); ret = false; From 49725ffc15fc4e9fae68c55b691fd25168cbe5c1 Mon Sep 17 00:00:00 2001 From: Junxiao Chang Date: Fri, 23 Sep 2022 13:04:48 +0800 Subject: [PATCH 20/30] net: stmmac: power up/down serdes in stmmac_open/release This commit fixes DMA engine reset timeout issue in suspend/resume with ADLink I-Pi SMARC Plus board which dmesg shows: ... [ 54.678271] PM: suspend exit [ 54.754066] intel-eth-pci 0000:00:1d.2 enp0s29f2: PHY [stmmac-3:01] driver [Maxlinear Ethernet GPY215B] (irq=POLL) [ 54.755808] intel-eth-pci 0000:00:1d.2 enp0s29f2: Register MEM_TYPE_PAGE_POOL RxQ-0 ... [ 54.780482] intel-eth-pci 0000:00:1d.2 enp0s29f2: Register MEM_TYPE_PAGE_POOL RxQ-7 [ 55.784098] intel-eth-pci 0000:00:1d.2: Failed to reset the dma [ 55.784111] intel-eth-pci 0000:00:1d.2 enp0s29f2: stmmac_hw_setup: DMA engine initialization failed [ 55.784115] intel-eth-pci 0000:00:1d.2 enp0s29f2: stmmac_open: Hw setup failed ... The issue is related with serdes which impacts clock. There is serdes in ADLink I-Pi SMARC board ethernet controller. Please refer to commit b9663b7ca6ff78 ("net: stmmac: Enable SERDES power up/down sequence") for detial. When issue is reproduced, DMA engine clock is not ready because serdes is not powered up. To reproduce DMA engine reset timeout issue with hardware which has serdes in GBE controller, install Ubuntu. In Ubuntu GUI, click "Power Off/Log Out" -> "Suspend" menu, it disables network interface, then goes to sleep mode. When it wakes up, it enables network interface again. Stmmac driver is called in this way: 1. stmmac_release: Stop network interface. In this function, it disables DMA engine and network interface; 2. stmmac_suspend: It is called in kernel suspend flow. But because network interface has been disabled(netif_running(ndev) is false), it does nothing and returns directly; 3. System goes into S3 or S0ix state. Some time later, system is waken up by keyboard or mouse; 4. stmmac_resume: It does nothing because network interface has been disabled; 5. stmmac_open: It is called to enable network interace again. DMA engine is initialized in this API, but serdes is not power on so there will be DMA engine reset timeout issue. Similarly, serdes powerdown should be added in stmmac_release. Network interface might be disabled by cmd "ifconfig eth0 down", DMA engine, phy and mac have been disabled in ndo_stop callback, serdes should be powered down as well. It doesn't make sense that serdes is on while other components have been turned off. If ethernet interface is in enabled state(netif_running(ndev) is true) before suspend/resume, the issue couldn't be reproduced because serdes could be powered up in stmmac_resume. Because serdes_powerup is added in stmmac_open, it doesn't need to be called in probe function. Fixes: b9663b7ca6ff78 ("net: stmmac: Enable SERDES power up/down sequence") Signed-off-by: Junxiao Chang Reviewed-by: Voon Weifeng Tested-by: Jimmy JS Chen Tested-by: Looi, Hong Aun Link: https://lore.kernel.org/r/20220923050448.1220250-1-junxiao.chang@intel.com Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 592d29abcb1c..9083159b93f1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3801,6 +3801,15 @@ static int __stmmac_open(struct net_device *dev, stmmac_reset_queues_param(priv); + if (priv->plat->serdes_powerup) { + ret = priv->plat->serdes_powerup(dev, priv->plat->bsp_priv); + if (ret < 0) { + netdev_err(priv->dev, "%s: Serdes powerup failed\n", + __func__); + goto init_error; + } + } + ret = stmmac_hw_setup(dev, true); if (ret < 0) { netdev_err(priv->dev, "%s: Hw setup failed\n", __func__); @@ -3904,6 +3913,10 @@ static int stmmac_release(struct net_device *dev) /* Disable the MAC Rx/Tx */ stmmac_mac_set(priv, priv->ioaddr, false); + /* Powerdown Serdes if there is */ + if (priv->plat->serdes_powerdown) + priv->plat->serdes_powerdown(dev, priv->plat->bsp_priv); + netif_carrier_off(dev); stmmac_release_ptp(priv); @@ -7293,14 +7306,6 @@ int stmmac_dvr_probe(struct device *device, goto error_netdev_register; } - if (priv->plat->serdes_powerup) { - ret = priv->plat->serdes_powerup(ndev, - priv->plat->bsp_priv); - - if (ret < 0) - goto error_serdes_powerup; - } - #ifdef CONFIG_DEBUG_FS stmmac_init_fs(ndev); #endif @@ -7315,8 +7320,6 @@ int stmmac_dvr_probe(struct device *device, return ret; -error_serdes_powerup: - unregister_netdev(ndev); error_netdev_register: phylink_destroy(priv->phylink); error_xpcs_setup: From ea64cdfad124922c931633e39287c5a31a9b14a1 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 23 Sep 2022 06:09:52 +0200 Subject: [PATCH 21/30] net: phy: Don't WARN for PHY_UP state in mdio_bus_phy_resume() Commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") introduced a WARN() on resume from system sleep if a PHY is not in PHY_HALTED state. Commit 6dbe852c379f ("net: phy: Don't WARN for PHY_READY state in mdio_bus_phy_resume()") added an exemption for PHY_READY state from the WARN(). It turns out PHY_UP state needs to be exempted as well because the following may happen on suspend: mdio_bus_phy_suspend() phy_stop_machine() phydev->state = PHY_UP # if (phydev->state >= PHY_UP) Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Link: https://lore.kernel.org/netdev/2b1a1588-505e-dff3-301d-bfc1fb14d685@samsung.com/ Signed-off-by: Lukas Wunner Acked-by: Florian Fainelli Cc: Xiaolei Wang Link: https://lore.kernel.org/r/8128fdb51eeebc9efbf3776a4097363a1317aaf1.1663905575.git.lukas@wunner.de Signed-off-by: Paolo Abeni --- drivers/net/phy/phy_device.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 12ff276b80ae..4df8c337221b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -316,11 +316,13 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) phydev->suspended_by_mdio_bus = 0; - /* If we manged to get here with the PHY state machine in a state neither - * PHY_HALTED nor PHY_READY this is an indication that something went wrong - * and we should most likely be using MAC managed PM and we are not. + /* If we managed to get here with the PHY state machine in a state + * neither PHY_HALTED, PHY_READY nor PHY_UP, this is an indication + * that something went wrong and we should most likely be using + * MAC managed PM, but we are not. */ - WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY); + WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY && + phydev->state != PHY_UP); ret = phy_init_hw(phydev); if (ret < 0) From bc7a319844891746135dc1f34ab9df78d636a3ac Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Fri, 23 Sep 2022 15:02:37 +0800 Subject: [PATCH 22/30] selftests: Fix the if conditions of in test_extra_filter() The socket 2 bind the addr in use, bind should fail with EADDRINUSE. So if bind success or errno != EADDRINUSE, testcase should be failed. Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test") Signed-off-by: Wang Yufen Link: https://lore.kernel.org/r/1663916557-10730-1-git-send-email-wangyufen@huawei.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/reuseport_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 072d709c96b4..65aea27d761c 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -328,7 +328,7 @@ static void test_extra_filter(const struct test_params p) if (bind(fd1, addr, sockaddr_size())) error(1, errno, "failed to bind recv socket 1"); - if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE) + if (!bind(fd2, addr, sockaddr_size()) || errno != EADDRINUSE) error(1, errno, "bind socket 2 should fail with EADDRINUSE"); free(addr); From 797666cd5af041ffb66642fff62f7389f08566a2 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Mon, 26 Sep 2022 17:07:40 +0200 Subject: [PATCH 23/30] net: usb: qmi_wwan: Add new usb-id for Dell branded EM7455 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for Dell 5811e (EM7455) with USB-id 0x413c:0x81c2. Signed-off-by: Frank Wunderlich Cc: stable@vger.kernel.org Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20220926150740.6684-3-linux@fw-web.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 0cb187def5bc..26c34a7c21bd 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1402,6 +1402,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ + {QMI_FIXED_INTF(0x413c, 0x81c2, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81cc, 8)}, /* Dell Wireless 5816e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ From 29322791bc8b4f42fc65734840826e3ddc30921e Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 1 Sep 2022 12:40:39 +0200 Subject: [PATCH 24/30] ice: xsk: change batched Tx descriptor cleaning AF_XDP Tx descriptor cleaning in ice driver currently works in a "lazy" way - descriptors are not cleaned immediately after send. We rather hold on with cleaning until we see that free space in ring drops below particular threshold. This was supposed to reduce the amount of unnecessary work related to cleaning and instead of keeping the ring empty, ring was rather saturated. In AF_XDP realm cleaning Tx descriptors implies producing them to CQ. This is a way of letting know user space that particular descriptor has been sent, as John points out in [0]. We tried to implement serial descriptor cleaning which would be used in conjunction with batched cleaning but it made code base more convoluted and probably harder to maintain in future. Therefore we step away from batched cleaning in a current form in favor of an approach where we set RS bit on every last descriptor from a batch and clean always at the beginning of ice_xmit_zc(). This means that we give up a bit of Tx performance, but this doesn't hurt l2fwd scenario which is way more meaningful than txonly as this can be treaten as AF_XDP based packet generator. l2fwd is not hurt due to the fact that Tx side is much faster than Rx and Rx is the one that has to catch Tx up. FWIW Tx descriptors are still produced in a batched way. [0]: https://lore.kernel.org/bpf/62b0a20232920_3573208ab@john.notmuch/ Fixes: 126cdfe1007a ("ice: xsk: Improve AF_XDP ZC Tx and use batching API") Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 145 +++++++++------------- drivers/net/ethernet/intel/ice/ice_xsk.h | 7 +- 3 files changed, 65 insertions(+), 89 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 97453d1dfafe..dd2285d4bef4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1467,7 +1467,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) bool wd; if (tx_ring->xsk_pool) - wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget); + wd = ice_xmit_zc(tx_ring); else if (ice_ring_is_xdp(tx_ring)) wd = true; else diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 03ce85f6e6df..8833b66b4e54 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -788,69 +788,57 @@ ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) } /** - * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring - * @xdp_ring: XDP ring to clean - * @napi_budget: amount of descriptors that NAPI allows us to clean - * - * Returns count of cleaned descriptors + * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ + * @xdp_ring: XDP Tx ring */ -static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) +static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) { - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); - int budget = napi_budget / tx_thresh; - u16 next_dd = xdp_ring->next_dd; - u16 ntc, cleared_dds = 0; + u16 ntc = xdp_ring->next_to_clean; + struct ice_tx_desc *tx_desc; + u16 cnt = xdp_ring->count; + struct ice_tx_buf *tx_buf; + u16 xsk_frames = 0; + u16 last_rs; + int i; - do { - struct ice_tx_desc *next_dd_desc; - u16 desc_cnt = xdp_ring->count; - struct ice_tx_buf *tx_buf; - u32 xsk_frames; - u16 i; + last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1; + tx_desc = ICE_TX_DESC(xdp_ring, last_rs); + if ((tx_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) { + if (last_rs >= ntc) + xsk_frames = last_rs - ntc + 1; + else + xsk_frames = last_rs + cnt - ntc + 1; + } - next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); - if (!(next_dd_desc->cmd_type_offset_bsz & - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) - break; + if (!xsk_frames) + return; - cleared_dds++; - xsk_frames = 0; - if (likely(!xdp_ring->xdp_tx_active)) { - xsk_frames = tx_thresh; - goto skip; + if (likely(!xdp_ring->xdp_tx_active)) + goto skip; + + ntc = xdp_ring->next_to_clean; + for (i = 0; i < xsk_frames; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; + + if (tx_buf->raw_buf) { + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + tx_buf->raw_buf = NULL; + } else { + xsk_frames++; } - ntc = xdp_ring->next_to_clean; - - for (i = 0; i < tx_thresh; i++) { - tx_buf = &xdp_ring->tx_buf[ntc]; - - if (tx_buf->raw_buf) { - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); - tx_buf->raw_buf = NULL; - } else { - xsk_frames++; - } - - ntc++; - if (ntc >= xdp_ring->count) - ntc = 0; - } + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } skip: - xdp_ring->next_to_clean += tx_thresh; - if (xdp_ring->next_to_clean >= desc_cnt) - xdp_ring->next_to_clean -= desc_cnt; - if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); - next_dd_desc->cmd_type_offset_bsz = 0; - next_dd = next_dd + tx_thresh; - if (next_dd >= desc_cnt) - next_dd = tx_thresh - 1; - } while (--budget); - - xdp_ring->next_dd = next_dd; - - return cleared_dds * tx_thresh; + tx_desc->cmd_type_offset_bsz = 0; + xdp_ring->next_to_clean += xsk_frames; + if (xdp_ring->next_to_clean >= cnt) + xdp_ring->next_to_clean -= cnt; + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); } /** @@ -885,7 +873,6 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, unsigned int *total_bytes) { - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u16 ntu = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; u32 i; @@ -905,13 +892,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de } xdp_ring->next_to_use = ntu; - - if (xdp_ring->next_to_use > xdp_ring->next_rs) { - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs += tx_thresh; - } } /** @@ -924,7 +904,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, u32 nb_pkts, unsigned int *total_bytes) { - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u32 batched, leftover, i; batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); @@ -933,54 +912,54 @@ static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *d ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); for (; i < batched + leftover; i++) ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); +} - if (xdp_ring->next_to_use > xdp_ring->next_rs) { - struct ice_tx_desc *tx_desc; +/** + * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU) + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + */ +static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring) +{ + u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; + struct ice_tx_desc *tx_desc; - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs += tx_thresh; - } + tx_desc = ICE_TX_DESC(xdp_ring, ntu); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); } /** * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on - * @budget: number of free descriptors on HW Tx ring that can be used - * @napi_budget: amount of descriptors that NAPI allows us to clean * * Returns true if there is no more work that needs to be done, false otherwise */ -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget) +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring) { struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; + int budget; - if (budget < tx_thresh) - budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget); + ice_clean_xdp_irq_zc(xdp_ring); + + budget = ICE_DESC_UNUSED(xdp_ring); + budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring)); nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); if (!nb_pkts) return true; if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { - struct ice_tx_desc *tx_desc; - nb_processed = xdp_ring->count - xdp_ring->next_to_use; ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs = tx_thresh - 1; xdp_ring->next_to_use = 0; } ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, &total_bytes); + ice_set_rs_bit(xdp_ring); ice_xdp_ring_update_tail(xdp_ring); ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 4edbe81eb646..6fa181f080ef 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -26,13 +26,10 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring); int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); #else -static inline bool -ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, - u32 __always_unused budget, - int __always_unused napi_budget) +static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring) { return false; } From b3056ae2b57858b02b376b3fed6077040caf14b4 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 1 Sep 2022 12:40:40 +0200 Subject: [PATCH 25/30] ice: xsk: drop power of 2 ring size restriction for AF_XDP We had multiple customers in the past months that reported commit 296f13ff3854 ("ice: xsk: Force rings to be sized to power of 2") makes them unable to use ring size of 8160 in conjunction with AF_XDP. Remove this restriction. Fixes: 296f13ff3854 ("ice: xsk: Force rings to be sized to power of 2") CC: Alasdair McWilliam Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8833b66b4e54..056c904b83cc 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -392,13 +392,6 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) goto failure; } - if (!is_power_of_2(vsi->rx_rings[qid]->count) || - !is_power_of_2(vsi->tx_rings[qid]->count)) { - netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n"); - pool_failure = -EINVAL; - goto failure; - } - if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); if (if_running) { @@ -534,11 +527,10 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) { u16 rx_thresh = ICE_RING_QUARTER(rx_ring); - u16 batched, leftover, i, tail_bumps; + u16 leftover, i, tail_bumps; - batched = ALIGN_DOWN(count, rx_thresh); - tail_bumps = batched / rx_thresh; - leftover = count & (rx_thresh - 1); + tail_bumps = count / rx_thresh; + leftover = count - (tail_bumps * rx_thresh); for (i = 0; i < tail_bumps; i++) if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) @@ -1037,14 +1029,16 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) */ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { - u16 count_mask = rx_ring->count - 1; u16 ntc = rx_ring->next_to_clean; u16 ntu = rx_ring->next_to_use; - for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { + while (ntc != ntu) { struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc); xsk_buff_free(xdp); + ntc++; + if (ntc >= rx_ring->count) + ntc = 0; } } From 81d192c2ce74157e717e1fc4b68791f82f7499d4 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 23 Sep 2022 13:42:23 +0200 Subject: [PATCH 26/30] can: c_can: don't cache TX messages for C_CAN cores As Jacob noticed, the optimization introduced in 387da6bc7a82 ("can: c_can: cache frames to operate as a true FIFO") doesn't properly work on C_CAN, but on D_CAN IP cores. The exact reasons are still unknown. For now disable caching if CAN frames in the TX path for C_CAN cores. Fixes: 387da6bc7a82 ("can: c_can: cache frames to operate as a true FIFO") Link: https://lore.kernel.org/all/20220928083354.1062321-1-mkl@pengutronix.de Link: https://lore.kernel.org/all/15a8084b-9617-2da1-6704-d7e39d60643b@gmail.com Reported-by: Jacob Kroon Tested-by: Jacob Kroon Cc: stable@vger.kernel.org # v5.15 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.h | 17 +++++++++++++++-- drivers/net/can/c_can/c_can_main.c | 11 +++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h index f23a03300a81..029cd8194ed5 100644 --- a/drivers/net/can/c_can/c_can.h +++ b/drivers/net/can/c_can/c_can.h @@ -235,9 +235,22 @@ static inline u8 c_can_get_tx_tail(const struct c_can_tx_ring *ring) return ring->tail & (ring->obj_num - 1); } -static inline u8 c_can_get_tx_free(const struct c_can_tx_ring *ring) +static inline u8 c_can_get_tx_free(const struct c_can_priv *priv, + const struct c_can_tx_ring *ring) { - return ring->obj_num - (ring->head - ring->tail); + u8 head = c_can_get_tx_head(ring); + u8 tail = c_can_get_tx_tail(ring); + + if (priv->type == BOSCH_D_CAN) + return ring->obj_num - (ring->head - ring->tail); + + /* This is not a FIFO. C/D_CAN sends out the buffers + * prioritized. The lowest buffer number wins. + */ + if (head < tail) + return 0; + + return ring->obj_num - head; } #endif /* C_CAN_H */ diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c index dc8132862f33..d6605dbb7737 100644 --- a/drivers/net/can/c_can/c_can_main.c +++ b/drivers/net/can/c_can/c_can_main.c @@ -429,7 +429,7 @@ static void c_can_setup_receive_object(struct net_device *dev, int iface, static bool c_can_tx_busy(const struct c_can_priv *priv, const struct c_can_tx_ring *tx_ring) { - if (c_can_get_tx_free(tx_ring) > 0) + if (c_can_get_tx_free(priv, tx_ring) > 0) return false; netif_stop_queue(priv->dev); @@ -437,7 +437,7 @@ static bool c_can_tx_busy(const struct c_can_priv *priv, /* Memory barrier before checking tx_free (head and tail) */ smp_mb(); - if (c_can_get_tx_free(tx_ring) == 0) { + if (c_can_get_tx_free(priv, tx_ring) == 0) { netdev_dbg(priv->dev, "Stopping tx-queue (tx_head=0x%08x, tx_tail=0x%08x, len=%d).\n", tx_ring->head, tx_ring->tail, @@ -465,7 +465,7 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb, idx = c_can_get_tx_head(tx_ring); tx_ring->head++; - if (c_can_get_tx_free(tx_ring) == 0) + if (c_can_get_tx_free(priv, tx_ring) == 0) netif_stop_queue(dev); if (idx < c_can_get_tx_tail(tx_ring)) @@ -748,7 +748,7 @@ static void c_can_do_tx(struct net_device *dev) return; tx_ring->tail += pkts; - if (c_can_get_tx_free(tx_ring)) { + if (c_can_get_tx_free(priv, tx_ring)) { /* Make sure that anybody stopping the queue after * this sees the new tx_ring->tail. */ @@ -760,8 +760,7 @@ static void c_can_do_tx(struct net_device *dev) stats->tx_packets += pkts; tail = c_can_get_tx_tail(tx_ring); - - if (tail == 0) { + if (priv->type == BOSCH_D_CAN && tail == 0) { u8 head = c_can_get_tx_head(tx_ring); /* Start transmission for all cached messages */ From 276d37eb449133bc22872b8f0a6f878e120deeff Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 27 Sep 2022 15:20:42 +0300 Subject: [PATCH 27/30] net: mscc: ocelot: fix tagged VLAN refusal while under a VLAN-unaware bridge Currently the following set of commands fails: $ ip link add br0 type bridge # vlan_filtering 0 $ ip link set swp0 master br0 $ bridge vlan port vlan-id swp0 1 PVID Egress Untagged $ bridge vlan add dev swp0 vid 10 Error: mscc_ocelot_switch_lib: Port with more than one egress-untagged VLAN cannot have egress-tagged VLANs. Dumping ocelot->vlans, one can see that the 2 egress-untagged VLANs on swp0 are vid 1 (the bridge PVID) and vid 4094, a PVID used privately by the driver for VLAN-unaware bridging. So this is why bridge vid 10 is refused, despite 'bridge vlan' showing a single egress untagged VLAN. As mentioned in the comment added, having this private VLAN does not impose restrictions to the hardware configuration, yet it is a bookkeeping problem. There are 2 possible solutions. One is to make the functions that operate on VLAN-unaware pvids: - ocelot_add_vlan_unaware_pvid() - ocelot_del_vlan_unaware_pvid() - ocelot_port_setup_dsa_8021q_cpu() - ocelot_port_teardown_dsa_8021q_cpu() call something different than ocelot_vlan_member_(add|del)(), the latter being the real problem, because it allocates a struct ocelot_bridge_vlan *vlan which it adds to ocelot->vlans. We don't really *need* the private VLANs in ocelot->vlans, it's just that we have the extra convenience of having the vlan->portmask cached in software (whereas without these structures, we'd have to create a raw ocelot_vlant_rmw_mask() procedure which reads back the current port mask from hardware). The other solution is to filter out the private VLANs from ocelot_port_num_untagged_vlans(), since they aren't what callers care about. We only need to do this to the mentioned function and not to ocelot_port_num_tagged_vlans(), because private VLANs are never egress-tagged. Nothing else seems to be broken in either solution, but the first one requires more rework which will conflict with the net-next change 36a0bf443585 ("net: mscc: ocelot: set up tag_8021q CPU ports independent of user port affinity"), and I'd like to avoid that. So go with the other one. Fixes: 54c319846086 ("net: mscc: ocelot: enforce FDB isolation when VLAN-unaware") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220927122042.1100231-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 306026e6aa11..8f8005bc6eea 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -290,6 +290,13 @@ static int ocelot_port_num_untagged_vlans(struct ocelot *ocelot, int port) if (!(vlan->portmask & BIT(port))) continue; + /* Ignore the VLAN added by ocelot_add_vlan_unaware_pvid(), + * because this is never active in hardware at the same time as + * the bridge VLANs, which only matter in VLAN-aware mode. + */ + if (vlan->vid >= OCELOT_RSV_VLAN_RANGE_START) + continue; + if (vlan->untagged & BIT(port)) num_untagged++; } From c9da02bfb1112461e048d3b736afb1873f6f4ccf Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 27 Sep 2022 16:30:02 +0100 Subject: [PATCH 28/30] net: ethernet: mtk_eth_soc: fix mask of RX_DMA_GET_SPORT{,_V2} The bitmasks applied in RX_DMA_GET_SPORT and RX_DMA_GET_SPORT_V2 macros were swapped. Fix that. Reported-by: Chen Minqiang Fixes: 160d3a9b192985 ("net: ethernet: mtk_eth_soc: introduce MTK_NETSYS_V2 support") Acked-by: Lorenzo Bianconi Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/YzMW+mg9UsaCdKRQ@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index ecf85e9ed824..0f9668a4079d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -319,8 +319,8 @@ #define MTK_RXD5_PPE_CPU_REASON GENMASK(22, 18) #define MTK_RXD5_SRC_PORT GENMASK(29, 26) -#define RX_DMA_GET_SPORT(x) (((x) >> 19) & 0xf) -#define RX_DMA_GET_SPORT_V2(x) (((x) >> 26) & 0x7) +#define RX_DMA_GET_SPORT(x) (((x) >> 19) & 0x7) +#define RX_DMA_GET_SPORT_V2(x) (((x) >> 26) & 0xf) /* PDMA V2 descriptor rxd3 */ #define RX_DMA_VTAG_V2 BIT(0) From 26d3e21ce1aab6cb19069c510fac8e7474445b18 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 27 Sep 2022 12:31:57 -0700 Subject: [PATCH 29/30] mptcp: factor out __mptcp_close() without socket lock Factor out __mptcp_close() from mptcp_close(). The caller of __mptcp_close() should hold the socket lock, and cancel mptcp work when __mptcp_close() returns true. This function will be used in the next commit. Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") Fixes: 6aeed9045071 ("mptcp: fix race on unaccepted mptcp sockets") Cc: stable@vger.kernel.org Reviewed-by: Jiang Biao Reviewed-by: Mengen Sun Acked-by: Paolo Abeni Signed-off-by: Menglong Dong Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 14 ++++++++++++-- net/mptcp/protocol.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 969b33a9dd64..f7690414320a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2802,13 +2802,12 @@ static void __mptcp_destroy_sock(struct sock *sk) sock_put(sk); } -static void mptcp_close(struct sock *sk, long timeout) +bool __mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); bool do_cancel_work = false; - lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { @@ -2850,6 +2849,17 @@ static void mptcp_close(struct sock *sk, long timeout) } else { mptcp_reset_timeout(msk, 0); } + + return do_cancel_work; +} + +static void mptcp_close(struct sock *sk, long timeout) +{ + bool do_cancel_work; + + lock_sock(sk); + + do_cancel_work = __mptcp_close(sk, timeout); release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 132d50833df1..8f123d450c76 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -612,6 +612,7 @@ void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); +bool __mptcp_close(struct sock *sk, long timeout); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); From 30e51b923e436b631e8d5b77fa5e318c6b066dc7 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 27 Sep 2022 12:31:58 -0700 Subject: [PATCH 30/30] mptcp: fix unreleased socket in accept queue The mptcp socket and its subflow sockets in accept queue can't be released after the process exit. While the release of a mptcp socket in listening state, the corresponding tcp socket will be released too. Meanwhile, the tcp socket in the unaccept queue will be released too. However, only init subflow is in the unaccept queue, and the joined subflow is not in the unaccept queue, which makes the joined subflow won't be released, and therefore the corresponding unaccepted mptcp socket will not be released to. This can be reproduced easily with following steps: 1. create 2 namespace and veth: $ ip netns add mptcp-client $ ip netns add mptcp-server $ sysctl -w net.ipv4.conf.all.rp_filter=0 $ ip netns exec mptcp-client sysctl -w net.mptcp.enabled=1 $ ip netns exec mptcp-server sysctl -w net.mptcp.enabled=1 $ ip link add red-client netns mptcp-client type veth peer red-server \ netns mptcp-server $ ip -n mptcp-server address add 10.0.0.1/24 dev red-server $ ip -n mptcp-server address add 192.168.0.1/24 dev red-server $ ip -n mptcp-client address add 10.0.0.2/24 dev red-client $ ip -n mptcp-client address add 192.168.0.2/24 dev red-client $ ip -n mptcp-server link set red-server up $ ip -n mptcp-client link set red-client up 2. configure the endpoint and limit for client and server: $ ip -n mptcp-server mptcp endpoint flush $ ip -n mptcp-server mptcp limits set subflow 2 add_addr_accepted 2 $ ip -n mptcp-client mptcp endpoint flush $ ip -n mptcp-client mptcp limits set subflow 2 add_addr_accepted 2 $ ip -n mptcp-client mptcp endpoint add 192.168.0.2 dev red-client id \ 1 subflow 3. listen and accept on a port, such as 9999. The nc command we used here is modified, which makes it use mptcp protocol by default. $ ip netns exec mptcp-server nc -l -k -p 9999 4. open another *two* terminal and use each of them to connect to the server with the following command: $ ip netns exec mptcp-client nc 10.0.0.1 9999 Input something after connect to trigger the connection of the second subflow. So that there are two established mptcp connections, with the second one still unaccepted. 5. exit all the nc command, and check the tcp socket in server namespace. And you will find that there is one tcp socket in CLOSE_WAIT state and can't release forever. Fix this by closing all of the unaccepted mptcp socket in mptcp_subflow_queue_clean() with __mptcp_close(). Now, we can ensure that all unaccepted mptcp sockets will be cleaned by __mptcp_close() before they are released, so mptcp_sock_destruct(), which is used to clean the unaccepted mptcp socket, is not needed anymore. The selftests for mptcp is ran for this commit, and no new failures. Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") Fixes: 6aeed9045071 ("mptcp: fix race on unaccepted mptcp sockets") Cc: stable@vger.kernel.org Reviewed-by: Jiang Biao Reviewed-by: Mengen Sun Acked-by: Paolo Abeni Signed-off-by: Menglong Dong Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 33 +++++++-------------------------- 3 files changed, 9 insertions(+), 27 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f7690414320a..f8897a70c11d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2662,7 +2662,7 @@ static void __mptcp_clear_xmit(struct sock *sk) dfrag_clear(sk, dfrag); } -static void mptcp_cancel_work(struct sock *sk) +void mptcp_cancel_work(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 8f123d450c76..8f372b8f059c 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -613,6 +613,7 @@ void mptcp_subflow_queue_clean(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); +void mptcp_cancel_work(struct sock *sk); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c7d49fb6e7bd..07dd23d0fe04 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -602,30 +602,6 @@ static bool subflow_hmac_valid(const struct request_sock *req, return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); } -static void mptcp_sock_destruct(struct sock *sk) -{ - /* if new mptcp socket isn't accepted, it is free'd - * from the tcp listener sockets request queue, linked - * from req->sk. The tcp socket is released. - * This calls the ULP release function which will - * also remove the mptcp socket, via - * sock_put(ctx->conn). - * - * Problem is that the mptcp socket will be in - * ESTABLISHED state and will not have the SOCK_DEAD flag. - * Both result in warnings from inet_sock_destruct. - */ - if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { - sk->sk_state = TCP_CLOSE; - WARN_ON_ONCE(sk->sk_socket); - sock_orphan(sk); - } - - /* We don't need to clear msk->subflow, as it's still NULL at this point */ - mptcp_destroy_common(mptcp_sk(sk), 0); - inet_sock_destruct(sk); -} - static void mptcp_force_close(struct sock *sk) { /* the msk is not yet exposed to user-space */ @@ -768,7 +744,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* new mpc subflow takes ownership of the newly * created mptcp socket */ - new_msk->sk_destruct = mptcp_sock_destruct; mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); @@ -1763,13 +1738,19 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) for (msk = head; msk; msk = next) { struct sock *sk = (struct sock *)msk; - bool slow; + bool slow, do_cancel_work; + sock_hold(sk); slow = lock_sock_fast_nested(sk); next = msk->dl_next; msk->first = NULL; msk->dl_next = NULL; + + do_cancel_work = __mptcp_close(sk, 0); unlock_sock_fast(sk, slow); + if (do_cancel_work) + mptcp_cancel_work(sk); + sock_put(sk); } /* we are still under the listener msk socket lock */