From d92e9ea2f0f918d7b01cbacb838288bffccc8954 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Wed, 4 Sep 2024 11:26:55 -0700 Subject: [PATCH 001/281] arm64: dts: qcom: msm8939: revert use of APCS mbox for RPM Commit 22e4e43484c4 ("arm64: dts: qcom: msm8939: Use mboxes properties for APCS") broke the boot on msm8939 platforms. The issue comes from the SMD driver failing to request the mbox channel because of circular dependencies: 1. rpm -> apcs1_mbox -> rpmcc (RPM_SMD_XO_CLK_SRC) -> rpm. 2. rpm -> apcs1_mbox -> gcc -> rpmcc (RPM_SMD_XO_CLK_SRC) -> rpm 3. rpm -> apcs1_mbox -> apcs2 -> gcc -> rpmcc (RPM_SMD_XO_CLK_SRC) -> rpm To fix this issue let's switch back to using the deprecated qcom,ipc property for the RPM node. Fixes: 22e4e43484c4 ("arm64: dts: qcom: msm8939: Use mboxes properties for APCS") Signed-off-by: Fabien Parent Link: https://lore.kernel.org/r/20240904-msm8939-rpm-apcs-fix-v1-1-b608e7e48fe1@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/msm8939.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8939.dtsi b/arch/arm64/boot/dts/qcom/msm8939.dtsi index 46d9480cd464..39405713329b 100644 --- a/arch/arm64/boot/dts/qcom/msm8939.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8939.dtsi @@ -248,7 +248,7 @@ smd-edge { interrupts = ; - mboxes = <&apcs1_mbox 0>; + qcom,ipc = <&apcs1_mbox 8 0>; qcom,smd-edge = <15>; rpm_requests: rpm-requests { From 2f39bba3b4f037d6c3c9174eed5befcef1c79abb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Sep 2024 15:48:40 +0200 Subject: [PATCH 002/281] arm64: dts: rockchip: Fix rt5651 compatible value on rk3399-eaidk-610 There are no DT bindings and driver support for a "rockchip,rt5651" codec. Replace "rockchip,rt5651" by "realtek,rt5651", which matches the "simple-audio-card,name" property in the "rt5651-sound" node. Fixes: 904f983256fdd24b ("arm64: dts: rockchip: Add dts for a rk3399 based board EAIDK-610") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/a9877b8b1bd0de279d2ec8294d5be14587203a82.1727358193.git.geert+renesas@glider.be Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts index 1489eb32e266..4feb78797982 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts @@ -541,7 +541,7 @@ status = "okay"; rt5651: audio-codec@1a { - compatible = "rockchip,rt5651"; + compatible = "realtek,rt5651"; reg = <0x1a>; clocks = <&cru SCLK_I2S_8CH_OUT>; clock-names = "mclk"; From 577b5761679da90e691acc939ebbe7879fff5f31 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Sep 2024 15:48:41 +0200 Subject: [PATCH 003/281] arm64: dts: rockchip: Fix rt5651 compatible value on rk3399-sapphire-excavator There are no DT bindings and driver support for a "rockchip,rt5651" codec. Replace "rockchip,rt5651" by "realtek,rt5651", which matches the "simple-audio-card,name" property in the "rt5651-sound" node. Fixes: 0a3c78e251b3a266 ("arm64: dts: rockchip: Add support for rk3399 excavator main board") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/abc6c89811b3911785601d6d590483eacb145102.1727358193.git.geert+renesas@glider.be Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index dbec2b7173a0..31ea3d0182c0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -163,7 +163,7 @@ status = "okay"; rt5651: rt5651@1a { - compatible = "rockchip,rt5651"; + compatible = "realtek,rt5651"; reg = <0x1a>; clocks = <&cru SCLK_I2S_8CH_OUT>; clock-names = "mclk"; From df5f6f2f62b9b50cef78f32909485b00fc7cf7f2 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Thu, 26 Sep 2024 12:29:13 +0200 Subject: [PATCH 004/281] arm64: dts: rockchip: Move L3 cache outside CPUs in RK3588(S) SoC dtsi Move the "l3_cache" node outside the "cpus" node in the base dtsi file for Rockchip RK3588(S) SoCs. The A55 and A76 CPU cores in these SoCs belong to the ARM DynamIQ IP core lineup, which places the L3 cache outside the CPUs and into the DynamIQ Shared Unit (DSU). [1] Thus, moving the L3 cache DT node one level higher in the DT improves the way the physical topology of the RK3588(S) SoCs is represented in the SoC dtsi files. While there, add a comment that explains it briefly, to save curious readers from the need to reference the repository log for a clarification. [1] ARM DynamIQ Shared Unit revision r4p0 TRM, version 0400-02 Fixes: c9211fa2602b ("arm64: dts: rockchip: Add base DT for rk3588 SoC") Helped-by: Robin Murphy Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/84264d0713fb51ae2b9b731e28fc14681beea853.1727345965.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-base.dtsi | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi index d97d84b88837..fc67585b64b7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi @@ -337,15 +337,19 @@ cache-unified; next-level-cache = <&l3_cache>; }; + }; - l3_cache: l3-cache { - compatible = "cache"; - cache-size = <3145728>; - cache-line-size = <64>; - cache-sets = <4096>; - cache-level = <3>; - cache-unified; - }; + /* + * The L3 cache belongs to the DynamIQ Shared Unit (DSU), + * so it's represented here, outside the "cpus" node + */ + l3_cache: l3-cache { + compatible = "cache"; + cache-size = <3145728>; + cache-line-size = <64>; + cache-sets = <4096>; + cache-level = <3>; + cache-unified; }; display_subsystem: display-subsystem { From 6be82067254cba14f7b9ca00613bdb7caac9501f Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Sat, 21 Sep 2024 23:39:05 +0200 Subject: [PATCH 005/281] arm64: dts: rockchip: Start cooling maps numbering from zero on ROCK 5B The package cooling maps for the Radxa ROCK 5B were mistakenly named map1 and map2. Their numbering should start from zero instead, because there are no package cooling maps defined in the parent RK3588 SoC dtsi file, so let's rename these cooling maps to map0 and map1. Fixes: 4a152231b050 ("arm64: dts: rockchip: enable automatic fan control on Rock 5B") Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/335ecd5841ab55f333e17bb391d0e1264fac257b.1726954592.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts index 966bbc582d89..6bd06e46a101 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts @@ -304,12 +304,12 @@ }; cooling-maps { - map1 { + map0 { trip = <&package_fan0>; cooling-device = <&fan THERMAL_NO_LIMIT 1>; }; - map2 { + map1 { trip = <&package_fan1>; cooling-device = <&fan 2 THERMAL_NO_LIMIT>; }; From 875ea82c75f56697fa500f30fabaa49f82f9b229 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Thu, 12 Sep 2024 11:01:48 -0700 Subject: [PATCH 006/281] arm64: dts: rockchip: Designate Turing RK1's system power controller Currently, the Turing RK1 board reboots when told to power off. Resolve this by designating the RK806 as the system power controller, so that the relevant driver can handle system shutdown requests. Fixes: 2806a69f3fef ("arm64: dts: rockchip: Add Turing RK1 SoM support") Signed-off-by: Sam Edwards Link: https://lore.kernel.org/r/20240912180148.205957-1-CFSworks@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index dbaa94ca69f4..432133251e31 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -296,6 +296,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>, <&rk806_dvs2_null>, <&rk806_dvs3_null>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; From 274e58cc226c54c849760d9a6ec7be23b221cb12 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 8 Sep 2024 21:14:14 +0100 Subject: [PATCH 007/281] MAINTAINERS: Qualcomm SoC: Match reserved-memory bindings commit 8b90269ee6d7 ("MAINTAINERS: Split Qualcomm SoC and linux-arm-msm entries") included an entry for .../bindings/reserved-memory/qcom. However, it appears that this should have been a glob as although there are files that start with that path, no file matches that exact path. Address this by making the entry a glob. Flagged by make htmldocs as: Warning: MAINTAINERS references a file that doesn't exist: Documentation/devicetree/bindings/reserved-memory/qcom Cc: Konrad Dybcio Cc: Bjorn Andersson Cc: Rob Herring Cc: Krzysztof Kozlowski Cc: Conor Dooley Cc: linux-arm-msm@vger.kernel.org Cc: devicetree@vger.kernel.org Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20240908-qcom-glob-v1-1-94a390f36744@kernel.org Signed-off-by: Bjorn Andersson --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..2396d053a69f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2865,7 +2865,7 @@ F: Documentation/devicetree/bindings/arm/qcom.yaml F: Documentation/devicetree/bindings/bus/qcom* F: Documentation/devicetree/bindings/cache/qcom,llcc.yaml F: Documentation/devicetree/bindings/firmware/qcom,scm.yaml -F: Documentation/devicetree/bindings/reserved-memory/qcom +F: Documentation/devicetree/bindings/reserved-memory/qcom* F: Documentation/devicetree/bindings/soc/qcom/ F: arch/arm/boot/dts/qcom/ F: arch/arm/configs/qcom_defconfig From e694d2b5c58ba2d1e995d068707c8d966e7f5f2a Mon Sep 17 00:00:00 2001 From: Charles Han Date: Sun, 29 Sep 2024 15:23:49 +0800 Subject: [PATCH 008/281] soc: qcom: Add check devm_kasprintf() returned value devm_kasprintf() can return a NULL pointer on failure but this returned value in qcom_socinfo_probe() is not checked. Signed-off-by: Charles Han Link: https://lore.kernel.org/r/20240929072349.202520-1-hanchunchao@inspur.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/socinfo.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c index 24c3971f2ef1..e42d86d5f6f9 100644 --- a/drivers/soc/qcom/socinfo.c +++ b/drivers/soc/qcom/socinfo.c @@ -786,10 +786,16 @@ static int qcom_socinfo_probe(struct platform_device *pdev) qs->attr.revision = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%u.%u", SOCINFO_MAJOR(le32_to_cpu(info->ver)), SOCINFO_MINOR(le32_to_cpu(info->ver))); - if (offsetof(struct socinfo, serial_num) <= item_size) + if (!qs->attr.soc_id || qs->attr.revision) + return -ENOMEM; + + if (offsetof(struct socinfo, serial_num) <= item_size) { qs->attr.serial_number = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%u", le32_to_cpu(info->serial_num)); + if (!qs->attr.serial_number) + return -ENOMEM; + } qs->soc_dev = soc_device_register(&qs->attr); if (IS_ERR(qs->soc_dev)) From 27727cb6604e0998d03d9ec063b517b239d2bb0f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 16 Sep 2024 10:23:06 +0200 Subject: [PATCH 009/281] arm64: dts: qcom: x1e80100: fix PCIe4 and PCIe6a PHY clocks Add the missing clkref enable and pipediv2 clocks to the PCIe4 and PCIe6a PHYs. Fixes: 5eb83fc10289 ("arm64: dts: qcom: x1e80100: Add PCIe nodes") Cc: stable@vger.kernel.org # 6.9 Cc: Abel Vesa Signed-off-by: Johan Hovold Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240916082307.29393-3-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index a36076e3c56b..6a91c46ee687 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3002,14 +3002,16 @@ clocks = <&gcc GCC_PCIE_6A_PHY_AUX_CLK>, <&gcc GCC_PCIE_6A_CFG_AHB_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&tcsr TCSR_PCIE_4L_CLKREF_EN>, <&gcc GCC_PCIE_6A_PHY_RCHNG_CLK>, - <&gcc GCC_PCIE_6A_PIPE_CLK>; + <&gcc GCC_PCIE_6A_PIPE_CLK>, + <&gcc GCC_PCIE_6A_PIPEDIV2_CLK>; clock-names = "aux", "cfg_ahb", "ref", "rchng", - "pipe"; + "pipe", + "pipediv2"; resets = <&gcc GCC_PCIE_6A_PHY_BCR>, <&gcc GCC_PCIE_6A_NOCSR_COM_PHY_BCR>; @@ -3254,14 +3256,16 @@ clocks = <&gcc GCC_PCIE_4_AUX_CLK>, <&gcc GCC_PCIE_4_CFG_AHB_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&tcsr TCSR_PCIE_2L_4_CLKREF_EN>, <&gcc GCC_PCIE_4_PHY_RCHNG_CLK>, - <&gcc GCC_PCIE_4_PIPE_CLK>; + <&gcc GCC_PCIE_4_PIPE_CLK>, + <&gcc GCC_PCIE_4_PIPEDIV2_CLK>; clock-names = "aux", "cfg_ahb", "ref", "rchng", - "pipe"; + "pipe", + "pipediv2"; resets = <&gcc GCC_PCIE_4_PHY_BCR>; reset-names = "phy"; From 0b80b3c0f6d20f1bc1f7fea6176a8df15619e884 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 16 Sep 2024 10:23:07 +0200 Subject: [PATCH 010/281] arm64: dts: qcom: x1e80100: fix PCIe5 PHY clocks Add the missing clkref enable and pipediv2 clocks to the PCIe5 PHY. Fixes: 62ab23e15508 ("arm64: dts: qcom: x1e80100: add PCIe5 nodes") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240916082307.29393-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 6a91c46ee687..88a2e2f92599 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3126,14 +3126,16 @@ clocks = <&gcc GCC_PCIE_5_AUX_CLK>, <&gcc GCC_PCIE_5_CFG_AHB_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&tcsr TCSR_PCIE_2L_5_CLKREF_EN>, <&gcc GCC_PCIE_5_PHY_RCHNG_CLK>, - <&gcc GCC_PCIE_5_PIPE_CLK>; + <&gcc GCC_PCIE_5_PIPE_CLK>, + <&gcc GCC_PCIE_5_PIPEDIV2_CLK>; clock-names = "aux", "cfg_ahb", "ref", "rchng", - "pipe"; + "pipe", + "pipediv2"; resets = <&gcc GCC_PCIE_5_PHY_BCR>; reset-names = "phy"; From d67907154808745b0fae5874edc7b0f78d33991c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 2 Oct 2024 12:01:21 +0200 Subject: [PATCH 011/281] firmware: qcom: scm: suppress download mode error Stop spamming the logs with errors about missing mechanism for setting the so called download (or dump) mode for users that have not requested that feature to be enabled in the first place. This avoids the follow error being logged on boot as well as on shutdown when the feature it not available and download mode has not been enabled on the kernel command line: qcom_scm firmware:scm: No available mechanism for setting download mode Fixes: 79cb2cb8d89b ("firmware: qcom: scm: Disable SDI and write no dump to dump mode") Fixes: 781d32d1c970 ("firmware: qcom_scm: Clear download bit during reboot") Cc: Mukesh Ojha Cc: stable@vger.kernel.org # 6.4 Signed-off-by: Johan Hovold Reviewed-by: Mukesh Ojha Link: https://lore.kernel.org/r/20241002100122.18809-2-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- drivers/firmware/qcom/qcom_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c index 10986cb11ec0..e2ac595902ed 100644 --- a/drivers/firmware/qcom/qcom_scm.c +++ b/drivers/firmware/qcom/qcom_scm.c @@ -545,7 +545,7 @@ static void qcom_scm_set_download_mode(u32 dload_mode) } else if (__qcom_scm_is_call_available(__scm->dev, QCOM_SCM_SVC_BOOT, QCOM_SCM_BOOT_SET_DLOAD_MODE)) { ret = __qcom_scm_set_dload_mode(__scm->dev, !!dload_mode); - } else { + } else if (dload_mode) { dev_err(__scm->dev, "No available mechanism for setting download mode\n"); } From ca61d6836e6f4442a77762e1074d2706a2a6e578 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 30 Sep 2024 10:33:28 +0200 Subject: [PATCH 012/281] firmware: qcom: scm: fix a NULL-pointer dereference Some SCM calls can be invoked with __scm being NULL (the driver may not have been and will not be probed as there's no SCM entry in device-tree). Make sure we don't dereference a NULL pointer. Fixes: 449d0d84bcd8 ("firmware: qcom: scm: smc: switch to using the SCM allocator") Reported-by: Rudraksha Gupta Closes: https://lore.kernel.org/lkml/692cfe9a-8c05-4ce4-813e-82b3f310019a@gmail.com/ Reviewed-by: Konrad Dybcio Tested-by: Rudraksha Gupta Reviewed-by: Dmitry Baryshkov Reviewed-by: Stephan Gerhold Signed-off-by: Bartosz Golaszewski Reviewed-by: Kuldeep Singh Link: https://lore.kernel.org/r/20240930083328.17904-1-brgl@bgdev.pl Signed-off-by: Bjorn Andersson --- drivers/firmware/qcom/qcom_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c index e2ac595902ed..f47fdf32d2da 100644 --- a/drivers/firmware/qcom/qcom_scm.c +++ b/drivers/firmware/qcom/qcom_scm.c @@ -216,7 +216,7 @@ static DEFINE_SPINLOCK(scm_query_lock); struct qcom_tzmem_pool *qcom_scm_get_tzmem_pool(void) { - return __scm->mempool; + return __scm ? __scm->mempool : NULL; } static enum qcom_scm_convention __get_convention(void) From 0a97195d2181caced187acd7454464b8e37021d7 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Tue, 3 Sep 2024 15:45:10 +0530 Subject: [PATCH 013/281] EDAC/qcom: Make irq configuration optional On most modern qualcomm SoCs, the configuration necessary to enable the Tag/Data RAM related irqs being propagated to the SoC irq controller is already done in firmware (in DSF or 'DDR System Firmware') On some like the x1e80100, these registers aren't even accesible to the kernel causing a crash when edac device is probed. Hence, make the irq configuration optional in the driver and mark x1e80100 as the SoC on which this should be avoided. Fixes: af16b00578a7 ("arm64: dts: qcom: Add base X1E80100 dtsi and the QCP dts") Reported-by: Bjorn Andersson Signed-off-by: Rajendra Nayak Reviewed-by: Manivannan Sadhasivam Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240903101510.3452734-1-quic_rjendra@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/edac/qcom_edac.c | 8 +++++--- drivers/soc/qcom/llcc-qcom.c | 3 +++ include/linux/soc/qcom/llcc-qcom.h | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c index d3cd4cc54ace..a9a8ba067007 100644 --- a/drivers/edac/qcom_edac.c +++ b/drivers/edac/qcom_edac.c @@ -342,9 +342,11 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev) int ecc_irq; int rc; - rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap); - if (rc) - return rc; + if (!llcc_driv_data->ecc_irq_configured) { + rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap); + if (rc) + return rc; + } /* Allocate edac control info */ edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank", diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c index 8fa4ffd3a9b5..28bcc65e91be 100644 --- a/drivers/soc/qcom/llcc-qcom.c +++ b/drivers/soc/qcom/llcc-qcom.c @@ -139,6 +139,7 @@ struct qcom_llcc_config { int size; bool need_llcc_cfg; bool no_edac; + bool irq_configured; }; struct qcom_sct_config { @@ -718,6 +719,7 @@ static const struct qcom_llcc_config x1e80100_cfg[] = { .need_llcc_cfg = true, .reg_offset = llcc_v2_1_reg_offset, .edac_reg_offset = &llcc_v2_1_edac_reg_offset, + .irq_configured = true, }, }; @@ -1345,6 +1347,7 @@ static int qcom_llcc_probe(struct platform_device *pdev) drv_data->cfg = llcc_cfg; drv_data->cfg_size = sz; drv_data->edac_reg_offset = cfg->edac_reg_offset; + drv_data->ecc_irq_configured = cfg->irq_configured; mutex_init(&drv_data->lock); platform_set_drvdata(pdev, drv_data); diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 9e9f528b1370..2f20281d4ad4 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -125,6 +125,7 @@ struct llcc_edac_reg_offset { * @num_banks: Number of llcc banks * @bitmap: Bit map to track the active slice ids * @ecc_irq: interrupt for llcc cache error detection and reporting + * @ecc_irq_configured: 'True' if firmware has already configured the irq propagation * @version: Indicates the LLCC version */ struct llcc_drv_data { @@ -139,6 +140,7 @@ struct llcc_drv_data { u32 num_banks; unsigned long *bitmap; int ecc_irq; + bool ecc_irq_configured; u32 version; }; From de50a7e3681771c6b990238af82bf1dea9b11b21 Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Tue, 8 Oct 2024 13:15:37 +0200 Subject: [PATCH 014/281] arm64: dts: rockchip: Remove hdmi's 2nd interrupt on rk3328 The "synopsys,dw-hdmi.yaml" binding specifies that the interrupts property of the hdmi node has 'maxItems: 1', so the hdmi node in rk3328.dtsi having 2 is incorrect. Paragraph 1.3 ("System Interrupt connection") of the RK3328 TRM v1.1 page 16 and 17 define the following hdmi related interrupts: - 67 hdmi_intr - 103 hdmi_intr_wakeup The difference of 32 is due to a different base used in the TRM. The RK3399 (which uses the same binding) has '23: hdmi_irq' and '24: hdmi_wakeup_irq' according to its TRM (page 19). The RK3568 (also same binding) has '76: hdmi_wakeup' and '77: hdmi' according to page 17 of its TRM. In both cases the non-wakeup IRQ was used, so use that too for rk3328. Helped-by: Heiko Stuebner Fixes: 725e351c265a ("arm64: dts: rockchip: add rk3328 display nodes") Signed-off-by: Diederik de Haas Link: https://lore.kernel.org/r/20241008113344.23957-3-didi.debian@cknow.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 16b4faa22e4f..c01a4cad48f3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -754,8 +754,7 @@ compatible = "rockchip,rk3328-dw-hdmi"; reg = <0x0 0xff3c0000 0x0 0x20000>; reg-io-width = <4>; - interrupts = , - ; + interrupts = ; clocks = <&cru PCLK_HDMI>, <&cru SCLK_HDMI_SFC>, <&cru SCLK_RTC32K>; From 87299d6ee95a37d2d576dd8077ea6860f77ad8e2 Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Tue, 8 Oct 2024 13:15:38 +0200 Subject: [PATCH 015/281] arm64: dts: rockchip: Fix wakeup prop names on PineNote BT node The "brcm,bluetooth.yaml" binding has 'device-wakeup-gpios' and 'host-wakeup-gpios' property names, not '*-wake-gpios'. Fix the incorrect property names. Note that the "realtek,bluetooth.yaml" binding does use the '*-wake-gpios' property names. Fixes: d449121e5e8a ("arm64: dts: rockchip: Add Pine64 PineNote board") Signed-off-by: Diederik de Haas Link: https://lore.kernel.org/r/20241008113344.23957-4-didi.debian@cknow.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index ae2536c65a83..ca7666bf5c0a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -684,8 +684,8 @@ compatible = "brcm,bcm43438-bt"; clocks = <&rk817 1>; clock-names = "lpo"; - device-wake-gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>; - host-wake-gpios = <&gpio0 RK_PC3 GPIO_ACTIVE_HIGH>; + device-wakeup-gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio0 RK_PC3 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_LOW>; pinctrl-0 = <&bt_enable_h>, <&bt_host_wake_l>, <&bt_wake_h>; pinctrl-names = "default"; From 2b6a3f857550e52b1cd4872ebb13cb3e3cf12f5f Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Tue, 8 Oct 2024 13:15:39 +0200 Subject: [PATCH 016/281] arm64: dts: rockchip: Fix reset-gpios property on brcm BT nodes For most compatibles, the "brcm,bluetooth.yaml" binding doesn't allow the 'reset-gpios' property, but there is a 'shutdown-gpios' property. Page 12 of the AzureWave-CM256SM datasheet (v1.9) has the following wrt pin 34 'BT_REG_ON' (connected to GPIO0_C4_d on the PineNote): Used by PMU to power up or power down the internal regulators used by the Bluetooth section. Also, when deasserted, this pin holds the Bluetooth section in reset. This pin has an internal 200k ohm pull down resistor that is enabled by default. So it is safe to replace 'reset-gpios' with 'shutdown-gpios'. Fixes: d449121e5e8a ("arm64: dts: rockchip: Add Pine64 PineNote board") Signed-off-by: Diederik de Haas Link: https://lore.kernel.org/r/20241008113344.23957-5-didi.debian@cknow.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index ca7666bf5c0a..a477bd992b40 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -686,9 +686,9 @@ clock-names = "lpo"; device-wakeup-gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>; host-wakeup-gpios = <&gpio0 RK_PC3 GPIO_ACTIVE_HIGH>; - reset-gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_LOW>; pinctrl-0 = <&bt_enable_h>, <&bt_host_wake_l>, <&bt_wake_h>; pinctrl-names = "default"; + shutdown-gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_LOW>; vbat-supply = <&vcc_wl>; vddio-supply = <&vcca_1v8_pmu>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi index 45de2630bb50..e9fa9bee995a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi @@ -402,9 +402,9 @@ clock-names = "lpo"; device-wakeup-gpios = <&gpio2 RK_PB2 GPIO_ACTIVE_HIGH>; host-wakeup-gpios = <&gpio2 RK_PB1 GPIO_ACTIVE_HIGH>; - reset-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_h &bt_reg_on_h &bt_wake_host_h>; + shutdown-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_LOW>; vbat-supply = <&vcc_3v3>; vddio-supply = <&vcc_1v8>; }; From f903663a8dcd6e1656e52856afbf706cc14cbe6d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 1 Sep 2024 11:30:24 +0200 Subject: [PATCH 017/281] clk: qcom: videocc-sm8350: use HW_CTRL_TRIGGER for vcodec GDSCs A recent change in the venus driver results in a stuck clock on the Lenovo ThinkPad X13s, for example, when streaming video in firefox: video_cc_mvs0_clk status stuck at 'off' WARNING: CPU: 6 PID: 2885 at drivers/clk/qcom/clk-branch.c:87 clk_branch_wait+0x144/0x15c ... Call trace: clk_branch_wait+0x144/0x15c clk_branch2_enable+0x30/0x40 clk_core_enable+0xd8/0x29c clk_enable+0x2c/0x4c vcodec_clks_enable.isra.0+0x94/0xd8 [venus_core] coreid_power_v4+0x464/0x628 [venus_core] vdec_start_streaming+0xc4/0x510 [venus_dec] vb2_start_streaming+0x6c/0x180 [videobuf2_common] vb2_core_streamon+0x120/0x1dc [videobuf2_common] vb2_streamon+0x1c/0x6c [videobuf2_v4l2] v4l2_m2m_ioctl_streamon+0x30/0x80 [v4l2_mem2mem] v4l_streamon+0x24/0x30 [videodev] using the out-of-tree sm8350/sc8280xp venus support. [1] Update also the sm8350/sc8280xp GDSC definitions so that the hw control mode can be changed at runtime as the venus driver now requires. Fixes: ec9a652e5149 ("venus: pm_helpers: Use dev_pm_genpd_set_hwmode to switch GDSC mode on V6") Link: https://lore.kernel.org/lkml/20230731-topic-8280_venus-v1-0-8c8bbe1983a5@linaro.org/ # [1] Cc: Jagadeesh Kona Cc: Taniya Das Cc: Abel Vesa Cc: Konrad Dybcio Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Tested-by: Steev Klimaszewski Link: https://lore.kernel.org/r/20240901093024.18841-1-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- drivers/clk/qcom/videocc-sm8350.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/videocc-sm8350.c b/drivers/clk/qcom/videocc-sm8350.c index 5bd6fe3e1298..874d4da95ff8 100644 --- a/drivers/clk/qcom/videocc-sm8350.c +++ b/drivers/clk/qcom/videocc-sm8350.c @@ -452,7 +452,7 @@ static struct gdsc mvs0_gdsc = { .pd = { .name = "mvs0_gdsc", }, - .flags = HW_CTRL | RETAIN_FF_ENABLE, + .flags = HW_CTRL_TRIGGER | RETAIN_FF_ENABLE, .pwrsts = PWRSTS_OFF_ON, }; @@ -461,7 +461,7 @@ static struct gdsc mvs1_gdsc = { .pd = { .name = "mvs1_gdsc", }, - .flags = HW_CTRL | RETAIN_FF_ENABLE, + .flags = HW_CTRL_TRIGGER | RETAIN_FF_ENABLE, .pwrsts = PWRSTS_OFF_ON, }; From f94b934336e30cebae75d4fbe04a2109a3c8fdec Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:27 +0200 Subject: [PATCH 018/281] arm64: dts: rockchip: fix i2c2 pinctrl-names property on anbernic-rg353p/v We want to control pins, not beer mugs, so rename pintctrl-names to the expected pinctrl-names. This was not affecting functionality, because the i2c2 controller already had a set of pinctrl properties. Fixes: 523adb553573 ("arm64: dts: rockchip: add Anbernic RG353P and RG503") Fixes: 1e141cf12726 ("arm64: dts: rockchip: add Anbernic RG353V and RG353VS") Cc: Chris Morgan Acked-by: Chris Morgan Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-2-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts index a73cf30801ec..9816a4ed4599 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353p.dts @@ -92,7 +92,7 @@ }; &i2c2 { - pintctrl-names = "default"; + pinctrl-names = "default"; pinctrl-0 = <&i2c2m1_xfer>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts index e9954a33e8cd..a79a5614bcc8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rg353v.dts @@ -79,7 +79,7 @@ }; &i2c2 { - pintctrl-names = "default"; + pinctrl-names = "default"; pinctrl-0 = <&i2c2m1_xfer>; status = "okay"; From 98c3f4a2d61a29a53244ce45e50655140bd47afb Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:28 +0200 Subject: [PATCH 019/281] arm64: dts: rockchip: Drop regulator-init-microvolt from two boards rk3568-roc-pc and rk3588-toybrick-x0 re-introduced this property despite previous patches removing older instances already. regulator-init-microvolt is not part of any regulator binding and is only used in the Rockchip vendor kernel. So drop it. It is used by u-boot in some places to setup initial regulator-state, but that should happen in the existing -u-boot devicetree additions. Fixes: 007b4bb47f44 ("arm64: dts: rockchip: add dts for Firefly Station P2 aka rk3568-roc-pc") Cc: Furkan Kardame Fixes: 8ffe365f8dc7 ("arm64: dts: rockchip: Add devicetree support for TB-RK3588X board") Cc: Elon Zhang Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-3-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts | 3 --- arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts | 1 - 2 files changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts index e333449ead04..2fa89a0eeafc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-roc-pc.dts @@ -272,7 +272,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -285,7 +284,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -309,7 +307,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts b/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts index d0021524e7f9..328dcb894ccb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-toybrick-x0.dts @@ -428,7 +428,6 @@ regulator-boot-on; regulator-min-microvolt = <550000>; regulator-max-microvolt = <950000>; - regulator-init-microvolt = <750000>; regulator-ramp-delay = <12500>; regulator-state-mem { From 2fa98dcc8d3ea2ebbd9e6be778f8bb19231c28be Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:29 +0200 Subject: [PATCH 020/281] arm64: dts: rockchip: Fix bluetooth properties on rk3566 box demo The expected clock-name is different, and extclk also is deprecated in favor of txco for clocks that are not crystals. The wakeup gpio properties are named differently too, when changing from vendor-tree to mainline. So fix those to match the binding. Fixes: 2e0537b16b25 ("arm64: dts: rockchip: Add dts for rockchip rk3566 box demo board") Cc: Andy Yan Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-4-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts index 0c18406e4c59..7d4680933823 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts @@ -449,9 +449,9 @@ bluetooth { compatible = "brcm,bcm43438-bt"; clocks = <&pmucru CLK_RTC_32K>; - clock-names = "ext_clock"; - device-wake-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; - host-wake-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; + clock-names = "txco"; + device-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio2 RK_PB7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_l &bt_wake_l &bt_enable_h>; From ea74528aaea5a1dfc8e3de09ef2af37530eca526 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:30 +0200 Subject: [PATCH 021/281] arm64: dts: rockchip: Fix bluetooth properties on Rock960 boards The expected clock-name is different, and extclk also is deprecated in favor of txco for clocks that are not crystals. So fix it to match the binding. Fixes: c72235c288c8 ("arm64: dts: rockchip: Add on-board WiFi/BT support for Rock960 boards") Cc: Manivannan Sadhasivam Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-5-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi index 8146f870d2bd..ab890e7b6c59 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi @@ -576,7 +576,7 @@ bluetooth { compatible = "brcm,bcm43438-bt"; clocks = <&rk808 1>; - clock-names = "ext_clock"; + clock-names = "txco"; device-wakeup-gpios = <&gpio2 RK_PD3 GPIO_ACTIVE_HIGH>; host-wakeup-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio0 RK_PB1 GPIO_ACTIVE_HIGH>; From 1b670212ee3dd9d14c6d39a042dfe4ae79b49b4e Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:31 +0200 Subject: [PATCH 022/281] arm64: dts: rockchip: Remove undocumented supports-emmc property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit supports-emmc is an undocumented property that slipped into the mainline kernel devicetree for some boards. Drop it. Fixes: c484cf93f61b ("arm64: dts: rockchip: add PX30-µQ7 (Ringneck) SoM with Haikou baseboard") Cc: Quentin Schulz Fixes: b8c028782922 ("arm64: dts: rockchip: Add DTS for FriendlyARM NanoPi R2S Plus") Cc: Sergey Bostandzhyan Fixes: 8d94da58de53 ("arm64: dts: rockchip: Add EmbedFire LubanCat 1") Cc: Wenhao Cui Fixes: cdf46cdbabfc ("arm64: dts: rockchip: Add dts for EmbedFire rk3568 LubanCat 2") Cc: Andy Yan Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-6-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi | 1 - arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts | 1 - arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts | 1 - arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts | 1 - 4 files changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi index bb1aea82e666..b7163ed74232 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi @@ -66,7 +66,6 @@ bus-width = <8>; cap-mmc-highspeed; mmc-hs200-1_8v; - supports-emmc; mmc-pwrseq = <&emmc_pwrseq>; non-removable; vmmc-supply = <&vcc_3v3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts index cb81ba3f23ff..3093f607f282 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts @@ -27,6 +27,5 @@ num-slots = <1>; pinctrl-names = "default"; pinctrl-0 = <&emmc_clk &emmc_cmd &emmc_bus8>; - supports-emmc; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts index c1194d1e438d..9a2f59a351de 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts @@ -507,7 +507,6 @@ non-removable; pinctrl-names = "default"; pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; - supports-emmc; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts index a3112d5df200..b505a4537ee8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts @@ -589,7 +589,6 @@ non-removable; pinctrl-names = "default"; pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; - supports-emmc; status = "okay"; }; From 5ed96580568c4f79a0aff11a67f10b3e9229ba86 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:32 +0200 Subject: [PATCH 023/281] arm64: dts: rockchip: Remove #cooling-cells from fan on Theobroma lion All Theobroma boards use a ti,amc6821 as fan controller. It normally runs in an automatically controlled way and while it may be possible to use it as part of a dt-based thermal management, this is not yet specified in the binding, nor implemented in any kernel. Newer boards already don't contain that #cooling-cells property, but older ones do. So remove them for now, they can be re-added if thermal integration gets implemented in the future. There are two further occurences in v6.12-rc in px30-ringneck and rk3399-puma, but those already get removed by the i2c-mux conversion scheduled for 6.13 . As the undocumented property is in the kernel so long, I opted for not causing extra merge conflicts between 6.12 and 6.13 Fixes: d99a02bcfa81 ("arm64: dts: rockchip: add RK3368-uQ7 (Lion) SoM") Cc: Quentin Schulz Cc: Klaus Goger Reviewed-by: Quentin Schulz Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-7-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi index 8ac8acf4082d..ab3fda69a1fb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi @@ -61,7 +61,6 @@ fan: fan@18 { compatible = "ti,amc6821"; reg = <0x18>; - #cooling-cells = <2>; }; rtc_twi: rtc@6f { From 3a53a7187f41ec3db12cf4c2cb0db4ba87c2f3a1 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:33 +0200 Subject: [PATCH 024/281] arm64: dts: rockchip: Fix LED triggers on rk3308-roc-cc There are two LEDs on the board, power and user events. Currently both are assigned undocumented IR(-remote) triggers that are probably only part of the vendor-kernel. To make dtbs check happier, assign the power-led to a generic default-on trigger and the user led to the documented rc-feedback trigger that should mostly match its current usage. Fixes: 4403e1237be3 ("arm64: dts: rockchip: Add devicetree for board roc-rk3308-cc") Cc: Andy Yan Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-8-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index 9232357f4fec..d9e191ad1d77 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -36,14 +36,14 @@ power_led: led-0 { label = "firefly:red:power"; - linux,default-trigger = "ir-power-click"; + linux,default-trigger = "default-on"; default-state = "on"; gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>; }; user_led: led-1 { label = "firefly:blue:user"; - linux,default-trigger = "ir-user-click"; + linux,default-trigger = "rc-feedback"; default-state = "off"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_HIGH>; }; From b1f8d3b81d9289e171141a7120093ddefe7bd2f4 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:34 +0200 Subject: [PATCH 025/281] arm64: dts: rockchip: remove num-slots property from rk3328-nanopi-r2s-plus num-slots was not part of the dw-mmc binding and the last slipage of one of them seeping in from the vendor kernel was removed way back in 2017. Somehow the nanopi-r2s-plus managed to smuggle another on in the kernel, so remove that as well. Fixes: b8c028782922 ("arm64: dts: rockchip: Add DTS for FriendlyARM NanoPi R2S Plus") Cc: Sergey Bostandzhyan Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-9-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts index 3093f607f282..4b9ced67742d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s-plus.dts @@ -24,7 +24,6 @@ disable-wp; mmc-hs200-1_8v; non-removable; - num-slots = <1>; pinctrl-names = "default"; pinctrl-0 = <&emmc_clk &emmc_cmd &emmc_bus8>; status = "okay"; From 80fe25fcc605209b707583e3337e3cd40b7ed0bf Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Mon, 14 Oct 2024 10:38:20 +0300 Subject: [PATCH 026/281] arm64: dts: qcom: x1e80100: Add Broadcast_AND region in LLCC block Add missing Broadcast_AND region to the LLCC block for x1e80100, as the LLCC version on this platform is 4.1 and it provides the region. This also fixes the following error caused by the missing region: [ 3.797768] qcom-llcc 25000000.system-cache-controller: error -EINVAL: invalid resource (null) This error started showing up only after the new regmap region called Broadcast_AND that has been added to the llcc-qcom driver. Cc: stable@vger.kernel.org # 6.11: 055afc34fd21: soc: qcom: llcc: Add regmap for Broadcast_AND region Fixes: af16b00578a7 ("arm64: dts: qcom: Add base X1E80100 dtsi and the QCP dts") Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20241014-x1e80100-dts-llcc-add-broadcastand_region-v2-1-5ee6ac128627@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 88a2e2f92599..27796ba1216f 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -6090,7 +6090,8 @@ <0 0x25a00000 0 0x200000>, <0 0x25c00000 0 0x200000>, <0 0x25e00000 0 0x200000>, - <0 0x26000000 0 0x200000>; + <0 0x26000000 0 0x200000>, + <0 0x26200000 0 0x200000>; reg-names = "llcc0_base", "llcc1_base", "llcc2_base", @@ -6099,7 +6100,8 @@ "llcc5_base", "llcc6_base", "llcc7_base", - "llcc_broadcast_base"; + "llcc_broadcast_base", + "llcc_broadcast_and_base"; interrupts = ; }; From 5d3d966400d0a094359009147d742b3926a2ea53 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 6 Oct 2024 19:47:56 +0300 Subject: [PATCH 027/281] arm64: dts: qcom: sm8450 fix PIPE clock specification for pcie1 For historical reasons on SM8450 the second PCIe host (pcie1) also keeps a reference to the PIPE clock coming from the PHY. Commit e76862840660 ("arm64: dts: qcom: sm8450: correct pcie1 phy clocks inputs to gcc") has updated the PHY to use #clock-cells = <1>, making just <&pcie1_phy> clock specification invalid. Update corresponding clock entry in the PCIe1 host node. /soc@0/pcie@1c08000: Failed to get clk index: 2 ret: -22 qcom-pcie 1c08000.pcie: Failed to get clocks qcom-pcie 1c08000.pcie: probe with driver qcom-pcie failed with error -22 Fixes: e76862840660 ("arm64: dts: qcom: sm8450: correct pcie1 phy clocks inputs to gcc") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20241006-fix-sm8450-pcie1-v1-1-4f227c9082ed@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8450.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 9bafb3b350ff..38cb524cc568 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -1973,7 +1973,7 @@ clocks = <&gcc GCC_PCIE_1_PIPE_CLK>, <&gcc GCC_PCIE_1_PIPE_CLK_SRC>, - <&pcie1_phy>, + <&pcie1_phy QMP_PCIE_PIPE_CLK>, <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_PCIE_1_AUX_CLK>, <&gcc GCC_PCIE_1_CFG_AHB_CLK>, From e02bfea4d7ef587bb285ad5825da4e1973ac8263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20Cz=C3=A9m=C3=A1n?= Date: Sun, 6 Oct 2024 22:51:58 +0200 Subject: [PATCH 028/281] clk: qcom: clk-alpha-pll: Fix pll post div mask when width is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many qcom clock drivers do not have .width set. In that case value of (p)->width - 1 will be negative which breaks clock tree. Fix this by checking if width is zero, and pass 3 to GENMASK if that's the case. Fixes: 1c3541145cbf ("clk: qcom: support for 2 bit PLL post divider") Signed-off-by: Barnabás Czémán Reviewed-by: Dmitry Baryshkov Reviewed-by: Christopher Obbard Tested-by: Christopher Obbard Link: https://lore.kernel.org/r/20241006-fix-postdiv-mask-v3-1-160354980433@mainlining.org Signed-off-by: Bjorn Andersson --- drivers/clk/qcom/clk-alpha-pll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index f9105443d7db..be9bee6ab65f 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -40,7 +40,7 @@ #define PLL_USER_CTL(p) ((p)->offset + (p)->regs[PLL_OFF_USER_CTL]) # define PLL_POST_DIV_SHIFT 8 -# define PLL_POST_DIV_MASK(p) GENMASK((p)->width - 1, 0) +# define PLL_POST_DIV_MASK(p) GENMASK((p)->width ? (p)->width - 1 : 3, 0) # define PLL_ALPHA_MSB BIT(15) # define PLL_ALPHA_EN BIT(24) # define PLL_ALPHA_MODE BIT(25) From fa88dc7db176c79b50adb132a56120a1d4d9d18b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 1 Oct 2024 11:01:34 +0200 Subject: [PATCH 029/281] media: dvb-core: add missing buffer index check dvb_vb2_expbuf() didn't check if the given buffer index was for a valid buffer. Add this check. Signed-off-by: Hans Verkuil Reported-by: Chenyuan Yang Closes: https://lore.kernel.org/linux-media/?q=WARNING+in+vb2_core_reqbufs Fixes: 7dc866df4012 ("media: dvb-core: Use vb2_get_buffer() instead of directly access to buffers array") Reviewed-by: Benjamin Gaignard Cc: Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvb_vb2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-core/dvb_vb2.c b/drivers/media/dvb-core/dvb_vb2.c index 192a8230c4aa..29edaaff7a5c 100644 --- a/drivers/media/dvb-core/dvb_vb2.c +++ b/drivers/media/dvb-core/dvb_vb2.c @@ -366,9 +366,15 @@ int dvb_vb2_querybuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b) int dvb_vb2_expbuf(struct dvb_vb2_ctx *ctx, struct dmx_exportbuffer *exp) { struct vb2_queue *q = &ctx->vb_q; + struct vb2_buffer *vb2 = vb2_get_buffer(q, exp->index); int ret; - ret = vb2_core_expbuf(&ctx->vb_q, &exp->fd, q->type, q->bufs[exp->index], + if (!vb2) { + dprintk(1, "[%s] invalid buffer index\n", ctx->name); + return -EINVAL; + } + + ret = vb2_core_expbuf(&ctx->vb_q, &exp->fd, q->type, vb2, 0, exp->flags); if (ret) { dprintk(1, "[%s] index=%d errno=%d\n", ctx->name, From fed13a5478680614ba97fc87e71f16e2e197912e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 7 Oct 2024 13:38:12 +0200 Subject: [PATCH 030/281] dm: fix a crash if blk_alloc_disk fails If blk_alloc_disk fails, the variable md->disk is set to an error value. cleanup_mapped_device will see that md->disk is non-NULL and it will attempt to access it, causing a crash on this statement "md->disk->private_data = NULL;". Signed-off-by: Mikulas Patocka Reported-by: Chenyuan Yang Closes: https://marc.info/?l=dm-devel&m=172824125004329&w=2 Cc: stable@vger.kernel.org Reviewed-by: Nitesh Shetty --- drivers/md/dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ff4a6b570b76..19230404d8c2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2290,8 +2290,10 @@ static struct mapped_device *alloc_dev(int minor) * override accordingly. */ md->disk = blk_alloc_disk(NULL, md->numa_node_id); - if (IS_ERR(md->disk)) + if (IS_ERR(md->disk)) { + md->disk = NULL; goto bad; + } md->queue = md->disk->queue; init_waitqueue_head(&md->wait); From eed2d8e8d0051a6551e4dffba99e16eb88c676ac Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 4 Sep 2024 13:41:03 +0200 Subject: [PATCH 031/281] arm64: dts: imx8-ss-vpu: Fix imx8qm VPU IRQs imx8-ss-vpu only contained imx8qxp IRQ numbers, only mu2_m0 uses the correct imx8qm IRQ number, as imx8qxp lacks this MU. Fix this by providing imx8qm IRQ numbers in the main imx8-ss-vpu.dtsi and override the IRQ numbers in SoC-specific imx8qxp-ss-vpu.dtsi, similar to reg property for VPU core devices. Fixes: 0d9968d98467d ("arm64: dts: freescale: imx8q: add imx vpu codec entries") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi | 4 ++-- arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi index c6540768bdb9..87211c18d65a 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi @@ -15,7 +15,7 @@ vpu: vpu@2c000000 { mu_m0: mailbox@2d000000 { compatible = "fsl,imx6sx-mu"; reg = <0x2d000000 0x20000>; - interrupts = ; + interrupts = ; #mbox-cells = <2>; power-domains = <&pd IMX_SC_R_VPU_MU_0>; status = "disabled"; @@ -24,7 +24,7 @@ vpu: vpu@2c000000 { mu1_m0: mailbox@2d020000 { compatible = "fsl,imx6sx-mu"; reg = <0x2d020000 0x20000>; - interrupts = ; + interrupts = ; #mbox-cells = <2>; power-domains = <&pd IMX_SC_R_VPU_MU_1>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi index 7894a3ab26d6..f81937b5fb72 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi @@ -5,6 +5,14 @@ * Author: Alexander Stein */ +&mu_m0 { + interrupts = ; +}; + +&mu1_m0 { + interrupts = ; +}; + &vpu_core0 { reg = <0x2d040000 0x10000>; }; From 409dc5196d5b6eb67468a06bf4d2d07d7225a67b Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Thu, 5 Sep 2024 17:43:38 +0800 Subject: [PATCH 032/281] arm64: dts: imx8ulp: correct the flexspi compatible string The flexspi on imx8ulp only has 16 LUTs, and imx8mm flexspi has 32 LUTs, so correct the compatible string here, otherwise will meet below error: [ 1.119072] ------------[ cut here ]------------ [ 1.123926] WARNING: CPU: 0 PID: 1 at drivers/spi/spi-nxp-fspi.c:855 nxp_fspi_exec_op+0xb04/0xb64 [ 1.133239] Modules linked in: [ 1.136448] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0-rc6-next-20240902-00001-g131bf9439dd9 #69 [ 1.146821] Hardware name: NXP i.MX8ULP EVK (DT) [ 1.151647] pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1.158931] pc : nxp_fspi_exec_op+0xb04/0xb64 [ 1.163496] lr : nxp_fspi_exec_op+0xa34/0xb64 [ 1.168060] sp : ffff80008002b2a0 [ 1.171526] x29: ffff80008002b2d0 x28: 0000000000000000 x27: 0000000000000000 [ 1.179002] x26: ffff2eb645542580 x25: ffff800080610014 x24: ffff800080610000 [ 1.186480] x23: ffff2eb645548080 x22: 0000000000000006 x21: ffff2eb6455425e0 [ 1.193956] x20: 0000000000000000 x19: ffff80008002b5e0 x18: ffffffffffffffff [ 1.201432] x17: ffff2eb644467508 x16: 0000000000000138 x15: 0000000000000002 [ 1.208907] x14: 0000000000000000 x13: ffff2eb6400d8080 x12: 00000000ffffff00 [ 1.216378] x11: 0000000000000000 x10: ffff2eb6400d8080 x9 : ffff2eb697adca80 [ 1.223850] x8 : ffff2eb697ad3cc0 x7 : 0000000100000000 x6 : 0000000000000001 [ 1.231324] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000000007a6 [ 1.238795] x2 : 0000000000000000 x1 : 00000000000001ce x0 : 00000000ffffff92 [ 1.246267] Call trace: [ 1.248824] nxp_fspi_exec_op+0xb04/0xb64 [ 1.253031] spi_mem_exec_op+0x3a0/0x430 [ 1.257139] spi_nor_read_id+0x80/0xcc [ 1.261065] spi_nor_scan+0x1ec/0xf10 [ 1.264901] spi_nor_probe+0x108/0x2fc [ 1.268828] spi_mem_probe+0x6c/0xbc [ 1.272574] spi_probe+0x84/0xe4 [ 1.275958] really_probe+0xbc/0x29c [ 1.279713] __driver_probe_device+0x78/0x12c [ 1.284277] driver_probe_device+0xd8/0x15c [ 1.288660] __device_attach_driver+0xb8/0x134 [ 1.293316] bus_for_each_drv+0x88/0xe8 [ 1.297337] __device_attach+0xa0/0x190 [ 1.301353] device_initial_probe+0x14/0x20 [ 1.305734] bus_probe_device+0xac/0xb0 [ 1.309752] device_add+0x5d0/0x790 [ 1.313408] __spi_add_device+0x134/0x204 [ 1.317606] of_register_spi_device+0x3b4/0x590 [ 1.322348] spi_register_controller+0x47c/0x754 [ 1.327181] devm_spi_register_controller+0x4c/0xa4 [ 1.332289] nxp_fspi_probe+0x1cc/0x2b0 [ 1.336307] platform_probe+0x68/0xc4 [ 1.340145] really_probe+0xbc/0x29c [ 1.343893] __driver_probe_device+0x78/0x12c [ 1.348457] driver_probe_device+0xd8/0x15c [ 1.352838] __driver_attach+0x90/0x19c [ 1.356857] bus_for_each_dev+0x7c/0xdc [ 1.360877] driver_attach+0x24/0x30 [ 1.364624] bus_add_driver+0xe4/0x208 [ 1.368552] driver_register+0x5c/0x124 [ 1.372573] __platform_driver_register+0x28/0x34 [ 1.377497] nxp_fspi_driver_init+0x1c/0x28 [ 1.381888] do_one_initcall+0x80/0x1c8 [ 1.385908] kernel_init_freeable+0x1c4/0x28c [ 1.390472] kernel_init+0x20/0x1d8 [ 1.394138] ret_from_fork+0x10/0x20 [ 1.397885] ---[ end trace 0000000000000000 ]--- [ 1.407908] ------------[ cut here ]------------ Fixes: ef89fd56bdfc ("arm64: dts: imx8ulp: add flexspi node") Cc: stable@kernel.org Signed-off-by: Haibo Chen Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8ulp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index e32d5afcf4a9..43f543768444 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -384,7 +384,7 @@ }; flexspi2: spi@29810000 { - compatible = "nxp,imx8mm-fspi"; + compatible = "nxp,imx8ulp-fspi"; reg = <0x29810000 0x10000>, <0x60000000 0x10000000>; reg-names = "fspi_base", "fspi_mmap"; #address-cells = <1>; From d7425f3cfada8c4a3bc72bdd203c4fec7f77b7a6 Mon Sep 17 00:00:00 2001 From: Diogo Silva Date: Tue, 17 Sep 2024 08:58:01 +0200 Subject: [PATCH 033/281] arm64: dts: imx8: Fix lvds0 device tree Some clock output names on lvds0 device tree were duplicated from mipi1, which caused an -EEXIST when registering these clocks during probe. Fixes: 0fba24b3b956 ("arm64: dts: imx8: add basic lvds0 and lvds1 subsystem") Signed-off-by: Diogo Silva Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi index d00036204a8c..dad0dc8fb431 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lvds0.dtsi @@ -14,7 +14,7 @@ lvds0_subsys: bus@56240000 { compatible = "fsl,imx8qxp-lpcg"; reg = <0x56243000 0x4>; #clock-cells = <1>; - clock-output-names = "mipi1_lis_lpcg_ipg_clk"; + clock-output-names = "lvds0_lis_lpcg_ipg_clk"; power-domains = <&pd IMX_SC_R_MIPI_1>; }; @@ -22,9 +22,9 @@ lvds0_subsys: bus@56240000 { compatible = "fsl,imx8qxp-lpcg"; reg = <0x5624300c 0x4>; #clock-cells = <1>; - clock-output-names = "mipi1_pwm_lpcg_clk", - "mipi1_pwm_lpcg_ipg_clk", - "mipi1_pwm_lpcg_32k_clk"; + clock-output-names = "lvds0_pwm_lpcg_clk", + "lvds0_pwm_lpcg_ipg_clk", + "lvds0_pwm_lpcg_32k_clk"; power-domains = <&pd IMX_SC_R_MIPI_1_PWM_0>; }; @@ -32,8 +32,8 @@ lvds0_subsys: bus@56240000 { compatible = "fsl,imx8qxp-lpcg"; reg = <0x56243010 0x4>; #clock-cells = <1>; - clock-output-names = "mipi1_i2c0_lpcg_clk", - "mipi1_i2c0_lpcg_ipg_clk"; + clock-output-names = "lvds0_i2c0_lpcg_clk", + "lvds0_i2c0_lpcg_ipg_clk"; power-domains = <&pd IMX_SC_R_MIPI_1_I2C_0>; }; From bf0a800415a7397617765fe5f5278a645195c75a Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Fri, 11 Oct 2024 03:41:39 -0700 Subject: [PATCH 034/281] clk: qcom: gcc-x1e80100: Fix halt_check for pipediv2 clocks The pipediv2_clk's source from the same mux as pipe clock. So they have same limitation, which is that the PHY sequence requires to enable these local CBCs before the PHY is actually outputting a clock to them. This means the clock won't actually turn on when we vote them. Hence, let's skip the halt bit check of the pipediv2_clk, otherwise pipediv2_clk may stuck at off state during bootup. Cc: stable@vger.kernel.org Fixes: 161b7c401f4b ("clk: qcom: Add Global Clock controller (GCC) driver for X1E80100") Suggested-by: Mike Tipton Signed-off-by: Qiang Yu Reviewed-by: Konrad Dybcio Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20241011104142.1181773-6-quic_qianyu@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/clk/qcom/gcc-x1e80100.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/qcom/gcc-x1e80100.c b/drivers/clk/qcom/gcc-x1e80100.c index 0f578771071f..81ba5ceab342 100644 --- a/drivers/clk/qcom/gcc-x1e80100.c +++ b/drivers/clk/qcom/gcc-x1e80100.c @@ -3123,7 +3123,7 @@ static struct clk_branch gcc_pcie_3_pipe_clk = { static struct clk_branch gcc_pcie_3_pipediv2_clk = { .halt_reg = 0x58060, - .halt_check = BRANCH_HALT_VOTED, + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52020, .enable_mask = BIT(5), @@ -3248,7 +3248,7 @@ static struct clk_branch gcc_pcie_4_pipe_clk = { static struct clk_branch gcc_pcie_4_pipediv2_clk = { .halt_reg = 0x6b054, - .halt_check = BRANCH_HALT_VOTED, + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52010, .enable_mask = BIT(27), @@ -3373,7 +3373,7 @@ static struct clk_branch gcc_pcie_5_pipe_clk = { static struct clk_branch gcc_pcie_5_pipediv2_clk = { .halt_reg = 0x2f054, - .halt_check = BRANCH_HALT_VOTED, + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52018, .enable_mask = BIT(19), @@ -3511,7 +3511,7 @@ static struct clk_branch gcc_pcie_6a_pipe_clk = { static struct clk_branch gcc_pcie_6a_pipediv2_clk = { .halt_reg = 0x31060, - .halt_check = BRANCH_HALT_VOTED, + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52018, .enable_mask = BIT(28), @@ -3649,7 +3649,7 @@ static struct clk_branch gcc_pcie_6b_pipe_clk = { static struct clk_branch gcc_pcie_6b_pipediv2_clk = { .halt_reg = 0x8d060, - .halt_check = BRANCH_HALT_VOTED, + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52010, .enable_mask = BIT(28), From 837c333f46df8ce6755ba82c53acb91948ec0072 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 9 Oct 2024 14:07:23 +0300 Subject: [PATCH 035/281] arm64: dts: qcom: x1e80100: Fix PCIe 6a lanes description Fix the description and compatible for PCIe 6a, as it is in fact a 4-lanes controller and PHY, but it can also be used in 2-lanes mode. For 4-lanes mode, it uses the lanes provided by PCIe 6b. For 2-lanes mode, PCIe 6a uses 2 lanes and then PCIe 6b uses the other 2 lanes. The number of lanes in which the PHY should be configured depends on a TCSR register value on each individual board. Cc: stable+noautosel@kernel.org # Depends on pcie-qcom 16.0 GT/s support Fixes: 5eb83fc10289 ("arm64: dts: qcom: x1e80100: Add PCIe nodes") Signed-off-by: Abel Vesa Reviewed-by: Konrad Dybcio Reviewed-by: Johan Hovold Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20241009-x1e80100-dts-fixes-pcie6a-v3-1-14a1163e691b@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 27796ba1216f..6a419fb6a982 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -2931,7 +2931,7 @@ dma-coherent; linux,pci-domain = <6>; - num-lanes = <2>; + num-lanes = <4>; interrupts = , , @@ -2997,8 +2997,9 @@ }; pcie6a_phy: phy@1bfc000 { - compatible = "qcom,x1e80100-qmp-gen4x2-pcie-phy"; - reg = <0 0x01bfc000 0 0x2000>; + compatible = "qcom,x1e80100-qmp-gen4x4-pcie-phy"; + reg = <0 0x01bfc000 0 0x2000>, + <0 0x01bfe000 0 0x2000>; clocks = <&gcc GCC_PCIE_6A_PHY_AUX_CLK>, <&gcc GCC_PCIE_6A_CFG_AHB_CLK>, @@ -3023,6 +3024,8 @@ power-domains = <&gcc GCC_PCIE_6_PHY_GDSC>; + qcom,4ln-config-sel = <&tcsr 0x1a000 0>; + #clock-cells = <0>; clock-output-names = "pcie6a_pipe_clk"; From 734bf13e0c853de4fe2c05d5c0255e9247ad634c Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Tue, 24 Sep 2024 15:12:18 +0800 Subject: [PATCH 036/281] arm64: dts: imx8mp-skov-revb-mi1010ait-1cp1: Assign "media_isp" clock rate Commit 2d39b78e5716 ("arm64: dts: imx8mp: Add DT nodes for the two ISPs") added a new phandle to the "assigned-clocks" property of media_blk_ctrl node just before the phandle for "video_pll1" clock in i.MX8MP SoC device tree so that "media_isp" clock rate is assigned to 500MHz by default. However, it missed updating this relevant board device tree where the relevant "assigned-clock-rates" property is changed to set a new rate for "video_pll1" clock. This causes the "media_isp" clock rate being wrongly set to the "video_pll1" clock rate and the "video_pll1" clock rate being untouched. Fix this by assigning "media_isp" clock rate explicitly to 500MHz in this board device tree. Fixes: 2d39b78e5716 ("arm64: dts: imx8mp: Add DT nodes for the two ISPs") Signed-off-by: Liu Ying Signed-off-by: Shawn Guo --- .../arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts b/arch/arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts index 3c2efdc59bfa..30962922b361 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-skov-revb-mi1010ait-1cp1.dts @@ -71,6 +71,7 @@ assigned-clock-rates = <500000000>, <200000000>, <0>, /* IMX8MP_CLK_MEDIA_DISP2_PIX = pixelclk of lvds panel */ <68900000>, + <500000000>, /* IMX8MP_VIDEO_PLL1 = IMX8MP_CLK_MEDIA_LDB * 2 */ <964600000>; }; From 83beece5aff75879bdfc6df8ba84ea88fd93050e Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 16 Oct 2024 17:35:06 +0100 Subject: [PATCH 037/281] firmware: microchip: auto-update: fix poll_complete() to not report spurious timeout errors fw_upload's poll_complete() is really intended for use with asynchronous write() implementations - or at least those where the write() loop may terminate without the kernel yet being aware of whether or not the firmware upload has succeeded. For auto-update, write() is only ever called once and will only return when uploading has completed, be that by passing or failing. The core fw_upload code only calls poll_complete() after the final call to write() has returned. However, the poll_complete() implementation in the auto-update driver was written to expect poll_complete() to be called from another context, and it waits for a completion signalled from write(). Since poll_complete() is actually called from the same context, after the write() loop has terminated, wait_for_completion() never sees the completion get signalled and always times out, causing programming to always report a failing. Since write() is full synchronous, and its return value will indicate whether or not programming passed or failed, poll_complete() serves no purpose and can be cut down to simply return FW_UPLOAD_ERR_NONE. Cc: stable@vger.kernel.org Fixes: ec5b0f1193ad4 ("firmware: microchip: add PolarFire SoC Auto Update support") Reported-by: Jamie Gibbons Tested-by: Jamie Gibbons Signed-off-by: Conor Dooley --- drivers/firmware/microchip/mpfs-auto-update.c | 42 ++++--------------- 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c index 9ca5ee58edbd..0f7ec8848202 100644 --- a/drivers/firmware/microchip/mpfs-auto-update.c +++ b/drivers/firmware/microchip/mpfs-auto-update.c @@ -76,14 +76,11 @@ #define AUTO_UPDATE_INFO_SIZE SZ_1M #define AUTO_UPDATE_BITSTREAM_BASE (AUTO_UPDATE_DIRECTORY_SIZE + AUTO_UPDATE_INFO_SIZE) -#define AUTO_UPDATE_TIMEOUT_MS 60000 - struct mpfs_auto_update_priv { struct mpfs_sys_controller *sys_controller; struct device *dev; struct mtd_info *flash; struct fw_upload *fw_uploader; - struct completion programming_complete; size_t size_per_bitstream; bool cancel_request; }; @@ -156,19 +153,6 @@ static void mpfs_auto_update_cancel(struct fw_upload *fw_uploader) static enum fw_upload_err mpfs_auto_update_poll_complete(struct fw_upload *fw_uploader) { - struct mpfs_auto_update_priv *priv = fw_uploader->dd_handle; - int ret; - - /* - * There is no meaningful way to get the status of the programming while - * it is in progress, so attempting anything other than waiting for it - * to complete would be misplaced. - */ - ret = wait_for_completion_timeout(&priv->programming_complete, - msecs_to_jiffies(AUTO_UPDATE_TIMEOUT_MS)); - if (!ret) - return FW_UPLOAD_ERR_TIMEOUT; - return FW_UPLOAD_ERR_NONE; } @@ -349,33 +333,23 @@ static enum fw_upload_err mpfs_auto_update_write(struct fw_upload *fw_uploader, u32 offset, u32 size, u32 *written) { struct mpfs_auto_update_priv *priv = fw_uploader->dd_handle; - enum fw_upload_err err = FW_UPLOAD_ERR_NONE; int ret; - reinit_completion(&priv->programming_complete); - ret = mpfs_auto_update_write_bitstream(fw_uploader, data, offset, size, written); - if (ret) { - err = FW_UPLOAD_ERR_RW_ERROR; - goto out; - } + if (ret) + return FW_UPLOAD_ERR_RW_ERROR; - if (priv->cancel_request) { - err = FW_UPLOAD_ERR_CANCELED; - goto out; - } + if (priv->cancel_request) + return FW_UPLOAD_ERR_CANCELED; if (mpfs_auto_update_is_bitstream_info(data, size)) - goto out; + return FW_UPLOAD_ERR_NONE; ret = mpfs_auto_update_verify_image(fw_uploader); if (ret) - err = FW_UPLOAD_ERR_FW_INVALID; + return FW_UPLOAD_ERR_FW_INVALID; -out: - complete(&priv->programming_complete); - - return err; + return FW_UPLOAD_ERR_NONE; } static const struct fw_upload_ops mpfs_auto_update_ops = { @@ -461,8 +435,6 @@ static int mpfs_auto_update_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "The current bitstream does not support auto-update\n"); - init_completion(&priv->programming_complete); - fw_uploader = firmware_upload_register(THIS_MODULE, dev, "mpfs-auto-update", &mpfs_auto_update_ops, priv); if (IS_ERR(fw_uploader)) From 2e11e78667db90a9e732fbe42820e734d0658fc7 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 16 Oct 2024 21:11:15 +0100 Subject: [PATCH 038/281] riscv: dts: starfive: disable unused csi/camss nodes Aurelien reported probe failures due to the csi node being enabled without having a camera attached to it. A camera was in the initial submissions, but was removed from the dts, as it had not actually been present on the board, but was from an addon board used by the developer of the relevant drivers. The non-camera pipeline nodes were not disabled when this happened and the probe failures are problematic for Debian. Disable them. CC: stable@vger.kernel.org Fixes: 28ecaaa5af192 ("riscv: dts: starfive: jh7110: Add camera subsystem nodes") Closes: https://lore.kernel.org/all/Zw1-vcN4CoVkfLjU@aurel32.net/ Reported-by: Aurelien Jarno Reviewed-by: Emil Renner Berthing Tested-by: Aurelien Jarno Reviewed-by: Aurelien Jarno Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/starfive/jh7110-common.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi index c7771b3b6475..d6c55f1cc96a 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi @@ -128,7 +128,6 @@ assigned-clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>, <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>; assigned-clock-rates = <49500000>, <198000000>; - status = "okay"; ports { #address-cells = <1>; @@ -151,7 +150,6 @@ &csi2rx { assigned-clocks = <&ispcrg JH7110_ISPCLK_VIN_SYS>; assigned-clock-rates = <297000000>; - status = "okay"; ports { #address-cells = <1>; From 4c76f331a9a173ac8fe1297a9231c2a38f88e368 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 14:23:38 +0200 Subject: [PATCH 039/281] media: v4l2-ctrls-api: fix error handling for v4l2_g_ctrl() As detected by Coverity, the error check logic at get_ctrl() is broken: if ptr_to_user() fails to fill a control due to an error, no errors are returned and v4l2_g_ctrl() returns success on a failed operation, which may cause applications to fail. Add an error check at get_ctrl() and ensure that it will be returned to userspace without filling the control value if get_ctrl() fails. Fixes: 71c689dc2e73 ("media: v4l2-ctrls: split up into four source files") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-ctrls-api.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c b/drivers/media/v4l2-core/v4l2-ctrls-api.c index e5a364efd5e6..95a2202879d8 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-api.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c @@ -753,9 +753,10 @@ static int get_ctrl(struct v4l2_ctrl *ctrl, struct v4l2_ext_control *c) for (i = 0; i < master->ncontrols; i++) cur_to_new(master->cluster[i]); ret = call_op(master, g_volatile_ctrl); - new_to_user(c, ctrl); + if (!ret) + ret = new_to_user(c, ctrl); } else { - cur_to_user(c, ctrl); + ret = cur_to_user(c, ctrl); } v4l2_ctrl_unlock(master); return ret; @@ -770,7 +771,10 @@ int v4l2_g_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_control *control) if (!ctrl || !ctrl->is_int) return -EINVAL; ret = get_ctrl(ctrl, &c); - control->value = c.value; + + if (!ret) + control->value = c.value; + return ret; } EXPORT_SYMBOL(v4l2_g_ctrl); @@ -811,10 +815,11 @@ static int set_ctrl_lock(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, int ret; v4l2_ctrl_lock(ctrl); - user_to_new(c, ctrl); - ret = set_ctrl(fh, ctrl, 0); + ret = user_to_new(c, ctrl); if (!ret) - cur_to_user(c, ctrl); + ret = set_ctrl(fh, ctrl, 0); + if (!ret) + ret = cur_to_user(c, ctrl); v4l2_ctrl_unlock(ctrl); return ret; } From e6a3ea83fbe15d4818d01804e904cbb0e64e543b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 16 Oct 2024 11:53:15 +0200 Subject: [PATCH 040/281] media: v4l2-tpg: prevent the risk of a division by zero As reported by Coverity, the logic at tpg_precalculate_line() blindly rescales the buffer even when scaled_witdh is equal to zero. If this ever happens, this will cause a division by zero. Instead, add a WARN_ON_ONCE() to trigger such cases and return without doing any precalculation. Fixes: 63881df94d3e ("[media] vivid: add the Test Pattern Generator") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/v4l2-tpg/v4l2-tpg-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index 642c48e8c1f5..ded11cd8dbf7 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -1795,6 +1795,9 @@ static void tpg_precalculate_line(struct tpg_data *tpg) unsigned p; unsigned x; + if (WARN_ON_ONCE(!tpg->src_width || !tpg->scaled_width)) + return; + switch (tpg->pattern) { case TPG_PAT_GREEN: contrast = TPG_COLOR_100_RED; From 972e63e895abbe8aa1ccbdbb4e6362abda7cd457 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 15:23:01 +0200 Subject: [PATCH 041/281] media: dvbdev: prevent the risk of out of memory access The dvbdev contains a static variable used to store dvb minors. The behavior of it depends if CONFIG_DVB_DYNAMIC_MINORS is set or not. When not set, dvb_register_device() won't check for boundaries, as it will rely that a previous call to dvb_register_adapter() would already be enforcing it. On a similar way, dvb_device_open() uses the assumption that the register functions already did the needed checks. This can be fragile if some device ends using different calls. This also generate warnings on static check analysers like Coverity. So, add explicit guards to prevent potential risk of OOM issues. Fixes: 5dd3f3071070 ("V4L/DVB (9361): Dynamic DVB minor allocation") Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvbdev.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index b43695bc51e7..14f323fbada7 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -86,10 +86,15 @@ static DECLARE_RWSEM(minor_rwsem); static int dvb_device_open(struct inode *inode, struct file *file) { struct dvb_device *dvbdev; + unsigned int minor = iminor(inode); + + if (minor >= MAX_DVB_MINORS) + return -ENODEV; mutex_lock(&dvbdev_mutex); down_read(&minor_rwsem); - dvbdev = dvb_minors[iminor(inode)]; + + dvbdev = dvb_minors[minor]; if (dvbdev && dvbdev->fops) { int err = 0; @@ -525,7 +530,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, for (minor = 0; minor < MAX_DVB_MINORS; minor++) if (!dvb_minors[minor]) break; - if (minor == MAX_DVB_MINORS) { + if (minor >= MAX_DVB_MINORS) { if (new_node) { list_del(&new_node->list_head); kfree(dvbdevfops); @@ -540,6 +545,14 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, } #else minor = nums2minor(adap->num, type, id); + if (minor >= MAX_DVB_MINORS) { + dvb_media_device_free(dvbdev); + list_del(&dvbdev->list_head); + kfree(dvbdev); + *pdvbdev = NULL; + mutex_unlock(&dvbdev_register_lock); + return ret; + } #endif dvbdev->minor = minor; dvb_minors[minor] = dvb_device_get(dvbdev); From 9883a4d41aba7612644e9bb807b971247cea9b9d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 16:05:16 +0200 Subject: [PATCH 042/281] media: dvb_frontend: don't play tricks with underflow values fepriv->auto_sub_step is unsigned. Setting it to -1 is just a trick to avoid calling continue, as reported by Coverity. It relies to have this code just afterwards: if (!ready) fepriv->auto_sub_step++; Simplify the code by simply setting it to zero and use continue to return to the while loop. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvb_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 4f78f30b3646..a05aa271a1ba 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -443,8 +443,8 @@ static int dvb_frontend_swzigzag_autotune(struct dvb_frontend *fe, int check_wra default: fepriv->auto_step++; - fepriv->auto_sub_step = -1; /* it'll be incremented to 0 in a moment */ - break; + fepriv->auto_sub_step = 0; + continue; } if (!ready) fepriv->auto_sub_step++; From 2aee207e5b3c94ef859316008119ea06d6798d49 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 10:33:10 +0200 Subject: [PATCH 043/281] media: mgb4: protect driver against spectre MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frequency range is set from sysfs via frequency_range_store(), being vulnerable to spectre, as reported by smatch: drivers/media/pci/mgb4/mgb4_cmt.c:231 mgb4_cmt_set_vin_freq_range() warn: potential spectre issue 'cmt_vals_in' [r] drivers/media/pci/mgb4/mgb4_cmt.c:238 mgb4_cmt_set_vin_freq_range() warn: possible spectre second half. 'reg_set' Fix it. Fixes: 0ab13674a9bd ("media: pci: mgb4: Added Digiteq Automotive MGB4 driver") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Martin Tůma --- drivers/media/pci/mgb4/mgb4_cmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/pci/mgb4/mgb4_cmt.c b/drivers/media/pci/mgb4/mgb4_cmt.c index 70dc78ef193c..a25b68403bc6 100644 --- a/drivers/media/pci/mgb4/mgb4_cmt.c +++ b/drivers/media/pci/mgb4/mgb4_cmt.c @@ -227,6 +227,8 @@ void mgb4_cmt_set_vin_freq_range(struct mgb4_vin_dev *vindev, u32 config; size_t i; + freq_range = array_index_nospec(freq_range, ARRAY_SIZE(cmt_vals_in)); + addr = cmt_addrs_in[vindev->config->id]; reg_set = cmt_vals_in[freq_range]; From 458ea1c0be991573ec436aa0afa23baacfae101a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 09:24:24 +0200 Subject: [PATCH 044/281] media: av7110: fix a spectre vulnerability As warned by smatch: drivers/staging/media/av7110/av7110_ca.c:270 dvb_ca_ioctl() warn: potential spectre issue 'av7110->ci_slot' [w] (local cap) There is a spectre-related vulnerability at the code. Fix it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/av7110/av7110.h | 4 +++- drivers/staging/media/av7110/av7110_ca.c | 25 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/staging/media/av7110/av7110.h b/drivers/staging/media/av7110/av7110.h index ec461fd187af..b584754f4be0 100644 --- a/drivers/staging/media/av7110/av7110.h +++ b/drivers/staging/media/av7110/av7110.h @@ -88,6 +88,8 @@ struct infrared { u32 ir_config; }; +#define MAX_CI_SLOTS 2 + /* place to store all the necessary device information */ struct av7110 { /* devices */ @@ -163,7 +165,7 @@ struct av7110 { /* CA */ - struct ca_slot_info ci_slot[2]; + struct ca_slot_info ci_slot[MAX_CI_SLOTS]; enum av7110_video_mode vidmode; struct dmxdev dmxdev; diff --git a/drivers/staging/media/av7110/av7110_ca.c b/drivers/staging/media/av7110/av7110_ca.c index 6ce212c64e5d..fce4023c9dea 100644 --- a/drivers/staging/media/av7110/av7110_ca.c +++ b/drivers/staging/media/av7110/av7110_ca.c @@ -26,23 +26,28 @@ void CI_handle(struct av7110 *av7110, u8 *data, u16 len) { + unsigned slot_num; + dprintk(8, "av7110:%p\n", av7110); if (len < 3) return; switch (data[0]) { case CI_MSG_CI_INFO: - if (data[2] != 1 && data[2] != 2) + if (data[2] != 1 && data[2] != MAX_CI_SLOTS) break; + + slot_num = array_index_nospec(data[2] - 1, MAX_CI_SLOTS); + switch (data[1]) { case 0: - av7110->ci_slot[data[2] - 1].flags = 0; + av7110->ci_slot[slot_num].flags = 0; break; case 1: - av7110->ci_slot[data[2] - 1].flags |= CA_CI_MODULE_PRESENT; + av7110->ci_slot[slot_num].flags |= CA_CI_MODULE_PRESENT; break; case 2: - av7110->ci_slot[data[2] - 1].flags |= CA_CI_MODULE_READY; + av7110->ci_slot[slot_num].flags |= CA_CI_MODULE_READY; break; } break; @@ -262,15 +267,19 @@ static int dvb_ca_ioctl(struct file *file, unsigned int cmd, void *parg) case CA_GET_SLOT_INFO: { struct ca_slot_info *info = (struct ca_slot_info *)parg; + unsigned int slot_num; if (info->num < 0 || info->num > 1) { mutex_unlock(&av7110->ioctl_mutex); return -EINVAL; } - av7110->ci_slot[info->num].num = info->num; - av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? - CA_CI_LINK : CA_CI; - memcpy(info, &av7110->ci_slot[info->num], sizeof(struct ca_slot_info)); + slot_num = array_index_nospec(info->num, MAX_CI_SLOTS); + + av7110->ci_slot[slot_num].num = info->num; + av7110->ci_slot[slot_num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? + CA_CI_LINK : CA_CI; + memcpy(info, &av7110->ci_slot[slot_num], + sizeof(struct ca_slot_info)); break; } From 14a22762c3daeac59a5a534e124acbb4d7a79b3a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 11:10:31 +0200 Subject: [PATCH 045/281] media: s5p-jpeg: prevent buffer overflows The current logic allows word to be less than 2. If this happens, there will be buffer overflows, as reported by smatch. Add extra checks to prevent it. While here, remove an unused word = 0 assignment. Fixes: 6c96dbbc2aa9 ("[media] s5p-jpeg: add support for 5433") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jacek Anaszewski --- .../media/platform/samsung/s5p-jpeg/jpeg-core.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c b/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c index d2c4a0178b3c..1db4609b3557 100644 --- a/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c @@ -775,11 +775,14 @@ static void exynos4_jpeg_parse_decode_h_tbl(struct s5p_jpeg_ctx *ctx) (unsigned long)vb2_plane_vaddr(&vb->vb2_buf, 0) + ctx->out_q.sos + 2; jpeg_buffer.curr = 0; - word = 0; - if (get_word_be(&jpeg_buffer, &word)) return; - jpeg_buffer.size = (long)word - 2; + + if (word < 2) + jpeg_buffer.size = 0; + else + jpeg_buffer.size = (long)word - 2; + jpeg_buffer.data += 2; jpeg_buffer.curr = 0; @@ -1058,6 +1061,7 @@ static int get_word_be(struct s5p_jpeg_buffer *buf, unsigned int *word) if (byte == -1) return -1; *word = (unsigned int)byte | temp; + return 0; } @@ -1145,7 +1149,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; sof = jpeg_buffer.curr; /* after 0xffc0 */ sof_len = length; @@ -1176,7 +1180,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; if (n_dqt >= S5P_JPEG_MAX_MARKER) return false; @@ -1189,7 +1193,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; if (n_dht >= S5P_JPEG_MAX_MARKER) return false; @@ -1214,6 +1218,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; + /* No need to check underflows as skip() does it */ skip(&jpeg_buffer, length); break; } From 438d3085ba5b8b5bfa5290faa594e577f6ac9aa7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 11:38:10 +0200 Subject: [PATCH 046/281] media: ar0521: don't overflow when checking PLL values The PLL checks are comparing 64 bit integers with 32 bit ones, as reported by Coverity. Depending on the values of the variables, this may underflow. Fix it ensuring that both sides of the expression are u64. Fixes: 852b50aeed15 ("media: On Semi AR0521 sensor driver") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Acked-by: Sakari Ailus --- drivers/media/i2c/ar0521.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/ar0521.c b/drivers/media/i2c/ar0521.c index fc27238dd4d3..24873149096c 100644 --- a/drivers/media/i2c/ar0521.c +++ b/drivers/media/i2c/ar0521.c @@ -255,10 +255,10 @@ static u32 calc_pll(struct ar0521_dev *sensor, u32 freq, u16 *pre_ptr, u16 *mult continue; /* Minimum value */ if (new_mult > 254) break; /* Maximum, larger pre won't work either */ - if (sensor->extclk_freq * (u64)new_mult < AR0521_PLL_MIN * + if (sensor->extclk_freq * (u64)new_mult < (u64)AR0521_PLL_MIN * new_pre) continue; - if (sensor->extclk_freq * (u64)new_mult > AR0521_PLL_MAX * + if (sensor->extclk_freq * (u64)new_mult > (u64)AR0521_PLL_MAX * new_pre) break; /* Larger pre won't work either */ new_pll = div64_round_up(sensor->extclk_freq * (u64)new_mult, From 576a307a7650bd544fbb24df801b9b7863b85e2f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 12:14:11 +0200 Subject: [PATCH 047/281] media: cx24116: prevent overflows on SNR calculus as reported by Coverity, if reading SNR registers fail, a negative number will be returned, causing an underflow when reading SNR registers. Prevent that. Fixes: 8953db793d5b ("V4L/DVB (9178): cx24116: Add module parameter to return SNR as ESNO.") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/cx24116.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c index 8b978a9f74a4..f5dd3a81725a 100644 --- a/drivers/media/dvb-frontends/cx24116.c +++ b/drivers/media/dvb-frontends/cx24116.c @@ -741,6 +741,7 @@ static int cx24116_read_snr_pct(struct dvb_frontend *fe, u16 *snr) { struct cx24116_state *state = fe->demodulator_priv; u8 snr_reading; + int ret; static const u32 snr_tab[] = { /* 10 x Table (rounded up) */ 0x00000, 0x0199A, 0x03333, 0x04ccD, 0x06667, 0x08000, 0x0999A, 0x0b333, 0x0cccD, 0x0e667, @@ -749,7 +750,11 @@ static int cx24116_read_snr_pct(struct dvb_frontend *fe, u16 *snr) dprintk("%s()\n", __func__); - snr_reading = cx24116_readreg(state, CX24116_REG_QUALITY0); + ret = cx24116_readreg(state, CX24116_REG_QUALITY0); + if (ret < 0) + return ret; + + snr_reading = ret; if (snr_reading >= 0xa0 /* 100% */) *snr = 0xffff; From 50b9fa751d1aef5d262bde871c70a7f44262f0bc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 12:25:09 +0200 Subject: [PATCH 048/281] media: adv7604: prevent underflow condition when reporting colorspace Currently, adv76xx_log_status() reads some date using io_read() which may return negative values. The current logic doesn't check such errors, causing colorspace to be reported on a wrong way at adv76xx_log_status(), as reported by Coverity. If I/O error happens there, print a different message, instead of reporting bogus messages to userspace. Fixes: 54450f591c99 ("[media] adv7604: driver for the Analog Devices ADV7604 video decoder") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Hans Verkuil --- drivers/media/i2c/adv7604.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index 48230d5109f0..272945a878b3 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -2519,10 +2519,10 @@ static int adv76xx_log_status(struct v4l2_subdev *sd) const struct adv76xx_chip_info *info = state->info; struct v4l2_dv_timings timings; struct stdi_readback stdi; - u8 reg_io_0x02 = io_read(sd, 0x02); + int ret; + u8 reg_io_0x02; u8 edid_enabled; u8 cable_det; - static const char * const csc_coeff_sel_rb[16] = { "bypassed", "YPbPr601 -> RGB", "reserved", "YPbPr709 -> RGB", "reserved", "RGB -> YPbPr601", "reserved", "RGB -> YPbPr709", @@ -2621,13 +2621,21 @@ static int adv76xx_log_status(struct v4l2_subdev *sd) v4l2_info(sd, "-----Color space-----\n"); v4l2_info(sd, "RGB quantization range ctrl: %s\n", rgb_quantization_range_txt[state->rgb_quantization_range]); - v4l2_info(sd, "Input color space: %s\n", - input_color_space_txt[reg_io_0x02 >> 4]); - v4l2_info(sd, "Output color space: %s %s, alt-gamma %s\n", - (reg_io_0x02 & 0x02) ? "RGB" : "YCbCr", - (((reg_io_0x02 >> 2) & 0x01) ^ (reg_io_0x02 & 0x01)) ? - "(16-235)" : "(0-255)", - (reg_io_0x02 & 0x08) ? "enabled" : "disabled"); + + ret = io_read(sd, 0x02); + if (ret < 0) { + v4l2_info(sd, "Can't read Input/Output color space\n"); + } else { + reg_io_0x02 = ret; + + v4l2_info(sd, "Input color space: %s\n", + input_color_space_txt[reg_io_0x02 >> 4]); + v4l2_info(sd, "Output color space: %s %s, alt-gamma %s\n", + (reg_io_0x02 & 0x02) ? "RGB" : "YCbCr", + (((reg_io_0x02 >> 2) & 0x01) ^ (reg_io_0x02 & 0x01)) ? + "(16-235)" : "(0-255)", + (reg_io_0x02 & 0x08) ? "enabled" : "disabled"); + } v4l2_info(sd, "Color space conversion: %s\n", csc_coeff_sel_rb[cp_read(sd, info->cp_csc) >> 4]); From 2d861977e7314f00bf27d0db17c11ff5e85e609a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 13:29:43 +0200 Subject: [PATCH 049/281] media: stb0899_algo: initialize cfr before using it The loop at stb0899_search_carrier() starts with a random value for cfr, as reported by Coverity. Initialize it to zero, just like stb0899_dvbs_algo() to ensure that carrier search won't bail out. Fixes: 8bd135bab91f ("V4L/DVB (9375): Add STB0899 support") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/stb0899_algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/stb0899_algo.c b/drivers/media/dvb-frontends/stb0899_algo.c index df89c33dac23..40537c4ccb0d 100644 --- a/drivers/media/dvb-frontends/stb0899_algo.c +++ b/drivers/media/dvb-frontends/stb0899_algo.c @@ -269,7 +269,7 @@ static enum stb0899_status stb0899_search_carrier(struct stb0899_state *state) short int derot_freq = 0, last_derot_freq = 0, derot_limit, next_loop = 3; int index = 0; - u8 cfr[2]; + u8 cfr[2] = {0}; u8 reg; internal->status = NOCARRIER; From eba6a8619d2b988f9b3a34e6b552a34fa2057d61 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 16 Oct 2024 11:03:26 +0200 Subject: [PATCH 050/281] media: cec: extron-da-hd-4k-plus: don't use -1 as an error code The logic at get_edid_tag_location() returns either an offset or an error condition. However, the error condition uses a non-standard "-1" value. This hits a Coverity bug, as Coverity assumes that positive values are underflow. While this is a false positive, returning error codes as -1 is an issue. So, instead, use -ENOENT to indicate that the tag was not found. Fixes: 056f2821b631 ("media: cec: extron-da-hd-4k-plus: add the Extron DA HD 4K Plus CEC driver") Signed-off-by: Mauro Carvalho Chehab --- .../cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c b/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c index 8526f613a40e..cfbfc4c1b2e6 100644 --- a/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c +++ b/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c @@ -348,12 +348,12 @@ static int get_edid_tag_location(const u8 *edid, unsigned int size, /* Return if not a CTA-861 extension block */ if (size < 256 || edid[0] != 0x02 || edid[1] != 0x03) - return -1; + return -ENOENT; /* search tag */ d = edid[0x02] & 0x7f; if (d <= 4) - return -1; + return -ENOENT; i = 0x04; end = 0x00 + d; @@ -371,7 +371,7 @@ static int get_edid_tag_location(const u8 *edid, unsigned int size, return offset + i; i += len + 1; } while (i < end); - return -1; + return -ENOENT; } static void extron_edid_crc(u8 *edid) From ba9cf6b430433e57bfc8072364e944b7c0eca2a4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 16 Oct 2024 11:24:15 +0200 Subject: [PATCH 051/281] media: pulse8-cec: fix data timestamp at pulse8_setup() As pointed by Coverity, there is a hidden overflow condition there. As date is signed and u8 is unsigned, doing: date = (data[0] << 24) With a value bigger than 07f will make all upper bits of date 0xffffffff. This can be demonstrated with this small code: typedef int64_t time64_t; typedef uint8_t u8; int main(void) { u8 data[] = { 0xde ,0xad , 0xbe, 0xef }; time64_t date; date = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; printf("Invalid data = 0x%08lx\n", date); date = ((unsigned)data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; printf("Expected data = 0x%08lx\n", date); return 0; } Fix it by converting the upper bit calculation to unsigned. Fixes: cea28e7a55e7 ("media: pulse8-cec: reorganize function order") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/usb/pulse8/pulse8-cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/cec/usb/pulse8/pulse8-cec.c b/drivers/media/cec/usb/pulse8/pulse8-cec.c index ba67587bd43e..171366fe3544 100644 --- a/drivers/media/cec/usb/pulse8/pulse8-cec.c +++ b/drivers/media/cec/usb/pulse8/pulse8-cec.c @@ -685,7 +685,7 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio, err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 4); if (err) return err; - date = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + date = ((unsigned)data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; dev_info(pulse8->dev, "Firmware build date %ptT\n", &date); dev_dbg(pulse8->dev, "Persistent config:\n"); From 404b739e895522838f1abdc340c554654d671dde Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Wed, 16 Oct 2024 18:32:24 +0530 Subject: [PATCH 052/281] staging: vchiq_arm: Use devm_kzalloc() for vchiq_arm_state allocation The struct vchiq_arm_state 'platform_state' is currently allocated dynamically using kzalloc(). Unfortunately, it is never freed and is subjected to memory leaks in the error handling paths of the probe() function. To address the issue, use device resource management helper devm_kzalloc(), to ensure cleanup after its allocation. Fixes: 71bad7f08641 ("staging: add bcm2708 vchiq driver") Cc: stable@vger.kernel.org Signed-off-by: Umang Jain Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20241016130225.61024-2-umang.jain@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 3dbeffc650d3..0d8d5555e8af 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -593,7 +593,7 @@ vchiq_platform_init_state(struct vchiq_state *state) { struct vchiq_arm_state *platform_state; - platform_state = kzalloc(sizeof(*platform_state), GFP_KERNEL); + platform_state = devm_kzalloc(state->dev, sizeof(*platform_state), GFP_KERNEL); if (!platform_state) return -ENOMEM; From 807babf69027b4f1c55e72b06879658e83830880 Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Wed, 16 Oct 2024 18:32:25 +0530 Subject: [PATCH 053/281] staging: vchiq_arm: Use devm_kzalloc() for drv_mgmt allocation The struct drv_mgmt 'mgmt' is currently allocated dynamically using kzalloc(). Unfortunately, it is subjected to memory leaks in the error handling paths of the probe() function. To address this issue, use device resource management helper devm_kzalloc(), to ensure cleanup after the allocation. Cc: stable@vger.kernel.org Fixes: 1c9e16b73166 ("staging: vc04_services: vchiq_arm: Split driver static and runtime data") Signed-off-by: Umang Jain Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20241016130225.61024-3-umang.jain@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 0d8d5555e8af..6c488b1e2624 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1731,7 +1731,7 @@ static int vchiq_probe(struct platform_device *pdev) return -ENOENT; } - mgmt = kzalloc(sizeof(*mgmt), GFP_KERNEL); + mgmt = devm_kzalloc(&pdev->dev, sizeof(*mgmt), GFP_KERNEL); if (!mgmt) return -ENOMEM; @@ -1789,8 +1789,6 @@ static void vchiq_remove(struct platform_device *pdev) arm_state = vchiq_platform_get_arm_state(&mgmt->state); kthread_stop(arm_state->ka_thread); - - kfree(mgmt); } static struct platform_driver vchiq_driver = { From eab6ba2aa3bbaf598a66e31f709bf84b7bb7dc8a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sat, 12 Oct 2024 10:52:21 +0800 Subject: [PATCH 054/281] arm64: dts: imx8mp: correct sdhc ipg clk The ipg clk for sdhc sources from IPG_CLK_ROOT per i.MX 8M Plus Applications Processor Reference Manual, Table 5-2. System Clocks. Fixes: 6d9b8d20431f ("arm64: dts: freescale: Add i.MX8MP dtsi support") Signed-off-by: Peng Fan Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index f3531cfb0d79..40e847bc0b7f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1261,7 +1261,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = ; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -1275,7 +1275,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = ; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -1289,7 +1289,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b60000 0x10000>; interrupts = ; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC3_ROOT>; clock-names = "ipg", "ahb", "per"; From 4fbb73416b10778adfd2c1319e9c5829780d8535 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 17 Oct 2024 05:11:20 +0200 Subject: [PATCH 055/281] arm64: dts: imx8mp-phyboard-pollux: Set Video PLL1 frequency to 506.8 MHz The LVDS panel on this device uses 72.4 MHz pixel clock, set IMX8MP_VIDEO_PLL1 to 72.4 * 7 = 506.8 MHz so the LDB serializer and LCDIFv3 scanout engine can reach accurate pixel clock of exactly 72.4 MHz. Without this patch, the Video PLL1 frequency is the default set in imx8mp.dtsi which is 1039.5 MHz, which divides down to inaccurate pixel clock of 74.25 MHz which works for this particular panel by sheer chance. Stop taking that chance and set correct accurate pixel clock frequency instead. Fixes: 326d86e197fc ("arm64: dts: imx8mp-phyboard-pollux-rdk: add etml panel support") Reported-by: Isaac Scott Signed-off-by: Marek Vasut Reviewed-by: Yannic Moog Tested-by: Yannic Moog Signed-off-by: Shawn Guo --- .../dts/freescale/imx8mp-phyboard-pollux-rdk.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts index 50debe821c42..9c102acb8052 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts @@ -218,6 +218,18 @@ }; }; +&media_blk_ctrl { + /* + * The LVDS panel on this device uses 72.4 MHz pixel clock, + * set IMX8MP_VIDEO_PLL1 to 72.4 * 7 = 506.8 MHz so the LDB + * serializer and LCDIFv3 scanout engine can reach accurate + * pixel clock of exactly 72.4 MHz. + */ + assigned-clock-rates = <500000000>, <200000000>, + <0>, <0>, <500000000>, + <506800000>; +}; + &snvs_pwrkey { status = "okay"; }; From 295416091e44806760ccf753aeafdafc0ae268f3 Mon Sep 17 00:00:00 2001 From: Xinqi Zhang Date: Wed, 16 Oct 2024 14:13:38 +0800 Subject: [PATCH 056/281] firmware: arm_scmi: Fix slab-use-after-free in scmi_bus_notifier() The scmi_dev->name is released prematurely in __scmi_device_destroy(), which causes slab-use-after-free when accessing scmi_dev->name in scmi_bus_notifier(). So move the release of scmi_dev->name to scmi_device_release() to avoid slab-use-after-free. | BUG: KASAN: slab-use-after-free in strncmp+0xe4/0xec | Read of size 1 at addr ffffff80a482bcc0 by task swapper/0/1 | | CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.6.38-debug #1 | Hardware name: Qualcomm Technologies, Inc. SA8775P Ride (DT) | Call trace: | dump_backtrace+0x94/0x114 | show_stack+0x18/0x24 | dump_stack_lvl+0x48/0x60 | print_report+0xf4/0x5b0 | kasan_report+0xa4/0xec | __asan_report_load1_noabort+0x20/0x2c | strncmp+0xe4/0xec | scmi_bus_notifier+0x5c/0x54c | notifier_call_chain+0xb4/0x31c | blocking_notifier_call_chain+0x68/0x9c | bus_notify+0x54/0x78 | device_del+0x1bc/0x840 | device_unregister+0x20/0xb4 | __scmi_device_destroy+0xac/0x280 | scmi_device_destroy+0x94/0xd0 | scmi_chan_setup+0x524/0x750 | scmi_probe+0x7fc/0x1508 | platform_probe+0xc4/0x19c | really_probe+0x32c/0x99c | __driver_probe_device+0x15c/0x3c4 | driver_probe_device+0x5c/0x170 | __driver_attach+0x1c8/0x440 | bus_for_each_dev+0xf4/0x178 | driver_attach+0x3c/0x58 | bus_add_driver+0x234/0x4d4 | driver_register+0xf4/0x3c0 | __platform_driver_register+0x60/0x88 | scmi_driver_init+0xb0/0x104 | do_one_initcall+0xb4/0x664 | kernel_init_freeable+0x3c8/0x894 | kernel_init+0x24/0x1e8 | ret_from_fork+0x10/0x20 | | Allocated by task 1: | kasan_save_stack+0x2c/0x54 | kasan_set_track+0x2c/0x40 | kasan_save_alloc_info+0x24/0x34 | __kasan_kmalloc+0xa0/0xb8 | __kmalloc_node_track_caller+0x6c/0x104 | kstrdup+0x48/0x84 | kstrdup_const+0x34/0x40 | __scmi_device_create.part.0+0x8c/0x408 | scmi_device_create+0x104/0x370 | scmi_chan_setup+0x2a0/0x750 | scmi_probe+0x7fc/0x1508 | platform_probe+0xc4/0x19c | really_probe+0x32c/0x99c | __driver_probe_device+0x15c/0x3c4 | driver_probe_device+0x5c/0x170 | __driver_attach+0x1c8/0x440 | bus_for_each_dev+0xf4/0x178 | driver_attach+0x3c/0x58 | bus_add_driver+0x234/0x4d4 | driver_register+0xf4/0x3c0 | __platform_driver_register+0x60/0x88 | scmi_driver_init+0xb0/0x104 | do_one_initcall+0xb4/0x664 | kernel_init_freeable+0x3c8/0x894 | kernel_init+0x24/0x1e8 | ret_from_fork+0x10/0x20 | | Freed by task 1: | kasan_save_stack+0x2c/0x54 | kasan_set_track+0x2c/0x40 | kasan_save_free_info+0x38/0x5c | __kasan_slab_free+0xe8/0x164 | __kmem_cache_free+0x11c/0x230 | kfree+0x70/0x130 | kfree_const+0x20/0x40 | __scmi_device_destroy+0x70/0x280 | scmi_device_destroy+0x94/0xd0 | scmi_chan_setup+0x524/0x750 | scmi_probe+0x7fc/0x1508 | platform_probe+0xc4/0x19c | really_probe+0x32c/0x99c | __driver_probe_device+0x15c/0x3c4 | driver_probe_device+0x5c/0x170 | __driver_attach+0x1c8/0x440 | bus_for_each_dev+0xf4/0x178 | driver_attach+0x3c/0x58 | bus_add_driver+0x234/0x4d4 | driver_register+0xf4/0x3c0 | __platform_driver_register+0x60/0x88 | scmi_driver_init+0xb0/0x104 | do_one_initcall+0xb4/0x664 | kernel_init_freeable+0x3c8/0x894 | kernel_init+0x24/0x1e8 | ret_from_fork+0x10/0x20 Fixes: ee7a9c9f67c5 ("firmware: arm_scmi: Add support for multiple device per protocol") Signed-off-by: Xinqi Zhang Reviewed-by: Cristian Marussi Reviewed-by: Bjorn Andersson Message-Id: <20241016-fix-arm-scmi-slab-use-after-free-v2-1-1783685ef90d@quicinc.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 96b2e5f9a8ef..157172a5f2b5 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -325,7 +325,10 @@ EXPORT_SYMBOL_GPL(scmi_driver_unregister); static void scmi_device_release(struct device *dev) { - kfree(to_scmi_dev(dev)); + struct scmi_device *scmi_dev = to_scmi_dev(dev); + + kfree_const(scmi_dev->name); + kfree(scmi_dev); } static void __scmi_device_destroy(struct scmi_device *scmi_dev) @@ -338,7 +341,6 @@ static void __scmi_device_destroy(struct scmi_device *scmi_dev) if (scmi_dev->protocol_id == SCMI_PROTOCOL_SYSTEM) atomic_set(&scmi_syspower_registered, 0); - kfree_const(scmi_dev->name); ida_free(&scmi_bus_id, scmi_dev->id); device_unregister(&scmi_dev->dev); } @@ -410,7 +412,6 @@ __scmi_device_create(struct device_node *np, struct device *parent, return scmi_dev; put_dev: - kfree_const(scmi_dev->name); put_device(&scmi_dev->dev); ida_free(&scmi_bus_id, id); return NULL; From a0a18e91eb3a6ef75a6de69dc00f206b913e3848 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 21 Oct 2024 18:15:44 +0100 Subject: [PATCH 057/281] firmware: arm_scmi: Reject clear channel request on A2P The clear channel transport operation is supposed to be called exclusively on the P2A channel from the agent, since it relinquishes the ownership of the channel to the platform, after this latter has initiated some sort of P2A communication. Make sure that, if it is ever called on a A2P, is logged and ignored. Signed-off-by: Cristian Marussi Reviewed-by: Florian Fainelli Message-Id: <20241021171544.2579551-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 2 ++ drivers/firmware/arm_scmi/driver.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 6d9227db473f..c52ed6c64006 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -163,6 +163,7 @@ void scmi_protocol_release(const struct scmi_handle *handle, u8 protocol_id); * used to initialize this channel * @dev: Reference to device in the SCMI hierarchy corresponding to this * channel + * @is_p2a: A flag to identify a channel as P2A (RX) * @rx_timeout_ms: The configured RX timeout in milliseconds. * @handle: Pointer to SCMI entity handle * @no_completion_irq: Flag to indicate that this channel has no completion @@ -174,6 +175,7 @@ void scmi_protocol_release(const struct scmi_handle *handle, u8 protocol_id); struct scmi_chan_info { int id; struct device *dev; + bool is_p2a; unsigned int rx_timeout_ms; struct scmi_handle *handle; bool no_completion_irq; diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index a477b5ade38d..5bd4cc68a3e3 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1048,6 +1048,11 @@ static inline void scmi_xfer_command_release(struct scmi_info *info, static inline void scmi_clear_channel(struct scmi_info *info, struct scmi_chan_info *cinfo) { + if (!cinfo->is_p2a) { + dev_warn(cinfo->dev, "Invalid clear on A2P channel !\n"); + return; + } + if (info->desc->ops->clear_channel) info->desc->ops->clear_channel(cinfo); } @@ -2638,6 +2643,7 @@ static int scmi_chan_setup(struct scmi_info *info, struct device_node *of_node, if (!cinfo) return -ENOMEM; + cinfo->is_p2a = !tx; cinfo->rx_timeout_ms = info->desc->max_rx_timeout_ms; /* Create a unique name for this transport device */ From e7f37a7d16310d3c9474825de26a67f00983ebea Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Mon, 21 Oct 2024 15:46:25 +0300 Subject: [PATCH 058/281] clk: qcom: gcc-x1e80100: Fix USB MP SS1 PHY GDSC pwrsts flags Allowing these GDSCs to collapse makes the QMP combo PHYs lose their configuration on machine suspend. Currently, the QMP combo PHY driver doesn't reinitialise the HW on resume. Under such conditions, the USB SuperSpeed support is broken. To avoid this, mark the pwrsts flags with RET_ON. This has been already done for USB 0 and 1 SS PHY GDSCs, Do this also for the USB MP SS1 PHY GDSC config. The USB MP SS0 PHY GDSC already has it. Fixes: 161b7c401f4b ("clk: qcom: Add Global Clock controller (GCC) driver for X1E80100") Reviewed-by: Johan Hovold Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20241021-x1e80100-clk-gcc-fix-usb-mp-phy-gdsc-pwrsts-flags-v2-1-0bfd64556238@linaro.org Signed-off-by: Bjorn Andersson --- drivers/clk/qcom/gcc-x1e80100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gcc-x1e80100.c b/drivers/clk/qcom/gcc-x1e80100.c index 81ba5ceab342..8ea25aa25dff 100644 --- a/drivers/clk/qcom/gcc-x1e80100.c +++ b/drivers/clk/qcom/gcc-x1e80100.c @@ -6155,7 +6155,7 @@ static struct gdsc gcc_usb3_mp_ss1_phy_gdsc = { .pd = { .name = "gcc_usb3_mp_ss1_phy_gdsc", }, - .pwrsts = PWRSTS_OFF_ON, + .pwrsts = PWRSTS_RET_ON, .flags = POLL_CFG_GDSCR | RETAIN_FF_ENABLE, }; From 787ade24cc3af4a8ec9498c9cd8a6d47a4d86485 Mon Sep 17 00:00:00 2001 From: Maya Matuszczyk Date: Sat, 19 Oct 2024 21:02:15 +0200 Subject: [PATCH 059/281] arm64: dts: qcom: x1e80100-crd Rename "Twitter" to "Tweeter" This makes the name consistent with both other x1e80100 devices and the dictionary. A UCM fix was merged already and is required in order for sound to work after this commit. Signed-off-by: Maya Matuszczyk Reviewed-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241019190214.3337-2-maccraft123mc@gmail.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-crd.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index 10b28d870f08..004353220dc5 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -177,9 +177,9 @@ compatible = "qcom,x1e80100-sndcard"; model = "X1E80100-CRD"; audio-routing = "WooferLeft IN", "WSA WSA_SPK1 OUT", - "TwitterLeft IN", "WSA WSA_SPK2 OUT", + "TweeterLeft IN", "WSA WSA_SPK2 OUT", "WooferRight IN", "WSA2 WSA_SPK2 OUT", - "TwitterRight IN", "WSA2 WSA_SPK2 OUT", + "TweeterRight IN", "WSA2 WSA_SPK2 OUT", "IN1_HPHL", "HPHL_OUT", "IN2_HPHR", "HPHR_OUT", "AMIC2", "MIC BIAS2", @@ -933,7 +933,7 @@ reg = <0 1>; reset-gpios = <&lpass_tlmm 12 GPIO_ACTIVE_LOW>; #sound-dai-cells = <0>; - sound-name-prefix = "TwitterLeft"; + sound-name-prefix = "TweeterLeft"; vdd-1p8-supply = <&vreg_l15b_1p8>; vdd-io-supply = <&vreg_l12b_1p2>; qcom,port-mapping = <4 5 6 7 11 13>; @@ -986,7 +986,7 @@ reg = <0 1>; reset-gpios = <&lpass_tlmm 13 GPIO_ACTIVE_LOW>; #sound-dai-cells = <0>; - sound-name-prefix = "TwitterRight"; + sound-name-prefix = "TweeterRight"; vdd-1p8-supply = <&vreg_l15b_1p8>; vdd-io-supply = <&vreg_l12b_1p2>; qcom,port-mapping = <4 5 6 7 11 13>; From f489f6c6eb26482010470d77bad3901a3de1b166 Mon Sep 17 00:00:00 2001 From: Qingqing Zhou Date: Wed, 23 Oct 2024 00:51:48 +0530 Subject: [PATCH 060/281] firmware: qcom: scm: Return -EOPNOTSUPP for unsupported SHM bridge enabling When enabling SHM bridge, QTEE returns 0 and sets error 4 in result to qcom_scm for unsupported platforms. Currently, tzmem interprets this as an unknown error rather than recognizing it as an unsupported platform. Error log: [ 0.177224] qcom_scm firmware:scm: error (____ptrval____): Failed to enable the TrustZone memory allocator [ 0.177244] qcom_scm firmware:scm: probe with driver qcom_scm failed with error 4 To address this, modify the function call qcom_scm_shm_bridge_enable() to remap result to indicate an unsupported error. This way, tzmem will correctly identify it as an unsupported platform case instead of reporting it as an error. Fixes: 178e19c0df1b ("firmware: qcom: scm: add support for SHM bridge operations") Signed-off-by: Qingqing Zhou Co-developed-by: Kuldeep Singh Signed-off-by: Kuldeep Singh Reviewed-by: Bartosz Golaszewski Reviewed-by: Mukesh Ojha Link: https://lore.kernel.org/r/20241022192148.1626633-1-quic_kuldsing@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/firmware/qcom/qcom_scm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c index f47fdf32d2da..2e4260ba5f79 100644 --- a/drivers/firmware/qcom/qcom_scm.c +++ b/drivers/firmware/qcom/qcom_scm.c @@ -112,6 +112,7 @@ enum qcom_scm_qseecom_tz_cmd_info { }; #define QSEECOM_MAX_APP_NAME_SIZE 64 +#define SHMBRIDGE_RESULT_NOTSUPP 4 /* Each bit configures cold/warm boot address for one of the 4 CPUs */ static const u8 qcom_scm_cpu_cold_bits[QCOM_SCM_BOOT_MAX_CPUS] = { @@ -1361,6 +1362,8 @@ EXPORT_SYMBOL_GPL(qcom_scm_lmh_dcvsh_available); int qcom_scm_shm_bridge_enable(void) { + int ret; + struct qcom_scm_desc desc = { .svc = QCOM_SCM_SVC_MP, .cmd = QCOM_SCM_MP_SHM_BRIDGE_ENABLE, @@ -1373,7 +1376,15 @@ int qcom_scm_shm_bridge_enable(void) QCOM_SCM_MP_SHM_BRIDGE_ENABLE)) return -EOPNOTSUPP; - return qcom_scm_call(__scm->dev, &desc, &res) ?: res.result[0]; + ret = qcom_scm_call(__scm->dev, &desc, &res); + + if (ret) + return ret; + + if (res.result[0] == SHMBRIDGE_RESULT_NOTSUPP) + return -EOPNOTSUPP; + + return res.result[0]; } EXPORT_SYMBOL_GPL(qcom_scm_shm_bridge_enable); From 825bb69228c8ab85637d21cdf4d44207937130b6 Mon Sep 17 00:00:00 2001 From: E Shattow Date: Mon, 21 Oct 2024 23:09:51 -0700 Subject: [PATCH 061/281] riscv: dts: starfive: Update ethernet phy0 delay parameter values for Star64 Improve function of Star64 bottom network port phy0 with updated delay values. Initial upstream patches supporting Star64 use the same vendor board support package parameters known to result in an unreliable bottom network port. Success acquiring DHCP lease and no dropped packets to ping LAN address: rx 900: tx 1500 1650 1800 1950 rx 750: tx 1650 1800 1950 rx 600: tx 1800 1950 rx 1050: tx 1650 1800 1950 rx 1200: tx 1500 1650 1800 1950 rx 1350: tx 1500 1650 1800 1950 rx 1500: tx 1500 1650 1800 1950 rx 1650: tx 1500 1650 1800 1950 rx 1800: tx 1500 1650 1800 1950 rx 1900: tx 1950 rx 1950: tx 1950 Failure acquiring DHCP lease or many dropped packets: rx 450: tx 1500 1800 1950 rx 600: tx 1200 1350 1650 rx 750: tx 1350 1500 rx 900: tx 1200 1350 rx 1050: tx 1050 1200 1350 1500 rx 1200: tx 1350 rx 1350: tx 1350 rx 1500: tx 1200 1350 rx 1650: tx 1050 1200 1350 rx 1800: tx 1050 1200 1350 rx 1900: tx 1500 1650 1800 rx 1950: tx 1200 1350 Non-functional: rx 0: tx 0 150 300 450 600 750 900 1050 1200 1350 1500 1650 1800 1950 rx 150: tx 0 150 300 450 600 750 900 1050 1200 1350 1500 1650 1800 1950 rx 300: tx 0 150 300 450 600 750 900 1050 1200 1350 1500 1650 1800 1950 rx 450: tx 0 150 300 450 600 750 900 1050 1200 1350 1650 rx 600: tx 0 150 300 450 600 750 900 1050 rx 750: tx 0 150 300 450 600 750 900 1050 1200 rx 900: tx 0 150 300 450 600 750 900 1050 rx 1050: tx 0 150 300 450 600 750 900 rx 1200: tx 0 150 300 450 600 750 900 1050 1200 rx 1350: tx 0 150 300 450 600 750 900 1050 1200 rx 1500: tx 0 150 300 450 600 750 900 1050 rx 1650: tx 0 150 300 450 600 750 900 rx 1800: tx 0 150 300 450 600 750 900 rx 1900: tx 0 150 300 450 600 750 900 1050 1200 1350 rx 1950: tx 0 150 300 450 600 750 900 1050 Selecting the median of all working rx delay values 1500 combined with tx delay values 1500, 1650, 1800, and 1950 only the tx delay value of 1950 (default) is reliable as tested in both Linux 6.11.2 and U-Boot v2024.10 Signed-off-by: E Shattow CC: stable@vger.kernel.org Fixes: 2606bf583b962 ("riscv: dts: starfive: add Star64 board devicetree") Acked-by: Emil Renner Berthing Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts b/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts index b720cdd15ed6..8e39fdc73ecb 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts +++ b/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts @@ -44,8 +44,7 @@ }; &phy0 { - rx-internal-delay-ps = <1900>; - tx-internal-delay-ps = <1500>; + rx-internal-delay-ps = <1500>; motorcomm,rx-clk-drv-microamp = <2910>; motorcomm,rx-data-drv-microamp = <2910>; motorcomm,tx-clk-adj-enabled; From dec19f1406fc5d73512cacdcf612e7bb161c2101 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Oct 2024 16:51:07 +0200 Subject: [PATCH 062/281] arm64: dts: qcom: x1e78100-t14s: fix nvme regulator boot glitch The NVMe regulator has been left enabled by the boot firmware. Mark it as such to avoid disabling the regulator temporarily during boot. Fixes: 7d1cbe2f4985 ("arm64: dts: qcom: Add X1E78100 ThinkPad T14s Gen 6") Cc: Konrad Dybcio Signed-off-by: Johan Hovold Reviewed-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241016145112.24785-2-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts index 941dfddd6713..fdde988ae01e 100644 --- a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts +++ b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts @@ -139,6 +139,8 @@ pinctrl-0 = <&nvme_reg_en>; pinctrl-names = "default"; + + regulator-boot-on; }; vph_pwr: regulator-vph-pwr { From 37f9477ce9d07ed87f6efe9b99de580bc9d27df5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Oct 2024 16:51:08 +0200 Subject: [PATCH 063/281] arm64: dts: qcom: x1e80100-crd: fix nvme regulator boot glitch The NVMe regulator has been left enabled by the boot firmware. Mark it as such to avoid disabling the regulator temporarily during boot. Fixes: eb57cbe730d1 ("arm64: dts: qcom: x1e80100: Describe the PCIe 6a resources") Cc: stable@vger.kernel.org # 6.11 Cc: Abel Vesa Signed-off-by: Johan Hovold Reviewed-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241016145112.24785-3-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-crd.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index 004353220dc5..c6e0356ed9a2 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -300,6 +300,8 @@ pinctrl-names = "default"; pinctrl-0 = <&nvme_reg_en>; + + regulator-boot-on; }; vreg_wwan: regulator-wwan { From c6d151f61b6703124e14bc0eae98d05206e36e02 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Oct 2024 16:51:09 +0200 Subject: [PATCH 064/281] arm64: dts: qcom: x1e80100-vivobook-s15: fix nvme regulator boot glitch The NVMe regulator has been left enabled by the boot firmware. Mark it as such to avoid disabling the regulator temporarily during boot. Fixes: d0e2f8f62dff ("arm64: dts: qcom: Add device tree for ASUS Vivobook S 15") Cc: stable@vger.kernel.org # 6.11 Cc: Xilin Wu Signed-off-by: Johan Hovold Reviewed-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241016145112.24785-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts index 20616bd4aa6c..fb4a48a1e2a8 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts @@ -134,6 +134,8 @@ pinctrl-0 = <&nvme_reg_en>; pinctrl-names = "default"; + + regulator-boot-on; }; }; From 1badd07e4c0e1ecfb187dcba05357c0f3e70e797 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Oct 2024 16:51:10 +0200 Subject: [PATCH 065/281] arm64: dts: qcom: x1e80100-yoga-slim7x: fix nvme regulator boot glitch The NVMe regulator has been left enabled by the boot firmware. Mark it as such to avoid disabling the regulator temporarily during boot. Fixes: 45247fe17db2 ("arm64: dts: qcom: x1e80100: add Lenovo Thinkpad Yoga slim 7x devicetree") Cc: stable@vger.kernel.org # 6.11 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Reviewed-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241016145112.24785-5-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index 3c13331a9ef4..0cdaff9c8cf0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -205,6 +205,8 @@ pinctrl-0 = <&nvme_reg_en>; pinctrl-names = "default"; + + regulator-boot-on; }; }; From 5462190b11aa62a945dc2fd74e1531b9c1bc9952 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Oct 2024 16:51:11 +0200 Subject: [PATCH 066/281] arm64: dts: qcom: x1e80100-microsoft-romulus: fix nvme regulator boot glitch The NVMe regulator has been left enabled by the boot firmware. Mark it as such to avoid disabling the regulator temporarily during boot. Fixes: 09d77be56093 ("arm64: dts: qcom: Add support for X1-based Surface Laptop 7 devices") Cc: Konrad Dybcio Signed-off-by: Johan Hovold Reviewed-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241016145112.24785-6-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi index 42e02ad6a9c3..cdb401767c42 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi @@ -164,6 +164,8 @@ pinctrl-0 = <&nvme_reg_en>; pinctrl-names = "default"; + + regulator-boot-on; }; }; From 717f0637ffc6a6a59f838df94a7d61e643c98d62 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Oct 2024 16:51:12 +0200 Subject: [PATCH 067/281] arm64: dts: qcom: x1e80100-qcp: fix nvme regulator boot glitch The NVMe regulator has been left enabled by the boot firmware. Mark it as such to avoid disabling the regulator temporarily during boot. Fixes: eb57cbe730d1 ("arm64: dts: qcom: x1e80100: Describe the PCIe 6a resources") Cc: stable@vger.kernel.org # 6.11 Cc: Abel Vesa Signed-off-by: Johan Hovold Reviewed-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241016145112.24785-7-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-qcp.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index 1c3a6a7b3ed6..5ef030c60abe 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -253,6 +253,8 @@ pinctrl-names = "default"; pinctrl-0 = <&nvme_reg_en>; + + regulator-boot-on; }; }; From 128fdbf36cddc2a901c4889ba1c89fa9f2643f2c Mon Sep 17 00:00:00 2001 From: Manikanta Mylavarapu Date: Wed, 16 Oct 2024 20:18:52 +0530 Subject: [PATCH 068/281] soc: qcom: socinfo: fix revision check in qcom_socinfo_probe() In success case, the revision holds a non-null pointer. The current logic incorrectly returns an error for a non-null pointer, whereas it should return an error for a null pointer. The socinfo driver for IPQ9574 and IPQ5332 is currently broken, resulting in the following error message qcom-socinfo qcom-socinfo: probe with driver qcom-socinfo failed with error -12 Add a null check for the revision to ensure it returns an error only in failure case (null pointer). Fixes: e694d2b5c58b ("soc: qcom: Add check devm_kasprintf() returned value") Signed-off-by: Manikanta Mylavarapu Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241016144852.2888679-1-quic_mmanikan@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/socinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c index e42d86d5f6f9..c3b877832521 100644 --- a/drivers/soc/qcom/socinfo.c +++ b/drivers/soc/qcom/socinfo.c @@ -786,7 +786,7 @@ static int qcom_socinfo_probe(struct platform_device *pdev) qs->attr.revision = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%u.%u", SOCINFO_MAJOR(le32_to_cpu(info->ver)), SOCINFO_MINOR(le32_to_cpu(info->ver))); - if (!qs->attr.soc_id || qs->attr.revision) + if (!qs->attr.soc_id || !qs->attr.revision) return -ENOMEM; if (offsetof(struct socinfo, serial_num) <= item_size) { From 384f2024e1a100b9b977a697f5e7cb151b00550d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 6 Aug 2024 18:36:21 +0100 Subject: [PATCH 069/281] MAINTAINERS: invert Misc RISC-V SoC Support's pattern There are now more directories that someone else maintains than ones I do, so invert the pattern to cover included, rather than included directories. Ditto for the bindings directory - there's more files there that are the responsibility of others than mine (and I get CCed on all bindings anyway). Remove it from the entry. Signed-off-by: Conor Dooley --- MAINTAINERS | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7ad507f49324..bec4f3fd0bf8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19927,12 +19927,10 @@ L: linux-riscv@lists.infradead.org S: Maintained Q: https://patchwork.kernel.org/project/linux-riscv/list/ T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ -F: Documentation/devicetree/bindings/riscv/ -F: arch/riscv/boot/dts/ -X: arch/riscv/boot/dts/allwinner/ -X: arch/riscv/boot/dts/renesas/ -X: arch/riscv/boot/dts/sophgo/ -X: arch/riscv/boot/dts/thead/ +F: arch/riscv/boot/dts/canaan/ +F: arch/riscv/boot/dts/microchip/ +F: arch/riscv/boot/dts/sifive/ +F: arch/riscv/boot/dts/starfive/ RISC-V PMU DRIVERS M: Atish Patra From 7af1418500124150f9fd24e1a5b9c288771df271 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 10 Jul 2024 16:07:23 +0200 Subject: [PATCH 070/281] arm64: dts: qcom: x1e80100: Fix up BAR spaces The 32-bit BAR spaces are reaching outside their assigned register regions. Shrink them to match their actual sizes. This resolves an issue where the regions overlap and one of the controllers won't come up, which can be seen in the log as: qcom-pcie 1c08000.pci: resource collision: [mem 0x7c300000-0x7fffffff] conflicts with 1c00000.pci dbi [mem 0x7e000000-0x7e000f1c] While at it, unify the style. Fixes: 5eb83fc10289 ("arm64: dts: qcom: x1e80100: Add PCIe nodes") Cc: stable@vger.kernel.org Signed-off-by: Konrad Dybcio Reviewed-by: Abel Vesa Tested-by: Abel Vesa Link: https://lore.kernel.org/r/20240710-topic-barman-v1-1-5f63fca8d0fc@linaro.org [bjorn: Added note about overlapping resource regions] Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 6a419fb6a982..c5d033e1b345 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -2924,9 +2924,9 @@ "mhi"; #address-cells = <3>; #size-cells = <2>; - ranges = <0x01000000 0 0x00000000 0 0x70200000 0 0x100000>, - <0x02000000 0 0x70300000 0 0x70300000 0 0x3d00000>; - bus-range = <0 0xff>; + ranges = <0x01000000 0x0 0x00000000 0x0 0x70200000 0x0 0x100000>, + <0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x1d00000>; + bus-range = <0x00 0xff>; dma-coherent; @@ -3173,8 +3173,8 @@ "mhi"; #address-cells = <3>; #size-cells = <2>; - ranges = <0x01000000 0 0x00000000 0 0x7c200000 0 0x100000>, - <0x02000000 0 0x7c300000 0 0x7c300000 0 0x3d00000>; + ranges = <0x01000000 0x0 0x00000000 0x0 0x7c200000 0x0 0x100000>, + <0x02000000 0x0 0x7c300000 0x0 0x7c300000 0x0 0x1d00000>; bus-range = <0x00 0xff>; dma-coherent; From f3bba5eb46ddb8f460fc808a65050a9bf2f7ef23 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 24 Oct 2024 15:10:59 +0200 Subject: [PATCH 071/281] arm64: dts: qcom: x1e80100: fix PCIe4 interconnect The fourth PCIe controller is connected to the PCIe North ANoC. Fix the corresponding interconnect property so that the OS manages the right path. Fixes: 5eb83fc10289 ("arm64: dts: qcom: x1e80100: Add PCIe nodes") Cc: stable@vger.kernel.org # 6.9 Cc: Abel Vesa Cc: Sibi Sankar Cc: Rajendra Nayak Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241024131101.13587-2-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index c5d033e1b345..512a6a485adc 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3224,7 +3224,7 @@ assigned-clocks = <&gcc GCC_PCIE_4_AUX_CLK>; assigned-clock-rates = <19200000>; - interconnects = <&pcie_south_anoc MASTER_PCIE_4 QCOM_ICC_TAG_ALWAYS + interconnects = <&pcie_north_anoc MASTER_PCIE_4 QCOM_ICC_TAG_ALWAYS &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS &cnoc_main SLAVE_PCIE_4 QCOM_ICC_TAG_ALWAYS>; From 54376fe116ef69c9e58794589c044abb2555169e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 24 Oct 2024 15:11:00 +0200 Subject: [PATCH 072/281] arm64: dts: qcom: x1e80100: fix PCIe5 interconnect The fifth PCIe controller is connected to the PCIe North ANoC. Fix the corresponding interconnect property so that the OS manages the right path. Fixes: 62ab23e15508 ("arm64: dts: qcom: x1e80100: add PCIe5 nodes") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20241024131101.13587-3-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 512a6a485adc..0510abc0edf0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3102,7 +3102,7 @@ assigned-clocks = <&gcc GCC_PCIE_5_AUX_CLK>; assigned-clock-rates = <19200000>; - interconnects = <&pcie_south_anoc MASTER_PCIE_5 QCOM_ICC_TAG_ALWAYS + interconnects = <&pcie_north_anoc MASTER_PCIE_5 QCOM_ICC_TAG_ALWAYS &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS &cnoc_main SLAVE_PCIE_5 QCOM_ICC_TAG_ALWAYS>; From a387e73fedd6307c0e194deaa53c42b153ff0bd6 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 23 Oct 2024 17:24:32 +0000 Subject: [PATCH 073/281] rpmsg: glink: Handle rejected intent request better GLINK operates using pre-allocated buffers, aka intents, where incoming messages are aggregated before being passed up the stack. In the case that no suitable intents have been announced by the receiver, the sender can request an intent to be allocated. The initial implementation of the response to such request dealt with two outcomes; granted allocations, and all other cases being considered -ECANCELLED (likely from "cancelling the operation as the remote is going down"). But on some channels intent allocation is not supported, instead the remote will pre-allocate and announce a fixed number of intents for the sender to use. If for such channels an rpmsg_send() is being invoked before any channels have been announced, an intent request will be issued and as this comes back rejected the call fails with -ECANCELED. Given that this is reported in the same way as the remote being shut down, there's no way for the client to differentiate the two cases. In line with the original GLINK design, change the return value to -EAGAIN for the case where the remote rejects an intent allocation request. It's tempting to handle this case in the GLINK core, as we expect intents to show up in this case. But there's no way to distinguish between this case and a rejection for a too big allocation, nor is it possible to predict if a currently used (and seemingly suitable) intent will be returned for reuse or not. As such, returning the error to the client and allow it to react seems to be the only sensible solution. In addition to this, commit 'c05dfce0b89e ("rpmsg: glink: Wait for intent, not just request ack")' changed the logic such that the code always wait for an intent request response and an intent. This works out in most cases, but in the event that an intent request is rejected and no further intent arrives (e.g. client asks for a too big intent), the code will stall for 10 seconds and then return -ETIMEDOUT; instead of a more suitable error. This change also resulted in intent requests racing with the shutdown of the remote would be exposed to this same problem, unless some intent happens to arrive. A patch for this was developed and posted by Sarannya S [1], and has been incorporated here. To summarize, the intent request can end in 4 ways: - Timeout, no response arrived => return -ETIMEDOUT - Abort TX, the edge is going away => return -ECANCELLED - Intent request was rejected => return -EAGAIN - Intent request was accepted, and an intent arrived => return 0 This patch was developed with input from Sarannya S, Deepak Kumar Singh, and Chris Lew. [1] https://lore.kernel.org/all/20240925072328.1163183-1-quic_deesin@quicinc.com/ Fixes: c05dfce0b89e ("rpmsg: glink: Wait for intent, not just request ack") Cc: stable@vger.kernel.org Tested-by: Johan Hovold Signed-off-by: Bjorn Andersson Reviewed-by: Chris Lew Link: https://lore.kernel.org/r/20241023-pmic-glink-ecancelled-v2-1-ebc268129407@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/rpmsg/qcom_glink_native.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 0b2f29006908..d3af1dfa3c7d 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -1440,14 +1440,18 @@ static int qcom_glink_request_intent(struct qcom_glink *glink, goto unlock; ret = wait_event_timeout(channel->intent_req_wq, - READ_ONCE(channel->intent_req_result) >= 0 && - READ_ONCE(channel->intent_received), + READ_ONCE(channel->intent_req_result) == 0 || + (READ_ONCE(channel->intent_req_result) > 0 && + READ_ONCE(channel->intent_received)) || + glink->abort_tx, 10 * HZ); if (!ret) { dev_err(glink->dev, "intent request timed out\n"); ret = -ETIMEDOUT; + } else if (glink->abort_tx) { + ret = -ECANCELED; } else { - ret = READ_ONCE(channel->intent_req_result) ? 0 : -ECANCELED; + ret = READ_ONCE(channel->intent_req_result) ? 0 : -EAGAIN; } unlock: From f8c879192465d9f328cb0df07208ef077c560bb1 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 23 Oct 2024 17:24:33 +0000 Subject: [PATCH 074/281] soc: qcom: pmic_glink: Handle GLINK intent allocation rejections Some versions of the pmic_glink firmware does not allow dynamic GLINK intent allocations, attempting to send a message before the firmware has allocated its receive buffers and announced these intent allocations will fail. When this happens something like this showns up in the log: pmic_glink_altmode.pmic_glink_altmode pmic_glink.altmode.0: failed to send altmode request: 0x10 (-125) pmic_glink_altmode.pmic_glink_altmode pmic_glink.altmode.0: failed to request altmode notifications: -125 ucsi_glink.pmic_glink_ucsi pmic_glink.ucsi.0: failed to send UCSI read request: -125 qcom_battmgr.pmic_glink_power_supply pmic_glink.power-supply.0: failed to request power notifications GLINK has been updated to distinguish between the cases where the remote is going down (-ECANCELED) and the intent allocation being rejected (-EAGAIN). Retry the send until intent buffers becomes available, or an actual error occur. To avoid infinitely waiting for the firmware in the event that this misbehaves and no intents arrive, an arbitrary 5 second timeout is used. This patch was developed with input from Chris Lew. Reported-by: Johan Hovold Closes: https://lore.kernel.org/all/Zqet8iInnDhnxkT9@hovoldconsulting.com/#t Cc: stable@vger.kernel.org # rpmsg: glink: Handle rejected intent request better Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver") Tested-by: Johan Hovold Reviewed-by: Johan Hovold Signed-off-by: Bjorn Andersson Reviewed-by: Chris Lew Link: https://lore.kernel.org/r/20241023-pmic-glink-ecancelled-v2-2-ebc268129407@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/pmic_glink.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index 9606222993fd..baa4ac6704a9 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -4,6 +4,7 @@ * Copyright (c) 2022, Linaro Ltd */ #include +#include #include #include #include @@ -13,6 +14,8 @@ #include #include +#define PMIC_GLINK_SEND_TIMEOUT (5 * HZ) + enum { PMIC_GLINK_CLIENT_BATT = 0, PMIC_GLINK_CLIENT_ALTMODE, @@ -112,13 +115,29 @@ EXPORT_SYMBOL_GPL(pmic_glink_client_register); int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len) { struct pmic_glink *pg = client->pg; + bool timeout_reached = false; + unsigned long start; int ret; mutex_lock(&pg->state_lock); - if (!pg->ept) + if (!pg->ept) { ret = -ECONNRESET; - else - ret = rpmsg_send(pg->ept, data, len); + } else { + start = jiffies; + for (;;) { + ret = rpmsg_send(pg->ept, data, len); + if (ret != -EAGAIN) + break; + + if (timeout_reached) { + ret = -ETIMEDOUT; + break; + } + + usleep_range(1000, 5000); + timeout_reached = time_after(jiffies, start + PMIC_GLINK_SEND_TIMEOUT); + } + } mutex_unlock(&pg->state_lock); return ret; From 3577d5e2bc1ff78808cbe2f233ae1837ee2ce84c Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:36 +0200 Subject: [PATCH 075/281] arm64: dts: rockchip: remove orphaned pinctrl-names from pinephone pro The patch adding display support for the pinephone pro introduced two regulators that contain pinctrl-names props but no pinctrl-assignments. Looks like someone forgot the pinctrl settings, so remove the orphans for now, until that changes. Fixes: 3e987e1f22b9 ("arm64: dts: rockchip: Add internal display support to rk3399-pinephone-pro") Cc: Martijn Braam Cc: Javier Martinez Canillas Cc: Ondrej Jirman Reviewed-by: Ondrej Jirman Reviewed-by: Javier Martinez Canillas Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-11-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts index 1a44582a49fb..09a016ea8c76 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts @@ -166,7 +166,6 @@ regulator-max-microvolt = <1800000>; vin-supply = <&vcc3v3_sys>; gpio = <&gpio3 RK_PA5 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; }; /* MIPI DSI panel 2.8v supply */ @@ -178,7 +177,6 @@ regulator-max-microvolt = <2800000>; vin-supply = <&vcc3v3_sys>; gpio = <&gpio3 RK_PA1 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; }; vibrator { From c7206853cd7d31c52575fb1dc7616b4398f3bc8f Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:37 +0200 Subject: [PATCH 076/281] ARM: dts: rockchip: fix rk3036 acodec node The acodec node is not conformant to the binding. Set the correct nodename, use the correct compatible, add the needed #sound-dai-cells and sort the rockchip,grf below clocks properties as expected. Fixes: faea098e1808 ("ARM: dts: rockchip: add core rk3036 dtsi") Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-12-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3036.dtsi | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/rockchip/rk3036.dtsi b/arch/arm/boot/dts/rockchip/rk3036.dtsi index 96279d1e02fe..37369538483f 100644 --- a/arch/arm/boot/dts/rockchip/rk3036.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3036.dtsi @@ -384,12 +384,13 @@ }; }; - acodec: acodec-ana@20030000 { - compatible = "rk3036-codec"; + acodec: audio-codec@20030000 { + compatible = "rockchip,rk3036-codec"; reg = <0x20030000 0x4000>; - rockchip,grf = <&grf>; clock-names = "acodec_pclk"; clocks = <&cru PCLK_ACODEC>; + rockchip,grf = <&grf>; + #sound-dai-cells = <0>; status = "disabled"; }; From 1580ccb6ed9dc76b8ff3e2d8912e8215c8b0fa6d Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:38 +0200 Subject: [PATCH 077/281] ARM: dts: rockchip: drop grf reference from rk3036 hdmi Neither the binding nor the driver implementation specify/use the grf reference provided in the rk3036. And neither does the newer rk3128 user of the hdmi controller. So drop the rockchip,grf property. Fixes: b7217cf19c63 ("ARM: dts: rockchip: add hdmi device node for rk3036") Cc: Caesar Wang Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-13-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3036.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/rockchip/rk3036.dtsi b/arch/arm/boot/dts/rockchip/rk3036.dtsi index 37369538483f..09371f07d7b4 100644 --- a/arch/arm/boot/dts/rockchip/rk3036.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3036.dtsi @@ -400,7 +400,6 @@ interrupts = ; clocks = <&cru PCLK_HDMI>; clock-names = "pclk"; - rockchip,grf = <&grf>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_ctl>; #sound-dai-cells = <0>; From 8bade1ad1f0821aef31f6a8fb1027ae292566d85 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:39 +0200 Subject: [PATCH 078/281] ARM: dts: rockchip: Fix the spi controller on rk3036 Compatible and clock names did not match the existing binding. So set the correct values and re-order+rename the clocks. It looks like no rk3036 board did use the spi controller so far, so this was never detected on a running device yet. Fixes: f629fcfab2cd ("ARM: dts: rockchip: support the spi for rk3036") Cc: Caesar Wang Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-14-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3036.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/rockchip/rk3036.dtsi b/arch/arm/boot/dts/rockchip/rk3036.dtsi index 09371f07d7b4..63b9912be06a 100644 --- a/arch/arm/boot/dts/rockchip/rk3036.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3036.dtsi @@ -553,11 +553,11 @@ }; spi: spi@20074000 { - compatible = "rockchip,rockchip-spi"; + compatible = "rockchip,rk3036-spi"; reg = <0x20074000 0x1000>; interrupts = ; - clocks = <&cru PCLK_SPI>, <&cru SCLK_SPI>; - clock-names = "apb-pclk","spi_pclk"; + clocks = <&cru SCLK_SPI>, <&cru PCLK_SPI>; + clock-names = "spiclk", "apb_pclk"; dmas = <&pdma 8>, <&pdma 9>; dma-names = "tx", "rx"; pinctrl-names = "default"; From 77a9a7f2d3b94d29d13d71b851114d593a2147cf Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:40 +0200 Subject: [PATCH 079/281] ARM: dts: rockchip: Fix the realtek audio codec on rk3036-kylin Both the node name as well as the compatible were not named according to the binding expectations, fix that. Fixes: 47bf3a5c9e2a ("ARM: dts: rockchip: add the sound setup for rk3036-kylin board") Cc: Caesar Wang Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-15-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3036-kylin.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/rockchip/rk3036-kylin.dts b/arch/arm/boot/dts/rockchip/rk3036-kylin.dts index e32c73d32f0a..2f84e2805712 100644 --- a/arch/arm/boot/dts/rockchip/rk3036-kylin.dts +++ b/arch/arm/boot/dts/rockchip/rk3036-kylin.dts @@ -325,8 +325,8 @@ &i2c2 { status = "okay"; - rt5616: rt5616@1b { - compatible = "rt5616"; + rt5616: audio-codec@1b { + compatible = "realtek,rt5616"; reg = <0x1b>; clocks = <&cru SCLK_I2S_OUT>; clock-names = "mclk"; From a4dca88c9c3abd2ba73d09fb5b365fdf7d5198a3 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Sat, 19 Oct 2024 03:38:10 +0300 Subject: [PATCH 080/281] arm64: dts: rockchip: Drop invalid clock-names from es8388 codec nodes The binding for Everest ES8328/ES8388 audio CODEC doesn't support the 'clock-names' property: rk3588-orangepi-5-plus.dtb: audio-codec@11: 'clock-names' does not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/sound/everest,es8328.yaml# Since the related audio driver is also not making use of it, drop the invalid property from all es8388 codec nodes. Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20241019-es8328-dt-fixes-v1-1-ca77d5ce21ad@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts | 1 - arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts | 1 - arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts | 1 - arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts | 1 - 4 files changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts index 7ba1c28f70a9..2f06bfdd70bf 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -114,7 +114,6 @@ es8388: es8388@11 { compatible = "everest,es8388"; reg = <0x11>; - clock-names = "mclk"; clocks = <&cru SCLK_I2S_8CH_OUT>; #sound-dai-cells = <0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts index c3a6812cc93a..dd4c79bcad87 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts @@ -328,7 +328,6 @@ compatible = "everest,es8388"; reg = <0x11>; clocks = <&cru I2S0_8CH_MCLKOUT>; - clock-names = "mclk"; AVDD-supply = <&vcc_1v8_s0>; DVDD-supply = <&vcc_1v8_s0>; HPVDD-supply = <&vcc_3v3_s0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts index e4a20cda65ed..b38dab009ccc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts @@ -316,7 +316,6 @@ assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; assigned-clock-rates = <12288000>; clocks = <&cru I2S0_8CH_MCLKOUT>; - clock-names = "mclk"; AVDD-supply = <&avcc_1v8_codec_s0>; DVDD-supply = <&avcc_1v8_codec_s0>; HPVDD-supply = <&vcc_3v3_s0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts index d8c50fdcca3b..8ba111d9283f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts @@ -377,7 +377,6 @@ assigned-clock-rates = <12288000>; assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; AVDD-supply = <&vcc_3v3_s3>; - clock-names = "mclk"; clocks = <&cru I2S0_8CH_MCLKOUT>; DVDD-supply = <&vcc_1v8_s3>; HPVDD-supply = <&vcc_3v3_s3>; From 08846522d9a7bccf18d4f97c3f39d03c7a193970 Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Fri, 18 Oct 2024 16:45:50 +0200 Subject: [PATCH 081/281] arm64: dts: rockchip: Correct GPIO polarity on brcm BT nodes Paragraph "3.4 Power up Timing Sequence" of the AzureWave-CM256SM datasheet mentions the following about the BT_REG_ON pin, which is connected to GPIO0_C4_d: When this pin is low and WL_REG_ON is high, the BT section is in reset. Therefor set that pin to GPIO_ACTIVE_HIGH so that it can be pulled low for a reset. If set to GPIO_ACTIVE_LOW, the following errors are observed: Bluetooth: hci0: command 0x0c03 tx timeout Bluetooth: hci0: BCM: Reset failed (-110) So fix the GPIO polarity by setting it to ACTIVE_HIGH. This also matches what other devices with the same BT device have. Fixes: 2b6a3f857550 ("arm64: dts: rockchip: Fix reset-gpios property on brcm BT nodes") Signed-off-by: Diederik de Haas Link: https://lore.kernel.org/r/20241018145053.11928-2-didi.debian@cknow.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index a477bd992b40..0131f2cdd312 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -688,7 +688,7 @@ host-wakeup-gpios = <&gpio0 RK_PC3 GPIO_ACTIVE_HIGH>; pinctrl-0 = <&bt_enable_h>, <&bt_host_wake_l>, <&bt_wake_h>; pinctrl-names = "default"; - shutdown-gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_LOW>; + shutdown-gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_HIGH>; vbat-supply = <&vcc_wl>; vddio-supply = <&vcca_1v8_pmu>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi index e9fa9bee995a..1e36f73840da 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3.dtsi @@ -404,7 +404,7 @@ host-wakeup-gpios = <&gpio2 RK_PB1 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_h &bt_reg_on_h &bt_wake_host_h>; - shutdown-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_LOW>; + shutdown-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; vbat-supply = <&vcc_3v3>; vddio-supply = <&vcc_1v8>; }; From cc6a931d1f3b412263d515fd93b21fc0ca5147fe Mon Sep 17 00:00:00 2001 From: Erik Schumacher Date: Fri, 25 Oct 2024 08:37:00 +0000 Subject: [PATCH 082/281] pwm: imx-tpm: Use correct MODULO value for EPWM mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The modulo register defines the period of the edge-aligned PWM mode (which is the only mode implemented). The reference manual states: "The EPWM period is determined by (MOD + 0001h) ..." So the value that is written to the MOD register must therefore be one less than the calculated period length. Return -EINVAL if the calculated length is already zero. A correct MODULO value is particularly relevant if the PWM has to output a high frequency due to a low period value. Fixes: 738a1cfec2ed ("pwm: Add i.MX TPM PWM driver support") Cc: stable@vger.kernel.org Signed-off-by: Erik Schumacher Link: https://lore.kernel.org/r/1a3890966d68b9f800d457cbf095746627495e18.camel@iris-sensing.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-imx-tpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index 96ea343856f0..7ee7b65b9b90 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -106,7 +106,9 @@ static int pwm_imx_tpm_round_state(struct pwm_chip *chip, p->prescale = prescale; period_count = (clock_unit + ((1 << prescale) >> 1)) >> prescale; - p->mod = period_count; + if (period_count == 0) + return -EINVAL; + p->mod = period_count - 1; /* calculate real period HW can support */ tmp = (u64)period_count << prescale; From 517fb4d77c44c7519ae6937329c496894461f416 Mon Sep 17 00:00:00 2001 From: Trevor Gamblin Date: Thu, 17 Oct 2024 13:47:44 -0400 Subject: [PATCH 083/281] MAINTAINERS: add self as reviewer for AXI PWM GENERATOR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initial author of the driver has moved on, so add the final submitter (myself) as reviewer for the AXI PWM driver. Signed-off-by: Trevor Gamblin Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20241017174744.902454-1-tgamblin@baylibre.com Signed-off-by: Uwe Kleine-König --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..a506fa4f6825 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3758,6 +3758,7 @@ F: drivers/spi/spi-axi-spi-engine.c AXI PWM GENERATOR M: Michael Hennerich M: Nuno Sá +R: Trevor Gamblin L: linux-pwm@vger.kernel.org S: Supported W: https://ez.analog.com/linux-software-drivers From d99913e1b80b51a058020835e7ea1a44397cb4d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 22 Oct 2024 11:14:30 +0200 Subject: [PATCH 084/281] riscv: dts: Replace deprecated snps,nr-gpios property for snps,dw-apb-gpio-port devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snps,dw-apb-gpio-port is deprecated since commit ef42a8da3cf3 ("dt-bindings: gpio: dwapb: Add ngpios property support"). The respective driver supports this since commit 7569486d79ae ("gpio: dwapb: Add ngpios DT-property support") which is included in Linux v5.10-rc1. This change was created using git grep -l snps,nr-gpios arch/riscv/boot/dts | xargs perl -p -i -e 's/\bsnps,nr-gpios\b/ngpios/ . Signed-off-by: Uwe Kleine-König Reviewed-by: Inochi Amaoto Reviewed-by: Chen Wang Fixes: a508d794f86e ("riscv: sophgo: dts: add gpio controllers for SG2042 SoC") Link: https://lore.kernel.org/r/20241022091428.477697-8-u.kleine-koenig@baylibre.com Signed-off-by: Inochi Amaoto Signed-off-by: Chen Wang --- arch/riscv/boot/dts/sophgo/sg2042.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi index 4e5fa6591623..e62ac51ac55a 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi +++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi @@ -112,7 +112,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -134,7 +134,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -156,7 +156,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; From 2feb023110843acce790e9089e72e9a9503d9fa5 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Fri, 25 Oct 2024 13:59:18 +0800 Subject: [PATCH 085/281] regulator: rtq2208: Fix uninitialized use of regulator_config Fix rtq2208 driver uninitialized use to cause kernel error. Fixes: 85a11f55621a ("regulator: rtq2208: Add Richtek RTQ2208 SubPMIC") Signed-off-by: ChiYuan Huang Link: https://patch.msgid.link/00d691cfcc0eae9ce80a37b62e99851e8fdcffe2.1729829243.git.cy_huang@richtek.com Signed-off-by: Mark Brown --- drivers/regulator/rtq2208-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rtq2208-regulator.c b/drivers/regulator/rtq2208-regulator.c index a5c126afc648..5925fa7a9a06 100644 --- a/drivers/regulator/rtq2208-regulator.c +++ b/drivers/regulator/rtq2208-regulator.c @@ -568,7 +568,7 @@ static int rtq2208_probe(struct i2c_client *i2c) struct regmap *regmap; struct rtq2208_regulator_desc *rdesc[RTQ2208_LDO_MAX]; struct regulator_dev *rdev; - struct regulator_config cfg; + struct regulator_config cfg = {}; struct rtq2208_rdev_map *rdev_map; int i, ret = 0, idx, n_regulator = 0; unsigned int regulator_idx_table[RTQ2208_LDO_MAX], From 3abab905b14f4ba756d413f37f1fb02b708eee93 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 28 Oct 2024 08:28:30 +0900 Subject: [PATCH 086/281] ksmbd: Fix the missing xa_store error check xa_store() can fail, it return xa_err(-EINVAL) if the entry cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation failed, so check error for xa_store() to fix it. Cc: stable@vger.kernel.org Fixes: b685757c7b08 ("ksmbd: Implements sess->rpc_handle_list as xarray") Signed-off-by: Jinjie Ruan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/user_session.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 1e4624e9d434..9756a4bbfe54 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -90,7 +90,7 @@ static int __rpc_method(char *rpc_name) int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) { - struct ksmbd_session_rpc *entry; + struct ksmbd_session_rpc *entry, *old; struct ksmbd_rpc_command *resp; int method; @@ -106,16 +106,19 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) entry->id = ksmbd_ipc_id_alloc(); if (entry->id < 0) goto free_entry; - xa_store(&sess->rpc_handle_list, entry->id, entry, GFP_KERNEL); + old = xa_store(&sess->rpc_handle_list, entry->id, entry, GFP_KERNEL); + if (xa_is_err(old)) + goto free_id; resp = ksmbd_rpc_open(sess, entry->id); if (!resp) - goto free_id; + goto erase_xa; kvfree(resp); return entry->id; -free_id: +erase_xa: xa_erase(&sess->rpc_handle_list, entry->id); +free_id: ksmbd_rpc_id_free(entry->id); free_entry: kfree(entry); From 96d8569563916fe2f8fe17317e20e43f54f9ba4b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 24 Oct 2024 10:21:30 +0200 Subject: [PATCH 087/281] media: vivid: fix buffer overwrite when using > 32 buffers The maximum number of buffers that can be requested was increased to 64 for the video capture queue. But video capture used a must_blank array that was still sized for 32 (VIDEO_MAX_FRAME). This caused an out-of-bounds write when using buffer indices >= 32. Create a new define MAX_VID_CAP_BUFFERS that is used to access the must_blank array and set max_num_buffers for the video capture queue. This solves a crash reported by: https://bugzilla.kernel.org/show_bug.cgi?id=219258 Signed-off-by: Hans Verkuil Fixes: cea70ed416b4 ("media: test-drivers: vivid: Increase max supported buffers for capture queues") Cc: stable@vger.kernel.org --- drivers/media/test-drivers/vivid/vivid-core.c | 2 +- drivers/media/test-drivers/vivid/vivid-core.h | 4 +++- drivers/media/test-drivers/vivid/vivid-ctrls.c | 2 +- drivers/media/test-drivers/vivid/vivid-vid-cap.c | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/media/test-drivers/vivid/vivid-core.c b/drivers/media/test-drivers/vivid/vivid-core.c index 00e0d08af357..4f330f4fc6be 100644 --- a/drivers/media/test-drivers/vivid/vivid-core.c +++ b/drivers/media/test-drivers/vivid/vivid-core.c @@ -910,7 +910,7 @@ static int vivid_create_queue(struct vivid_dev *dev, * videobuf2-core.c to MAX_BUFFER_INDEX. */ if (buf_type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - q->max_num_buffers = 64; + q->max_num_buffers = MAX_VID_CAP_BUFFERS; if (buf_type == V4L2_BUF_TYPE_SDR_CAPTURE) q->max_num_buffers = 1024; if (buf_type == V4L2_BUF_TYPE_VBI_CAPTURE) diff --git a/drivers/media/test-drivers/vivid/vivid-core.h b/drivers/media/test-drivers/vivid/vivid-core.h index cc18a3bc6dc0..d2d52763b119 100644 --- a/drivers/media/test-drivers/vivid/vivid-core.h +++ b/drivers/media/test-drivers/vivid/vivid-core.h @@ -26,6 +26,8 @@ #define MAX_INPUTS 16 /* The maximum number of outputs */ #define MAX_OUTPUTS 16 +/* The maximum number of video capture buffers */ +#define MAX_VID_CAP_BUFFERS 64 /* The maximum up or down scaling factor is 4 */ #define MAX_ZOOM 4 /* The maximum image width/height are set to 4K DMT */ @@ -481,7 +483,7 @@ struct vivid_dev { /* video capture */ struct tpg_data tpg; unsigned ms_vid_cap; - bool must_blank[VIDEO_MAX_FRAME]; + bool must_blank[MAX_VID_CAP_BUFFERS]; const struct vivid_fmt *fmt_cap; struct v4l2_fract timeperframe_vid_cap; diff --git a/drivers/media/test-drivers/vivid/vivid-ctrls.c b/drivers/media/test-drivers/vivid/vivid-ctrls.c index 8bb38bc7b8cc..2b5c8fbcd0a2 100644 --- a/drivers/media/test-drivers/vivid/vivid-ctrls.c +++ b/drivers/media/test-drivers/vivid/vivid-ctrls.c @@ -553,7 +553,7 @@ static int vivid_vid_cap_s_ctrl(struct v4l2_ctrl *ctrl) break; case VIVID_CID_PERCENTAGE_FILL: tpg_s_perc_fill(&dev->tpg, ctrl->val); - for (i = 0; i < VIDEO_MAX_FRAME; i++) + for (i = 0; i < MAX_VID_CAP_BUFFERS; i++) dev->must_blank[i] = ctrl->val < 100; break; case VIVID_CID_INSERT_SAV: diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c index 69620e0a35a0..6a790ac8cbe6 100644 --- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c +++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c @@ -213,7 +213,7 @@ static int vid_cap_start_streaming(struct vb2_queue *vq, unsigned count) dev->vid_cap_seq_count = 0; dprintk(dev, 1, "%s\n", __func__); - for (i = 0; i < VIDEO_MAX_FRAME; i++) + for (i = 0; i < MAX_VID_CAP_BUFFERS; i++) dev->must_blank[i] = tpg_g_perc_fill(&dev->tpg) < 100; if (dev->start_streaming_error) { dev->start_streaming_error = false; From 8c68b5656e55e9324875881f1000eb4ee3603a87 Mon Sep 17 00:00:00 2001 From: Ben Chuang Date: Fri, 25 Oct 2024 14:00:16 +0800 Subject: [PATCH 088/281] mmc: sdhci-pci-gli: GL9767: Fix low power mode on the set clock function On sdhci_gl9767_set_clock(), the vendor header space(VHS) is read-only after calling gl9767_disable_ssc_pll() and gl9767_set_ssc_pll_205mhz(). So the low power negotiation mode cannot be enabled again. Introduce gl9767_set_low_power_negotiation() function to fix it. The explanation process is as below. static void sdhci_gl9767_set_clock() { ... gl9767_vhs_write(); ... value |= PCIE_GLI_9767_CFG_LOW_PWR_OFF; pci_write_config_dword(pdev, PCIE_GLI_9767_CFG, value); <--- (a) gl9767_disable_ssc_pll(); <--- (b) sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); if (clock == 0) return; <-- (I) ... if (clock == 200000000 && ios->timing == MMC_TIMING_UHS_SDR104) { ... gl9767_set_ssc_pll_205mhz(); <--- (c) } ... value &= ~PCIE_GLI_9767_CFG_LOW_PWR_OFF; pci_write_config_dword(pdev, PCIE_GLI_9767_CFG, value); <-- (II) gl9767_vhs_read(); } (a) disable low power negotiation mode. When return on (I), the low power mode is disabled. After (b) and (c), VHS is read-only, the low power mode cannot be enabled on (II). Reported-by: Georg Gottleuber Fixes: d2754355512e ("mmc: sdhci-pci-gli: Set SDR104's clock to 205MHz and enable SSC for GL9767") Signed-off-by: Ben Chuang Tested-by: Georg Gottleuber Cc: stable@vger.kernel.org Message-ID: <20241025060017.1663697-1-benchuanggli@gmail.com> Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 35 +++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 0f81586a19df..22a927ce2c88 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -892,28 +892,40 @@ static void gl9767_disable_ssc_pll(struct pci_dev *pdev) gl9767_vhs_read(pdev); } +static void gl9767_set_low_power_negotiation(struct pci_dev *pdev, bool enable) +{ + u32 value; + + gl9767_vhs_write(pdev); + + pci_read_config_dword(pdev, PCIE_GLI_9767_CFG, &value); + if (enable) + value &= ~PCIE_GLI_9767_CFG_LOW_PWR_OFF; + else + value |= PCIE_GLI_9767_CFG_LOW_PWR_OFF; + pci_write_config_dword(pdev, PCIE_GLI_9767_CFG, value); + + gl9767_vhs_read(pdev); +} + static void sdhci_gl9767_set_clock(struct sdhci_host *host, unsigned int clock) { struct sdhci_pci_slot *slot = sdhci_priv(host); struct mmc_ios *ios = &host->mmc->ios; struct pci_dev *pdev; - u32 value; u16 clk; pdev = slot->chip->pdev; host->mmc->actual_clock = 0; - gl9767_vhs_write(pdev); - - pci_read_config_dword(pdev, PCIE_GLI_9767_CFG, &value); - value |= PCIE_GLI_9767_CFG_LOW_PWR_OFF; - pci_write_config_dword(pdev, PCIE_GLI_9767_CFG, value); - + gl9767_set_low_power_negotiation(pdev, false); gl9767_disable_ssc_pll(pdev); sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); - if (clock == 0) + if (clock == 0) { + gl9767_set_low_power_negotiation(pdev, true); return; + } clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock); if (clock == 200000000 && ios->timing == MMC_TIMING_UHS_SDR104) { @@ -922,12 +934,7 @@ static void sdhci_gl9767_set_clock(struct sdhci_host *host, unsigned int clock) } sdhci_enable_clk(host, clk); - - pci_read_config_dword(pdev, PCIE_GLI_9767_CFG, &value); - value &= ~PCIE_GLI_9767_CFG_LOW_PWR_OFF; - pci_write_config_dword(pdev, PCIE_GLI_9767_CFG, value); - - gl9767_vhs_read(pdev); + gl9767_set_low_power_negotiation(pdev, true); } static void gli_set_9767(struct sdhci_host *host) From c4dedaaeb3f78d3718e9c1b1e4d972a6b99073cd Mon Sep 17 00:00:00 2001 From: Ben Chuang Date: Fri, 25 Oct 2024 14:00:17 +0800 Subject: [PATCH 089/281] mmc: sdhci-pci-gli: GL9767: Fix low power mode in the SD Express process When starting the SD Express process, the low power negotiation mode will be disabled, so we need to re-enable it after switching back to SD mode. Fixes: 0e92aec2efa0 ("mmc: sdhci-pci-gli: Add support SD Express card for GL9767") Signed-off-by: Ben Chuang Cc: stable@vger.kernel.org Message-ID: <20241025060017.1663697-2-benchuanggli@gmail.com> Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 22a927ce2c88..68ce4920e01e 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1068,6 +1068,9 @@ static int gl9767_init_sd_express(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_writew(host, value, SDHCI_CLOCK_CONTROL); } + pci_read_config_dword(pdev, PCIE_GLI_9767_CFG, &value); + value &= ~PCIE_GLI_9767_CFG_LOW_PWR_OFF; + pci_write_config_dword(pdev, PCIE_GLI_9767_CFG, value); gl9767_vhs_read(pdev); return 0; From 7bf46ec090b9e6c9ab08d8006b4eefba2cd5a7f5 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 28 Oct 2024 12:01:50 +0000 Subject: [PATCH 090/281] dt-bindings: firmware: arm,scmi: Add missing vendor string Recently introduced max-rx-timeout-ms optionao property is missing a vendor prefix. Add the vendor prefix so that it aligns with the new properties that are about to get added soon. Fixes: 3a5e6ab06eab ("dt-bindings: firmware: arm,scmi: Introduce property max-rx-timeout-ms") Signed-off-by: Cristian Marussi Message-Id: <20241028120151.1301177-7-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- Documentation/devicetree/bindings/firmware/arm,scmi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/firmware/arm,scmi.yaml b/Documentation/devicetree/bindings/firmware/arm,scmi.yaml index 54d7d11bfed4..ff7a6f12cd00 100644 --- a/Documentation/devicetree/bindings/firmware/arm,scmi.yaml +++ b/Documentation/devicetree/bindings/firmware/arm,scmi.yaml @@ -124,7 +124,7 @@ properties: atomic mode of operation, even if requested. default: 0 - max-rx-timeout-ms: + arm,max-rx-timeout-ms: description: An optional time value, expressed in milliseconds, representing the transport maximum timeout value for the receive channel. The value should From 54962707f8b8b53812d3d7fca279a68c6e18faae Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 28 Oct 2024 12:01:51 +0000 Subject: [PATCH 091/281] firmware: arm_scmi: Use vendor string in max-rx-timeout-ms The original optional property was missing a vendor string prefix; this has been rectified. Fix the naming of such optional property in code too. Cc: Peng Fan Fixes: 1780e411ef94 ("firmware: arm_scmi: Use max-rx-timeout-ms from devicetree") Signed-off-by: Cristian Marussi Message-Id: <20241028120151.1301177-8-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 5bd4cc68a3e3..f8934d049d68 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -3048,10 +3048,10 @@ static const struct scmi_desc *scmi_transport_setup(struct device *dev) dev_info(dev, "Using %s\n", dev_driver_string(trans->supplier)); - ret = of_property_read_u32(dev->of_node, "max-rx-timeout-ms", + ret = of_property_read_u32(dev->of_node, "arm,max-rx-timeout-ms", &trans->desc->max_rx_timeout_ms); if (ret && ret != -EINVAL) - dev_err(dev, "Malformed max-rx-timeout-ms DT property.\n"); + dev_err(dev, "Malformed arm,max-rx-timeout-ms DT property.\n"); dev_info(dev, "SCMI max-rx-timeout: %dms\n", trans->desc->max_rx_timeout_ms); From bf791751162ac875a9439426d13f8d4d18151549 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 24 Oct 2024 12:26:53 +0300 Subject: [PATCH 092/281] thunderbolt: Add only on-board retimers when !CONFIG_USB4_DEBUGFS_MARGINING Normally there is no need to enumerate retimers on the other side of the cable. This is only needed in special cases where user wants to run receiver lane margining against the downstream facing port of a retimer. Furthermore this might confuse the userspace tools such as fwupd because it cannot read the information it expects from these retimers. Fix this by changing the retimer enumeration code to add only on-board retimers when CONFIG_USB4_DEBUGFS_MARGINING is not enabled. Reported-by: AceLan Kao Tested-by: AceLan Kao Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219420 Cc: stable@vger.kernel.org Fixes: ff6ab055e070 ("thunderbolt: Add receiver lane margining support for retimers") Signed-off-by: Mika Westerberg --- drivers/thunderbolt/retimer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 721319329afa..bdf641489f82 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -531,6 +531,8 @@ int tb_retimer_scan(struct tb_port *port, bool add) max = i; ret = 0; + if (!IS_ENABLED(CONFIG_USB4_DEBUGFS_MARGINING)) + max = min(last_idx, max); /* Add retimers if they do not exist already */ for (i = 1; i <= max; i++) { From 393c74ccbd847bacf18865a01b422586fc7341cf Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Fri, 18 Oct 2024 23:07:06 +0200 Subject: [PATCH 093/281] USB: serial: option: add Fibocom FG132 0x0112 composition Add Fibocom FG132 0x0112 composition: T: Bus=03 Lev=02 Prnt=06 Port=01 Cnt=02 Dev#= 10 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0112 Rev= 5.15 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom Module S: SerialNumber=xxxxxxxx C:* #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms Signed-off-by: Reinhard Speyerer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4f18f189f309..4eaab8b843c1 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2320,6 +2320,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0112, 0xff, 0xff, 0x30) }, /* Fibocom FG132 Diag */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0112, 0xff, 0xff, 0x40) }, /* Fibocom FG132 AT */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0112, 0xff, 0, 0) }, /* Fibocom FG132 NMEA */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff), /* Fibocom FM135 (laptop MBIM) */ .driver_info = RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ From 3b05949ba39f305b585452d0e177470607842165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Monin?= Date: Thu, 24 Oct 2024 17:09:19 +0200 Subject: [PATCH 094/281] USB: serial: option: add Quectel RG650V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for Quectel RG650V which is based on Qualcomm SDX65 chip. The composition is DIAG / NMEA / AT / AT / QMI. T: Bus=02 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2c7c ProdID=0122 Rev=05.15 S: Manufacturer=Quectel S: Product=RG650V-EU S: SerialNumber=xxxxxxx C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=9ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=9ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=9ms Signed-off-by: Benoît Monin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4eaab8b843c1..9ba5584061c8 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -251,6 +251,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 +#define QUECTEL_PRODUCT_RG650V 0x0122 #define QUECTEL_PRODUCT_EM061K_LTA 0x0123 #define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 @@ -1273,6 +1274,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG912Y, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG916Q, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, From 177f25d1292c7e16e1199b39c85480f7f8815552 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 29 Oct 2024 15:44:35 +0100 Subject: [PATCH 095/281] HID: core: zero-initialize the report buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the report buffer is used by all kinds of drivers in various ways, let's zero-initialize it during allocation to make sure that it can't be ever used to leak kernel memory via specially-crafted report. Fixes: 27ce405039bf ("HID: fix data access in implement()") Reported-by: Benoît Sevens Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 30de92d0bf0f..4aee4eae50fd 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1875,7 +1875,7 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) u32 len = hid_report_len(report) + 7; - return kmalloc(len, flags); + return kzalloc(len, flags); } EXPORT_SYMBOL_GPL(hid_alloc_report_buf); From 37bb5628379295c1254c113a407cab03a0f4d0b4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 31 Oct 2024 12:48:30 +0300 Subject: [PATCH 096/281] USB: serial: io_edgeport: fix use after free in debug printk The "dev_dbg(&urb->dev->dev, ..." which happens after usb_free_urb(urb) is a use after free of the "urb" pointer. Store the "dev" pointer at the start of the function to avoid this issue. Fixes: 984f68683298 ("USB: serial: io_edgeport.c: remove dbg() usage") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Johan Hovold --- drivers/usb/serial/io_edgeport.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index c7d6b5e3f898..28c71d99e857 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -770,11 +770,12 @@ static void edge_bulk_out_data_callback(struct urb *urb) static void edge_bulk_out_cmd_callback(struct urb *urb) { struct edgeport_port *edge_port = urb->context; + struct device *dev = &urb->dev->dev; int status = urb->status; atomic_dec(&CmdUrbs); - dev_dbg(&urb->dev->dev, "%s - FREE URB %p (outstanding %d)\n", - __func__, urb, atomic_read(&CmdUrbs)); + dev_dbg(dev, "%s - FREE URB %p (outstanding %d)\n", __func__, urb, + atomic_read(&CmdUrbs)); /* clean up the transfer buffer */ @@ -784,8 +785,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb) usb_free_urb(urb); if (status) { - dev_dbg(&urb->dev->dev, - "%s - nonzero write bulk status received: %d\n", + dev_dbg(dev, "%s - nonzero write bulk status received: %d\n", __func__, status); return; } From c9363bbb0f68dd1ddb8be7bbfe958cdfcd38d851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaros=C5=82aw=20Janik?= Date: Wed, 30 Oct 2024 18:18:12 +0100 Subject: [PATCH 097/281] Revert "ALSA: hda/conexant: Mute speakers at suspend / shutdown" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4f61c8fe3520 ("ALSA: hda/conexant: Mute speakers at suspend / shutdown") mutes speakers on system shutdown or whenever HDA controller is suspended by PM; this however interacts badly with Thinkpad's ACPI firmware behavior which uses beeps to signal various events (enter/leave suspend or hibernation, AC power connect/disconnect, low battery, etc.); now those beeps are either muted altogether (for suspend/hibernate/ shutdown related events) or work more or less randomly (eg. AC plug/unplug is only audible when you are playing music at the moment, because HDA device is likely in suspend mode otherwise). Since the original bug report mentioned in 4f61c8fe3520 complained about Lenovo's Thinkpad laptop - revert this commit altogether. Fixes: 4f61c8fe3520 ("ALSA: hda/conexant: Mute speakers at suspend / shutdown") Signed-off-by: Jarosław Janik Link: https://patch.msgid.link/20241030171813.18941-2-jaroslaw.janik@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index c74f6742c359..b2bcdf76da30 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -205,8 +205,6 @@ static void cx_auto_shutdown(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; - snd_hda_gen_shutup_speakers(codec); - /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); From badccd49b93bb945bf4e5cc8707db67cdc5e27e5 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 29 Oct 2024 17:04:06 +0800 Subject: [PATCH 098/281] net: enetc: set MAC address to the VF net_device The MAC address of VF can be configured through the mailbox mechanism of ENETC, but the previous implementation forgot to set the MAC address in net_device, resulting in the SMAC of the sent frames still being the old MAC address. Since the MAC address in the hardware has been changed, Rx cannot receive frames with the DMAC address as the new MAC address. The most obvious phenomenon is that after changing the MAC address, we can see that the MAC address of eno0vf0 has not changed through the "ifconfig eno0vf0" command and the IP address cannot be obtained . root@ls1028ardb:~# ifconfig eno0vf0 down root@ls1028ardb:~# ifconfig eno0vf0 hw ether 00:04:9f:3a:4d:56 up root@ls1028ardb:~# ifconfig eno0vf0 eno0vf0: flags=4163 mtu 1500 ether 66:36:2c:3b:87:76 txqueuelen 1000 (Ethernet) RX packets 794 bytes 69239 (69.2 KB) RX errors 0 dropped 0 overruns 0 frame 0 TX packets 11 bytes 2226 (2.2 KB) TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0 Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20241029090406.841836-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_vf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index dfcaac302e24..b15db70769e5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -78,11 +78,18 @@ static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct sockaddr *saddr = addr; + int err; if (!is_valid_ether_addr(saddr->sa_data)) return -EADDRNOTAVAIL; - return enetc_msg_vsi_set_primary_mac_addr(priv, saddr); + err = enetc_msg_vsi_set_primary_mac_addr(priv, saddr); + if (err) + return err; + + eth_hw_addr_set(ndev, saddr->sa_data); + + return 0; } static int enetc_vf_set_features(struct net_device *ndev, From 0144c06c5890d1ad0eea65df074cffaf4eea5a3c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 29 Oct 2024 18:31:05 +0200 Subject: [PATCH 099/281] net: dpaa_eth: print FD status in CPU endianness in dpaa_eth_fd tracepoint Sparse warns: note: in included file (through ../include/trace/trace_events.h, ../include/trace/define_trace.h, ../drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h): warning: incorrect type in assignment (different base types) expected unsigned int [usertype] fd_status got restricted __be32 const [usertype] status We take struct qm_fd :: status, store it and print it as an u32, though it is a big endian field. We should print the FD status in CPU endianness for ease of debug and consistency between PowerPC and Arm systems. Though it is a not often used debug feature, it is best to treat it as a bug and backport the format change to all supported stable kernels, for consistency. Fixes: eb11ddf36eb8 ("dpaa_eth: add trace points") Signed-off-by: Vladimir Oltean Acked-by: Madalin Bucur Link: https://patch.msgid.link/20241029163105.44135-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h index 6f0e58a2a58a..9e1d44ae92cc 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h @@ -56,7 +56,7 @@ DECLARE_EVENT_CLASS(dpaa_eth_fd, __entry->fd_format = qm_fd_get_format(fd); __entry->fd_offset = qm_fd_get_offset(fd); __entry->fd_length = qm_fd_get_length(fd); - __entry->fd_status = fd->status; + __entry->fd_status = __be32_to_cpu(fd->status); __assign_str(name); ), From e4d32142d1de8bcafd90ea5f4f557104f0969c41 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 30 Oct 2024 10:17:48 -0700 Subject: [PATCH 100/281] tracing: Fix tracefs mount options Commit 78ff64081949 ("vfs: Convert tracefs to use the new mount API") converted tracefs to use the new mount APIs caused mount options (e.g. gid=) to not take effect. The tracefs superblock can be updated from multiple paths: - on fs_initcall() to init_trace_printk_function_export() - from a work queue to initialize eventfs tracer_init_tracefs_work_func() - fsconfig() syscall to mount or remount of tracefs The tracefs superblock root inode gets created early on in init_trace_printk_function_export(). With the new mount API, tracefs effectively uses get_tree_single() instead of the old API mount_single(). Previously, mount_single() ensured that the options are always applied to the superblock root inode: (1) If the root inode didn't exist, call fill_super() to create it and apply the options. (2) If the root inode exists, call reconfigure_single() which effectively calls tracefs_apply_options() to parse and apply options to the subperblock's fs_info and inode and remount eventfs (if necessary) On the other hand, get_tree_single() effectively calls vfs_get_super() which: (3) If the root inode doesn't exists, calls fill_super() to create it and apply the options. (4) If the root inode already exists, updates the fs_context root with the superblock's root inode. (4) above is always the case for tracefs mounts, since the super block's root inode will already be created by init_trace_printk_function_export(). This means that the mount options get ignored: - Since it isn't applied to the superblock's root inode, it doesn't get inherited by the children. - Since eventfs is initialized from a separate work queue and before call to mount with the options, and it doesn't get remounted for mount. Ensure that the mount options are applied to the super block and eventfs is remounted to respect the mount options. To understand this better, if fstab has the following: tracefs /sys/kernel/tracing tracefs nosuid,nodev,noexec,gid=tracing 0 0 On boot up, permissions look like: # ls -l /sys/kernel/tracing/trace -rw-r----- 1 root root 0 Nov 1 08:37 /sys/kernel/tracing/trace When it should look like: # ls -l /sys/kernel/tracing/trace -rw-r----- 1 root tracing 0 Nov 1 08:37 /sys/kernel/tracing/trace Link: https://lore.kernel.org/r/536e99d3-345c-448b-adee-a21389d7ab4b@redhat.com/ Cc: Eric Sandeen Cc: Mathieu Desnoyers Cc: Shuah Khan Cc: Ali Zahraee Cc: Christian Brauner Cc: David Howells Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 78ff64081949 ("vfs: Convert tracefs to use the new mount API") Link: https://lore.kernel.org/20241030171928.4168869-2-kaleshsingh@google.com Signed-off-by: Kalesh Singh Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 1748dff58c3b..cfc614c638da 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -392,6 +392,9 @@ static int tracefs_reconfigure(struct fs_context *fc) struct tracefs_fs_info *sb_opts = sb->s_fs_info; struct tracefs_fs_info *new_opts = fc->s_fs_info; + if (!new_opts) + return 0; + sync_filesystem(sb); /* structure copy of new mount options to sb */ *sb_opts = *new_opts; @@ -478,14 +481,17 @@ static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_op = &tracefs_super_operations; sb->s_d_op = &tracefs_dentry_operations; - tracefs_apply_options(sb, false); - return 0; } static int tracefs_get_tree(struct fs_context *fc) { - return get_tree_single(fc, tracefs_fill_super); + int err = get_tree_single(fc, tracefs_fill_super); + + if (err) + return err; + + return tracefs_reconfigure(fc); } static void tracefs_free_fc(struct fs_context *fc) From fa17cb4b3b42618aeed1e0bce80cc55106561718 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 30 Oct 2024 10:17:49 -0700 Subject: [PATCH 101/281] tracing: Document tracefs gid mount option Commit ee7f3666995d ("tracefs: Have new files inherit the ownership of their parent") and commit 48b27b6b5191 ("tracefs: Set all files to the same group ownership as the mount option") introduced a new gid mount option that allows specifying a group to apply to all entries in tracefs. Document this in the tracing readme. Cc: Eric Sandeen Cc: Mathieu Desnoyers Cc: Shuah Khan Cc: Ali Zahraee Cc: Christian Brauner Cc: David Howells Cc: Masami Hiramatsu Link: https://lore.kernel.org/20241030171928.4168869-3-kaleshsingh@google.com Signed-off-by: Kalesh Singh Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a8f52b6527ca..2b64b3ec67d9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5501,6 +5501,10 @@ static const struct file_operations tracing_iter_fops = { static const char readme_msg[] = "tracing mini-HOWTO:\n\n" + "By default tracefs removes all OTH file permission bits.\n" + "When mounting tracefs an optional group id can be specified\n" + "which adds the group to every directory and file in tracefs:\n\n" + "\t e.g. mount -t tracefs [-o [gid=]] nodev /sys/kernel/tracing\n\n" "# echo 0 > tracing_on : quick way to disable tracing\n" "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" " Important files:\n" From 8b55572e51805184353ee7d587c720a51818fb82 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 30 Oct 2024 10:17:50 -0700 Subject: [PATCH 102/281] tracing/selftests: Add tracefs mount options test Add a selftest to check that the tracefs gid mount option is applied correctly. ./ftracetest test.d/00basic/mount_options.tc Use the new readme string "[gid=] as a requirement and also update test_ownership.tc requirements to use this. Cc: Eric Sandeen Cc: Mathieu Desnoyers Cc: Shuah Khan Cc: Ali Zahraee Cc: Christian Brauner Cc: David Howells Cc: Masami Hiramatsu Link: https://lore.kernel.org/20241030171928.4168869-4-kaleshsingh@google.com Signed-off-by: Kalesh Singh Signed-off-by: Steven Rostedt (Google) --- .../ftrace/test.d/00basic/mount_options.tc | 101 ++++++++++++++++++ .../ftrace/test.d/00basic/test_ownership.tc | 16 +-- .../testing/selftests/ftrace/test.d/functions | 25 +++++ 3 files changed, 129 insertions(+), 13 deletions(-) create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc new file mode 100644 index 000000000000..35e8d47d6072 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc @@ -0,0 +1,101 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test tracefs GID mount option +# requires: "[gid=]":README + +fail() { + local msg="$1" + + echo "FAILED: $msg" + exit_fail +} + +find_alternate_gid() { + local original_gid="$1" + tac /etc/group | grep -v ":$original_gid:" | head -1 | cut -d: -f3 +} + +mount_tracefs_with_options() { + local mount_point="$1" + local options="$2" + + mount -t tracefs -o "$options" nodev "$mount_point" + + setup +} + +unmount_tracefs() { + local mount_point="$1" + + # Need to make sure the mount isn't busy so that we can umount it + (cd $mount_point; finish_ftrace;) + + cleanup +} + +create_instance() { + local mount_point="$1" + local instance="$mount_point/instances/$(mktemp -u test-XXXXXX)" + + mkdir "$instance" + echo "$instance" +} + +remove_instance() { + local instance="$1" + + rmdir "$instance" +} + +check_gid() { + local mount_point="$1" + local expected_gid="$2" + + echo "Checking permission group ..." + + cd "$mount_point" + + for file in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable"; do + local gid=`stat -c "%g" $file` + if [ "$gid" -ne "$expected_gid" ]; then + cd - # Return to the previous working directory (tracefs root) + fail "$(realpath $file): Expected group $expected_gid; Got group $gid" + fi + done + + cd - # Return to the previous working directory (tracefs root) +} + +test_gid_mount_option() { + local mount_point=$(get_mount_point) + local mount_options=$(get_mnt_options "$mount_point") + local original_group=$(stat -c "%g" .) + local other_group=$(find_alternate_gid "$original_group") + + # Set up mount options with new GID for testing + local new_options=`echo "$mount_options" | sed -e "s/gid=[0-9]*/gid=$other_group/"` + if [ "$new_options" = "$mount_options" ]; then + new_options="$mount_options,gid=$other_group" + mount_options="$mount_options,gid=$original_group" + fi + + # Unmount existing tracefs instance and mount with new GID + unmount_tracefs "$mount_point" + mount_tracefs_with_options "$mount_point" "$new_options" + + check_gid "$mount_point" "$other_group" + + # Check that files created after the mount inherit the GID + local instance=$(create_instance "$mount_point") + check_gid "$instance" "$other_group" + remove_instance "$instance" + + # Unmount and remount with the original GID + unmount_tracefs "$mount_point" + mount_tracefs_with_options "$mount_point" "$mount_options" + check_gid "$mount_point" "$original_group" +} + +test_gid_mount_option + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc index 094419e190c2..e71cc3ad0bdf 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc @@ -1,24 +1,14 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test file and directory ownership changes for eventfs +# requires: "[gid=]":README original_group=`stat -c "%g" .` original_owner=`stat -c "%u" .` -mount_point=`stat -c '%m' .` +local mount_point=$(get_mount_point) -# If stat -c '%m' does not work (e.g. busybox) or failed, try to use the -# current working directory (which should be a tracefs) as the mount point. -if [ ! -d "$mount_point" ]; then - if mount | grep -qw $PWD ; then - mount_point=$PWD - else - # If PWD doesn't work, that is an environmental problem. - exit_unresolved - fi -fi - -mount_options=`mount | grep "$mount_point" | sed -e 's/.*(\(.*\)).*/\1/'` +mount_options=$(get_mnt_options "$mount_point") # find another owner and group that is not the original other_group=`tac /etc/group | grep -v ":$original_group:" | head -1 | cut -d: -f3` diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 779f3e62ec90..84d6a9c7ad67 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -193,3 +193,28 @@ ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file # " Command: " and "^\n" => 13 test $(expr 13 + $pos) -eq $N } + +# Helper to get the tracefs mount point +get_mount_point() { + local mount_point=`stat -c '%m' .` + + # If stat -c '%m' does not work (e.g. busybox) or failed, try to use the + # current working directory (which should be a tracefs) as the mount point. + if [ ! -d "$mount_point" ]; then + if mount | grep -qw "$PWD"; then + mount_point=$PWD + else + # If PWD doesn't work, that is an environmental problem. + exit_unresolved + fi + fi + echo "$mount_point" +} + +# Helper function to retrieve mount options for a given mount point +get_mnt_options() { + local mnt_point="$1" + local opts=$(mount | grep -m1 "$mnt_point" | sed -e 's/.*(\(.*\)).*/\1/') + + echo "$opts" +} \ No newline at end of file From 5e53e4a66bc7430dd2d11c18a86410e3a38d2940 Mon Sep 17 00:00:00 2001 From: Mikhail Rudenko Date: Thu, 17 Oct 2024 21:37:28 +0300 Subject: [PATCH 103/281] regulator: rk808: Add apply_bit for BUCK3 on RK809 Currently, RK809's BUCK3 regulator is modelled in the driver as a configurable regulator with 0.5-2.4V voltage range. But the voltage setting is not actually applied, because when bit 6 of PMIC_POWER_CONFIG register is set to 0 (default), BUCK3 output voltage is determined by the external feedback resistor. Fix this, by setting bit 6 when voltage selection is set. Existing users which do not specify voltage constraints in their device trees will not be affected by this change, since no voltage setting is applied in those cases, and bit 6 is not enabled. Signed-off-by: Mikhail Rudenko Link: https://patch.msgid.link/20241017-rk809-dcdc3-v1-1-e3c3de92f39c@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/rk808-regulator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index 14b60abd6afc..01a8d0487918 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -1379,6 +1379,8 @@ static const struct regulator_desc rk809_reg[] = { .n_linear_ranges = ARRAY_SIZE(rk817_buck1_voltage_ranges), .vsel_reg = RK817_BUCK3_ON_VSEL_REG, .vsel_mask = RK817_BUCK_VSEL_MASK, + .apply_reg = RK817_POWER_CONFIG, + .apply_bit = RK817_BUCK3_FB_RES_INTER, .enable_reg = RK817_POWER_EN_REG(0), .enable_mask = ENABLE_MASK(RK817_ID_DCDC3), .enable_val = ENABLE_MASK(RK817_ID_DCDC3), From b2183187c5fd30659b9caccb92f7e5e680301769 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Mon, 28 Oct 2024 14:42:14 +0530 Subject: [PATCH 104/281] dt-bindings: net: xlnx,axi-ethernet: Correct phy-mode property value Correct phy-mode property value to 1000base-x. Fixes: cbb1ca6d5f9a ("dt-bindings: net: xlnx,axi-ethernet: convert bindings document to yaml") Signed-off-by: Suraj Gupta Reviewed-by: Radhey Shyam Pandey Reviewed-by: Andrew Lunn Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20241028091214.2078726-1-suraj.gupta2@amd.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml index e95c21628281..fb02e579463c 100644 --- a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml +++ b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml @@ -61,7 +61,7 @@ properties: - gmii - rgmii - sgmii - - 1000BaseX + - 1000base-x xlnx,phy-type: description: From 7ce3e6107103214d354a16729a472f588be60572 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 30 Oct 2024 12:02:53 +0100 Subject: [PATCH 105/281] scsi: sd_zbc: Use kvzalloc() to allocate REPORT ZONES buffer We have two reports of failed memory allocation in btrfs' code which is calling into report zones. Both of these reports have the following signature coming from __vmalloc_area_node(): kworker/u17:5: vmalloc error: size 0, failed to allocate pages, mode:0x10dc2(GFP_KERNEL|__GFP_HIGHMEM|__GFP_NORETRY|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0 Further debugging showed these where allocations of one sector (512 bytes) and at least one of the reporter's systems where low on memory, so going through the overhead of allocating a vm area failed. Switching the allocation from __vmalloc() to kvzalloc() avoids the overhead of vmalloc() on small allocations and succeeds. Note: the buffer is already freed using kvfree() so there's no need to adjust the free path. Cc: Qu Wenru Cc: Naohiro Aota Link: https://github.com/kdave/btrfs-progs/issues/779 Link: https://github.com/kdave/btrfs-progs/issues/915 Fixes: 23a50861adda ("scsi: sd_zbc: Cleanup sd_zbc_alloc_report_buffer()") Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241030110253.11718-1-jth@kernel.org Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index c8b9654d30f0..a4d17f3da25d 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -188,8 +188,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); while (bufsize >= SECTOR_SIZE) { - buf = __vmalloc(bufsize, - GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY); + buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); if (buf) { *buflen = bufsize; return buf; From 3b557be89fc688dbd9ccf704a70f7600a094f13a Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 1 Nov 2024 10:53:16 +0800 Subject: [PATCH 106/281] net: wwan: t7xx: Fix off-by-one error in t7xx_dpmaif_rx_buf_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error path in t7xx_dpmaif_rx_buf_alloc(), free and unmap the already allocated and mapped skb in a loop, but the loop condition terminates when the index reaches zero, which fails to free the first allocated skb at index zero. Check with i-- so that skb at index 0 is freed as well. Cc: stable@vger.kernel.org Fixes: d642b012df70 ("net: wwan: t7xx: Add data path interface") Acked-by: Sergey Ryazanov Signed-off-by: Jinjie Ruan Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20241101025316.3234023-1-ruanjinjie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c index 210d84c67ef9..7a9c09cd4fdc 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c @@ -226,7 +226,7 @@ int t7xx_dpmaif_rx_buf_alloc(struct dpmaif_ctrl *dpmaif_ctrl, return 0; err_unmap_skbs: - while (--i > 0) + while (i--) t7xx_unmap_bat_skb(dpmaif_ctrl->dev, bat_req->bat_skb, i); return ret; From 0ead60804b64f5bd6999eec88e503c6a1a242d41 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Oct 2024 13:46:21 -0400 Subject: [PATCH 107/281] sctp: properly validate chunk size in sctp_sf_ootb() A size validation fix similar to that in Commit 50619dbf8db7 ("sctp: add size validation when walking chunks") is also required in sctp_sf_ootb() to address a crash reported by syzbot: BUG: KMSAN: uninit-value in sctp_sf_ootb+0x7f5/0xce0 net/sctp/sm_statefuns.c:3712 sctp_sf_ootb+0x7f5/0xce0 net/sctp/sm_statefuns.c:3712 sctp_do_sm+0x181/0x93d0 net/sctp/sm_sideeffect.c:1166 sctp_endpoint_bh_rcv+0xc38/0xf90 net/sctp/endpointola.c:407 sctp_inq_push+0x2ef/0x380 net/sctp/inqueue.c:88 sctp_rcv+0x3831/0x3b20 net/sctp/input.c:243 sctp4_rcv+0x42/0x50 net/sctp/protocol.c:1159 ip_protocol_deliver_rcu+0xb51/0x13d0 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x336/0x500 net/ipv4/ip_input.c:233 Reported-by: syzbot+f0cbb34d39392f2746ca@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Link: https://patch.msgid.link/a29ebb6d8b9f8affd0f9abb296faafafe10c17d8.1730223981.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7d315a18612b..a0524ba8d787 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3751,7 +3751,7 @@ enum sctp_disposition sctp_sf_ootb(struct net *net, } ch = (struct sctp_chunkhdr *)ch_end; - } while (ch_end < skb_tail_pointer(skb)); + } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb)); if (ootb_shut_ack) return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands); From e15c5506dd39885cd047f811a64240e2e8ab401b Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 31 Oct 2024 14:02:46 +0800 Subject: [PATCH 108/281] net: enetc: allocate vf_state during PF probes In the previous implementation, vf_state is allocated memory only when VF is enabled. However, net_device_ops::ndo_set_vf_mac() may be called before VF is enabled to configure the MAC address of VF. If this is the case, enetc_pf_set_vf_mac() will access vf_state, resulting in access to a null pointer. The simplified error log is as follows. root@ls1028ardb:~# ip link set eno0 vf 1 mac 00:0c:e7:66:77:89 [ 173.543315] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 [ 173.637254] pc : enetc_pf_set_vf_mac+0x3c/0x80 Message from sy [ 173.641973] lr : do_setlink+0x4a8/0xec8 [ 173.732292] Call trace: [ 173.734740] enetc_pf_set_vf_mac+0x3c/0x80 [ 173.738847] __rtnl_newlink+0x530/0x89c [ 173.742692] rtnl_newlink+0x50/0x7c [ 173.746189] rtnetlink_rcv_msg+0x128/0x390 [ 173.750298] netlink_rcv_skb+0x60/0x130 [ 173.754145] rtnetlink_rcv+0x18/0x24 [ 173.757731] netlink_unicast+0x318/0x380 [ 173.761665] netlink_sendmsg+0x17c/0x3c8 Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/20241031060247.1290941-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/enetc/enetc_pf.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 8f6b0bf48139..c95a7c083b0f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -665,19 +665,11 @@ static int enetc_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!num_vfs) { enetc_msg_psi_free(pf); - kfree(pf->vf_state); pf->num_vfs = 0; pci_disable_sriov(pdev); } else { pf->num_vfs = num_vfs; - pf->vf_state = kcalloc(num_vfs, sizeof(struct enetc_vf_state), - GFP_KERNEL); - if (!pf->vf_state) { - pf->num_vfs = 0; - return -ENOMEM; - } - err = enetc_msg_psi_init(pf); if (err) { dev_err(&pdev->dev, "enetc_msg_psi_init (%d)\n", err); @@ -696,7 +688,6 @@ static int enetc_sriov_configure(struct pci_dev *pdev, int num_vfs) err_en_sriov: enetc_msg_psi_free(pf); err_msg_psi: - kfree(pf->vf_state); pf->num_vfs = 0; return err; @@ -1286,6 +1277,12 @@ static int enetc_pf_probe(struct pci_dev *pdev, pf = enetc_si_priv(si); pf->si = si; pf->total_vfs = pci_sriov_get_totalvfs(pdev); + if (pf->total_vfs) { + pf->vf_state = kcalloc(pf->total_vfs, sizeof(struct enetc_vf_state), + GFP_KERNEL); + if (!pf->vf_state) + goto err_alloc_vf_state; + } err = enetc_setup_mac_addresses(node, pf); if (err) @@ -1363,6 +1360,8 @@ err_alloc_si_res: free_netdev(ndev); err_alloc_netdev: err_setup_mac_addresses: + kfree(pf->vf_state); +err_alloc_vf_state: enetc_psi_destroy(pdev); err_psi_create: return err; @@ -1389,6 +1388,7 @@ static void enetc_pf_remove(struct pci_dev *pdev) enetc_free_si_resources(priv); free_netdev(si->ndev); + kfree(pf->vf_state); enetc_psi_destroy(pdev); } From be31ec5c8efa69f4970e4554c1b760ac8ea3e543 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 31 Oct 2024 10:33:29 -0700 Subject: [PATCH 109/281] MAINTAINERS: Remove self from DSA entry Signed-off-by: Florian Fainelli Acked-by: Andrew Lunn Acked-by: Vladimir Oltean Link: https://patch.msgid.link/20241031173332.3858162-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 63f53feefa0a..96660c63f5b9 100644 --- a/CREDITS +++ b/CREDITS @@ -1204,6 +1204,10 @@ S: Dreisbachstrasse 24 S: D-57250 Netphen S: Germany +N: Florian Fainelli +E: f.fainelli@gmail.com +D: DSA + N: Rik Faith E: faith@acm.org D: Future Domain TMC-16x0 SCSI driver (author) diff --git a/MAINTAINERS b/MAINTAINERS index a27407950242..8dca600d69fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16028,7 +16028,6 @@ F: drivers/net/wireless/ NETWORKING [DSA] M: Andrew Lunn -M: Florian Fainelli M: Vladimir Oltean S: Maintained F: Documentation/devicetree/bindings/net/dsa/ From 5ccdcdf186aec6b9111845fd37e1757e9b413e2f Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 30 Oct 2024 11:55:32 +0530 Subject: [PATCH 110/281] net: xilinx: axienet: Enqueue Tx packets in dql before dmaengine starts Enqueue packets in dql after dma engine starts causes race condition. Tx transfer starts once dma engine is started and may execute dql dequeue in completion before it gets queued. It results in following kernel crash while running iperf stress test: kernel BUG at lib/dynamic_queue_limits.c:99! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP pc : dql_completed+0x238/0x248 lr : dql_completed+0x3c/0x248 Call trace: dql_completed+0x238/0x248 axienet_dma_tx_cb+0xa0/0x170 xilinx_dma_do_tasklet+0xdc/0x290 tasklet_action_common+0xf8/0x11c tasklet_action+0x30/0x3c handle_softirqs+0xf8/0x230 Start dmaengine after enqueue in dql fixes the crash. Fixes: 6a91b846af85 ("net: axienet: Introduce dmaengine support") Signed-off-by: Suraj Gupta Link: https://patch.msgid.link/20241030062533.2527042-2-suraj.gupta2@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index d940853acc0b..1fcbcaa85ebd 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -924,13 +924,13 @@ axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev) skbuf_dma->sg_len = sg_len; dma_tx_desc->callback_param = lp; dma_tx_desc->callback_result = axienet_dma_tx_cb; - dmaengine_submit(dma_tx_desc); - dma_async_issue_pending(lp->tx_chan); txq = skb_get_tx_queue(lp->ndev, skb); netdev_tx_sent_queue(txq, skb->len); netif_txq_maybe_stop(txq, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX), MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS); + dmaengine_submit(dma_tx_desc); + dma_async_issue_pending(lp->tx_chan); return NETDEV_TX_OK; xmit_error_unmap_sg: From 9265fed6db601ee2ec47577815387458ef4f047a Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Thu, 31 Oct 2024 02:16:09 +0200 Subject: [PATCH 111/281] tpm: Lock TPM chip in tpm_pm_suspend() first Setting TPM_CHIP_FLAG_SUSPENDED in the end of tpm_pm_suspend() can be racy according, as this leaves window for tpm_hwrng_read() to be called while the operation is in progress. The recent bug report gives also evidence of this behaviour. Aadress this by locking the TPM chip before checking any chip->flags both in tpm_pm_suspend() and tpm_hwrng_read(). Move TPM_CHIP_FLAG_SUSPENDED check inside tpm_get_random() so that it will be always checked only when the lock is reserved. Cc: stable@vger.kernel.org # v6.4+ Fixes: 99d464506255 ("tpm: Prevent hwrng from activating during resume") Reported-by: Mike Seo Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219383 Reviewed-by: Jerry Snitselaar Tested-by: Mike Seo Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm-chip.c | 4 ---- drivers/char/tpm/tpm-interface.c | 32 ++++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 1ff99a7091bb..7df7abaf3e52 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -525,10 +525,6 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) { struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng); - /* Give back zero bytes, as TPM chip has not yet fully resumed: */ - if (chip->flags & TPM_CHIP_FLAG_SUSPENDED) - return 0; - return tpm_get_random(chip, data, max); } diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 8134f002b121..b1daa0d7b341 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -370,6 +370,13 @@ int tpm_pm_suspend(struct device *dev) if (!chip) return -ENODEV; + rc = tpm_try_get_ops(chip); + if (rc) { + /* Can be safely set out of locks, as no action cannot race: */ + chip->flags |= TPM_CHIP_FLAG_SUSPENDED; + goto out; + } + if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED) goto suspended; @@ -377,21 +384,19 @@ int tpm_pm_suspend(struct device *dev) !pm_suspend_via_firmware()) goto suspended; - rc = tpm_try_get_ops(chip); - if (!rc) { - if (chip->flags & TPM_CHIP_FLAG_TPM2) { - tpm2_end_auth_session(chip); - tpm2_shutdown(chip, TPM2_SU_STATE); - } else { - rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); - } - - tpm_put_ops(chip); + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + tpm2_end_auth_session(chip); + tpm2_shutdown(chip, TPM2_SU_STATE); + goto suspended; } + rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); + suspended: chip->flags |= TPM_CHIP_FLAG_SUSPENDED; + tpm_put_ops(chip); +out: if (rc) dev_err(dev, "Ignoring error %d while suspending\n", rc); return 0; @@ -440,11 +445,18 @@ int tpm_get_random(struct tpm_chip *chip, u8 *out, size_t max) if (!chip) return -ENODEV; + /* Give back zero bytes, as TPM chip has not yet fully resumed: */ + if (chip->flags & TPM_CHIP_FLAG_SUSPENDED) { + rc = 0; + goto out; + } + if (chip->flags & TPM_CHIP_FLAG_TPM2) rc = tpm2_get_random(chip, out, max); else rc = tpm1_get_random(chip, out, max); +out: tpm_put_ops(chip); return rc; } From ef7134c7fc48e1441b398e55a862232868a6f0a7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 2 Nov 2024 14:24:38 -0700 Subject: [PATCH 112/281] smb: client: Fix use-after-free of network namespace. Recently, we got a customer report that CIFS triggers oops while reconnecting to a server. [0] The workload runs on Kubernetes, and some pods mount CIFS servers in non-root network namespaces. The problem rarely happened, but it was always while the pod was dying. The root cause is wrong reference counting for network namespace. CIFS uses kernel sockets, which do not hold refcnt of the netns that the socket belongs to. That means CIFS must ensure the socket is always freed before its netns; otherwise, use-after-free happens. The repro steps are roughly: 1. mount CIFS in a non-root netns 2. drop packets from the netns 3. destroy the netns 4. unmount CIFS We can reproduce the issue quickly with the script [1] below and see the splat [2] if CONFIG_NET_NS_REFCNT_TRACKER is enabled. When the socket is TCP, it is hard to guarantee the netns lifetime without holding refcnt due to async timers. Let's hold netns refcnt for each socket as done for SMC in commit 9744d2bf1976 ("smc: Fix use-after-free in tcp_write_timer_handler()."). Note that we need to move put_net() from cifs_put_tcp_session() to clean_demultiplex_info(); otherwise, __sock_create() still could touch a freed netns while cifsd tries to reconnect from cifs_demultiplex_thread(). Also, maybe_get_net() cannot be put just before __sock_create() because the code is not under RCU and there is a small chance that the same address happened to be reallocated to another netns. [0]: CIFS: VFS: \\XXXXXXXXXXX has not responded in 15 seconds. Reconnecting... CIFS: Serverclose failed 4 times, giving up Unable to handle kernel paging request at virtual address 14de99e461f84a07 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 [14de99e461f84a07] address between user and kernel address ranges Internal error: Oops: 0000000096000004 [#1] SMP Modules linked in: cls_bpf sch_ingress nls_utf8 cifs cifs_arc4 cifs_md4 dns_resolver tcp_diag inet_diag veth xt_state xt_connmark nf_conntrack_netlink xt_nat xt_statistic xt_MASQUERADE xt_mark xt_addrtype ipt_REJECT nf_reject_ipv4 nft_chain_nat nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_comment nft_compat nf_tables nfnetlink overlay nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sm4_ce_cipher sm4 sm3_ce sm3 sha3_ce sha512_ce sha512_arm64 sha1_ce ena button sch_fq_codel loop fuse configfs dmi_sysfs sha2_ce sha256_arm64 dm_mirror dm_region_hash dm_log dm_mod dax efivarfs CPU: 5 PID: 2690970 Comm: cifsd Not tainted 6.1.103-109.184.amzn2023.aarch64 #1 Hardware name: Amazon EC2 r7g.4xlarge/, BIOS 1.0 11/1/2018 pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : fib_rules_lookup+0x44/0x238 lr : __fib_lookup+0x64/0xbc sp : ffff8000265db790 x29: ffff8000265db790 x28: 0000000000000000 x27: 000000000000bd01 x26: 0000000000000000 x25: ffff000b4baf8000 x24: ffff00047b5e4580 x23: ffff8000265db7e0 x22: 0000000000000000 x21: ffff00047b5e4500 x20: ffff0010e3f694f8 x19: 14de99e461f849f7 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 3f92800abd010002 x11: 0000000000000001 x10: ffff0010e3f69420 x9 : ffff800008a6f294 x8 : 0000000000000000 x7 : 0000000000000006 x6 : 0000000000000000 x5 : 0000000000000001 x4 : ffff001924354280 x3 : ffff8000265db7e0 x2 : 0000000000000000 x1 : ffff0010e3f694f8 x0 : ffff00047b5e4500 Call trace: fib_rules_lookup+0x44/0x238 __fib_lookup+0x64/0xbc ip_route_output_key_hash_rcu+0x2c4/0x398 ip_route_output_key_hash+0x60/0x8c tcp_v4_connect+0x290/0x488 __inet_stream_connect+0x108/0x3d0 inet_stream_connect+0x50/0x78 kernel_connect+0x6c/0xac generic_ip_connect+0x10c/0x6c8 [cifs] __reconnect_target_unlocked+0xa0/0x214 [cifs] reconnect_dfs_server+0x144/0x460 [cifs] cifs_reconnect+0x88/0x148 [cifs] cifs_readv_from_socket+0x230/0x430 [cifs] cifs_read_from_socket+0x74/0xa8 [cifs] cifs_demultiplex_thread+0xf8/0x704 [cifs] kthread+0xd0/0xd4 Code: aa0003f8 f8480f13 eb18027f 540006c0 (b9401264) [1]: CIFS_CRED="/root/cred.cifs" CIFS_USER="Administrator" CIFS_PASS="Password" CIFS_IP="X.X.X.X" CIFS_PATH="//${CIFS_IP}/Users/Administrator/Desktop/CIFS_TEST" CIFS_MNT="/mnt/smb" DEV="enp0s3" cat < ${CIFS_CRED} username=${CIFS_USER} password=${CIFS_PASS} domain=EXAMPLE.COM EOF unshare -n bash -c " mkdir -p ${CIFS_MNT} ip netns attach root 1 ip link add eth0 type veth peer veth0 netns root ip link set eth0 up ip -n root link set veth0 up ip addr add 192.168.0.2/24 dev eth0 ip -n root addr add 192.168.0.1/24 dev veth0 ip route add default via 192.168.0.1 dev eth0 ip netns exec root sysctl net.ipv4.ip_forward=1 ip netns exec root iptables -t nat -A POSTROUTING -s 192.168.0.2 -o ${DEV} -j MASQUERADE mount -t cifs ${CIFS_PATH} ${CIFS_MNT} -o vers=3.0,sec=ntlmssp,credentials=${CIFS_CRED},rsize=65536,wsize=65536,cache=none,echo_interval=1 touch ${CIFS_MNT}/a.txt ip netns exec root iptables -t nat -D POSTROUTING -s 192.168.0.2 -o ${DEV} -j MASQUERADE " umount ${CIFS_MNT} [2]: ref_tracker: net notrefcnt@000000004bbc008d has 1/1 users at sk_alloc (./include/net/net_namespace.h:339 net/core/sock.c:2227) inet_create (net/ipv4/af_inet.c:326 net/ipv4/af_inet.c:252) __sock_create (net/socket.c:1576) generic_ip_connect (fs/smb/client/connect.c:3075) cifs_get_tcp_session.part.0 (fs/smb/client/connect.c:3160 fs/smb/client/connect.c:1798) cifs_mount_get_session (fs/smb/client/trace.h:959 fs/smb/client/connect.c:3366) dfs_mount_share (fs/smb/client/dfs.c:63 fs/smb/client/dfs.c:285) cifs_mount (fs/smb/client/connect.c:3622) cifs_smb3_do_mount (fs/smb/client/cifsfs.c:949) smb3_get_tree (fs/smb/client/fs_context.c:784 fs/smb/client/fs_context.c:802 fs/smb/client/fs_context.c:794) vfs_get_tree (fs/super.c:1800) path_mount (fs/namespace.c:3508 fs/namespace.c:3834) __x64_sys_mount (fs/namespace.c:3848 fs/namespace.c:4057 fs/namespace.c:4034 fs/namespace.c:4034) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Signed-off-by: Kuniyuki Iwashima Acked-by: Tom Talpey Signed-off-by: Steve French --- fs/smb/client/connect.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 15d94ac4095e..0ce2d704b1f3 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1037,6 +1037,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) */ } + put_net(cifs_net_ns(server)); kfree(server->leaf_fullpath); kfree(server); @@ -1635,8 +1636,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) /* srv_count can never go negative */ WARN_ON(server->srv_count < 0); - put_net(cifs_net_ns(server)); - list_del_init(&server->tcp_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -3070,13 +3069,22 @@ generic_ip_connect(struct TCP_Server_Info *server) if (server->ssocket) { socket = server->ssocket; } else { - rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM, + struct net *net = cifs_net_ns(server); + struct sock *sk; + + rc = __sock_create(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket, 1); if (rc < 0) { cifs_server_dbg(VFS, "Error %d creating socket\n", rc); return rc; } + sk = server->ssocket->sk; + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); + /* BB other socket options to set KEEPALIVE, NODELAY? */ cifs_dbg(FYI, "Socket created\n"); socket = server->ssocket; From b0ef514bc6bbdeb8cc7492c0f473e14cb06b14d4 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Fri, 18 Oct 2024 15:41:36 +0000 Subject: [PATCH 113/281] drm/imagination: Add a per-file PVR context list This adds a linked list of VM contexts which is needed for the next patch to be able to correctly track VM contexts for destruction on file close. It is only safe for VM contexts to be removed from the list and destroyed when not in interrupt context. Signed-off-by: Brendan King Signed-off-by: Matt Coster Reviewed-by: Frank Binns Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/e57128ea-f0ce-4e93-a9d4-3f033a8b06fa@imgtec.com --- drivers/gpu/drm/imagination/pvr_context.c | 14 ++++++++++++++ drivers/gpu/drm/imagination/pvr_context.h | 3 +++ drivers/gpu/drm/imagination/pvr_device.h | 10 ++++++++++ drivers/gpu/drm/imagination/pvr_drv.c | 3 +++ 4 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/imagination/pvr_context.c b/drivers/gpu/drm/imagination/pvr_context.c index eded5e955cc0..255c93582734 100644 --- a/drivers/gpu/drm/imagination/pvr_context.c +++ b/drivers/gpu/drm/imagination/pvr_context.c @@ -17,10 +17,14 @@ #include #include + +#include #include #include +#include #include #include +#include #include #include #include @@ -354,6 +358,10 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co return err; } + spin_lock(&pvr_dev->ctx_list_lock); + list_add_tail(&ctx->file_link, &pvr_file->contexts); + spin_unlock(&pvr_dev->ctx_list_lock); + return 0; err_destroy_fw_obj: @@ -380,6 +388,11 @@ pvr_context_release(struct kref *ref_count) container_of(ref_count, struct pvr_context, ref_count); struct pvr_device *pvr_dev = ctx->pvr_dev; + WARN_ON(in_interrupt()); + spin_lock(&pvr_dev->ctx_list_lock); + list_del(&ctx->file_link); + spin_unlock(&pvr_dev->ctx_list_lock); + xa_erase(&pvr_dev->ctx_ids, ctx->ctx_id); pvr_context_destroy_queues(ctx); pvr_fw_object_destroy(ctx->fw_obj); @@ -451,6 +464,7 @@ void pvr_destroy_contexts_for_file(struct pvr_file *pvr_file) void pvr_context_device_init(struct pvr_device *pvr_dev) { xa_init_flags(&pvr_dev->ctx_ids, XA_FLAGS_ALLOC1); + spin_lock_init(&pvr_dev->ctx_list_lock); } /** diff --git a/drivers/gpu/drm/imagination/pvr_context.h b/drivers/gpu/drm/imagination/pvr_context.h index 0c7b97dfa6ba..a5b0a82a54a1 100644 --- a/drivers/gpu/drm/imagination/pvr_context.h +++ b/drivers/gpu/drm/imagination/pvr_context.h @@ -85,6 +85,9 @@ struct pvr_context { /** @compute: Transfer queue. */ struct pvr_queue *transfer; } queues; + + /** @file_link: pvr_file PVR context list link. */ + struct list_head file_link; }; static __always_inline struct pvr_queue * diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h index b574e23d484b..6d0dfacb677b 100644 --- a/drivers/gpu/drm/imagination/pvr_device.h +++ b/drivers/gpu/drm/imagination/pvr_device.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -293,6 +294,12 @@ struct pvr_device { /** @sched_wq: Workqueue for schedulers. */ struct workqueue_struct *sched_wq; + + /** + * @ctx_list_lock: Lock to be held when accessing the context list in + * struct pvr_file. + */ + spinlock_t ctx_list_lock; }; /** @@ -344,6 +351,9 @@ struct pvr_file { * This array is used to allocate handles returned to userspace. */ struct xarray vm_ctx_handles; + + /** @contexts: PVR context list. */ + struct list_head contexts; }; /** diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c index 1a0cb7aa9cea..fb17196e05f4 100644 --- a/drivers/gpu/drm/imagination/pvr_drv.c +++ b/drivers/gpu/drm/imagination/pvr_drv.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1326,6 +1327,8 @@ pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file) */ pvr_file->pvr_dev = pvr_dev; + INIT_LIST_HEAD(&pvr_file->contexts); + xa_init_flags(&pvr_file->ctx_handles, XA_FLAGS_ALLOC1); xa_init_flags(&pvr_file->free_list_handles, XA_FLAGS_ALLOC1); xa_init_flags(&pvr_file->hwrt_handles, XA_FLAGS_ALLOC1); From b04ce1e718bd55302b52d05d6873e233cb3ec7a1 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Fri, 18 Oct 2024 15:41:40 +0000 Subject: [PATCH 114/281] drm/imagination: Break an object reference loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When remaining resources are being cleaned up on driver close, outstanding VM mappings may result in resources being leaked, due to an object reference loop, as shown below, with each object (or set of objects) referencing the object below it: PVR GEM Object GPU scheduler "finished" fence GPU scheduler “scheduled” fence PVR driver “done” fence PVR Context PVR VM Context PVR VM Mappings PVR GEM Object The reference that the PVR VM Context has on the VM mappings is a soft one, in the sense that the freeing of outstanding VM mappings is done as part of VM context destruction; no reference counts are involved, as is the case for all the other references in the loop. To break the reference loop during cleanup, free the outstanding VM mappings before destroying the PVR Context associated with the VM context. Signed-off-by: Brendan King Signed-off-by: Matt Coster Reviewed-by: Frank Binns Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/8a25924f-1bb7-4d9a-a346-58e871dfb1d1@imgtec.com --- drivers/gpu/drm/imagination/pvr_context.c | 19 +++++++++++++++++++ drivers/gpu/drm/imagination/pvr_context.h | 18 ++++++++++++++++++ drivers/gpu/drm/imagination/pvr_vm.c | 22 ++++++++++++++++++---- drivers/gpu/drm/imagination/pvr_vm.h | 1 + 4 files changed, 56 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_context.c b/drivers/gpu/drm/imagination/pvr_context.c index 255c93582734..4cb3494c0bb2 100644 --- a/drivers/gpu/drm/imagination/pvr_context.c +++ b/drivers/gpu/drm/imagination/pvr_context.c @@ -450,11 +450,30 @@ pvr_context_destroy(struct pvr_file *pvr_file, u32 handle) */ void pvr_destroy_contexts_for_file(struct pvr_file *pvr_file) { + struct pvr_device *pvr_dev = pvr_file->pvr_dev; struct pvr_context *ctx; unsigned long handle; xa_for_each(&pvr_file->ctx_handles, handle, ctx) pvr_context_destroy(pvr_file, handle); + + spin_lock(&pvr_dev->ctx_list_lock); + ctx = list_first_entry(&pvr_file->contexts, struct pvr_context, file_link); + + while (!list_entry_is_head(ctx, &pvr_file->contexts, file_link)) { + list_del_init(&ctx->file_link); + + if (pvr_context_get_if_referenced(ctx)) { + spin_unlock(&pvr_dev->ctx_list_lock); + + pvr_vm_unmap_all(ctx->vm_ctx); + + pvr_context_put(ctx); + spin_lock(&pvr_dev->ctx_list_lock); + } + ctx = list_first_entry(&pvr_file->contexts, struct pvr_context, file_link); + } + spin_unlock(&pvr_dev->ctx_list_lock); } /** diff --git a/drivers/gpu/drm/imagination/pvr_context.h b/drivers/gpu/drm/imagination/pvr_context.h index a5b0a82a54a1..07afa179cdf4 100644 --- a/drivers/gpu/drm/imagination/pvr_context.h +++ b/drivers/gpu/drm/imagination/pvr_context.h @@ -126,6 +126,24 @@ pvr_context_get(struct pvr_context *ctx) return ctx; } +/** + * pvr_context_get_if_referenced() - Take an additional reference on a still + * referenced context. + * @ctx: Context pointer. + * + * Call pvr_context_put() to release. + * + * Returns: + * * True on success, or + * * false if no context pointer passed, or the context wasn't still + * * referenced. + */ +static __always_inline bool +pvr_context_get_if_referenced(struct pvr_context *ctx) +{ + return ctx != NULL && kref_get_unless_zero(&ctx->ref_count) != 0; +} + /** * pvr_context_lookup() - Lookup context pointer from handle and file. * @pvr_file: Pointer to pvr_file structure. diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index 97c0f772ed65..7bd6ba4c6e8a 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -597,12 +598,26 @@ err_free: } /** - * pvr_vm_context_release() - Teardown a VM context. - * @ref_count: Pointer to reference counter of the VM context. + * pvr_vm_unmap_all() - Unmap all mappings associated with a VM context. + * @vm_ctx: Target VM context. * * This function ensures that no mappings are left dangling by unmapping them * all in order of ascending device-virtual address. */ +void +pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx) +{ + WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start, + vm_ctx->gpuvm_mgr.mm_range)); +} + +/** + * pvr_vm_context_release() - Teardown a VM context. + * @ref_count: Pointer to reference counter of the VM context. + * + * This function also ensures that no mappings are left dangling by calling + * pvr_vm_unmap_all. + */ static void pvr_vm_context_release(struct kref *ref_count) { @@ -612,8 +627,7 @@ pvr_vm_context_release(struct kref *ref_count) if (vm_ctx->fw_mem_ctx_obj) pvr_fw_object_destroy(vm_ctx->fw_mem_ctx_obj); - WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start, - vm_ctx->gpuvm_mgr.mm_range)); + pvr_vm_unmap_all(vm_ctx); pvr_mmu_context_destroy(vm_ctx->mmu_ctx); drm_gem_private_object_fini(&vm_ctx->dummy_gem); diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h index f2a6463f2b05..79406243617c 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.h +++ b/drivers/gpu/drm/imagination/pvr_vm.h @@ -39,6 +39,7 @@ int pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset, u64 device_addr, u64 size); int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size); +void pvr_vm_unmap_all(struct pvr_vm_context *vm_ctx); dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx); struct dma_resv *pvr_vm_get_dma_resv(struct pvr_vm_context *vm_ctx); From 8ca8d07857c698503b2b3bf615238c87c02f064e Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Wed, 23 Oct 2024 12:02:41 +0530 Subject: [PATCH 115/281] platform/x86/amd/pmf: Add SMU metrics table support for 1Ah family 60h model Add SMU metrics table support for 1Ah family 60h model. This information will be used by the PMF driver to alter the system thermals. Co-developed-by: Patil Rajesh Reddy Signed-off-by: Patil Rajesh Reddy Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241023063245.1404420-2-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/core.c | 1 + drivers/platform/x86/amd/pmf/spc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index d6af0ca036f1..347bb43a5f2b 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -261,6 +261,7 @@ int amd_pmf_set_dram_addr(struct amd_pmf_dev *dev, bool alloc_buffer) dev->mtable_size = sizeof(dev->m_table); break; case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: dev->mtable_size = sizeof(dev->m_table_v2); break; default: diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index b5183969f9bf..06226eb0eab3 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -86,6 +86,7 @@ static void amd_pmf_get_smu_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ta ARRAY_SIZE(dev->m_table.avg_core_c0residency), in); break; case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: memcpy(&dev->m_table_v2, dev->buf, dev->mtable_size); in->ev_info.socket_power = dev->m_table_v2.apu_power + dev->m_table_v2.dgpu_power; in->ev_info.skin_temperature = dev->m_table_v2.skin_temp; From bceec87a73804bb4c33b9a6c96e2d27cd893a801 Mon Sep 17 00:00:00 2001 From: Corey Hickey Date: Mon, 28 Oct 2024 11:02:41 -0700 Subject: [PATCH 116/281] platform/x86/amd/pmc: Detect when STB is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Loading the amd_pmc module as: amd_pmc enable_stb=1 ...can result in the following messages in the kernel ring buffer: amd_pmc AMDI0009:00: SMU cmd failed. err: 0xff ioremap on RAM at 0x0000000000000000 - 0x0000000000ffffff WARNING: CPU: 10 PID: 2151 at arch/x86/mm/ioremap.c:217 __ioremap_caller+0x2cd/0x340 Further debugging reveals that this occurs when the requests for S2D_PHYS_ADDR_LOW and S2D_PHYS_ADDR_HIGH return a value of 0, indicating that the STB is inaccessible. To prevent the ioremap warning and provide clarity to the user, handle the invalid address and display an error message. Link: https://lore.kernel.org/platform-driver-x86/c588ff5d-3e04-4549-9a86-284b9b4419ba@amd.com Fixes: 3d7d407dfb05 ("platform/x86: amd-pmc: Add support for AMD Spill to DRAM STB feature") Acked-by: Shyam Sundar S K Signed-off-by: Corey Hickey Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20241028180241.1341624-1-bugfood-ml@fatooh.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmc/pmc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index bbb8edb62e00..5669f94c3d06 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -998,6 +998,11 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) amd_pmc_send_cmd(dev, S2D_PHYS_ADDR_LOW, &phys_addr_low, dev->s2d_msg_id, true); amd_pmc_send_cmd(dev, S2D_PHYS_ADDR_HIGH, &phys_addr_hi, dev->s2d_msg_id, true); + if (!phys_addr_hi && !phys_addr_low) { + dev_err(dev->dev, "STB is not enabled on the system; disable enable_stb or contact system vendor\n"); + return -EINVAL; + } + stb_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low); /* Clear msg_port for other SMU operation */ From c2d188e137e77294323132a760a4608321a36a70 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 4 Nov 2024 11:07:34 +0100 Subject: [PATCH 117/281] ALSA: ump: Don't enumeration invalid groups for legacy rawmidi The legacy rawmidi tries to enumerate all possible UMP groups belonging to the UMP endpoint. But currently it shows all 16 ports when the UMP endpoint is configured with static blocks, although most of them may be unused. There was already a fix for the sequencer client side to ignore such groups in the commit 3bfd7c0ba184 ("ALSA: seq: ump: Skip useless ports for static blocks"), and this commit is a similar fix for UMP rawmidi devices; it adds simply the check for the validity of each group that has been already parsed. (Note that the group info was moved to snd_ump_endpoint.groups[] by the commit 0642a3c5cacc0321c755 ("ALSA: ump: Update substream name from assigned FB names")). Link: https://patch.msgid.link/20241104100735.16127-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/ump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index cf22a17e38dd..7d59a0a9b037 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1233,7 +1233,7 @@ static int fill_legacy_mapping(struct snd_ump_endpoint *ump) num = 0; for (i = 0; i < SNDRV_UMP_MAX_GROUPS; i++) - if (group_maps & (1U << i)) + if ((group_maps & (1U << i)) && ump->groups[i].valid) ump->legacy_mapping[num++] = i; return num; From 8abbf1f01d6a2ef9f911f793e30f7382154b5a3a Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Fri, 1 Nov 2024 21:55:13 +0300 Subject: [PATCH 118/281] ALSA: firewire-lib: fix return value on fail in amdtp_tscm_init() If amdtp_stream_init() fails in amdtp_tscm_init(), the latter returns zero, though it's supposed to return error code, which is checked inside init_stream() in file tascam-stream.c. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 47faeea25ef3 ("ALSA: firewire-tascam: add data block processing layer") Signed-off-by: Murad Masimov Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20241101185517.1819-1-m.masimov@maxima.ru --- sound/firewire/tascam/amdtp-tascam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/tascam/amdtp-tascam.c b/sound/firewire/tascam/amdtp-tascam.c index 0b42d6559008..079afa4bd381 100644 --- a/sound/firewire/tascam/amdtp-tascam.c +++ b/sound/firewire/tascam/amdtp-tascam.c @@ -238,7 +238,7 @@ int amdtp_tscm_init(struct amdtp_stream *s, struct fw_unit *unit, err = amdtp_stream_init(s, unit, dir, flags, fmt, process_ctx_payloads, sizeof(struct amdtp_tscm)); if (err < 0) - return 0; + return err; if (dir == AMDTP_OUT_STREAM) { // Use fixed value for FDF field. From a36b8b84ac4327b90ef5a22bc97cc96a92073330 Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Thu, 31 Oct 2024 12:40:24 -0300 Subject: [PATCH 119/281] platform/x86: dell-smbios-base: Extends support to Alienware products MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following error: dell_smbios: Unable to run on non-Dell system Which is triggered after dell-wmi driver fails to initialize on Alienware systems, as it depends on dell-smbios. This effectively extends dell-wmi, dell-smbios and dcdbas support to Alienware devices, that might share some features of the SMBIOS intereface calling interface with other Dell products. Tested on an Alienware X15 R1. Signed-off-by: Kurt Borja Reviewed-by: Mario Limonciello Acked-by: Pali Rohár Link: https://lore.kernel.org/r/20241031154023.6149-2-kuurtb@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/dell/dell-smbios-base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c index 73e41eb69cb5..01c72b91a50d 100644 --- a/drivers/platform/x86/dell/dell-smbios-base.c +++ b/drivers/platform/x86/dell/dell-smbios-base.c @@ -576,6 +576,7 @@ static int __init dell_smbios_init(void) int ret, wmi, smm; if (!dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Dell System", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Alienware", NULL) && !dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "www.dell.com", NULL)) { pr_err("Unable to run on non-Dell system\n"); return -ENODEV; From ec61f0bb4feec3345626a2b93b970b6719743997 Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Thu, 31 Oct 2024 12:44:42 -0300 Subject: [PATCH 120/281] platform/x86: dell-wmi-base: Handle META key Lock/Unlock events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Alienware devices have a key that locks/unlocks the Meta key. This key triggers a WMI event that should be ignored by the kernel, as it's handled by internally the firmware. There is no known way of changing this default behavior. The firmware would lock/unlock the Meta key, regardless of how the event is handled. Tested on an Alienware x15 R1. Signed-off-by: Kurt Borja Reviewed-by: Mario Limonciello Acked-by: Pali Rohár Link: https://lore.kernel.org/r/20241031154441.6663-2-kuurtb@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/dell/dell-wmi-base.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c index 24fd7ffadda9..841a5414d28a 100644 --- a/drivers/platform/x86/dell/dell-wmi-base.c +++ b/drivers/platform/x86/dell/dell-wmi-base.c @@ -80,6 +80,12 @@ static const struct dmi_system_id dell_wmi_smbios_list[] __initconst = { static const struct key_entry dell_wmi_keymap_type_0000[] = { { KE_IGNORE, 0x003a, { KEY_CAPSLOCK } }, + /* Meta key lock */ + { KE_IGNORE, 0xe000, { KEY_RIGHTMETA } }, + + /* Meta key unlock */ + { KE_IGNORE, 0xe001, { KEY_RIGHTMETA } }, + /* Key code is followed by brightness level */ { KE_KEY, 0xe005, { KEY_BRIGHTNESSDOWN } }, { KE_KEY, 0xe006, { KEY_BRIGHTNESSUP } }, From 36e66be874a7ea9d28fb9757629899a8449b8748 Mon Sep 17 00:00:00 2001 From: Renato Caldas Date: Sat, 2 Nov 2024 18:31:16 +0000 Subject: [PATCH 121/281] platform/x86: ideapad-laptop: add missing Ideapad Pro 5 fn keys The scancodes for the Mic Mute and Airplane keys on the Ideapad Pro 5 (14AHP9 at least, probably the other variants too) are different and were not being picked up by the driver. This adds them to the keymap. Apart from what is already supported, the remaining fn keys are unfortunately producing windows-specific key-combos. Signed-off-by: Renato Caldas Link: https://lore.kernel.org/r/20241102183116.30142-1-renato@calgera.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/ideapad-laptop.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index c64dfc56651d..c908f52ed717 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1294,6 +1294,9 @@ static const struct key_entry ideapad_keymap[] = { { KE_KEY, 0x27 | IDEAPAD_WMI_KEY, { KEY_HELP } }, /* Refresh Rate Toggle */ { KE_KEY, 0x0a | IDEAPAD_WMI_KEY, { KEY_REFRESH_RATE_TOGGLE } }, + /* Specific to some newer models */ + { KE_KEY, 0x3e | IDEAPAD_WMI_KEY, { KEY_MICMUTE } }, + { KE_KEY, 0x3f | IDEAPAD_WMI_KEY, { KEY_RFKILL } }, { KE_END }, }; From fe09de2db2365eed8b44b572cff7d421eaf1754a Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Mon, 4 Nov 2024 18:00:55 +0800 Subject: [PATCH 122/281] ASoC: tas2781: Add new driver version for tas2563 & tas2781 qfn chip Add new driver version to support tas2563 & tas2781 qfn chip Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20241104100055.48-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-fmwlib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index ae360c97fe1e..0aeb88abbf52 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -1992,6 +1992,7 @@ static int tasdevice_dspfw_ready(const struct firmware *fmw, break; case 0x202: case 0x400: + case 0x401: tas_priv->fw_parse_variable_header = fw_parse_variable_header_git; tas_priv->fw_parse_program_data = From ebdcba2126a817da4efc085c9d4dce0c51942eba Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 4 Nov 2024 11:53:27 +0530 Subject: [PATCH 123/281] MAINTAINERS: update AMD SPI maintainer 'Sanjay R Mehta' is no longer with AMD, I will take over as the maintainer of the AMD SPI driver moving forward. I request to be added as the new maintainer. Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20241104062327.1228521-1-Raju.Rangoju@amd.com Signed-off-by: Mark Brown --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a097afd76ded..3e251b357d54 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1181,8 +1181,9 @@ F: Documentation/hid/amd-sfh* F: drivers/hid/amd-sfh-hid/ AMD SPI DRIVER -M: Sanjay R Mehta -S: Maintained +M: Raju Rangoju +L: linux-spi@vger.kernel.org +S: Supported F: drivers/spi/spi-amd.c AMD XGBE DRIVER From f16beaaee248eaa37ad40b5905924fcf70ae02e3 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 11 Oct 2024 08:48:39 +0300 Subject: [PATCH 124/281] thermal/drivers/qcom/lmh: Remove false lockdep backtrace Annotate LMH IRQs with lockdep classes so that the lockdep doesn't report possible recursive locking issue between LMH and GIC interrupts. For the reference: CPU0 ---- lock(&irq_desc_lock_class); lock(&irq_desc_lock_class); *** DEADLOCK *** Call trace: dump_backtrace+0x98/0xf0 show_stack+0x18/0x24 dump_stack_lvl+0x90/0xd0 dump_stack+0x18/0x24 print_deadlock_bug+0x258/0x348 __lock_acquire+0x1078/0x1f44 lock_acquire+0x1fc/0x32c _raw_spin_lock_irqsave+0x60/0x88 __irq_get_desc_lock+0x58/0x98 enable_irq+0x38/0xa0 lmh_enable_interrupt+0x2c/0x38 irq_enable+0x40/0x8c __irq_startup+0x78/0xa4 irq_startup+0x78/0x168 __enable_irq+0x70/0x7c enable_irq+0x4c/0xa0 qcom_cpufreq_ready+0x20/0x2c cpufreq_online+0x2a8/0x988 cpufreq_add_dev+0x80/0x98 subsys_interface_register+0x104/0x134 cpufreq_register_driver+0x150/0x234 qcom_cpufreq_hw_driver_probe+0x2a8/0x388 platform_probe+0x68/0xc0 really_probe+0xbc/0x298 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x20c/0x62c worker_thread+0x1bc/0x36c kthread+0x120/0x124 ret_from_fork+0x10/0x20 Fixes: 53bca371cdf7 ("thermal/drivers/qcom: Add support for LMh driver") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241011-lmh-lockdep-v1-1-495cbbe6fef1@linaro.org Signed-off-by: Daniel Lezcano --- drivers/thermal/qcom/lmh.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c index 5225b3621a56..d2d49264cf83 100644 --- a/drivers/thermal/qcom/lmh.c +++ b/drivers/thermal/qcom/lmh.c @@ -73,7 +73,14 @@ static struct irq_chip lmh_irq_chip = { static int lmh_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct lmh_hw_data *lmh_data = d->host_data; + static struct lock_class_key lmh_lock_key; + static struct lock_class_key lmh_request_key; + /* + * This lock class tells lockdep that GPIO irqs are in a different + * category than their parents, so it won't report false recursion. + */ + irq_set_lockdep_class(irq, &lmh_lock_key, &lmh_request_key); irq_set_chip_and_handler(irq, &lmh_irq_chip, handle_simple_irq); irq_set_chip_data(irq, lmh_data); From fcd54cf480c87b96313a97dbf898c644b7bb3a2e Mon Sep 17 00:00:00 2001 From: Emil Dahl Juhl Date: Tue, 15 Oct 2024 19:18:26 +0200 Subject: [PATCH 125/281] tools/lib/thermal: Fix sampling handler context ptr The sampling handler, provided by the user alongside a void* context, was invoked with an internal structure instead of the user context. Correct the invocation of the sampling handler to pass the user context pointer instead. Note that the approach taken is similar to that in events.c, and will reduce the chances of this mistake happening if additional sampling callbacks are added. Fixes: 47c4b0de080a ("tools/lib/thermal: Add a thermal library") Signed-off-by: Emil Dahl Juhl Link: https://lore.kernel.org/r/20241015171826.170154-1-emdj@bang-olufsen.dk Signed-off-by: Daniel Lezcano --- tools/lib/thermal/sampling.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c index 70577423a9f0..f67c1f9ea1d7 100644 --- a/tools/lib/thermal/sampling.c +++ b/tools/lib/thermal/sampling.c @@ -16,6 +16,8 @@ static int handle_thermal_sample(struct nl_msg *n, void *arg) struct thermal_handler_param *thp = arg; struct thermal_handler *th = thp->th; + arg = thp->arg; + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); switch (genlhdr->cmd) { From c5426dcc5a3a064bbd2de383e29035a14fe933e0 Mon Sep 17 00:00:00 2001 From: zhang jiao Date: Thu, 12 Sep 2024 12:50:31 +0800 Subject: [PATCH 126/281] tools/lib/thermal: Remove the thermal.h soft link when doing make clean Run "make -C tools thermal" can create a soft link for thermal.h in tools/include/uapi/linux. Just rm it when make clean. Signed-off-by: zhang jiao Link: https://lore.kernel.org/r/20240912045031.18426-1-zhangjiao2@cmss.chinamobile.com Signed-off-by: Daniel Lezcano --- tools/lib/thermal/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile index 2d0d255fd0e1..8890fd57b110 100644 --- a/tools/lib/thermal/Makefile +++ b/tools/lib/thermal/Makefile @@ -121,7 +121,9 @@ all: fixdep clean: $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \ - *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) \ + .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) \ + $(srctree)/tools/$(THERMAL_UAPI) $(LIBTHERMAL_PC): $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ From 725f31f300e300a9d94976bd8f1db6e746f95f63 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 18 Oct 2024 15:31:36 +0800 Subject: [PATCH 127/281] thermal/of: support thermal zones w/o trips subnode Although the current device tree binding of thermal zones require the trips subnode, the binding in kernel v5.15 does not require it, and many device trees shipped with the kernel, for example, allwinner/sun50i-a64.dtsi and mediatek/mt8183-kukui.dtsi in ARM64, still comply to the old binding and contain no trips subnode. Allow the code to successfully register thermal zones w/o trips subnode for DT binding compatibility now. Furtherly, the inconsistency between DTs and bindings should be resolved by either adding empty trips subnode or dropping the trips subnode requirement. Fixes: d0c75fa2c17f ("thermal/of: Initialize trip points separately") Signed-off-by: Icenowy Zheng [wenst@chromium.org: Reworked logic and kernel log messages] Signed-off-by: Chen-Yu Tsai Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20241018073139.1268995-1-wenst@chromium.org Signed-off-by: Daniel Lezcano --- drivers/thermal/thermal_of.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index a4caf7899f8e..07e09897165f 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -99,18 +99,15 @@ static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *n struct device_node *trips; int ret, count; + *ntrips = 0; + trips = of_get_child_by_name(np, "trips"); - if (!trips) { - pr_err("Failed to find 'trips' node\n"); - return ERR_PTR(-EINVAL); - } + if (!trips) + return NULL; count = of_get_child_count(trips); - if (!count) { - pr_err("No trip point defined\n"); - ret = -EINVAL; - goto out_of_node_put; - } + if (!count) + return NULL; tt = kzalloc(sizeof(*tt) * count, GFP_KERNEL); if (!tt) { @@ -133,7 +130,6 @@ static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *n out_kfree: kfree(tt); - *ntrips = 0; out_of_node_put: of_node_put(trips); @@ -401,11 +397,14 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * trips = thermal_of_trips_init(np, &ntrips); if (IS_ERR(trips)) { - pr_err("Failed to find trip points for %pOFn id=%d\n", sensor, id); + pr_err("Failed to parse trip points for %pOFn id=%d\n", sensor, id); ret = PTR_ERR(trips); goto out_of_node_put; } + if (!trips) + pr_info("No trip points found for %pOFn id=%d\n", sensor, id); + ret = thermal_of_monitor_init(np, &delay, &pdelay); if (ret) { pr_err("Failed to initialize monitoring delays from %pOFn\n", np); From 10f0740234f0b157b41bdc7e9c3555a9b86c1599 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 Oct 2024 16:28:06 +1100 Subject: [PATCH 128/281] sunrpc: handle -ENOTCONN in xs_tcp_setup_socket() xs_tcp_finish_connecting() can return -ENOTCONN but the switch statement in xs_tcp_setup_socket() treats that as an unhandled error. If we treat it as a known error it would propagate back to call_connect_status() which does handle that error code. This appears to be the intention of the commit (given below) which added -ENOTCONN as a return status for xs_tcp_finish_connecting(). So add -ENOTCONN to the switch statement as an error to pass through to the caller. Link: https://bugzilla.suse.com/show_bug.cgi?id=1231050 Link: https://access.redhat.com/discussions/3434091 Fixes: 01d37c428ae0 ("SUNRPC: xprt_connect() don't abort the task if the transport isn't bound") Signed-off-by: NeilBrown Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0e1691316f42..1326fbf45a34 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2459,6 +2459,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: + case -ENOTCONN: break; default: printk("%s: connect returned unhandled error %d\n", From 6e2a10343ecb71c4457bc16be05758f9c7aae7d9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 4 Oct 2024 11:07:23 +1000 Subject: [PATCH 129/281] NFSv3: only use NFS timeout for MOUNT when protocols are compatible If a timeout is specified in the mount options, it currently applies to both the NFS protocol and (with v3) the MOUNT protocol. This is sensible when they both use the same underlying protocol, or those protocols are compatible w.r.t timeouts as RDMA and TCP are. However if, for example, NFS is using TCP and MOUNT is using UDP then using the same timeout doesn't make much sense. If you mount -o vers=3,proto=tcp,mountproto=udp,timeo=600,retrans=5 \ server:/path /mountpoint then the timeo=600 which was intended for the NFS/TCP request will apply to the MOUNT/UDP requests with the result that there will only be one request sent (because UDP has a maximum timeout of 60 seconds). This is not what a reasonable person might expect. This patch disables the sharing of timeout information in cases where the underlying protocols are not compatible. Fixes: c9301cb35b59 ("nfs: hornor timeo and retrans option when mounting NFSv3") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9723b6c53397..ae5c5e39afa0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -885,7 +885,15 @@ static int nfs_request_mount(struct fs_context *fc, * Now ask the mount server to map our export path * to a file handle. */ - status = nfs_mount(&request, ctx->timeo, ctx->retrans); + if ((request.protocol == XPRT_TRANSPORT_UDP) == + !(ctx->flags & NFS_MOUNT_TCP)) + /* + * NFS protocol and mount protocol are both UDP or neither UDP + * so timeouts are compatible. Use NFS timeouts for MOUNT + */ + status = nfs_mount(&request, ctx->timeo, ctx->retrans); + else + status = nfs_mount(&request, NFS_UNSPEC_TIMEO, NFS_UNSPEC_RETRANS); if (status != 0) { dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", request.hostname, status); From dc270d7159699ad6d11decadfce9633f0f71c1db Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Fri, 25 Oct 2024 16:03:27 +0200 Subject: [PATCH 130/281] nfs: Fix KMSAN warning in decode_getfattr_attrs() Fix the following KMSAN warning: CPU: 1 UID: 0 PID: 7651 Comm: cp Tainted: G B Tainted: [B]=BAD_PAGE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009) ===================================================== ===================================================== BUG: KMSAN: uninit-value in decode_getfattr_attrs+0x2d6d/0x2f90 decode_getfattr_attrs+0x2d6d/0x2f90 decode_getfattr_generic+0x806/0xb00 nfs4_xdr_dec_getattr+0x1de/0x240 rpcauth_unwrap_resp_decode+0xab/0x100 rpcauth_unwrap_resp+0x95/0xc0 call_decode+0x4ff/0xb50 __rpc_execute+0x57b/0x19d0 rpc_execute+0x368/0x5e0 rpc_run_task+0xcfe/0xee0 nfs4_proc_getattr+0x5b5/0x990 __nfs_revalidate_inode+0x477/0xd00 nfs_access_get_cached+0x1021/0x1cc0 nfs_do_access+0x9f/0xae0 nfs_permission+0x1e4/0x8c0 inode_permission+0x356/0x6c0 link_path_walk+0x958/0x1330 path_lookupat+0xce/0x6b0 filename_lookup+0x23e/0x770 vfs_statx+0xe7/0x970 vfs_fstatat+0x1f2/0x2c0 __se_sys_newfstatat+0x67/0x880 __x64_sys_newfstatat+0xbd/0x120 x64_sys_call+0x1826/0x3cf0 do_syscall_64+0xd0/0x1b0 entry_SYSCALL_64_after_hwframe+0x77/0x7f The KMSAN warning is triggered in decode_getfattr_attrs(), when calling decode_attr_mdsthreshold(). It appears that fattr->mdsthreshold is not initialized. Fix the issue by initializing fattr->mdsthreshold to NULL in nfs_fattr_init(). Cc: stable@vger.kernel.org # v3.5.x Fixes: 88034c3d88c2 ("NFSv4.1 mdsthreshold attribute xdr") Signed-off-by: Roberto Sassu Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 542c7d97b235..1e71b029da58 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1633,6 +1633,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->gencount = nfs_inc_attr_generation_counter(); fattr->owner_name = NULL; fattr->group_name = NULL; + fattr->mdsthreshold = NULL; } EXPORT_SYMBOL_GPL(nfs_fattr_init); From d054c5eb2890633935c23c371f45fb2d6b3b4b64 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Oct 2024 09:35:43 -0400 Subject: [PATCH 131/281] NFS: Fix attribute delegation behaviour on exclusive create When the client does an exclusive create and the server decides to store the verifier in the timestamps, a SETATTR is subsequently sent to fix up those timestamps. When that is the case, suppress the exceptions for attribute delegations in nfs4_bitmap_copy_adjust(). Fixes: 32215c1f893a ("NFSv4: Don't request atime/mtime/size if they are delegated to us") Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cd2fbde2e6d7..9d40319e063d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3452,6 +3452,10 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, adjust_flags |= NFS_INO_INVALID_MODE; if (sattr->ia_valid & (ATTR_UID | ATTR_GID)) adjust_flags |= NFS_INO_INVALID_OTHER; + if (sattr->ia_valid & ATTR_ATIME) + adjust_flags |= NFS_INO_INVALID_ATIME; + if (sattr->ia_valid & ATTR_MTIME) + adjust_flags |= NFS_INO_INVALID_MTIME; do { nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), From 40f45ab3814f2aff1ddada629c910aad982fc8e1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Oct 2024 17:05:48 -0400 Subject: [PATCH 132/281] NFS: Further fixes to attribute delegation a/mtime changes When asked to set both an atime and an mtime to the current system time, ensure that the setting is atomic by calling inode_update_timestamps() only once with the appropriate flags. Fixes: e12912d94137 ("NFSv4: Add support for delegated atime and mtime attributes") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1e71b029da58..9e884e1ce5d8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -628,23 +628,35 @@ nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr) } } +static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid) +{ + enum file_time_flags time_flags = 0; + unsigned int cache_flags = 0; + + if (ia_valid & ATTR_MTIME) { + time_flags |= S_MTIME | S_CTIME; + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (ia_valid & ATTR_ATIME) { + time_flags |= S_ATIME; + cache_flags |= NFS_INO_INVALID_ATIME; + } + inode_update_timestamps(inode, time_flags); + NFS_I(inode)->cache_validity &= ~cache_flags; +} + void nfs_update_delegated_atime(struct inode *inode) { spin_lock(&inode->i_lock); - if (nfs_have_delegated_atime(inode)) { - inode_update_timestamps(inode, S_ATIME); - NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ATIME; - } + if (nfs_have_delegated_atime(inode)) + nfs_update_timestamps(inode, ATTR_ATIME); spin_unlock(&inode->i_lock); } void nfs_update_delegated_mtime_locked(struct inode *inode) { - if (nfs_have_delegated_mtime(inode)) { - inode_update_timestamps(inode, S_CTIME | S_MTIME); - NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_CTIME | - NFS_INO_INVALID_MTIME); - } + if (nfs_have_delegated_mtime(inode)) + nfs_update_timestamps(inode, ATTR_MTIME); } void nfs_update_delegated_mtime(struct inode *inode) @@ -682,15 +694,16 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, attr->ia_valid &= ~ATTR_SIZE; } - if (nfs_have_delegated_mtime(inode)) { - if (attr->ia_valid & ATTR_MTIME) { - nfs_update_delegated_mtime(inode); - attr->ia_valid &= ~ATTR_MTIME; - } - if (attr->ia_valid & ATTR_ATIME) { - nfs_update_delegated_atime(inode); - attr->ia_valid &= ~ATTR_ATIME; - } + if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) { + spin_lock(&inode->i_lock); + nfs_update_timestamps(inode, attr->ia_valid); + spin_unlock(&inode->i_lock); + attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME); + } else if (nfs_have_delegated_atime(inode) && + attr->ia_valid & ATTR_ATIME && + !(attr->ia_valid & ATTR_MTIME)) { + nfs_update_delegated_atime(inode); + attr->ia_valid &= ~ATTR_ATIME; } /* Optimization: if the end result is no change, don't RPC */ From bc2940869508b7b956a757a26d3b1ebf9546790e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 23 Oct 2024 16:34:42 -0400 Subject: [PATCH 133/281] nfs_common: fix localio to cope with racing nfs_local_probe() Fix the possibility of racing nfs_local_probe() resulting in: list_add double add: new=ffff8b99707f9f58, prev=ffff8b99707f9f58, next=ffffffffc0f30000. ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:35! Add nfs_uuid_init() to properly initialize all nfs_uuid_t members (particularly its list_head). Switch to returning bool from nfs_uuid_begin(), returns false if nfs_uuid_t is already in-use (its list_head is on a list). Update nfs_local_probe() to return early if the nfs_client's cl_uuid (nfs_uuid_t) is in-use. Also, switch nfs_uuid_begin() from using list_add_tail_rcu() to list_add_tail() -- rculist was used in an earlier version of the localio code that had a lockless nfs_uuid_lookup interface. Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 3 +-- fs/nfs/localio.c | 3 ++- fs/nfs_common/nfslocalio.c | 23 ++++++++++++++++++----- include/linux/nfslocalio.h | 3 ++- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 114282398716..03ecc7765615 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -181,8 +181,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) #if IS_ENABLED(CONFIG_NFS_LOCALIO) seqlock_init(&clp->cl_boot_lock); ktime_get_real_ts64(&clp->cl_nfssvc_boot); - clp->cl_uuid.net = NULL; - clp->cl_uuid.dom = NULL; + nfs_uuid_init(&clp->cl_uuid); spin_lock_init(&clp->cl_localio_lock); #endif /* CONFIG_NFS_LOCALIO */ diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index d0aa680ec816..8f0ce82a677e 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -205,7 +205,8 @@ void nfs_local_probe(struct nfs_client *clp) nfs_local_disable(clp); } - nfs_uuid_begin(&clp->cl_uuid); + if (!nfs_uuid_begin(&clp->cl_uuid)) + return; if (nfs_server_uuid_is_local(clp)) nfs_local_enable(clp); nfs_uuid_end(&clp->cl_uuid); diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 5c8ce5066c16..09404d142d1a 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -5,7 +5,7 @@ */ #include -#include +#include #include #include @@ -20,15 +20,27 @@ static DEFINE_SPINLOCK(nfs_uuid_lock); */ static LIST_HEAD(nfs_uuids); -void nfs_uuid_begin(nfs_uuid_t *nfs_uuid) +void nfs_uuid_init(nfs_uuid_t *nfs_uuid) { nfs_uuid->net = NULL; nfs_uuid->dom = NULL; - uuid_gen(&nfs_uuid->uuid); + INIT_LIST_HEAD(&nfs_uuid->list); +} +EXPORT_SYMBOL_GPL(nfs_uuid_init); +bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid) +{ spin_lock(&nfs_uuid_lock); - list_add_tail_rcu(&nfs_uuid->list, &nfs_uuids); + /* Is this nfs_uuid already in use? */ + if (!list_empty(&nfs_uuid->list)) { + spin_unlock(&nfs_uuid_lock); + return false; + } + uuid_gen(&nfs_uuid->uuid); + list_add_tail(&nfs_uuid->list, &nfs_uuids); spin_unlock(&nfs_uuid_lock); + + return true; } EXPORT_SYMBOL_GPL(nfs_uuid_begin); @@ -36,7 +48,8 @@ void nfs_uuid_end(nfs_uuid_t *nfs_uuid) { if (nfs_uuid->net == NULL) { spin_lock(&nfs_uuid_lock); - list_del_init(&nfs_uuid->list); + if (nfs_uuid->net == NULL) + list_del_init(&nfs_uuid->list); spin_unlock(&nfs_uuid_lock); } } diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index b0dd9b1eef4f..3982fea79919 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -32,7 +32,8 @@ typedef struct { struct auth_domain *dom; /* auth_domain for localio */ } nfs_uuid_t; -void nfs_uuid_begin(nfs_uuid_t *); +void nfs_uuid_init(nfs_uuid_t *); +bool nfs_uuid_begin(nfs_uuid_t *); void nfs_uuid_end(nfs_uuid_t *); void nfs_uuid_is_local(const uuid_t *, struct list_head *, struct net *, struct auth_domain *, struct module *); From 867da60d463bb2a3e28c9235c487e56e96cffa00 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 18 Oct 2024 17:15:41 -0400 Subject: [PATCH 134/281] nfs: avoid i_lock contention in nfs_clear_invalid_mapping Multi-threaded buffered reads to the same file exposed significant inode spinlock contention in nfs_clear_invalid_mapping(). Eliminate this spinlock contention by checking flags without locking, instead using smp_rmb and smp_load_acquire accordingly, but then take spinlock and double-check these inode flags. Also refactor nfs_set_cache_invalid() slightly to use smp_store_release() to pair with nfs_clear_invalid_mapping()'s smp_load_acquire(). While this fix is beneficial for all multi-threaded buffered reads issued by an NFS client, this issue was identified in the context of surprisingly low LOCALIO performance with 4K multi-threaded buffered read IO. This fix dramatically speeds up LOCALIO performance: before: read: IOPS=1583k, BW=6182MiB/s (6482MB/s)(121GiB/20002msec) after: read: IOPS=3046k, BW=11.6GiB/s (12.5GB/s)(232GiB/20001msec) Fixes: 17dfeb911339 ("NFS: Fix races in nfs_revalidate_mapping") Signed-off-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9e884e1ce5d8..596f35170137 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -205,12 +205,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) nfs_fscache_invalidate(inode, 0); flags &= ~NFS_INO_REVAL_FORCED; - nfsi->cache_validity |= flags; + flags |= nfsi->cache_validity; + if (inode->i_mapping->nrpages == 0) + flags &= ~NFS_INO_INVALID_DATA; - if (inode->i_mapping->nrpages == 0) { - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - nfs_ooo_clear(nfsi); - } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */ + smp_store_release(&nfsi->cache_validity, flags); + + if (inode->i_mapping->nrpages == 0 || + nfsi->cache_validity & NFS_INO_INVALID_DATA) { nfs_ooo_clear(nfsi); } trace_nfs_set_cache_invalid(inode, 0); @@ -1421,6 +1424,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping) TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); if (ret) goto out; + smp_rmb(); /* pairs with smp_wmb() below */ + if (test_bit(NFS_INO_INVALIDATING, bitlock)) + continue; + /* pairs with nfs_set_cache_invalid()'s smp_store_release() */ + if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA)) + goto out; + /* Slow-path that double-checks with spinlock held */ spin_lock(&inode->i_lock); if (test_bit(NFS_INO_INVALIDATING, bitlock)) { spin_unlock(&inode->i_lock); From 7fd3fa006fa56c0ec299c61ecf5c572c723adad5 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Tue, 8 Oct 2024 13:06:27 +0530 Subject: [PATCH 135/281] drm/xe: Set mask bits for CCS_MODE register CCS_MODE register requires setting mask bits from Xe2+ platforms. Set the mask bits unconditionally, as those bits are unused for older platforms. Signed-off-by: Balasubramani Vivekanandan Cc: stable@vger.kernel.org # v6.11+ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20241008073628.377433-2-balasubramani.vivekanandan@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 23ea2c7572d4735ef66beb1e4feb8ae510b78247) [ Fix conflict with mmio refactors ] Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 00ad34ed73a5..bd604b9f08e4 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -517,7 +517,7 @@ * [4-6] RSVD * [7] Disabled */ -#define CCS_MODE XE_REG(0x14804) +#define CCS_MODE XE_REG(0x14804, XE_REG_OPTION_MASKED) #define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */ #define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */ #define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1) diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index d2e4dc3aaf61..b8d832c8f907 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -68,6 +68,12 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) } } + /* + * Mask bits need to be set for the register. Though only Xe2+ + * platforms require setting of mask bits, it won't harm for older + * platforms as these bits are unused there. + */ + mode |= CCS_MODE_CSLICE_0_3_MASK << 16; xe_mmio_write32(gt, CCS_MODE, mode); xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", From 4b468a92ddb2985da66823910a1643349fe6447d Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Tue, 8 Oct 2024 13:06:28 +0530 Subject: [PATCH 136/281] drm/xe: Use the filelist from drm for ccs_mode change Drop the exclusive client count tracking and use the filelist from the drm to track the active clients. This also ensures the clients created internally by the driver won't block changing the ccs mode. Fixes: ce8c161cbad4 ("drm/xe: Add ref counting for xe_file") Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20241008073628.377433-3-balasubramani.vivekanandan@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 1c35f1ed1fe3c649f8c16214d0d3dd828b5265d9) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 10 ---------- drivers/gpu/drm/xe/xe_device_types.h | 9 --------- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 9 +++++---- 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 10fd4601b9f2..a1987b554a8d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -87,10 +87,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) mutex_init(&xef->exec_queue.lock); xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); - spin_lock(&xe->clients.lock); - xe->clients.count++; - spin_unlock(&xe->clients.lock); - file->driver_priv = xef; kref_init(&xef->refcount); @@ -107,17 +103,12 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) static void xe_file_destroy(struct kref *ref) { struct xe_file *xef = container_of(ref, struct xe_file, refcount); - struct xe_device *xe = xef->xe; xa_destroy(&xef->exec_queue.xa); mutex_destroy(&xef->exec_queue.lock); xa_destroy(&xef->vm.xa); mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); - xe_drm_client_put(xef->client); kfree(xef->process_name); kfree(xef); @@ -333,7 +324,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.force_execlist = xe_modparam.force_execlist; spin_lock_init(&xe->irq.lock); - spin_lock_init(&xe->clients.lock); init_waitqueue_head(&xe->ufence_wq); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 09d731a9125c..687f3a9039bb 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -353,15 +353,6 @@ struct xe_device { struct workqueue_struct *wq; } sriov; - /** @clients: drm clients info */ - struct { - /** @clients.lock: Protects drm clients info */ - spinlock_t lock; - - /** @clients.count: number of drm clients */ - u64 count; - } clients; - /** @usm: unified memory state */ struct { /** @usm.asid: convert a ASID to VM */ diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index b8d832c8f907..ffcbd05671fc 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -139,9 +139,10 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, } /* CCS mode can only be updated when there are no drm clients */ - spin_lock(&xe->clients.lock); - if (xe->clients.count) { - spin_unlock(&xe->clients.lock); + mutex_lock(&xe->drm.filelist_mutex); + if (!list_empty(&xe->drm.filelist)) { + mutex_unlock(&xe->drm.filelist_mutex); + xe_gt_dbg(gt, "Rejecting compute mode change as there are active drm clients\n"); return -EBUSY; } @@ -152,7 +153,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_reset_async(gt); } - spin_unlock(&xe->clients.lock); + mutex_unlock(&xe->drm.filelist_mutex); return count; } From 55e8a3f37e54eb1c7b914d6d5565a37282ec1978 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 29 Oct 2024 13:01:15 +0100 Subject: [PATCH 137/281] drm/xe: Move LNL scheduling WA to xe_device.h Move LNL scheduling WA to xe_device.h so this can be used in other places without needing keep the same comment about removal of this WA in the future. The WA, which flushes work or workqueues, is now wrapped in macros and can be reused wherever needed. Cc: Badal Nilawar Cc: Matthew Auld Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi cc: stable@vger.kernel.org # v6.11+ Suggested-by: John Harrison Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241029120117.449694-1-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit cbe006a6492c01a0058912ae15d473f4c149896c) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.h | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_guc_ct.c | 11 +---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 894f04770454..34620ef855c0 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -178,4 +178,18 @@ void xe_device_declare_wedged(struct xe_device *xe); struct xe_file *xe_file_get(struct xe_file *xef); void xe_file_put(struct xe_file *xef); +/* + * Occasionally it is seen that the G2H worker starts running after a delay of more than + * a second even after being queued and activated by the Linux workqueue subsystem. This + * leads to G2H timeout error. The root cause of issue lies with scheduling latency of + * Lunarlake Hybrid CPU. Issue disappears if we disable Lunarlake atom cores from BIOS + * and this is beyond xe kmd. + * + * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU. + */ +#define LNL_FLUSH_WORKQUEUE(wq__) \ + flush_workqueue(wq__) +#define LNL_FLUSH_WORK(wrk__) \ + flush_work(wrk__) + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 17986bfd8818..9c505d3517cd 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -897,17 +897,8 @@ retry_same_fence: ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); - /* - * Occasionally it is seen that the G2H worker starts running after a delay of more than - * a second even after being queued and activated by the Linux workqueue subsystem. This - * leads to G2H timeout error. The root cause of issue lies with scheduling latency of - * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS - * and this is beyond xe kmd. - * - * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU. - */ if (!ret) { - flush_work(&ct->g2h_worker); + LNL_FLUSH_WORK(&ct->g2h_worker); if (g2h_fence.done) { xe_gt_warn(gt, "G2H fence %u, action %04x, done\n", g2h_fence.seqno, action[0]); From 7d1e2580ed166f36949b468373b468d188880cd3 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 29 Oct 2024 13:01:16 +0100 Subject: [PATCH 138/281] drm/xe/ufence: Flush xe ordered_wq in case of ufence timeout Flush xe ordered_wq in case of ufence timeout which is observed on LNL and that points to recent scheduling issue with E-cores. This is similar to the recent fix: commit e51527233804 ("drm/xe/guc/ct: Flush g2h worker in case of g2h response timeout") and should be removed once there is a E-core scheduling fix for LNL. v2: Add platform check(Himal) s/__flush_workqueue/flush_workqueue(Jani) v3: Remove gfx platform check as the issue related to cpu platform(John) v4: Use the Common macro(John) and print when the flush resolves timeout(Matt B) Cc: Badal Nilawar Cc: Matthew Auld Cc: John Harrison Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: stable@vger.kernel.org # v6.11+ Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2754 Suggested-by: Matthew Brost Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241029120117.449694-2-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 38c4c8722bd74452280951edc44c23de47612001) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index f5deb81eba01..5b4264ea38bd 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -155,6 +155,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, } if (!timeout) { + LNL_FLUSH_WORKQUEUE(xe->ordered_wq); + err = do_compare(addr, args->value, args->mask, + args->op); + if (err <= 0) { + drm_dbg(&xe->drm, "LNL_FLUSH_WORKQUEUE resolved ufence timeout\n"); + break; + } err = -ETIME; break; } From 1491efb39acee3848b61fcb3e5cc4be8de304352 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 29 Oct 2024 13:01:17 +0100 Subject: [PATCH 139/281] drm/xe/guc/tlb: Flush g2h worker in case of tlb timeout Flush the g2h worker explicitly if TLB timeout happens which is observed on LNL and that points to the recent scheduling issue with E-cores on LNL. This is similar to the recent fix: commit e51527233804 ("drm/xe/guc/ct: Flush g2h worker in case of g2h response timeout") and should be removed once there is E core scheduling fix. v2: Add platform check(Himal) v3: Remove gfx platform check as the issue related to cpu platform(John) Use the common WA macro(John) and print when the flush resolves timeout(Matt B) v4: Remove the resolves log and do the flush before taking pending_lock(Matt A) Cc: Badal Nilawar Cc: Matthew Brost Cc: Matthew Auld Cc: John Harrison Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: stable@vger.kernel.org # v6.11+ Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2687 Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20241029120117.449694-3-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit e1f6fa55664a0eeb0a641f497e1adfcf6672e995) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index bbb9e411d21f..9d82ea30f4df 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -72,6 +72,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) struct xe_device *xe = gt_to_xe(gt); struct xe_gt_tlb_invalidation_fence *fence, *next; + LNL_FLUSH_WORK(>->uc.guc.ct.g2h_worker); + spin_lock_irq(>->tlb_invalidation.pending_lock); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { From 5a4510c762fc04c74cff264cd4d9e9f5bf364bae Mon Sep 17 00:00:00 2001 From: Zichen Xie Date: Mon, 21 Oct 2024 14:54:45 -0500 Subject: [PATCH 140/281] dm-unstriped: cast an operand to sector_t to prevent potential uint32_t overflow This was found by a static analyzer. There may be a potential integer overflow issue in unstripe_ctr(). uc->unstripe_offset and uc->unstripe_width are defined as "sector_t"(uint64_t), while uc->unstripe, uc->chunk_size and uc->stripes are all defined as "uint32_t". The result of the calculation will be limited to "uint32_t" without correct casting. So, we recommend adding an extra cast to prevent potential integer overflow. Fixes: 18a5bf270532 ("dm: add unstriped target") Signed-off-by: Zichen Xie Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-unstripe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 48587c16c445..e8a9432057dc 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -85,8 +85,8 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) } uc->physical_start = start; - uc->unstripe_offset = uc->unstripe * uc->chunk_size; - uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size; + uc->unstripe_offset = (sector_t)uc->unstripe * uc->chunk_size; + uc->unstripe_width = (sector_t)(uc->stripes - 1) * uc->chunk_size; uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0; tmp_len = ti->len; From a674d0cd56f47628e8057232833cd0654c85d50b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 29 Oct 2024 12:17:13 +0100 Subject: [PATCH 141/281] dm-verity: don't crash if panic_on_corruption is not selected If the user sets panic_on_error and doesn't set panic_on_corruption, dm-verity should not panic on data mismatch. But, currently it panics, because it treats data mismatch as I/O error. This commit fixes the logic so that if there is data mismatch and panic_on_corruption or restart_on_corruption is not selected, the system won't restart or panic. Signed-off-by: Mikulas Patocka Reviewed-by: Sami Tolvanen Fixes: f811b83879fb ("dm-verity: introduce the options restart_on_error and panic_on_error") --- drivers/md/dm-verity-target.c | 9 ++++++--- drivers/md/dm-verity.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 7d4d90b4395a..c142ec5458b7 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -356,9 +356,9 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, hash_block)) { - struct bio *bio = - dm_bio_from_per_bio_data(io, - v->ti->per_io_data_size); + struct bio *bio; + io->had_mismatch = true; + bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); dm_audit_log_bio(DM_MSG_PREFIX, "verify-metadata", bio, block, 0); r = -EIO; @@ -482,6 +482,7 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v, return -EIO; /* Error correction failed; Just return error */ if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, blkno)) { + io->had_mismatch = true; dm_audit_log_bio(DM_MSG_PREFIX, "verify-data", bio, blkno, 0); return -EIO; } @@ -606,6 +607,7 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (unlikely(status != BLK_STS_OK) && unlikely(!(bio->bi_opf & REQ_RAHEAD)) && + !io->had_mismatch && !verity_is_system_shutting_down()) { if (v->error_mode == DM_VERITY_MODE_PANIC) { panic("dm-verity device has I/O error"); @@ -779,6 +781,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) io->orig_bi_end_io = bio->bi_end_io; io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits; + io->had_mismatch = false; bio->bi_end_io = verity_end_io; bio->bi_private = io; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 6b75159bf835..c996140bda94 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -92,6 +92,7 @@ struct dm_verity_io { sector_t block; unsigned int n_blocks; bool in_bh; + bool had_mismatch; struct work_struct work; struct work_struct bh_work; From 235d2e739fcbe964c9ce179b4c991025662dcdb6 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:12:22 +0800 Subject: [PATCH 142/281] dm cache: correct the number of origin blocks to match the target length When creating a cache device, the actual size of the cache origin might be greater than the specified cache target length. In such case, the number of origin blocks should match the cache target length, not the full size of the origin device, since access beyond the cache target is not possible. This issue occurs when reducing the origin device size using lvm, as lvreduce preloads the new cache table before resuming the cache origin, which can result in incorrect sizes for the discard bitset and smq hotspot blocks. Reproduce steps: 1. create a cache device consists of 4096 origin blocks dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 2. reduce the cache origin to 2048 oblocks, in lvreduce's approach dmsetup reload corig --table "0 262144 linear /dev/sdc 262144" dmsetup reload cache --table "0 262144 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup suspend cache dmsetup suspend corig dmsetup suspend cdata dmsetup suspend cmeta dmsetup resume corig dmsetup resume cdata dmsetup resume cmeta dmsetup resume cache 3. shutdown the cache, and check the number of discard blocks in superblock. The value is expected to be 2048, but actually is 4096. dmsetup remove cache corig cdata cmeta dd if=/dev/sdc bs=1c count=8 skip=224 2>/dev/null | hexdump -e '1/8 "%u\n"' Fix by correcting the origin_blocks initialization in cache_create and removing the unused origin_sectors from struct cache_args accordingly. Signed-off-by: Ming-Hung Tsai Fixes: c6b4fcbad044 ("dm: add cache target") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index aaeeabfab09b..90772b42c234 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2003,7 +2003,6 @@ struct cache_args { sector_t cache_sectors; struct dm_dev *origin_dev; - sector_t origin_sectors; uint32_t block_size; @@ -2084,6 +2083,7 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, char **error) { + sector_t origin_sectors; int r; if (!at_least_one_arg(as, error)) @@ -2096,8 +2096,8 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, return r; } - ca->origin_sectors = get_dev_size(ca->origin_dev); - if (ca->ti->len > ca->origin_sectors) { + origin_sectors = get_dev_size(ca->origin_dev); + if (ca->ti->len > origin_sectors) { *error = "Device size larger than cached device"; return -EINVAL; } @@ -2407,7 +2407,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; - origin_blocks = cache->origin_sectors = ca->origin_sectors; + origin_blocks = cache->origin_sectors = ti->len; origin_blocks = block_div(origin_blocks, ca->block_size); cache->origin_blocks = to_oblock(origin_blocks); From 135496c208ba26fd68cdef10b64ed7a91ac9a7ff Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:12:49 +0800 Subject: [PATCH 143/281] dm cache: fix flushing uninitialized delayed_work on cache_ctr error An unexpected WARN_ON from flush_work() may occur when cache creation fails, caused by destroying the uninitialized delayed_work waker in the error path of cache_create(). For example, the warning appears on the superblock checksum error. Reproduce steps: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/urandom of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" Kernel logs: (snip) WARNING: CPU: 0 PID: 84 at kernel/workqueue.c:4178 __flush_work+0x5d4/0x890 Fix by pulling out the cancel_delayed_work_sync() from the constructor's error path. This patch doesn't affect the use-after-free fix for concurrent dm_resume and dm_destroy (commit 6a459d8edbdb ("dm cache: Fix UAF in destroy()")) as cache_dtr is not changed. Signed-off-by: Ming-Hung Tsai Fixes: 6a459d8edbdb ("dm cache: Fix UAF in destroy()") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 90772b42c234..68e9a1abd303 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1905,16 +1905,13 @@ static void check_migrations(struct work_struct *ws) * This function gets called on the error paths of the constructor, so we * have to cope with a partially initialised struct. */ -static void destroy(struct cache *cache) +static void __destroy(struct cache *cache) { - unsigned int i; - mempool_exit(&cache->migration_pool); if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); - cancel_delayed_work_sync(&cache->waker); if (cache->wq) destroy_workqueue(cache->wq); @@ -1942,13 +1939,22 @@ static void destroy(struct cache *cache) if (cache->policy) dm_cache_policy_destroy(cache->policy); + bioset_exit(&cache->bs); + + kfree(cache); +} + +static void destroy(struct cache *cache) +{ + unsigned int i; + + cancel_delayed_work_sync(&cache->waker); + for (i = 0; i < cache->nr_ctr_args ; i++) kfree(cache->ctr_args[i]); kfree(cache->ctr_args); - bioset_exit(&cache->bs); - - kfree(cache); + __destroy(cache); } static void cache_dtr(struct dm_target *ti) @@ -2561,7 +2567,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) *result = cache; return 0; bad: - destroy(cache); + __destroy(cache); return r; } @@ -2612,7 +2618,7 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv) r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); if (r) { - destroy(cache); + __destroy(cache); goto out; } From 792227719725497ce10a8039803bec13f89f8910 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:16 +0800 Subject: [PATCH 144/281] dm cache: fix out-of-bounds access to the dirty bitset when resizing dm-cache checks the dirty bits of the cache blocks to be dropped when shrinking the fast device, but an index bug in bitset iteration causes out-of-bounds access. Reproduce steps: 1. create a cache device of 1024 cache blocks (128 bytes dirty bitset) dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 131072 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 2. shrink the fast device to 512 cache blocks, triggering out-of-bounds access to the dirty bitset (offset 0x80) dmsetup suspend cache dmsetup reload cdata --table "0 65536 linear /dev/sdc 8192" dmsetup resume cdata dmsetup resume cache KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in cache_preresume+0x269/0x7b0 Read of size 8 at addr ffffc900000f3080 by task dmsetup/131 (...snip...) The buggy address belongs to the virtual mapping at [ffffc900000f3000, ffffc900000f5000) created by: cache_ctr+0x176a/0x35f0 (...snip...) Memory state around the buggy address: ffffc900000f2f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc900000f3000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffffc900000f3080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffffc900000f3100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc900000f3180: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 Fix by making the index post-incremented. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 68e9a1abd303..1fcd8c5c220e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2912,13 +2912,13 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) * We can't drop a dirty block when shrinking the cache. */ while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - new_size = to_cblock(from_cblock(new_size) + 1); if (is_dirty(cache, new_size)) { DMERR("%s: unable to shrink cache; cache block %llu is dirty", cache_device_name(cache), (unsigned long long) from_cblock(new_size)); return false; } + new_size = to_cblock(from_cblock(new_size) + 1); } return true; From f484697e619a83ecc370443a34746379ad99d204 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:39 +0800 Subject: [PATCH 145/281] dm cache: optimize dirty bit checking with find_next_bit when resizing When shrinking the fast device, dm-cache iteratively searches for a dirty bit among the cache blocks to be dropped, which is less efficient. Use find_next_bit instead, as it is twice as fast as the iterative approach with test_bit. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1fcd8c5c220e..fa8ef2c32af8 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2911,14 +2911,14 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) /* * We can't drop a dirty block when shrinking the cache. */ - while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - if (is_dirty(cache, new_size)) { - DMERR("%s: unable to shrink cache; cache block %llu is dirty", - cache_device_name(cache), - (unsigned long long) from_cblock(new_size)); - return false; - } - new_size = to_cblock(from_cblock(new_size) + 1); + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), + (unsigned long long) from_cblock(new_size)); + return false; } return true; From c0ade5d98979585d4f5a93e4514c2e9a65afa08d Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:54 +0800 Subject: [PATCH 146/281] dm cache: fix potential out-of-bounds access on the first resume Out-of-bounds access occurs if the fast device is expanded unexpectedly before the first-time resume of the cache table. This happens because expanding the fast device requires reloading the cache table for cache_create to allocate new in-core data structures that fit the new size, and the check in cache_preresume is not performed during the first resume, leading to the issue. Reproduce steps: 1. prepare component devices: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct 2. load a cache table of 512 cache blocks, and deliberately expand the fast device before resuming the cache, making the in-core data structures inadequate. dmsetup create cache --notable dmsetup reload cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup reload cdata --table "0 131072 linear /dev/sdc 8192" dmsetup resume cdata dmsetup resume cache 3. suspend the cache to write out the in-core dirty bitset and hint array, leading to out-of-bounds access to the dirty bitset at offset 0x40: dmsetup suspend cache KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in is_dirty_callback+0x2b/0x80 Read of size 8 at addr ffffc90000085040 by task dmsetup/90 (...snip...) The buggy address belongs to the virtual mapping at [ffffc90000085000, ffffc90000087000) created by: cache_ctr+0x176a/0x35f0 (...snip...) Memory state around the buggy address: ffffc90000084f00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc90000084f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 >ffffc90000085000: 00 00 00 00 00 00 00 00 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffffc90000085080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc90000085100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 Fix by checking the size change on the first resume. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 37 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index fa8ef2c32af8..40709310e327 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2901,24 +2901,24 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) static bool can_resize(struct cache *cache, dm_cblock_t new_size) { if (from_cblock(new_size) > from_cblock(cache->cache_size)) { - if (cache->sized) { - DMERR("%s: unable to extend cache due to missing cache table reload", - cache_device_name(cache)); - return false; - } + DMERR("%s: unable to extend cache due to missing cache table reload", + cache_device_name(cache)); + return false; } /* * We can't drop a dirty block when shrinking the cache. */ - new_size = to_cblock(find_next_bit(cache->dirty_bitset, - from_cblock(cache->cache_size), - from_cblock(new_size))); - if (new_size != cache->cache_size) { - DMERR("%s: unable to shrink cache; cache block %llu is dirty", - cache_device_name(cache), - (unsigned long long) from_cblock(new_size)); - return false; + if (cache->loaded_mappings) { + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), + (unsigned long long) from_cblock(new_size)); + return false; + } } return true; @@ -2949,20 +2949,15 @@ static int cache_preresume(struct dm_target *ti) /* * Check to see if the cache has resized. */ - if (!cache->sized) { - r = resize_cache_dev(cache, csize); - if (r) - return r; - - cache->sized = true; - - } else if (csize != cache->cache_size) { + if (!cache->sized || csize != cache->cache_size) { if (!can_resize(cache, csize)) return -EINVAL; r = resize_cache_dev(cache, csize); if (r) return r; + + cache->sized = true; } if (!cache->loaded_mappings) { From b6ec62e01aa4229bc9d3861d1073806767ea7838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=B6lzl?= Date: Wed, 23 Oct 2024 16:52:57 +0200 Subject: [PATCH 147/281] can: j1939: fix error in J1939 documentation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The description of PDU1 format usage mistakenly referred to PDU2 format. Signed-off-by: Alexander Hölzl Acked-by: Oleksij Rempel Acked-by: Vincent Mailhol Link: https://patch.msgid.link/20241023145257.82709-1-alexander.hoelzl@gmx.net Signed-off-by: Marc Kleine-Budde --- Documentation/networking/j1939.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst index e4bd7aa1f5aa..544bad175aae 100644 --- a/Documentation/networking/j1939.rst +++ b/Documentation/networking/j1939.rst @@ -121,7 +121,7 @@ format, the Group Extension is set in the PS-field. On the other hand, when using PDU1 format, the PS-field contains a so-called Destination Address, which is _not_ part of the PGN. When communicating a PGN -from user space to kernel (or vice versa) and PDU2 format is used, the PS-field +from user space to kernel (or vice versa) and PDU1 format is used, the PS-field of the PGN shall be set to zero. The Destination Address shall be set elsewhere. From 7b22846f8af5ab2f267de9eb209fb1835ee9978c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20M=C3=BChlbacher?= Date: Thu, 19 Sep 2024 17:35:22 +0000 Subject: [PATCH 148/281] can: {cc770,sja1000}_isa: allow building on x86_64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ISA variable is only defined if X86_32 is also defined. However, these drivers are still useful and in use on at least some modern 64-bit x86 industrial systems as well. With the correct module parameters, they work as long as IO port communication is possible, despite their name having ISA in them. Fixes: a29689e60ed3 ("net: handle HAS_IOPORT dependencies") Signed-off-by: Thomas Mühlbacher Link: https://patch.msgid.link/20240919174151.15473-2-tmuehlbacher@posteo.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/Kconfig | 2 +- drivers/net/can/sja1000/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/cc770/Kconfig b/drivers/net/can/cc770/Kconfig index 467ef19de1c1..aae25c2f849e 100644 --- a/drivers/net/can/cc770/Kconfig +++ b/drivers/net/can/cc770/Kconfig @@ -7,7 +7,7 @@ if CAN_CC770 config CAN_CC770_ISA tristate "ISA Bus based legacy CC770 driver" - depends on ISA + depends on HAS_IOPORT help This driver adds legacy support for CC770 and AN82527 chips connected to the ISA bus using I/O port, memory mapped or diff --git a/drivers/net/can/sja1000/Kconfig b/drivers/net/can/sja1000/Kconfig index 01168db4c106..2f516cc6d22c 100644 --- a/drivers/net/can/sja1000/Kconfig +++ b/drivers/net/can/sja1000/Kconfig @@ -87,7 +87,7 @@ config CAN_PLX_PCI config CAN_SJA1000_ISA tristate "ISA Bus based legacy SJA1000 driver" - depends on ISA + depends on HAS_IOPORT help This driver adds legacy support for SJA1000 chips connected to the ISA bus using I/O port, memory mapped or indirect access. From e4de81f9e134c78ff7c75a00e43bd819643530d0 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 30 Sep 2024 19:02:30 +0200 Subject: [PATCH 149/281] can: m_can: m_can_close(): don't call free_irq() for IRQ-less devices In commit b382380c0d2d ("can: m_can: Add hrtimer to generate software interrupt") support for IRQ-less devices was added. Instead of an interrupt, the interrupt routine is called by a hrtimer-based polling loop. That patch forgot to change free_irq() to be only called for devices with IRQs. Fix this, by calling free_irq() conditionally only if an IRQ is available for the device (and thus has been requested previously). Fixes: b382380c0d2d ("can: m_can: Add hrtimer to generate software interrupt") Reviewed-by: Simon Horman Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20240930-m_can-cleanups-v1-1-001c579cdee4@pengutronix.de Cc: # v6.6+ Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index a978b960f1f1..16e9e7d7527d 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1765,7 +1765,8 @@ static int m_can_close(struct net_device *dev) netif_stop_queue(dev); m_can_stop(dev); - free_irq(dev->irq, dev); + if (dev->irq) + free_irq(dev->irq, dev); m_can_clean(dev); From 4d6d26537940f3b3e17138987ed9e4a334780bf7 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Mon, 14 Oct 2024 15:53:13 +0200 Subject: [PATCH 150/281] can: c_can: fix {rx,tx}_errors statistics The c_can_handle_bus_err() function was incorrectly incrementing only the receive error counter, even in cases of bit or acknowledgment errors that occur during transmission. The patch fixes the issue by incrementing the appropriate counter based on the type of error. Fixes: 881ff67ad450 ("can: c_can: Added support for Bosch C_CAN controller") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241014135319.2009782-1-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c index c63f7fc1e691..511615dc3341 100644 --- a/drivers/net/can/c_can/c_can_main.c +++ b/drivers/net/can/c_can/c_can_main.c @@ -1011,7 +1011,6 @@ static int c_can_handle_bus_err(struct net_device *dev, /* common for all type of bus errors */ priv->can.can_stats.bus_error++; - stats->rx_errors++; /* propagate the error condition to the CAN stack */ skb = alloc_can_err_skb(dev, &cf); @@ -1027,26 +1026,32 @@ static int c_can_handle_bus_err(struct net_device *dev, case LEC_STUFF_ERROR: netdev_dbg(dev, "stuff error\n"); cf->data[2] |= CAN_ERR_PROT_STUFF; + stats->rx_errors++; break; case LEC_FORM_ERROR: netdev_dbg(dev, "form error\n"); cf->data[2] |= CAN_ERR_PROT_FORM; + stats->rx_errors++; break; case LEC_ACK_ERROR: netdev_dbg(dev, "ack error\n"); cf->data[3] = CAN_ERR_PROT_LOC_ACK; + stats->tx_errors++; break; case LEC_BIT1_ERROR: netdev_dbg(dev, "bit1 error\n"); cf->data[2] |= CAN_ERR_PROT_BIT1; + stats->tx_errors++; break; case LEC_BIT0_ERROR: netdev_dbg(dev, "bit0 error\n"); cf->data[2] |= CAN_ERR_PROT_BIT0; + stats->tx_errors++; break; case LEC_CRC_ERROR: netdev_dbg(dev, "CRC error\n"); cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; + stats->rx_errors++; break; default: break; From 4384b8b6ec4643aa73487bd1dc458e236c320564 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Sep 2024 11:15:31 +0200 Subject: [PATCH 151/281] can: rockchip_canfd: CAN_ROCKCHIP_CANFD should depend on ARCH_ROCKCHIP The Rockchip CAN-FD controller is only present on Rockchip SoCs. Hence add a dependency on ARCH_ROCKCHIP, to prevent asking the user about this driver when configuring a kernel without Rockchip platform support. Fixes: ff60bfbaf67f219c ("can: rockchip_canfd: add driver for Rockchip CAN-FD controller") Signed-off-by: Geert Uytterhoeven Reviewed-by: Heiko Stuebner Link: https://patch.msgid.link/a4b3c8c1cca9515e67adac83af5ba1b1fab2fcbc.1727169288.git.geert+renesas@glider.be Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/rockchip/Kconfig b/drivers/net/can/rockchip/Kconfig index e029e2a3ca4b..fd8d9f5eeaa4 100644 --- a/drivers/net/can/rockchip/Kconfig +++ b/drivers/net/can/rockchip/Kconfig @@ -3,6 +3,7 @@ config CAN_ROCKCHIP_CANFD tristate "Rockchip CAN-FD controller" depends on OF || COMPILE_TEST + depends on ARCH_ROCKCHIP || COMPILE_TEST select CAN_RX_OFFLOAD help Say Y here if you want to use CAN-FD controller found on From 51e102ec23b25e6ca45ed45c3b9be42cb48d63dd Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 22 Oct 2024 13:04:39 +0200 Subject: [PATCH 152/281] can: rockchip_canfd: Drop obsolete dependency on COMPILE_TEST Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), OF can be enabled on all architectures. Therefore depending on COMPILE_TEST as an alternative is no longer needed. Signed-off-by: Jean Delvare Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20241022130439.70d016e9@endymion.delvare Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rockchip/Kconfig b/drivers/net/can/rockchip/Kconfig index fd8d9f5eeaa4..d203c530551f 100644 --- a/drivers/net/can/rockchip/Kconfig +++ b/drivers/net/can/rockchip/Kconfig @@ -2,7 +2,7 @@ config CAN_ROCKCHIP_CANFD tristate "Rockchip CAN-FD controller" - depends on OF || COMPILE_TEST + depends on OF depends on ARCH_ROCKCHIP || COMPILE_TEST select CAN_RX_OFFLOAD help From eb9a839b3d8a989be5970035a5cf29bcd6ffd24d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 25 Oct 2024 14:34:40 +0200 Subject: [PATCH 153/281] can: mcp251xfd: mcp251xfd_ring_alloc(): fix coalescing configuration when switching CAN modes Since commit 50ea5449c563 ("can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode"), the current ring and coalescing configuration is passed to can_ram_get_layout(). That fixed the issue when switching between CAN-CC and CAN-FD mode with configured ring (rx, tx) and/or coalescing parameters (rx-frames-irq, tx-frames-irq). However 50ea5449c563 ("can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode"), introduced a regression when switching CAN modes with disabled coalescing configuration: Even if the previous CAN mode has no coalescing configured, the new mode is configured with active coalescing. This leads to delayed receiving of CAN-FD frames. This comes from the fact, that ethtool uses usecs = 0 and max_frames = 1 to disable coalescing, however the driver uses internally priv->{rx,tx}_obj_num_coalesce_irq = 0 to indicate disabled coalescing. Fix the regression by assigning struct ethtool_coalesce ec->{rx,tx}_max_coalesced_frames_irq = 1 if coalescing is disabled in the driver as can_ram_get_layout() expects this. Reported-by: https://github.com/vdh-robothania Closes: https://github.com/raspberrypi/linux/issues/6407 Fixes: 50ea5449c563 ("can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode") Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241025-mcp251xfd-fix-coalesing-v1-1-9d11416de1df@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index e684991fa391..7209a831f0f2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -2,7 +2,7 @@ // // mcp251xfd - Microchip MCP251xFD Family CAN controller driver // -// Copyright (c) 2019, 2020, 2021 Pengutronix, +// Copyright (c) 2019, 2020, 2021, 2024 Pengutronix, // Marc Kleine-Budde // // Based on: @@ -483,9 +483,11 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv) }; const struct ethtool_coalesce ec = { .rx_coalesce_usecs_irq = priv->rx_coalesce_usecs_irq, - .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq, + .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq == 0 ? + 1 : priv->rx_obj_num_coalesce_irq, .tx_coalesce_usecs_irq = priv->tx_coalesce_usecs_irq, - .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq, + .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq == 0 ? + 1 : priv->tx_obj_num_coalesce_irq, }; struct can_ram_layout layout; From 3c1c18551e6ac1b988d0a05c5650e3f6c95a1b8a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 1 Oct 2024 16:56:22 +0200 Subject: [PATCH 154/281] can: mcp251xfd: mcp251xfd_get_tef_len(): fix length calculation Commit b8e0ddd36ce9 ("can: mcp251xfd: tef: prepare to workaround broken TEF FIFO tail index erratum") introduced mcp251xfd_get_tef_len() to get the number of unhandled transmit events from the Transmit Event FIFO (TEF). As the TEF has no head pointer, the driver uses the TX FIFO's tail pointer instead, assuming that send frames are completed. However the check for the TEF being full was not correct. This leads to the driver stop working if the TEF is full. Fix the TEF full check by assuming that if, from the driver's point of view, there are no free TX buffers in the chip and the TX FIFO is empty, all messages must have been sent and the TEF must therefore be full. Reported-by: Sven Schuchmann Closes: https://patch.msgid.link/FR3P281MB155216711EFF900AD9791B7ED9692@FR3P281MB1552.DEUP281.PROD.OUTLOOK.COM Fixes: b8e0ddd36ce9 ("can: mcp251xfd: tef: prepare to workaround broken TEF FIFO tail index erratum") Tested-by: Sven Schuchmann Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20241104-mcp251xfd-fix-length-calculation-v3-1-608b6e7e2197@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c index f732556d233a..d3ac865933fd 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c @@ -16,9 +16,9 @@ #include "mcp251xfd.h" -static inline bool mcp251xfd_tx_fifo_sta_full(u32 fifo_sta) +static inline bool mcp251xfd_tx_fifo_sta_empty(u32 fifo_sta) { - return !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF); + return fifo_sta & MCP251XFD_REG_FIFOSTA_TFERFFIF; } static inline int @@ -122,7 +122,11 @@ mcp251xfd_get_tef_len(struct mcp251xfd_priv *priv, u8 *len_p) if (err) return err; - if (mcp251xfd_tx_fifo_sta_full(fifo_sta)) { + /* If the chip says the TX-FIFO is empty, but there are no TX + * buffers free in the ring, we assume all have been sent. + */ + if (mcp251xfd_tx_fifo_sta_empty(fifo_sta) && + mcp251xfd_get_tx_free(tx_ring) == 0) { *len_p = tx_ring->obj_num; return 0; } From 4f26c95ffc21a91281429ed60180619bae19ae92 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 9 Oct 2024 17:09:38 +0800 Subject: [PATCH 155/281] drm/amd/display: Fix brightness level not retained over reboot [Why] During boot up and resume the DC layer will reset the panel brightness to fix a flicker issue. It will cause the dm->actual_brightness is not the current panel brightness level. (the dm->brightness is the correct panel level) [How] Set the backlight level after do the set mode. Cc: Mario Limonciello Cc: Alex Deucher Fixes: d9e865826c20 ("drm/amd/display: Simplify brightness initialization") Reported-by: Mark Herbert Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3655 Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 7875afafba84817b791be6d2282b836695146060) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 13421a58210d..07e9ce99694f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9429,6 +9429,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, bool mode_set_reset_required = false; u32 i; struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count}; + bool set_backlight_level = false; /* Disable writeback */ for_each_old_connector_in_state(state, connector, old_con_state, i) { @@ -9548,6 +9549,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->hw_mode = new_crtc_state->mode; crtc->hwmode = new_crtc_state->mode; mode_set_reset_required = true; + set_backlight_level = true; } else if (modereset_required(new_crtc_state)) { drm_dbg_atomic(dev, "Atomic commit: RESET. crtc id %d:[%p]\n", @@ -9599,6 +9601,19 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->otg_inst = status->primary_otg_inst; } } + + /* During boot up and resume the DC layer will reset the panel brightness + * to fix a flicker issue. + * It will cause the dm->actual_brightness is not the current panel brightness + * level. (the dm->brightness is the correct panel level) + * So we set the backlight level with dm->brightness value after set mode + */ + if (set_backlight_level) { + for (i = 0; i < dm->num_of_edps; i++) { + if (dm->backlight_dev[i]) + amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); + } + } } static void dm_set_writeback(struct amdgpu_display_manager *dm, From 694c79769cb384bca8b1ec1d1e84156e726bd106 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 18 Oct 2024 10:52:16 -0400 Subject: [PATCH 156/281] drm/amd/display: parse umc_info or vram_info based on ASIC An upstream bug report suggests that there are production dGPUs that are older than DCN401 but still have a umc_info in VBIOS tables with the same version as expected for a DCN401 product. Hence, reading this tables should be guarded with a version check. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3678 Reviewed-by: Dillon Varone Signed-off-by: Aurabindo Pillai Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 2551b4a321a68134360b860113dd460133e856e5) Fixes: 00c391102abc ("drm/amd/display: Add misc DC changes for DCN401") Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 0d8498ab9b23..be8fbb04ad98 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -3127,7 +3127,9 @@ static enum bp_result bios_parser_get_vram_info( struct atom_data_revision revision; // vram info moved to umc_info for DCN4x - if (info && DATA_TABLES(umc_info)) { + if (dcb->ctx->dce_version >= DCN_VERSION_4_01 && + dcb->ctx->dce_version < DCN_VERSION_MAX && + info && DATA_TABLES(umc_info)) { header = GET_IMAGE(struct atom_common_table_header, DATA_TABLES(umc_info)); From a6dd15981c03f2cdc9a351a278f09b5479d53d2e Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 31 Oct 2024 16:28:48 +0100 Subject: [PATCH 157/281] drm/amdgpu: prevent NULL pointer dereference if ATIF is not supported acpi_evaluate_object() may return AE_NOT_FOUND (failure), which would result in dereferencing buffer.pointer (obj) while being NULL. Although this case may be unrealistic for the current code, it is still better to protect against possible bugs. Bail out also when status is AE_NOT_FOUND. This fixes 1 FORWARD_NULL issue reported by Coverity Report: CID 1600951: Null pointer dereferences (FORWARD_NULL) Signed-off-by: Antonio Quartulli Fixes: c9b7c809b89f ("drm/amd: Guard against bad data for ATIF ACPI method") Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241031152848.4716-1-antonio@mandelbit.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 91c9e221fe2553edf2db71627d8453f083de87a1) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 1f5a296f5ed2..7dd55ed57c1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -172,8 +172,8 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, &buffer); obj = (union acpi_object *)buffer.pointer; - /* Fail if calling the method fails and ATIF is supported */ - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { + /* Fail if calling the method fails */ + if (ACPI_FAILURE(status)) { DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n", acpi_format_exception(status)); kfree(obj); From 1356bfc54c8d4c8e7c9fb8553dc8c28e9714b07b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 1 Nov 2024 11:55:25 +0800 Subject: [PATCH 158/281] drm/amd/pm: always pick the pptable from IFWI always pick the pptable from IFWI on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit 136ce12bd5907388cb4e9aa63ee5c9c8c441640b) Cc: stable@vger.kernel.org # 6.11.x --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 65 +------------------ 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e83ea2bc7f9c..1e16a281f2dc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -367,54 +367,6 @@ static int smu_v14_0_2_store_powerplay_table(struct smu_context *smu) return 0; } -#ifndef atom_smc_dpm_info_table_14_0_0 -struct atom_smc_dpm_info_table_14_0_0 { - struct atom_common_table_header table_header; - BoardTable_t BoardTable; -}; -#endif - -static int smu_v14_0_2_append_powerplay_table(struct smu_context *smu) -{ - struct smu_table_context *table_context = &smu->smu_table; - PPTable_t *smc_pptable = table_context->driver_pptable; - struct atom_smc_dpm_info_table_14_0_0 *smc_dpm_table; - BoardTable_t *BoardTable = &smc_pptable->BoardTable; - int index, ret; - - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - smc_dpm_info); - - ret = amdgpu_atombios_get_data_table(smu->adev, index, NULL, NULL, NULL, - (uint8_t **)&smc_dpm_table); - if (ret) - return ret; - - memcpy(BoardTable, &smc_dpm_table->BoardTable, sizeof(BoardTable_t)); - - return 0; -} - -#if 0 -static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, - void **table, - uint32_t *size) -{ - struct smu_table_context *smu_table = &smu->smu_table; - void *combo_pptable = smu_table->combo_pptable; - int ret = 0; - - ret = smu_cmn_get_combo_pptable(smu); - if (ret) - return ret; - - *table = combo_pptable; - *size = sizeof(struct smu_14_0_powerplay_table); - - return 0; -} -#endif - static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, void **table, uint32_t *size) @@ -436,16 +388,12 @@ static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, static int smu_v14_0_2_setup_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - struct amdgpu_device *adev = smu->adev; int ret = 0; if (amdgpu_sriov_vf(smu->adev)) return 0; - if (!adev->scpm_enabled) - ret = smu_v14_0_setup_pptable(smu); - else - ret = smu_v14_0_2_get_pptable_from_pmfw(smu, + ret = smu_v14_0_2_get_pptable_from_pmfw(smu, &smu_table->power_play_table, &smu_table->power_play_table_size); if (ret) @@ -455,16 +403,6 @@ static int smu_v14_0_2_setup_pptable(struct smu_context *smu) if (ret) return ret; - /* - * With SCPM enabled, the operation below will be handled - * by PSP. Driver involvment is unnecessary and useless. - */ - if (!adev->scpm_enabled) { - ret = smu_v14_0_2_append_powerplay_table(smu); - if (ret) - return ret; - } - ret = smu_v14_0_2_check_powerplay_table(smu); if (ret) return ret; @@ -2799,7 +2737,6 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .check_fw_status = smu_v14_0_check_fw_status, .setup_pptable = smu_v14_0_2_setup_pptable, .check_fw_version = smu_v14_0_check_fw_version, - .write_pptable = smu_cmn_write_pptable, .set_driver_table_location = smu_v14_0_set_driver_table_location, .system_features_control = smu_v14_0_system_features_control, .set_allowed_mask = smu_v14_0_set_allowed_mask, From 74e1006430a5377228e49310f6d915628609929e Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 30 Oct 2024 13:22:44 +0800 Subject: [PATCH 159/281] drm/amd/pm: correct the workload setting Correct the workload setting in order not to mix the setting with the end user. Update the workload mask accordingly. v2: changes as below: 1. the end user can not erase the workload from driver except default workload. 2. always shows the real highest priority workoad to the end user. 3. the real workload mask is combined with driver workload mask and end user workload mask. v3: apply this to the other ASICs as well. v4: simplify the code v5: refine the code based on the review comments. Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 8cc438be5d49b8326b2fcade0bdb7e6a97df9e0b) Cc: stable@vger.kernel.org # 6.11.x --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 49 +++++++++++++------ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 +- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 5 +- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +- .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 4 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 20 ++++++-- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 5 +- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 9 ++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 8 +++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 + 12 files changed, 84 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 80e60ea2d11e..ee1bcfaae3e3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1259,26 +1259,33 @@ static int smu_sw_init(void *handle) smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + smu->user_dpm_profile.user_workload_mask = 0; atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; - smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; - smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; - smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3; - smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; - smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; - smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; + smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; + smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; + smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; + smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3; + smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4; + smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; + smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; if (smu->is_apu || - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; - else - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; + !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) { + smu->driver_workload_mask = + 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; + } else { + smu->driver_workload_mask = + 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; + smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + } + smu->workload_mask = smu->driver_workload_mask | + smu->user_dpm_profile.user_workload_mask; smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; @@ -2348,17 +2355,20 @@ static int smu_switch_power_profile(void *handle, return -EINVAL; if (!en) { - smu->workload_mask &= ~(1 << smu->workload_prority[type]); + smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]); index = fls(smu->workload_mask); index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; } else { - smu->workload_mask |= (1 << smu->workload_prority[type]); + smu->driver_workload_mask |= (1 << smu->workload_priority[type]); index = fls(smu->workload_mask); index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; } + smu->workload_mask = smu->driver_workload_mask | + smu->user_dpm_profile.user_workload_mask; + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) smu_bump_power_profile_mode(smu, workload, 0); @@ -3049,12 +3059,23 @@ static int smu_set_power_profile_mode(void *handle, uint32_t param_size) { struct smu_context *smu = handle; + int ret; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !smu->ppt_funcs->set_power_profile_mode) return -EOPNOTSUPP; - return smu_bump_power_profile_mode(smu, param, param_size); + if (smu->user_dpm_profile.user_workload_mask & + (1 << smu->workload_priority[param[param_size]])) + return 0; + + smu->user_dpm_profile.user_workload_mask = + (1 << smu->workload_priority[param[param_size]]); + smu->workload_mask = smu->user_dpm_profile.user_workload_mask | + smu->driver_workload_mask; + ret = smu_bump_power_profile_mode(smu, param, param_size); + + return ret; } static int smu_get_fan_control_mode(void *handle, u32 *fan_mode) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index b44a185d07e8..d60d9a12a47e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -240,6 +240,7 @@ struct smu_user_dpm_profile { /* user clock state information */ uint32_t clk_mask[SMU_CLK_COUNT]; uint32_t clk_dependency; + uint32_t user_workload_mask; }; #define SMU_TABLE_INIT(tables, table_id, s, a, d) \ @@ -557,7 +558,8 @@ struct smu_context { bool disable_uclk_switch; uint32_t workload_mask; - uint32_t workload_prority[WORKLOAD_POLICY_MAX]; + uint32_t driver_workload_mask; + uint32_t workload_priority[WORKLOAD_POLICY_MAX]; uint32_t workload_setting[WORKLOAD_POLICY_MAX]; uint32_t power_profile_mode; uint32_t default_power_profile_mode; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index c0f6b59369b7..31fe512028f4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1455,7 +1455,6 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, return -EINVAL; } - if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) && (smu->smc_fw_version >= 0x360d00)) { if (size != 10) @@ -1523,14 +1522,14 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type); return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 16af1a329621..12223f507977 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2081,10 +2081,13 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 9c3c48297cba..3b7b2ec8319a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1786,10 +1786,13 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 1fe020f1f4db..952ee22cbc90 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1079,7 +1079,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", @@ -1087,7 +1087,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index cc0504b063fa..62316a6707ef 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -890,14 +890,14 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type); return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index d53e162dcd8d..5dd7ceca64fe 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2485,7 +2485,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); int workload_type, ret = 0; - u32 workload_mask, selected_workload_mask; + u32 workload_mask; smu->power_profile_mode = input[size]; @@ -2552,7 +2552,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - selected_workload_mask = workload_mask = 1 << workload_type; + workload_mask = 1 << workload_type; /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && @@ -2567,12 +2567,22 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask |= 1 << workload_type; } + smu->workload_mask |= workload_mask; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - workload_mask, + smu->workload_mask, NULL); - if (!ret) - smu->workload_mask = selected_workload_mask; + if (!ret) { + smu_cmn_assign_power_profile(smu); + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) { + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_FULLSCREEN3D); + smu->power_profile_mode = smu->workload_mask & (1 << workload_type) + ? PP_SMC_POWER_PROFILE_FULLSCREEN3D + : PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + } + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index b891a5e0a396..9d0b19419de0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2499,13 +2499,14 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); else - smu->workload_mask = (1 << workload_type); + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 1e16a281f2dc..1aa13d32ceb2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1807,12 +1807,11 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetWorkloadMask, - 1 << workload_type, - NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + smu->workload_mask, NULL); + if (!ret) - smu->workload_mask = 1 << workload_type; + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 91ad434bcdae..bdfc5e617333 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1138,6 +1138,14 @@ int smu_cmn_set_mp1_state(struct smu_context *smu, return ret; } +void smu_cmn_assign_power_profile(struct smu_context *smu) +{ + uint32_t index; + index = fls(smu->workload_mask); + index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; + smu->power_profile_mode = smu->workload_setting[index]; +} + bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev) { struct pci_dev *p = NULL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 1de685defe85..8a801e389659 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -130,6 +130,8 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); int smu_cmn_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state); +void smu_cmn_assign_power_profile(struct smu_context *smu); + /* * Helper function to make sysfs_emit_at() happy. Align buf to * the current page boundary and record the offset. From 4a74da044ec9ec8679e6beccc4306b936b62873f Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 8 Oct 2024 12:46:39 +0000 Subject: [PATCH 160/281] security/keys: fix slab-out-of-bounds in key_task_permission KASAN reports an out of bounds read: BUG: KASAN: slab-out-of-bounds in __kuid_val include/linux/uidgid.h:36 BUG: KASAN: slab-out-of-bounds in uid_eq include/linux/uidgid.h:63 [inline] BUG: KASAN: slab-out-of-bounds in key_task_permission+0x394/0x410 security/keys/permission.c:54 Read of size 4 at addr ffff88813c3ab618 by task stress-ng/4362 CPU: 2 PID: 4362 Comm: stress-ng Not tainted 5.10.0-14930-gafbffd6c3ede #15 Call Trace: __dump_stack lib/dump_stack.c:82 [inline] dump_stack+0x107/0x167 lib/dump_stack.c:123 print_address_description.constprop.0+0x19/0x170 mm/kasan/report.c:400 __kasan_report.cold+0x6c/0x84 mm/kasan/report.c:560 kasan_report+0x3a/0x50 mm/kasan/report.c:585 __kuid_val include/linux/uidgid.h:36 [inline] uid_eq include/linux/uidgid.h:63 [inline] key_task_permission+0x394/0x410 security/keys/permission.c:54 search_nested_keyrings+0x90e/0xe90 security/keys/keyring.c:793 This issue was also reported by syzbot. It can be reproduced by following these steps(more details [1]): 1. Obtain more than 32 inputs that have similar hashes, which ends with the pattern '0xxxxxxxe6'. 2. Reboot and add the keys obtained in step 1. The reproducer demonstrates how this issue happened: 1. In the search_nested_keyrings function, when it iterates through the slots in a node(below tag ascend_to_node), if the slot pointer is meta and node->back_pointer != NULL(it means a root), it will proceed to descend_to_node. However, there is an exception. If node is the root, and one of the slots points to a shortcut, it will be treated as a keyring. 2. Whether the ptr is keyring decided by keyring_ptr_is_keyring function. However, KEYRING_PTR_SUBTYPE is 0x2UL, the same as ASSOC_ARRAY_PTR_SUBTYPE_MASK. 3. When 32 keys with the similar hashes are added to the tree, the ROOT has keys with hashes that are not similar (e.g. slot 0) and it splits NODE A without using a shortcut. When NODE A is filled with keys that all hashes are xxe6, the keys are similar, NODE A will split with a shortcut. Finally, it forms the tree as shown below, where slot 6 points to a shortcut. NODE A +------>+---+ ROOT | | 0 | xxe6 +---+ | +---+ xxxx | 0 | shortcut : : xxe6 +---+ | +---+ xxe6 : : | | | xxe6 +---+ | +---+ | 6 |---+ : : xxe6 +---+ +---+ xxe6 : : | f | xxe6 +---+ +---+ xxe6 | f | +---+ 4. As mentioned above, If a slot(slot 6) of the root points to a shortcut, it may be mistakenly transferred to a key*, leading to a read out-of-bounds read. To fix this issue, one should jump to descend_to_node if the ptr is a shortcut, regardless of whether the node is root or not. [1] https://lore.kernel.org/linux-kernel/1cfa878e-8c7b-4570-8606-21daf5e13ce7@huaweicloud.com/ [jarkko: tweaked the commit message a bit to have an appropriate closes tag.] Fixes: b2a4df200d57 ("KEYS: Expand the capacity of a keyring") Reported-by: syzbot+5b415c07907a2990d1a3@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000cbb7860611f61147@google.com/T/ Signed-off-by: Chen Ridong Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- security/keys/keyring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 4448758f643a..f331725d5a37 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -772,8 +772,11 @@ ascend_to_node: for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); - if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) - goto descend_to_node; + if (assoc_array_ptr_is_meta(ptr)) { + if (node->back_pointer || + assoc_array_ptr_is_shortcut(ptr)) + goto descend_to_node; + } if (!keyring_ptr_is_keyring(ptr)) continue; From 04de7589e0a95167d803ecadd115235ba2c14997 Mon Sep 17 00:00:00 2001 From: David Gstir Date: Tue, 29 Oct 2024 12:34:01 +0100 Subject: [PATCH 161/281] KEYS: trusted: dcp: fix NULL dereference in AEAD crypto operation When sealing or unsealing a key blob we currently do not wait for the AEAD cipher operation to finish and simply return after submitting the request. If there is some load on the system we can exit before the cipher operation is done and the buffer we read from/write to is already removed from the stack. This will e.g. result in NULL pointer dereference errors in the DCP driver during blob creation. Fix this by waiting for the AEAD cipher operation to finish before resuming the seal and unseal calls. Cc: stable@vger.kernel.org # v6.10+ Fixes: 0e28bf61a5f9 ("KEYS: trusted: dcp: fix leak of blob encryption key") Reported-by: Parthiban N Closes: https://lore.kernel.org/keyrings/254d3bb1-6dbc-48b4-9c08-77df04baee2f@linumiz.com/ Signed-off-by: David Gstir Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- security/keys/trusted-keys/trusted_dcp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c index 4edc5bbbcda3..e908c53a803c 100644 --- a/security/keys/trusted-keys/trusted_dcp.c +++ b/security/keys/trusted-keys/trusted_dcp.c @@ -133,6 +133,7 @@ static int do_aead_crypto(u8 *in, u8 *out, size_t len, u8 *key, u8 *nonce, struct scatterlist src_sg, dst_sg; struct crypto_aead *aead; int ret; + DECLARE_CRYPTO_WAIT(wait); aead = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(aead)) { @@ -163,8 +164,8 @@ static int do_aead_crypto(u8 *in, u8 *out, size_t len, u8 *key, u8 *nonce, } aead_request_set_crypt(aead_req, &src_sg, &dst_sg, len, nonce); - aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, - NULL); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, &wait); aead_request_set_ad(aead_req, 0); if (crypto_aead_setkey(aead, key, AES_KEYSIZE_128)) { @@ -174,9 +175,9 @@ static int do_aead_crypto(u8 *in, u8 *out, size_t len, u8 *key, u8 *nonce, } if (do_encrypt) - ret = crypto_aead_encrypt(aead_req); + ret = crypto_wait_req(crypto_aead_encrypt(aead_req), &wait); else - ret = crypto_aead_decrypt(aead_req); + ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &wait); free_req: aead_request_free(aead_req); From e9942bfe493108bceb64a91c2a832412524e8b78 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Wed, 9 Oct 2024 17:18:35 +0200 Subject: [PATCH 162/281] ice: Fix use after free during unload with ports in bridge Unloading the ice driver while switchdev port representors are added to a bridge can lead to kernel panic. Reproducer: modprobe ice devlink dev eswitch set $PF1_PCI mode switchdev ip link add $BR type bridge ip link set $BR up echo 2 > /sys/class/net/$PF1/device/sriov_numvfs sleep 2 ip link set $PF1 master $BR ip link set $VF1_PR master $BR ip link set $VF2_PR master $BR ip link set $PF1 up ip link set $VF1_PR up ip link set $VF2_PR up ip link set $VF1 up rmmod irdma ice When unloading the driver, ice_eswitch_detach() is eventually called as part of VF freeing. First, it removes a port representor from xarray, then unregister_netdev() is called (via repr->ops.rem()), finally representor is deallocated. The problem comes from the bridge doing its own deinit at the same time. unregister_netdev() triggers a notifier chain, resulting in ice_eswitch_br_port_deinit() being called. It should set repr->br_port = NULL, but this does not happen since repr has already been removed from xarray and is not found. Regardless, it finishes up deallocating br_port. At this point, repr is still not freed and an fdb event can happen, in which ice_eswitch_br_fdb_event_work() takes repr->br_port and tries to use it, which causes a panic (use after free). Note that this only happens with 2 or more port representors added to the bridge, since with only one representor port, the bridge deinit is slightly different (ice_eswitch_br_port_deinit() is called via ice_eswitch_br_ports_flush(), not ice_eswitch_br_port_unlink()). Trace: Oops: general protection fault, probably for non-canonical address 0xf129010fd1a93284: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: maybe wild-memory-access in range [0x8948287e8d499420-0x8948287e8d499427] (...) Workqueue: ice_bridge_wq ice_eswitch_br_fdb_event_work [ice] RIP: 0010:__rht_bucket_nested+0xb4/0x180 (...) Call Trace: (...) ice_eswitch_br_fdb_find+0x3fa/0x550 [ice] ? __pfx_ice_eswitch_br_fdb_find+0x10/0x10 [ice] ice_eswitch_br_fdb_event_work+0x2de/0x1e60 [ice] ? __schedule+0xf60/0x5210 ? mutex_lock+0x91/0xe0 ? __pfx_ice_eswitch_br_fdb_event_work+0x10/0x10 [ice] ? ice_eswitch_br_update_work+0x1f4/0x310 [ice] (...) A workaround is available: brctl setageing $BR 0, which stops the bridge from adding fdb entries altogether. Change the order of operations in ice_eswitch_detach(): move the call to unregister_netdev() before removing repr from xarray. This way repr->br_port will be correctly set to NULL in ice_eswitch_br_port_deinit(), preventing a panic. Fixes: fff292b47ac1 ("ice: add VF representors one by one") Reviewed-by: Michal Swiatkowski Reviewed-by: Paul Menzel Signed-off-by: Marcin Szycik Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index c0b3e70a7ea3..fb527434b58b 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -552,13 +552,14 @@ int ice_eswitch_attach_sf(struct ice_pf *pf, struct ice_dynamic_port *sf) static void ice_eswitch_detach(struct ice_pf *pf, struct ice_repr *repr) { ice_eswitch_stop_reprs(pf); + repr->ops.rem(repr); + xa_erase(&pf->eswitch.reprs, repr->id); if (xa_empty(&pf->eswitch.reprs)) ice_eswitch_disable_switchdev(pf); ice_eswitch_release_repr(pf, repr); - repr->ops.rem(repr); ice_repr_destroy(repr); if (xa_empty(&pf->eswitch.reprs)) { From 64502dac974a5d9951d16015fa2e16a14e5f2bb2 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Mon, 28 Oct 2024 12:59:22 -0400 Subject: [PATCH 163/281] ice: change q_index variable type to s16 to store -1 value Fix Flow Director not allowing to re-map traffic to 0th queue when action is configured to drop (and vice versa). The current implementation of ethtool callback in the ice driver forbids change Flow Director action from 0 to -1 and from -1 to 0 with an error, e.g: # ethtool -U eth2 flow-type tcp4 src-ip 1.1.1.1 loc 1 action 0 # ethtool -U eth2 flow-type tcp4 src-ip 1.1.1.1 loc 1 action -1 rmgr: Cannot insert RX class rule: Invalid argument We set the value of `u16 q_index = 0` at the beginning of the function ice_set_fdir_input_set(). In case of "drop traffic" action (which is equal to -1 in ethtool) we store the 0 value. Later, when want to change traffic rule to redirect to queue with index 0 it returns an error caused by duplicate found. Fix this behaviour by change of the type of field `q_index` from u16 to s16 in `struct ice_fdir_fltr`. This allows to store -1 in the field in case of "drop traffic" action. What is more, change the variable type in the function ice_set_fdir_input_set() and assign at the beginning the new `#define ICE_FDIR_NO_QUEUE_IDX` which is -1. Later, if the action is set to another value (point specific queue index) the variable value is overwritten in the function. Fixes: cac2a27cd9ab ("ice: Support IPv4 Flow Director filters") Reviewed-by: Przemek Kitszel Signed-off-by: Mateusz Polchlopek Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 3 ++- drivers/net/ethernet/intel/ice/ice_fdir.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 5412eff8ef23..ee9862ddfe15 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1830,11 +1830,12 @@ static int ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp, struct ice_fdir_fltr *input) { - u16 dest_vsi, q_index = 0; + s16 q_index = ICE_FDIR_NO_QUEUE_IDX; u16 orig_q_index = 0; struct ice_pf *pf; struct ice_hw *hw; int flow_type; + u16 dest_vsi; u8 dest_ctl; if (!vsi || !fsp || !input) diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h index ab5b118daa2d..820023c0271f 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.h +++ b/drivers/net/ethernet/intel/ice/ice_fdir.h @@ -53,6 +53,8 @@ */ #define ICE_FDIR_IPV4_PKT_FLAG_MF 0x20 +#define ICE_FDIR_NO_QUEUE_IDX -1 + enum ice_fltr_prgm_desc_dest { ICE_FLTR_PRGM_DESC_DEST_DROP_PKT, ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX, @@ -186,7 +188,7 @@ struct ice_fdir_fltr { u16 flex_fltr; /* filter control */ - u16 q_index; + s16 q_index; u16 orig_q_index; u16 dest_vsi; u8 dest_ctl; From 81d2fb4c7c18a3b36ba3e00b9d5b753107472d75 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Fri, 25 Oct 2024 11:38:42 -0700 Subject: [PATCH 164/281] idpf: avoid vport access in idpf_get_link_ksettings When the device control plane is removed or the platform running device control plane is rebooted, a reset is detected on the driver. On driver reset, it releases the resources and waits for the reset to complete. If the reset fails, it takes the error path and releases the vport lock. At this time if the monitoring tools tries to access link settings, it call traces for accessing released vport pointer. To avoid it, move link_speed_mbps to netdev_priv structure which removes the dependency on vport pointer and the vport lock in idpf_get_link_ksettings. Also use netif_carrier_ok() to check the link status and adjust the offsetof to use link_up instead of link_speed_mbps. Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks") Cc: stable@vger.kernel.org # 6.7+ Reviewed-by: Tarun K Singh Signed-off-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf.h | 4 ++-- drivers/net/ethernet/intel/idpf/idpf_ethtool.c | 11 +++-------- drivers/net/ethernet/intel/idpf/idpf_lib.c | 4 ++-- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 2c31ad87587a..66544faab710 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -141,6 +141,7 @@ enum idpf_vport_state { * @adapter: Adapter back pointer * @vport: Vport back pointer * @vport_id: Vport identifier + * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index * @state: See enum idpf_vport_state * @netstats: Packet and byte stats @@ -150,6 +151,7 @@ struct idpf_netdev_priv { struct idpf_adapter *adapter; struct idpf_vport *vport; u32 vport_id; + u32 link_speed_mbps; u16 vport_idx; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; @@ -287,7 +289,6 @@ struct idpf_port_stats { * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation * @port_stats: per port csum, header split, and other offload stats * @link_up: True if link is up - * @link_speed_mbps: Link speed in mbps * @sw_marker_wq: workqueue for marker packets */ struct idpf_vport { @@ -331,7 +332,6 @@ struct idpf_vport { struct idpf_port_stats port_stats; bool link_up; - u32 link_speed_mbps; wait_queue_head_t sw_marker_wq; }; diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 3806ddd3ce4a..59b1a1a09996 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -1296,24 +1296,19 @@ static void idpf_set_msglevel(struct net_device *netdev, u32 data) static int idpf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct idpf_vport *vport; - - idpf_vport_ctrl_lock(netdev); - vport = idpf_netdev_to_vport(netdev); + struct idpf_netdev_priv *np = netdev_priv(netdev); ethtool_link_ksettings_zero_link_mode(cmd, supported); cmd->base.autoneg = AUTONEG_DISABLE; cmd->base.port = PORT_NONE; - if (vport->link_up) { + if (netif_carrier_ok(netdev)) { cmd->base.duplex = DUPLEX_FULL; - cmd->base.speed = vport->link_speed_mbps; + cmd->base.speed = np->link_speed_mbps; } else { cmd->base.duplex = DUPLEX_UNKNOWN; cmd->base.speed = SPEED_UNKNOWN; } - idpf_vport_ctrl_unlock(netdev); - return 0; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 4f20343e49a9..c3848e10e7db 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1860,7 +1860,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, * mess with. Nothing below should use those variables from new_vport * and should instead always refer to them in vport if they need to. */ - memcpy(new_vport, vport, offsetof(struct idpf_vport, link_speed_mbps)); + memcpy(new_vport, vport, offsetof(struct idpf_vport, link_up)); /* Adjust resource parameters prior to reallocating resources */ switch (reset_cause) { @@ -1906,7 +1906,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, /* Same comment as above regarding avoiding copying the wait_queues and * mutexes applies here. We do not want to mess with those if possible. */ - memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps)); + memcpy(vport, new_vport, offsetof(struct idpf_vport, link_up)); if (reset_cause == IDPF_SR_Q_CHANGE) idpf_vport_alloc_vec_indexes(vport); diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 15c00a01f1c0..ce217e274506 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -141,7 +141,7 @@ static void idpf_handle_event_link(struct idpf_adapter *adapter, } np = netdev_priv(vport->netdev); - vport->link_speed_mbps = le32_to_cpu(v2e->link_speed); + np->link_speed_mbps = le32_to_cpu(v2e->link_speed); if (vport->link_up == v2e->link_status) return; From 9b58031ff96b84a38d7b73b23c7ecfb2e0557f43 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Fri, 25 Oct 2024 11:38:43 -0700 Subject: [PATCH 165/281] idpf: fix idpf_vc_core_init error path In an event where the platform running the device control plane is rebooted, reset is detected on the driver. It releases all the resources and waits for the reset to complete. Once the reset is done, it tries to build the resources back. At this time if the device control plane is not yet started, then the driver timeouts on the virtchnl message and retries to establish the mailbox again. In the retry flow, mailbox is deinitialized but the mailbox workqueue is still alive and polling for the mailbox message. This results in accessing the released control queue leading to null-ptr-deref. Fix it by unrolling the work queue cancellation and mailbox deinitialization in the reverse order which they got initialized. Fixes: 4930fbf419a7 ("idpf: add core init and interrupt request") Fixes: 34c21fa894a1 ("idpf: implement virtchnl transaction manager") Cc: stable@vger.kernel.org # 6.9+ Reviewed-by: Tarun K Singh Signed-off-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 1 + drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index c3848e10e7db..b4fbb99bfad2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1786,6 +1786,7 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter) */ err = idpf_vc_core_init(adapter); if (err) { + cancel_delayed_work_sync(&adapter->mbx_task); idpf_deinit_dflt_mbx(adapter); goto unlock_mutex; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index ce217e274506..d46c95f91b0d 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3063,7 +3063,6 @@ init_failed: adapter->state = __IDPF_VER_CHECK; if (adapter->vcxn_mngr) idpf_vc_xn_shutdown(adapter->vcxn_mngr); - idpf_deinit_dflt_mbx(adapter); set_bit(IDPF_HR_DRV_LOAD, adapter->flags); queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task, msecs_to_jiffies(task_delay)); From f30490e9695ef7da3d0899c6a0293cc7cd373567 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Wed, 16 Oct 2024 11:30:11 +0200 Subject: [PATCH 166/281] i40e: fix race condition by adding filter's intermediate sync state Fix a race condition in the i40e driver that leads to MAC/VLAN filters becoming corrupted and leaking. Address the issue that occurs under heavy load when multiple threads are concurrently modifying MAC/VLAN filters by setting mac and port VLAN. 1. Thread T0 allocates a filter in i40e_add_filter() within i40e_ndo_set_vf_port_vlan(). 2. Thread T1 concurrently frees the filter in __i40e_del_filter() within i40e_ndo_set_vf_mac(). 3. Subsequently, i40e_service_task() calls i40e_sync_vsi_filters(), which refers to the already freed filter memory, causing corruption. Reproduction steps: 1. Spawn multiple VFs. 2. Apply a concurrent heavy load by running parallel operations to change MAC addresses on the VFs and change port VLANs on the host. 3. Observe errors in dmesg: "Error I40E_AQ_RC_ENOSPC adding RX filters on VF XX, please set promiscuous on manually for VF XX". Exact code for stable reproduction Intel can't open-source now. The fix involves implementing a new intermediate filter state, I40E_FILTER_NEW_SYNC, for the time when a filter is on a tmp_add_list. These filters cannot be deleted from the hash list directly but must be removed using the full process. Fixes: 278e7d0b9d68 ("i40e: store MAC/VLAN filters in a hash with the MAC Address as key") Signed-off-by: Aleksandr Loktionov Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Reviewed-by: Michal Schmidt Tested-by: Michal Schmidt Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 12 ++++++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2089a0e172bf..d4255c2706fa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -755,6 +755,7 @@ enum i40e_filter_state { I40E_FILTER_ACTIVE, /* Added to switch by FW */ I40E_FILTER_FAILED, /* Rejected by FW */ I40E_FILTER_REMOVE, /* To be removed */ + I40E_FILTER_NEW_SYNC, /* New, not sent yet, is in i40e_sync_vsi_filters() */ /* There is no 'removed' state; the filter struct is freed */ }; struct i40e_mac_filter { diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index abf624d770e6..208c2f0857b6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -89,6 +89,7 @@ static char *i40e_filter_state_string[] = { "ACTIVE", "FAILED", "REMOVE", + "NEW_SYNC", }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 25295ae370b2..55fb362eb508 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1255,6 +1255,7 @@ int i40e_count_filters(struct i40e_vsi *vsi) hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_NEW_SYNC || f->state == I40E_FILTER_ACTIVE) ++cnt; } @@ -1441,6 +1442,8 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, new->f = add_head; new->state = add_head->state; + if (add_head->state == I40E_FILTER_NEW) + add_head->state = I40E_FILTER_NEW_SYNC; /* Add the new filter to the tmp list */ hlist_add_head(&new->hlist, tmp_add_list); @@ -1550,6 +1553,8 @@ static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, return -ENOMEM; new_mac->f = add_head; new_mac->state = add_head->state; + if (add_head->state == I40E_FILTER_NEW) + add_head->state = I40E_FILTER_NEW_SYNC; /* Add the new filter to the tmp list */ hlist_add_head(&new_mac->hlist, tmp_add_list); @@ -2437,7 +2442,8 @@ static int i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_mac_filter *f) { - bool enable = f->state == I40E_FILTER_NEW; + bool enable = f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_NEW_SYNC; struct i40e_hw *hw = &vsi->back->hw; int aq_ret; @@ -2611,6 +2617,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* Add it to the hash list */ hlist_add_head(&new->hlist, &tmp_add_list); + f->state = I40E_FILTER_NEW_SYNC; } /* Count the number of active (current and new) VLAN @@ -2762,7 +2769,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) spin_lock_bh(&vsi->mac_filter_hash_lock); hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { /* Only update the state if we're still NEW */ - if (new->f->state == I40E_FILTER_NEW) + if (new->f->state == I40E_FILTER_NEW || + new->f->state == I40E_FILTER_NEW_SYNC) new->f->state = new->state; hlist_del(&new->hlist); netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); From b8473723272e346e22aa487b9046fd324b73a0a5 Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Tue, 1 Oct 2024 20:08:48 +0300 Subject: [PATCH 167/281] e1000e: Remove Meteor Lake SMBUS workarounds This is a partial revert to commit 76a0a3f9cc2f ("e1000e: fix force smbus during suspend flow"). That commit fixed a sporadic PHY access issue but introduced a regression in runtime suspend flows. The original issue on Meteor Lake systems was rare in terms of the reproduction rate and the number of the systems affected. After the integration of commit 0a6ad4d9e169 ("e1000e: avoid failing the system during pm_suspend"), PHY access loss can no longer cause a system-level suspend failure. As it only occurs when the LAN cable is disconnected, and is recovered during system resume flow. Therefore, its functional impact is low, and the priority is given to stabilizing runtime suspend. Fixes: 76a0a3f9cc2f ("e1000e: fix force smbus during suspend flow") Signed-off-by: Vitaly Lifshits Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index ce227b56cf72..2f9655cf5dd9 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1205,12 +1205,10 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (ret_val) goto out; - if (hw->mac.type != e1000_pch_mtp) { - ret_val = e1000e_force_smbus(hw); - if (ret_val) { - e_dbg("Failed to force SMBUS: %d\n", ret_val); - goto release; - } + ret_val = e1000e_force_smbus(hw); + if (ret_val) { + e_dbg("Failed to force SMBUS: %d\n", ret_val); + goto release; } /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable @@ -1273,13 +1271,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) } release: - if (hw->mac.type == e1000_pch_mtp) { - ret_val = e1000e_force_smbus(hw); - if (ret_val) - e_dbg("Failed to force SMBUS over MTL system: %d\n", - ret_val); - } - hw->phy.ops.release(hw); out: if (ret_val) From 6d1c69945ce63a9fba22a4abf646cf960d878782 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 4 Nov 2024 04:24:40 -0800 Subject: [PATCH 168/281] nvme/host: Fix RCU list traversal to use SRCU primitive The code currently uses list_for_each_entry_rcu() while holding an SRCU lock, triggering false positive warnings with CONFIG_PROVE_RCU=y enabled: drivers/nvme/host/core.c:3770 RCU-list traversed in non-reader section!! While the list is properly protected by SRCU lock, the code uses the wrong list traversal primitive. Replace list_for_each_entry_rcu() with list_for_each_entry_srcu() to correctly indicate SRCU-based protection and eliminate the false warning. Fixes: be647e2c76b2 ("nvme: use srcu for iterating namespace list") Signed-off-by: Breno Leitao Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ce20b916301a..4a83fd120064 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3795,7 +3795,8 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) { if (ns->head->ns_id == nsid) { if (!nvme_get_ns(ns)) continue; @@ -4879,7 +4880,8 @@ void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_mark_disk_dead(ns->disk); srcu_read_unlock(&ctrl->srcu, srcu_idx); } @@ -4891,7 +4893,8 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_mq_unfreeze_queue(ns->queue); srcu_read_unlock(&ctrl->srcu, srcu_idx); clear_bit(NVME_CTRL_FROZEN, &ctrl->flags); @@ -4904,7 +4907,8 @@ int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) { timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); if (timeout <= 0) break; @@ -4920,7 +4924,8 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_mq_freeze_queue_wait(ns->queue); srcu_read_unlock(&ctrl->srcu, srcu_idx); } @@ -4933,7 +4938,8 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) set_bit(NVME_CTRL_FROZEN, &ctrl->flags); srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_freeze_queue_start(ns->queue); srcu_read_unlock(&ctrl->srcu, srcu_idx); } @@ -4981,7 +4987,8 @@ void nvme_sync_io_queues(struct nvme_ctrl *ctrl) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_sync_queue(ns->queue); srcu_read_unlock(&ctrl->srcu, srcu_idx); } From a97e293e077a3e8f41e8972e593b34d0052b9e25 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 Nov 2024 19:51:28 +0100 Subject: [PATCH 169/281] cpufreq: intel_pstate: Clear hybrid_max_perf_cpu before driver registration Modify intel_pstate_register_driver() to clear hybrid_max_perf_cpu before calling cpufreq_register_driver(), so that asymmetric CPU capacity scaling is not updated until hybrid_init_cpu_capacity_scaling() runs down the road. This is done in preparation for a subsequent change adding asymmetric CPU capacity computation to the CPU init path to handle CPUs that are initially offline. The information on whether or not hybrid_max_perf_cpu was NULL before it has been cleared is passed to hybrid_init_cpu_capacity_scaling(), so full initialization of CPU capacity scaling can be skipped if it has been carried out already. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/4616631.LvFx2qVVIh@rjwysocki.net --- drivers/cpufreq/intel_pstate.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b0018f371ea3..4e816f0857f2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1034,7 +1034,7 @@ static void __hybrid_init_cpu_capacity_scaling(void) hybrid_update_cpu_capacity_scaling(); } -static void hybrid_init_cpu_capacity_scaling(void) +static void hybrid_init_cpu_capacity_scaling(bool refresh) { bool disable_itmt = false; @@ -1045,7 +1045,7 @@ static void hybrid_init_cpu_capacity_scaling(void) * scaling has been enabled already and the driver is just changing the * operation mode. */ - if (hybrid_max_perf_cpu) { + if (refresh) { __hybrid_init_cpu_capacity_scaling(); goto unlock; } @@ -1071,6 +1071,18 @@ unlock: sched_clear_itmt_support(); } +static bool hybrid_clear_max_perf_cpu(void) +{ + bool ret; + + guard(mutex)(&hybrid_capacity_lock); + + ret = !!hybrid_max_perf_cpu; + hybrid_max_perf_cpu = NULL; + + return ret; +} + static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; @@ -3352,6 +3364,7 @@ static void intel_pstate_driver_cleanup(void) static int intel_pstate_register_driver(struct cpufreq_driver *driver) { + bool refresh_cpu_cap_scaling; int ret; if (driver == &intel_pstate) @@ -3364,6 +3377,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) arch_set_max_freq_ratio(global.turbo_disabled); + refresh_cpu_cap_scaling = hybrid_clear_max_perf_cpu(); + intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { @@ -3373,7 +3388,7 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) global.min_perf_pct = min_perf_pct_min(); - hybrid_init_cpu_capacity_scaling(); + hybrid_init_cpu_capacity_scaling(refresh_cpu_cap_scaling); return 0; } From 92447aa5f6e7fbad9427a3fd1bb9e0679c403206 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 Nov 2024 19:53:53 +0100 Subject: [PATCH 170/281] cpufreq: intel_pstate: Update asym capacity for CPUs that were offline initially Commit 929ebc93ccaa ("cpufreq: intel_pstate: Set asymmetric CPU capacity on hybrid systems") overlooked a corner case in which some CPUs may be offline to start with and brought back online later, after the intel_pstate driver has been registered, so their asymmetric capacity will not be set. Address this by calling hybrid_update_capacity() in the CPU initialization path that is executed instead of the online path for those CPUs. Note that this asymmetric capacity update will be skipped during driver initialization and mode switches because hybrid_max_perf_cpu is NULL in those cases. Fixes: 929ebc93ccaa ("cpufreq: intel_pstate: Set asymmetric CPU capacity on hybrid systems") Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/1913414.tdWV9SEqCh@rjwysocki.net --- drivers/cpufreq/intel_pstate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4e816f0857f2..cd2ac1ba53d2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2275,6 +2275,11 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) } else { cpu->pstate.scaling = perf_ctl_scaling; } + /* + * If the CPU is going online for the first time and it was + * offline initially, asym capacity scaling needs to be updated. + */ + hybrid_update_capacity(cpu); } else { cpu->pstate.scaling = perf_ctl_scaling; cpu->pstate.max_pstate = pstate_funcs.get_max(cpu->cpu); From 0a77715db22611df50b178374c51e2ba0d58866e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 2 Nov 2024 18:46:38 +0900 Subject: [PATCH 171/281] ksmbd: fix slab-use-after-free in ksmbd_smb2_session_create There is a race condition between ksmbd_smb2_session_create and ksmbd_expire_session. This patch add missing sessions_table_lock while adding/deleting session from global session table. Cc: stable@vger.kernel.org # v5.15+ Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/user_session.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 9756a4bbfe54..ad02fe555fda 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -178,6 +178,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) unsigned long id; struct ksmbd_session *sess; + down_write(&sessions_table_lock); down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { if (atomic_read(&sess->refcnt) == 0 && @@ -191,6 +192,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) } } up_write(&conn->session_lock); + up_write(&sessions_table_lock); } int ksmbd_session_register(struct ksmbd_conn *conn, @@ -232,7 +234,6 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) } } } - up_write(&sessions_table_lock); down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { @@ -252,6 +253,7 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) } } up_write(&conn->session_lock); + up_write(&sessions_table_lock); } struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn, From b8fc56fbca7482c1e5c0e3351c6ae78982e25ada Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 4 Nov 2024 13:40:41 +0900 Subject: [PATCH 172/281] ksmbd: fix slab-use-after-free in smb3_preauth_hash_rsp ksmbd_user_session_put should be called under smb3_preauth_hash_rsp(). It will avoid freeing session before calling smb3_preauth_hash_rsp(). Cc: stable@vger.kernel.org # v5.15+ Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 9670c97f14b3..e7f14f8df943 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -238,11 +238,11 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, } while (is_chained == true); send: - if (work->sess) - ksmbd_user_session_put(work->sess); if (work->tcon) ksmbd_tree_connect_put(work->tcon); smb3_preauth_hash_rsp(work); + if (work->sess) + ksmbd_user_session_put(work->sess); if (work->sess && work->sess->enc && work->encrypted && conn->ops->encrypt_resp) { rc = conn->ops->encrypt_resp(work); From 0a77d947f599b1f39065015bec99390d0c0022ee Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 4 Nov 2024 13:43:06 +0900 Subject: [PATCH 173/281] ksmbd: check outstanding simultaneous SMB operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If Client send simultaneous SMB operations to ksmbd, It exhausts too much memory through the "ksmbd_work_cache”. It will cause OOM issue. ksmbd has a credit mechanism but it can't handle this problem. This patch add the check if it exceeds max credits to prevent this problem by assuming that one smb request consumes at least one credit. Cc: stable@vger.kernel.org # v5.15+ Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 1 + fs/smb/server/connection.h | 1 + fs/smb/server/server.c | 16 ++++++++++------ fs/smb/server/smb_common.c | 10 +++++++--- fs/smb/server/smb_common.h | 2 +- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index aa2a37a7ce84..e6a72f75ab94 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -70,6 +70,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) atomic_set(&conn->req_running, 0); atomic_set(&conn->r_count, 0); atomic_set(&conn->refcnt, 1); + atomic_set(&conn->mux_smb_requests, 0); conn->total_credits = 1; conn->outstanding_credits = 0; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index b379ae4fdcdf..8ddd5a3c7baf 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -107,6 +107,7 @@ struct ksmbd_conn { __le16 signing_algorithm; bool binding; atomic_t refcnt; + atomic_t mux_smb_requests; }; struct ksmbd_conn_ops { diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index e7f14f8df943..e6cfedba9992 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -270,6 +270,7 @@ static void handle_ksmbd_work(struct work_struct *wk) ksmbd_conn_try_dequeue_request(work); ksmbd_free_work_struct(work); + atomic_dec(&conn->mux_smb_requests); /* * Checking waitqueue to dropping pending requests on * disconnection. waitqueue_active is safe because it @@ -291,6 +292,15 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn) struct ksmbd_work *work; int err; + err = ksmbd_init_smb_server(conn); + if (err) + return 0; + + if (atomic_inc_return(&conn->mux_smb_requests) >= conn->vals->max_credits) { + atomic_dec_return(&conn->mux_smb_requests); + return -ENOSPC; + } + work = ksmbd_alloc_work_struct(); if (!work) { pr_err("allocation for work failed\n"); @@ -301,12 +311,6 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn) work->request_buf = conn->request_buf; conn->request_buf = NULL; - err = ksmbd_init_smb_server(work); - if (err) { - ksmbd_free_work_struct(work); - return 0; - } - ksmbd_conn_enqueue_request(work); atomic_inc(&conn->r_count); /* update activity on connection */ diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index a2ebbe604c8c..75b4eb856d32 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -388,6 +388,10 @@ static struct smb_version_ops smb1_server_ops = { .set_rsp_status = set_smb1_rsp_status, }; +static struct smb_version_values smb1_server_values = { + .max_credits = SMB2_MAX_CREDITS, +}; + static int smb1_negotiate(struct ksmbd_work *work) { return ksmbd_smb_negotiate_common(work, SMB_COM_NEGOTIATE); @@ -399,18 +403,18 @@ static struct smb_version_cmds smb1_server_cmds[1] = { static int init_smb1_server(struct ksmbd_conn *conn) { + conn->vals = &smb1_server_values; conn->ops = &smb1_server_ops; conn->cmds = smb1_server_cmds; conn->max_cmds = ARRAY_SIZE(smb1_server_cmds); return 0; } -int ksmbd_init_smb_server(struct ksmbd_work *work) +int ksmbd_init_smb_server(struct ksmbd_conn *conn) { - struct ksmbd_conn *conn = work->conn; __le32 proto; - proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol; + proto = *(__le32 *)((struct smb_hdr *)conn->request_buf)->Protocol; if (conn->need_neg == false) { if (proto == SMB1_PROTO_NUMBER) return -EINVAL; diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index cc1d6dfe29d5..a3d8a905b07e 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -427,7 +427,7 @@ bool ksmbd_smb_request(struct ksmbd_conn *conn); int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count); -int ksmbd_init_smb_server(struct ksmbd_work *work); +int ksmbd_init_smb_server(struct ksmbd_conn *conn); struct ksmbd_kstat; int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, From 54c814c8b23bc7617be3d46abdb896937695dbfa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 31 Oct 2024 14:26:24 -0700 Subject: [PATCH 174/281] scsi: ufs: core: Start the RTC update work later The RTC update work involves runtime resuming the UFS controller. Hence, only start the RTC update work after runtime power management in the UFS driver has been fully initialized. This patch fixes the following kernel crash: Internal error: Oops: 0000000096000006 [#1] PREEMPT SMP Workqueue: events ufshcd_rtc_work Call trace: _raw_spin_lock_irqsave+0x34/0x8c (P) pm_runtime_get_if_active+0x24/0x9c (L) pm_runtime_get_if_active+0x24/0x9c ufshcd_rtc_work+0x138/0x1b4 process_one_work+0x148/0x288 worker_thread+0x2cc/0x3d4 kthread+0x110/0x114 ret_from_fork+0x10/0x20 Reported-by: Neil Armstrong Closes: https://lore.kernel.org/linux-scsi/0c0bc528-fdc2-4106-bc99-f23ae377f6f5@linaro.org/ Fixes: 6bf999e0eb41 ("scsi: ufs: core: Add UFS RTC support") Cc: Bean Huo Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241031212632.2799127-1-bvanassche@acm.org Reviewed-by: Peter Wang Reviewed-by: Bean Huo Tested-by: Neil Armstrong # on SM8650-HDK Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0e22bbb78239..3b32803b7a68 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8636,6 +8636,14 @@ static int ufshcd_add_lus(struct ufs_hba *hba) ufshcd_init_clk_scaling_sysfs(hba); } + /* + * The RTC update code accesses the hba->ufs_device_wlun->sdev_gendev + * pointer and hence must only be started after the WLUN pointer has + * been initialized by ufshcd_scsi_add_wlus(). + */ + schedule_delayed_work(&hba->ufs_rtc_update_work, + msecs_to_jiffies(UFS_RTC_UPDATE_INTERVAL_MS)); + ufs_bsg_probe(hba); scsi_scan_host(hba->host); @@ -8795,8 +8803,6 @@ static int ufshcd_device_init(struct ufs_hba *hba, bool init_dev_params) ufshcd_force_reset_auto_bkops(hba); ufshcd_set_timestamp_attr(hba); - schedule_delayed_work(&hba->ufs_rtc_update_work, - msecs_to_jiffies(UFS_RTC_UPDATE_INTERVAL_MS)); /* Gear up to HS gear if supported */ if (hba->max_pwr_info.is_valid) { From 249cfa318fb1b77eb726c2ff4f74c9685f04e568 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Nov 2024 18:03:52 -0800 Subject: [PATCH 175/281] Revert "Merge branch 'there-are-some-bugfix-for-the-hns3-ethernet-driver'" This reverts commit d80a3091308491455b6501b1c4b68698c4a7cd24, reversing changes made to 637f41476384c76d3cd7dcf5947caf2c8b8d7a9b: 2cf246143519 ("net: hns3: fix kernel crash when 1588 is sent on HIP08 devices") 3e22b7de34cb ("net: hns3: fixed hclge_fetch_pf_reg accesses bar space out of bounds issue") d1c2e2961ab4 ("net: hns3: initialize reset_timer before hclgevf_misc_irq_init()") 5f62009ff108 ("net: hns3: don't auto enable misc vector") 2758f18a83ef ("net: hns3: Resolved the issue that the debugfs query result is inconsistent.") 662ecfc46690 ("net: hns3: fix missing features due to dev->features configuration too early") 3e0f7cc887b7 ("net: hns3: fixed reset failure issues caused by the incorrect reset type") f2c14899caba ("net: hns3: add sync command to sync io-pgtable") e6ab19443b36 ("net: hns3: default enable tx bounce buffer when smmu enabled") The series is making the driver poke into IOMMU internals instead of implementing appropriate IOMMU workarounds. Link: https://lore.kernel.org/069c9838-b781-4012-934a-d2626fa78212@arm.com Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hns3/hns3_debugfs.c | 4 +- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 59 +------------------ .../net/ethernet/hisilicon/hns3/hns3_enet.h | 2 - .../ethernet/hisilicon/hns3/hns3_ethtool.c | 33 ----------- .../hisilicon/hns3/hns3pf/hclge_main.c | 45 +++----------- .../hisilicon/hns3/hns3pf/hclge_ptp.c | 3 - .../hisilicon/hns3/hns3pf/hclge_regs.c | 9 ++- .../hisilicon/hns3/hns3vf/hclgevf_main.c | 40 +++---------- .../hisilicon/hns3/hns3vf/hclgevf_regs.c | 9 ++- 9 files changed, 26 insertions(+), 178 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 841e5af7b2be..807eb3bbb11c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1293,10 +1293,8 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, /* save the buffer addr until the last read operation */ *save_buf = read_buf; - } - /* get data ready for the first time to read */ - if (!*ppos) { + /* get data ready for the first time to read */ ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd, read_buf, hns3_dbg_cmd[index].buf_len); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b09f0cca34dc..4cbc4d069a1f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -381,24 +380,6 @@ static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = { #define HNS3_INVALID_PTYPE \ ARRAY_SIZE(hns3_rx_ptype_tbl) -static void hns3_dma_map_sync(struct device *dev, unsigned long iova) -{ - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iommu_iotlb_gather iotlb_gather; - size_t granule; - - if (!domain || !iommu_is_dma_domain(domain)) - return; - - granule = 1 << __ffs(domain->pgsize_bitmap); - iova = ALIGN_DOWN(iova, granule); - iotlb_gather.start = iova; - iotlb_gather.end = iova + granule - 1; - iotlb_gather.pgsize = granule; - - iommu_iotlb_sync(domain, &iotlb_gather); -} - static irqreturn_t hns3_irq_handle(int irq, void *vector) { struct hns3_enet_tqp_vector *tqp_vector = vector; @@ -1051,8 +1032,6 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring, static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) { u32 alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size; - struct net_device *netdev = ring_to_netdev(ring); - struct hns3_nic_priv *priv = netdev_priv(netdev); struct hns3_tx_spare *tx_spare; struct page *page; dma_addr_t dma; @@ -1094,7 +1073,6 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) tx_spare->buf = page_address(page); tx_spare->len = PAGE_SIZE << order; ring->tx_spare = tx_spare; - ring->tx_copybreak = priv->tx_copybreak; return; dma_mapping_error: @@ -1746,9 +1724,7 @@ static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv, unsigned int type) { struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; - struct hnae3_handle *handle = ring->tqp->handle; struct device *dev = ring_to_dev(ring); - struct hnae3_ae_dev *ae_dev; unsigned int size; dma_addr_t dma; @@ -1780,13 +1756,6 @@ static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv, return -ENOMEM; } - /* Add a SYNC command to sync io-pgtale to avoid errors in pgtable - * prefetch - */ - ae_dev = hns3_get_ae_dev(handle); - if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) - hns3_dma_map_sync(dev, dma); - desc_cb->priv = priv; desc_cb->length = size; desc_cb->dma = dma; @@ -2483,6 +2452,7 @@ static int hns3_nic_set_features(struct net_device *netdev, return ret; } + netdev->features = features; return 0; } @@ -4898,30 +4868,6 @@ static void hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv) devm_kfree(&pdev->dev, priv->tqp_vector); } -static void hns3_update_tx_spare_buf_config(struct hns3_nic_priv *priv) -{ -#define HNS3_MIN_SPARE_BUF_SIZE (2 * 1024 * 1024) -#define HNS3_MAX_PACKET_SIZE (64 * 1024) - - struct iommu_domain *domain = iommu_get_domain_for_dev(priv->dev); - struct hnae3_ae_dev *ae_dev = hns3_get_ae_dev(priv->ae_handle); - struct hnae3_handle *handle = priv->ae_handle; - - if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) - return; - - if (!(domain && iommu_is_dma_domain(domain))) - return; - - priv->min_tx_copybreak = HNS3_MAX_PACKET_SIZE; - priv->min_tx_spare_buf_size = HNS3_MIN_SPARE_BUF_SIZE; - - if (priv->tx_copybreak < priv->min_tx_copybreak) - priv->tx_copybreak = priv->min_tx_copybreak; - if (handle->kinfo.tx_spare_buf_size < priv->min_tx_spare_buf_size) - handle->kinfo.tx_spare_buf_size = priv->min_tx_spare_buf_size; -} - static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, unsigned int ring_type) { @@ -5155,7 +5101,6 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) int i, j; int ret; - hns3_update_tx_spare_buf_config(priv); for (i = 0; i < ring_num; i++) { ret = hns3_alloc_ring_memory(&priv->ring[i]); if (ret) { @@ -5360,8 +5305,6 @@ static int hns3_client_init(struct hnae3_handle *handle) priv->ae_handle = handle; priv->tx_timeout_count = 0; priv->max_non_tso_bd_num = ae_dev->dev_specs.max_non_tso_bd_num; - priv->min_tx_copybreak = 0; - priv->min_tx_spare_buf_size = 0; set_bit(HNS3_NIC_STATE_DOWN, &priv->state); handle->msg_enable = netif_msg_init(debug, DEFAULT_MSG_LEVEL); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index caf7a4df8585..d36c4ed16d8d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -596,8 +596,6 @@ struct hns3_nic_priv { struct hns3_enet_coalesce rx_coal; u32 tx_copybreak; u32 rx_copybreak; - u32 min_tx_copybreak; - u32 min_tx_spare_buf_size; }; union l3_hdr_info { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 97eaeec1952b..b1e988347347 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1933,31 +1933,6 @@ static int hns3_set_tx_spare_buf_size(struct net_device *netdev, return ret; } -static int hns3_check_tx_copybreak(struct net_device *netdev, u32 copybreak) -{ - struct hns3_nic_priv *priv = netdev_priv(netdev); - - if (copybreak < priv->min_tx_copybreak) { - netdev_err(netdev, "tx copybreak %u should be no less than %u!\n", - copybreak, priv->min_tx_copybreak); - return -EINVAL; - } - return 0; -} - -static int hns3_check_tx_spare_buf_size(struct net_device *netdev, u32 buf_size) -{ - struct hns3_nic_priv *priv = netdev_priv(netdev); - - if (buf_size < priv->min_tx_spare_buf_size) { - netdev_err(netdev, - "tx spare buf size %u should be no less than %u!\n", - buf_size, priv->min_tx_spare_buf_size); - return -EINVAL; - } - return 0; -} - static int hns3_set_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, const void *data) @@ -1974,10 +1949,6 @@ static int hns3_set_tunable(struct net_device *netdev, switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: - ret = hns3_check_tx_copybreak(netdev, *(u32 *)data); - if (ret) - return ret; - priv->tx_copybreak = *(u32 *)data; for (i = 0; i < h->kinfo.num_tqps; i++) @@ -1992,10 +1963,6 @@ static int hns3_set_tunable(struct net_device *netdev, break; case ETHTOOL_TX_COPYBREAK_BUF_SIZE: - ret = hns3_check_tx_spare_buf_size(netdev, *(u32 *)data); - if (ret) - return ret; - old_tx_spare_buf_size = h->kinfo.tx_spare_buf_size; new_tx_spare_buf_size = *(u32 *)data; netdev_info(netdev, "request to set tx spare buf size from %u to %u\n", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 728f4777e51f..bd86efd92a5a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -3585,17 +3584,6 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf, return ret; } -static void hclge_set_reset_pending(struct hclge_dev *hdev, - enum hnae3_reset_type reset_type) -{ - /* When an incorrect reset type is executed, the get_reset_level - * function generates the HNAE3_NONE_RESET flag. As a result, this - * type do not need to pending. - */ - if (reset_type != HNAE3_NONE_RESET) - set_bit(reset_type, &hdev->reset_pending); -} - static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) { u32 cmdq_src_reg, msix_src_reg, hw_err_src_reg; @@ -3616,7 +3604,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) */ if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & msix_src_reg) { dev_info(&hdev->pdev->dev, "IMP reset interrupt\n"); - hclge_set_reset_pending(hdev, HNAE3_IMP_RESET); + set_bit(HNAE3_IMP_RESET, &hdev->reset_pending); set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); *clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B); hdev->rst_stats.imp_rst_cnt++; @@ -3626,7 +3614,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & msix_src_reg) { dev_info(&hdev->pdev->dev, "global reset interrupt\n"); set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); - hclge_set_reset_pending(hdev, HNAE3_GLOBAL_RESET); + set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending); *clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B); hdev->rst_stats.global_rst_cnt++; return HCLGE_VECTOR0_EVENT_RST; @@ -3781,7 +3769,7 @@ static int hclge_misc_irq_init(struct hclge_dev *hdev) snprintf(hdev->misc_vector.name, HNAE3_INT_NAME_LEN, "%s-misc-%s", HCLGE_NAME, pci_name(hdev->pdev)); ret = request_irq(hdev->misc_vector.vector_irq, hclge_misc_irq_handle, - IRQ_NOAUTOEN, hdev->misc_vector.name, hdev); + 0, hdev->misc_vector.name, hdev); if (ret) { hclge_free_vector(hdev, 0); dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n", @@ -4074,7 +4062,7 @@ static void hclge_do_reset(struct hclge_dev *hdev) case HNAE3_FUNC_RESET: dev_info(&pdev->dev, "PF reset requested\n"); /* schedule again to check later */ - hclge_set_reset_pending(hdev, HNAE3_FUNC_RESET); + set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending); hclge_reset_task_schedule(hdev); break; default: @@ -4108,8 +4096,6 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, clear_bit(HNAE3_FLR_RESET, addr); } - clear_bit(HNAE3_NONE_RESET, addr); - if (hdev->reset_type != HNAE3_NONE_RESET && rst_level < hdev->reset_type) return HNAE3_NONE_RESET; @@ -4251,7 +4237,7 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev) return false; } else if (hdev->rst_stats.reset_fail_cnt < MAX_RESET_FAIL_CNT) { hdev->rst_stats.reset_fail_cnt++; - hclge_set_reset_pending(hdev, hdev->reset_type); + set_bit(hdev->reset_type, &hdev->reset_pending); dev_info(&hdev->pdev->dev, "re-schedule reset task(%u)\n", hdev->rst_stats.reset_fail_cnt); @@ -4494,20 +4480,8 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle) static void hclge_set_def_reset_request(struct hnae3_ae_dev *ae_dev, enum hnae3_reset_type rst_type) { -#define HCLGE_SUPPORT_RESET_TYPE \ - (BIT(HNAE3_FLR_RESET) | BIT(HNAE3_FUNC_RESET) | \ - BIT(HNAE3_GLOBAL_RESET) | BIT(HNAE3_IMP_RESET)) - struct hclge_dev *hdev = ae_dev->priv; - if (!(BIT(rst_type) & HCLGE_SUPPORT_RESET_TYPE)) { - /* To prevent reset triggered by hclge_reset_event */ - set_bit(HNAE3_NONE_RESET, &hdev->default_reset_request); - dev_warn(&hdev->pdev->dev, "unsupported reset type %d\n", - rst_type); - return; - } - set_bit(rst_type, &hdev->default_reset_request); } @@ -11917,6 +11891,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_init_rxd_adv_layout(hdev); + /* Enable MISC vector(vector0) */ + hclge_enable_vector(&hdev->misc_vector, true); + ret = hclge_init_wol(hdev); if (ret) dev_warn(&pdev->dev, @@ -11929,10 +11906,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_state_init(hdev); hdev->last_reset_time = jiffies; - /* Enable MISC vector(vector0) */ - enable_irq(hdev->misc_vector.vector_irq); - hclge_enable_vector(&hdev->misc_vector, true); - dev_info(&hdev->pdev->dev, "%s driver initialization finished.\n", HCLGE_DRIVER_NAME); @@ -12338,7 +12311,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) /* Disable MISC vector(vector0) */ hclge_enable_vector(&hdev->misc_vector, false); - disable_irq(hdev->misc_vector.vector_irq); + synchronize_irq(hdev->misc_vector.vector_irq); /* Disable all hw interrupts */ hclge_config_mac_tnl_int(hdev, false); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index bab16c2191b2..5505caea88e9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -58,9 +58,6 @@ bool hclge_ptp_set_tx_info(struct hnae3_handle *handle, struct sk_buff *skb) struct hclge_dev *hdev = vport->back; struct hclge_ptp *ptp = hdev->ptp; - if (!ptp) - return false; - if (!test_bit(HCLGE_PTP_FLAG_TX_EN, &ptp->flags) || test_and_set_bit(HCLGE_STATE_PTP_TX_HANDLING, &hdev->state)) { ptp->tx_skipped++; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c index 8c057192aae6..43c1c18fa81f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c @@ -510,9 +510,9 @@ out: static int hclge_fetch_pf_reg(struct hclge_dev *hdev, void *data, struct hnae3_knic_private_info *kinfo) { +#define HCLGE_RING_REG_OFFSET 0x200 #define HCLGE_RING_INT_REG_OFFSET 0x4 - struct hnae3_queue *tqp; int i, j, reg_num; int data_num_sum; u32 *reg = data; @@ -533,11 +533,10 @@ static int hclge_fetch_pf_reg(struct hclge_dev *hdev, void *data, reg_num = ARRAY_SIZE(ring_reg_addr_list); for (j = 0; j < kinfo->num_tqps; j++) { reg += hclge_reg_get_tlv(HCLGE_REG_TAG_RING, reg_num, reg); - tqp = kinfo->tqp[j]; for (i = 0; i < reg_num; i++) - *reg++ = readl_relaxed(tqp->io_base - - HCLGE_TQP_REG_OFFSET + - ring_reg_addr_list[i]); + *reg++ = hclge_read_dev(&hdev->hw, + ring_reg_addr_list[i] + + HCLGE_RING_REG_OFFSET * j); } data_num_sum += (reg_num + HCLGE_REG_TLV_SPACE) * kinfo->num_tqps; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 896f1eb172d3..094a7c7b5592 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1395,17 +1395,6 @@ static int hclgevf_notify_roce_client(struct hclgevf_dev *hdev, return ret; } -static void hclgevf_set_reset_pending(struct hclgevf_dev *hdev, - enum hnae3_reset_type reset_type) -{ - /* When an incorrect reset type is executed, the get_reset_level - * function generates the HNAE3_NONE_RESET flag. As a result, this - * type do not need to pending. - */ - if (reset_type != HNAE3_NONE_RESET) - set_bit(reset_type, &hdev->reset_pending); -} - static int hclgevf_reset_wait(struct hclgevf_dev *hdev) { #define HCLGEVF_RESET_WAIT_US 20000 @@ -1555,7 +1544,7 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev) hdev->rst_stats.rst_fail_cnt); if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT) - hclgevf_set_reset_pending(hdev, hdev->reset_type); + set_bit(hdev->reset_type, &hdev->reset_pending); if (hclgevf_is_reset_pending(hdev)) { set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); @@ -1675,8 +1664,6 @@ static enum hnae3_reset_type hclgevf_get_reset_level(unsigned long *addr) clear_bit(HNAE3_FLR_RESET, addr); } - clear_bit(HNAE3_NONE_RESET, addr); - return rst_level; } @@ -1686,15 +1673,14 @@ static void hclgevf_reset_event(struct pci_dev *pdev, struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); struct hclgevf_dev *hdev = ae_dev->priv; + dev_info(&hdev->pdev->dev, "received reset request from VF enet\n"); + if (hdev->default_reset_request) hdev->reset_level = hclgevf_get_reset_level(&hdev->default_reset_request); else hdev->reset_level = HNAE3_VF_FUNC_RESET; - dev_info(&hdev->pdev->dev, "received reset request from VF enet, reset level is %d\n", - hdev->reset_level); - /* reset of this VF requested */ set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state); hclgevf_reset_task_schedule(hdev); @@ -1705,20 +1691,8 @@ static void hclgevf_reset_event(struct pci_dev *pdev, static void hclgevf_set_def_reset_request(struct hnae3_ae_dev *ae_dev, enum hnae3_reset_type rst_type) { -#define HCLGEVF_SUPPORT_RESET_TYPE \ - (BIT(HNAE3_VF_RESET) | BIT(HNAE3_VF_FUNC_RESET) | \ - BIT(HNAE3_VF_PF_FUNC_RESET) | BIT(HNAE3_VF_FULL_RESET) | \ - BIT(HNAE3_FLR_RESET) | BIT(HNAE3_VF_EXP_RESET)) - struct hclgevf_dev *hdev = ae_dev->priv; - if (!(BIT(rst_type) & HCLGEVF_SUPPORT_RESET_TYPE)) { - /* To prevent reset triggered by hclge_reset_event */ - set_bit(HNAE3_NONE_RESET, &hdev->default_reset_request); - dev_info(&hdev->pdev->dev, "unsupported reset type %d\n", - rst_type); - return; - } set_bit(rst_type, &hdev->default_reset_request); } @@ -1875,14 +1849,14 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) */ if (hdev->reset_attempts > HCLGEVF_MAX_RESET_ATTEMPTS_CNT) { /* prepare for full reset of stack + pcie interface */ - hclgevf_set_reset_pending(hdev, HNAE3_VF_FULL_RESET); + set_bit(HNAE3_VF_FULL_RESET, &hdev->reset_pending); /* "defer" schedule the reset task again */ set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); } else { hdev->reset_attempts++; - hclgevf_set_reset_pending(hdev, hdev->reset_level); + set_bit(hdev->reset_level, &hdev->reset_pending); set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); } hclgevf_reset_task_schedule(hdev); @@ -2005,7 +1979,7 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev, rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING); dev_info(&hdev->pdev->dev, "receive reset interrupt 0x%x!\n", rst_ing_reg); - hclgevf_set_reset_pending(hdev, HNAE3_VF_RESET); + set_bit(HNAE3_VF_RESET, &hdev->reset_pending); set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); *clearval = ~(1U << HCLGEVF_VECTOR0_RST_INT_B); @@ -2315,7 +2289,6 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev) clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); INIT_DELAYED_WORK(&hdev->service_task, hclgevf_service_task); - timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0); mutex_init(&hdev->mbx_resp.mbx_mutex); sema_init(&hdev->reset_sem, 1); @@ -3015,6 +2988,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) HCLGEVF_DRIVER_NAME); hclgevf_task_schedule(hdev, round_jiffies_relative(HZ)); + timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c index 7d9d9dbc7560..6db415d8b917 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c @@ -123,10 +123,10 @@ int hclgevf_get_regs_len(struct hnae3_handle *handle) void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version, void *data) { +#define HCLGEVF_RING_REG_OFFSET 0x200 #define HCLGEVF_RING_INT_REG_OFFSET 0x4 struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - struct hnae3_queue *tqp; int i, j, reg_um; u32 *reg = data; @@ -147,11 +147,10 @@ void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version, reg_um = ARRAY_SIZE(ring_reg_addr_list); for (j = 0; j < hdev->num_tqps; j++) { reg += hclgevf_reg_get_tlv(HCLGEVF_REG_TAG_RING, reg_um, reg); - tqp = &hdev->htqp[j].q; for (i = 0; i < reg_um; i++) - *reg++ = readl_relaxed(tqp->io_base - - HCLGEVF_TQP_REG_OFFSET + - ring_reg_addr_list[i]); + *reg++ = hclgevf_read_dev(&hdev->hw, + ring_reg_addr_list[i] + + HCLGEVF_RING_REG_OFFSET * j); } reg_um = ARRAY_SIZE(tqp_intr_reg_addr_list); From dabc44c28f118910dea96244d903f0c270225669 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 5 Nov 2024 13:02:17 +0100 Subject: [PATCH 176/281] ALSA: usb-audio: Add quirk for HP 320 FHD Webcam HP 320 FHD Webcam (03f0:654a) seems to have flaky firmware like other webcam devices that don't like the frequency inquiries. Also, Mic Capture Volume has an invalid resolution, hence fix it to be 16 (as a blind shot). Link: https://bugzilla.suse.com/show_bug.cgi?id=1232768 Cc: Link: https://patch.msgid.link/20241105120220.5740-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 1 + sound/usb/quirks.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 9945ae55b0d0..bd67027c7677 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1205,6 +1205,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; case USB_ID(0x1bcf, 0x2283): /* NexiGo N930AF FHD Webcam */ + case USB_ID(0x03f0, 0x654a): /* HP 320 FHD Webcam */ if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, "set resolution quirk: cval->res = 16\n"); diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e6278a245795..c5fd180357d1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2114,6 +2114,8 @@ struct usb_audio_quirk_flags_table { static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { /* Device matches */ + DEVICE_FLG(0x03f0, 0x654a, /* HP 320 FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x041e, 0x3000, /* Creative SB Extigy */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x041e, 0x4080, /* Creative Live Cam VF0610 */ From 498dbd9aea205db9da674994b74c7bf8e18448bd Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 29 Oct 2024 23:13:38 +0800 Subject: [PATCH 177/281] usb: musb: sunxi: Fix accessing an released usb phy Commit 6ed05c68cbca ("usb: musb: sunxi: Explicitly release USB PHY on exit") will cause that usb phy @glue->xceiv is accessed after released. 1) register platform driver @sunxi_musb_driver // get the usb phy @glue->xceiv sunxi_musb_probe() -> devm_usb_get_phy(). 2) register and unregister platform driver @musb_driver musb_probe() -> sunxi_musb_init() use the phy here //the phy is released here musb_remove() -> sunxi_musb_exit() -> devm_usb_put_phy() 3) register @musb_driver again musb_probe() -> sunxi_musb_init() use the phy here but the phy has been released at 2). ... Fixed by reverting the commit, namely, removing devm_usb_put_phy() from sunxi_musb_exit(). Fixes: 6ed05c68cbca ("usb: musb: sunxi: Explicitly release USB PHY on exit") Cc: stable@vger.kernel.org Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20241029-sunxi_fix-v1-1-9431ed2ab826@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/sunxi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index d54283fd026b..05b6e7e52e02 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -293,8 +293,6 @@ static int sunxi_musb_exit(struct musb *musb) if (test_bit(SUNXI_MUSB_FL_HAS_SRAM, &glue->flags)) sunxi_sram_release(musb->controller->parent); - devm_usb_put_phy(glue->dev, glue->xceiv); - return 0; } From df3dff8ab6d79edc942464999d06fbaedf8cdd18 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Fri, 1 Nov 2024 17:15:07 +0800 Subject: [PATCH 178/281] net: hns3: fix kernel crash when uninstalling driver When the driver is uninstalled and the VF is disabled concurrently, a kernel crash occurs. The reason is that the two actions call function pci_disable_sriov(). The num_VFs is checked to determine whether to release the corresponding resources. During the second calling, num_VFs is not 0 and the resource release function is called. However, the corresponding resource has been released during the first invoking. Therefore, the problem occurs: [15277.839633][T50670] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ... [15278.131557][T50670] Call trace: [15278.134686][T50670] klist_put+0x28/0x12c [15278.138682][T50670] klist_del+0x14/0x20 [15278.142592][T50670] device_del+0xbc/0x3c0 [15278.146676][T50670] pci_remove_bus_device+0x84/0x120 [15278.151714][T50670] pci_stop_and_remove_bus_device+0x6c/0x80 [15278.157447][T50670] pci_iov_remove_virtfn+0xb4/0x12c [15278.162485][T50670] sriov_disable+0x50/0x11c [15278.166829][T50670] pci_disable_sriov+0x24/0x30 [15278.171433][T50670] hnae3_unregister_ae_algo_prepare+0x60/0x90 [hnae3] [15278.178039][T50670] hclge_exit+0x28/0xd0 [hclge] [15278.182730][T50670] __se_sys_delete_module.isra.0+0x164/0x230 [15278.188550][T50670] __arm64_sys_delete_module+0x1c/0x30 [15278.193848][T50670] invoke_syscall+0x50/0x11c [15278.198278][T50670] el0_svc_common.constprop.0+0x158/0x164 [15278.203837][T50670] do_el0_svc+0x34/0xcc [15278.207834][T50670] el0_svc+0x20/0x30 For details, see the following figure. rmmod hclge disable VFs ---------------------------------------------------- hclge_exit() sriov_numvfs_store() ... device_lock() pci_disable_sriov() hns3_pci_sriov_configure() pci_disable_sriov() sriov_disable() sriov_disable() if !num_VFs : if !num_VFs : return; return; sriov_del_vfs() sriov_del_vfs() ... ... klist_put() klist_put() ... ... num_VFs = 0; num_VFs = 0; device_unlock(); In this patch, when driver is removing, we get the device_lock() to protect num_VFs, just like sriov_numvfs_store(). Fixes: 0dd8a25f355b ("net: hns3: disable sriov before unload hclge layer") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241101091507.3644584-1-shaojijie@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hnae3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 67b0bf310daa..9a63fbc69408 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -25,8 +25,11 @@ void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo) pci_id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev); if (!pci_id) continue; - if (IS_ENABLED(CONFIG_PCI_IOV)) + if (IS_ENABLED(CONFIG_PCI_IOV)) { + device_lock(&ae_dev->pdev->dev); pci_disable_sriov(ae_dev->pdev); + device_unlock(&ae_dev->pdev->dev); + } } } EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare); From 029778a4fd2c90c2e76a902b797c2348a722f1b8 Mon Sep 17 00:00:00 2001 From: Rex Nie Date: Wed, 30 Oct 2024 21:36:32 +0800 Subject: [PATCH 179/281] usb: typec: qcom-pmic: init value of hdr_len/txbuf_len earlier If the read of USB_PDPHY_RX_ACKNOWLEDGE_REG failed, then hdr_len and txbuf_len are uninitialized. This commit stops to print uninitialized value and misleading/false data. Cc: stable@vger.kernel.org Fixes: a4422ff22142 (" usb: typec: qcom: Add Qualcomm PMIC Type-C driver") Signed-off-by: Rex Nie Reviewed-by: Heikki Krogerus Reviewed-by: Bjorn Andersson Acked-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20241030133632.2116-1-rex.nie@jaguarmicro.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c index 5b7f52b74a40..726423684bae 100644 --- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c +++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c @@ -227,6 +227,10 @@ qcom_pmic_typec_pdphy_pd_transmit_payload(struct pmic_typec_pdphy *pmic_typec_pd spin_lock_irqsave(&pmic_typec_pdphy->lock, flags); + hdr_len = sizeof(msg->header); + txbuf_len = pd_header_cnt_le(msg->header) * 4; + txsize_len = hdr_len + txbuf_len - 1; + ret = regmap_read(pmic_typec_pdphy->regmap, pmic_typec_pdphy->base + USB_PDPHY_RX_ACKNOWLEDGE_REG, &val); @@ -244,10 +248,6 @@ qcom_pmic_typec_pdphy_pd_transmit_payload(struct pmic_typec_pdphy *pmic_typec_pd if (ret) goto done; - hdr_len = sizeof(msg->header); - txbuf_len = pd_header_cnt_le(msg->header) * 4; - txsize_len = hdr_len + txbuf_len - 1; - /* Write message header sizeof(u16) to USB_PDPHY_TX_BUFFER_HDR_REG */ ret = regmap_bulk_write(pmic_typec_pdphy->regmap, pmic_typec_pdphy->base + USB_PDPHY_TX_BUFFER_HDR_REG, From 9cfb31e4c89d200d8ab7cb1e0bb9e6e8d621ca0b Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 4 Nov 2024 16:00:11 +0200 Subject: [PATCH 180/281] usb: dwc3: fix fault at system suspend if device was already runtime suspended If the device was already runtime suspended then during system suspend we cannot access the device registers else it will crash. Also we cannot access any registers after dwc3_core_exit() on some platforms so move the dwc3_enable_susphy() call to the top. Cc: stable@vger.kernel.org # v5.15+ Reported-by: William McVicker Closes: https://lore.kernel.org/all/ZyVfcUuPq56R2m1Y@google.com Fixes: 705e3ce37bcc ("usb: dwc3: core: Fix system suspend on TI AM62 platforms") Signed-off-by: Roger Quadros Acked-by: Thinh Nguyen Tested-by: Will McVicker Link: https://lore.kernel.org/r/20241104-am62-lpm-usb-fix-v1-1-e93df73a4f0d@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 427e5660f87c..98114c2827c0 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2342,10 +2342,18 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) u32 reg; int i; - dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) & - DWC3_GUSB2PHYCFG_SUSPHY) || - (dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)) & - DWC3_GUSB3PIPECTL_SUSPHY); + if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) { + dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) & + DWC3_GUSB2PHYCFG_SUSPHY) || + (dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)) & + DWC3_GUSB3PIPECTL_SUSPHY); + /* + * TI AM62 platform requires SUSPHY to be + * enabled for system suspend to work. + */ + if (!dwc->susphy_state) + dwc3_enable_susphy(dwc, true); + } switch (dwc->current_dr_role) { case DWC3_GCTL_PRTCAP_DEVICE: @@ -2398,15 +2406,6 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) break; } - if (!PMSG_IS_AUTO(msg)) { - /* - * TI AM62 platform requires SUSPHY to be - * enabled for system suspend to work. - */ - if (!dwc->susphy_state) - dwc3_enable_susphy(dwc, true); - } - return 0; } From 7dd08a0b4193087976db6b3ee7807de7e8316f96 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Nov 2024 20:16:42 +0300 Subject: [PATCH 181/281] usb: typec: fix potential out of bounds in ucsi_ccg_update_set_new_cam_cmd() The "*cmd" variable can be controlled by the user via debugfs. That means "new_cam" can be as high as 255 while the size of the uc->updated[] array is UCSI_MAX_ALTMODES (30). The call tree is: ucsi_cmd() // val comes from simple_attr_write_xsigned() -> ucsi_send_command() -> ucsi_send_command_common() -> ucsi_run_command() // calls ucsi->ops->sync_control() -> ucsi_ccg_sync_control() Fixes: 170a6726d0e2 ("usb: typec: ucsi: add support for separate DP altmode devices") Cc: stable Signed-off-by: Dan Carpenter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/325102b3-eaa8-4918-a947-22aca1146586@stanley.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index ba58d11907bc..bccfc03b5986 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -482,6 +482,8 @@ static void ucsi_ccg_update_set_new_cam_cmd(struct ucsi_ccg *uc, port = uc->orig; new_cam = UCSI_SET_NEW_CAM_GET_AM(*cmd); + if (new_cam >= ARRAY_SIZE(uc->updated)) + return; new_port = &uc->updated[new_cam]; cam = new_port->linked_idx; enter_new_mode = UCSI_SET_NEW_CAM_ENTER(*cmd); From 08a3b241adfd90361c16c3e92f5275b816a73f04 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Nov 2024 01:00:00 +0000 Subject: [PATCH 182/281] MAINTAINERS: Generic Sound Card section ALSA SoC Sound has Generic Sound Card (Simple-Card, Audio-Graph-Card, Audio-Graph-Card2). Adds its Maintainer. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87ikt2a41c.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9d6272c00fbd..388626c3df1f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21697,6 +21697,15 @@ S: Supported W: https://github.com/thesofproject/linux/ F: sound/soc/sof/ +SOUND - GENERIC SOUND CARD (Simple-Audio-Card, Audio-Graph-Card) +M: Kuninori Morimoto +S: Supported +L: linux-sound@vger.kernel.org +F: sound/soc/generic/ +F: include/sound/simple_card* +F: Documentation/devicetree/bindings/sound/simple-card.yaml +F: Documentation/devicetree/bindings/sound/audio-graph*.yaml + SOUNDWIRE SUBSYSTEM M: Vinod Koul M: Bard Liao From ab2e5c8ff253ff612f7c6ef9441d2ff6558e5449 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 26 Oct 2024 11:09:42 +0800 Subject: [PATCH 183/281] i2c: muxes: Fix return value check in mule_i2c_mux_probe() If dev_get_regmap() fails, it returns NULL pointer not ERR_PTR(), replace IS_ERR() with NULL pointer check, and return -ENODEV. Fixes: d0f8e97866bf ("i2c: muxes: add support for tsd,mule-i2c multiplexer") Signed-off-by: Yang Yingliang Signed-off-by: Andi Shyti --- drivers/i2c/muxes/i2c-mux-mule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-mule.c b/drivers/i2c/muxes/i2c-mux-mule.c index 8e942470b35f..284ff4afeeac 100644 --- a/drivers/i2c/muxes/i2c-mux-mule.c +++ b/drivers/i2c/muxes/i2c-mux-mule.c @@ -66,8 +66,8 @@ static int mule_i2c_mux_probe(struct platform_device *pdev) priv = i2c_mux_priv(muxc); priv->regmap = dev_get_regmap(mux_dev->parent, NULL); - if (IS_ERR(priv->regmap)) - return dev_err_probe(mux_dev, PTR_ERR(priv->regmap), + if (!priv->regmap) + return dev_err_probe(mux_dev, -ENODEV, "No parent i2c register map\n"); platform_set_drvdata(pdev, muxc); From bd646c768a934d28e574ee940d6759c7954a024d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 5 Nov 2024 09:19:02 +0200 Subject: [PATCH 184/281] thunderbolt: Fix connection issue with Pluggable UD-4VPD dock Rick reported that his Pluggable USB4 dock does not work anymore after upgrading to v6.10 kernel. It looks like commit c6ca1ac9f472 ("thunderbolt: Increase sideband access polling delay") makes the device router enumeration happen later than what might be expected by the dock (although there is no such limit in the USB4 spec) which probably makes it assume there is something wrong with the high-speed link and reset it. After the link is reset the same issue happens again and again. For this reason lower the sideband access delay from 5ms to 1ms. This seems to work fine according to Rick's testing. Reported-by: Rick Lahaye Closes: https://lore.kernel.org/linux-usb/000f01db247b$d10e1520$732a3f60$@581238.xyz/ Tested-by: Rick Lahaye Fixes: c6ca1ac9f472 ("thunderbolt: Increase sideband access polling delay") Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Reviewed-by: Mario Limonciello Signed-off-by: Mika Westerberg --- drivers/thunderbolt/usb4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 0a9b4aeb3fa1..402fdf8b1cde 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -48,7 +48,7 @@ enum usb4_ba_index { /* Delays in us used with usb4_port_wait_for_bit() */ #define USB4_PORT_DELAY 50 -#define USB4_PORT_SB_DELAY 5000 +#define USB4_PORT_SB_DELAY 1000 static int usb4_native_switch_op(struct tb_switch *sw, u16 opcode, u32 *metadata, u8 *status, From de794169cf1711a98e1e4856c76388e6dadd73a1 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 1 Nov 2024 12:18:50 +0200 Subject: [PATCH 185/281] net: ethernet: ti: am65-cpsw: Fix multi queue Rx on J7 On J7 platforms, setting up multiple RX flows was failing as the RX free descriptor ring 0 is shared among all flows and we did not allocate enough elements in the RX free descriptor ring 0 to accommodate for all RX flows. This issue is not present on AM62 as separate pair of rings are used for free and completion rings for each flow. Fix this by allocating enough elements for RX free descriptor ring 0. However, we can no longer rely on desc_idx (descriptor based offsets) to identify the pages in the respective flows as free descriptor ring includes elements for all flows. To solve this, introduce a new swdata data structure to store flow_id and page. This can be used to identify which flow (page_pool) and page the descriptor belonged to when popped out of the RX rings. Fixes: da70d184a8c3 ("net: ethernet: ti: am65-cpsw: Introduce multi queue Rx") Signed-off-by: Roger Quadros Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 73 ++++++++++-------------- drivers/net/ethernet/ti/am65-cpsw-nuss.h | 6 +- 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 0520e9f4bea7..70aea654c79f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -337,9 +337,9 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common, struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; struct cppi5_host_desc_t *desc_rx; struct device *dev = common->dev; + struct am65_cpsw_swdata *swdata; dma_addr_t desc_dma; dma_addr_t buf_dma; - void *swdata; desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool); if (!desc_rx) { @@ -363,7 +363,8 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common, cppi5_hdesc_attach_buf(desc_rx, buf_dma, AM65_CPSW_MAX_PACKET_SIZE, buf_dma, AM65_CPSW_MAX_PACKET_SIZE); swdata = cppi5_hdesc_get_swdata(desc_rx); - *((void **)swdata) = page_address(page); + swdata->page = page; + swdata->flow_id = flow_idx; return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, flow_idx, desc_rx, desc_dma); @@ -519,36 +520,31 @@ static enum am65_cpsw_tx_buf_type am65_cpsw_nuss_buf_type(struct am65_cpsw_tx_ch static inline void am65_cpsw_put_page(struct am65_cpsw_rx_flow *flow, struct page *page, - bool allow_direct, - int desc_idx) + bool allow_direct) { page_pool_put_full_page(flow->page_pool, page, allow_direct); - flow->pages[desc_idx] = NULL; } static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) { - struct am65_cpsw_rx_flow *flow = data; + struct am65_cpsw_rx_chn *rx_chn = data; struct cppi5_host_desc_t *desc_rx; - struct am65_cpsw_rx_chn *rx_chn; + struct am65_cpsw_swdata *swdata; dma_addr_t buf_dma; + struct page *page; u32 buf_dma_len; - void *page_addr; - void **swdata; - int desc_idx; + u32 flow_id; - rx_chn = &flow->common->rx_chns; desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); - page_addr = *swdata; + page = swdata->page; + flow_id = swdata->flow_id; cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx, - rx_chn->dsize_log2); - am65_cpsw_put_page(flow, virt_to_page(page_addr), false, desc_idx); + am65_cpsw_put_page(&rx_chn->flows[flow_id], page, false); } static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, @@ -703,14 +699,13 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) ret = -ENOMEM; goto fail_rx; } - flow->pages[i] = page; ret = am65_cpsw_nuss_rx_push(common, page, flow_idx); if (ret < 0) { dev_err(common->dev, "cannot submit page to rx channel flow %d, error %d\n", flow_idx, ret); - am65_cpsw_put_page(flow, page, false, i); + am65_cpsw_put_page(flow, page, false); goto fail_rx; } } @@ -764,8 +759,8 @@ fail_tx: fail_rx: for (i = 0; i < common->rx_ch_num_flows; i++) - k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i], - am65_cpsw_nuss_rx_cleanup, 0); + k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn, + am65_cpsw_nuss_rx_cleanup, !!i); am65_cpsw_destroy_xdp_rxqs(common); @@ -817,11 +812,11 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) dev_err(common->dev, "rx teardown timeout\n"); } - for (i = 0; i < common->rx_ch_num_flows; i++) { + for (i = common->rx_ch_num_flows - 1; i >= 0; i--) { napi_disable(&rx_chn->flows[i].napi_rx); hrtimer_cancel(&rx_chn->flows[i].rx_hrtimer); - k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i], - am65_cpsw_nuss_rx_cleanup, 0); + k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn, + am65_cpsw_nuss_rx_cleanup, !!i); } k3_udma_glue_disable_rx_chn(rx_chn->rx_chn); @@ -1028,7 +1023,7 @@ pool_free: static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, struct am65_cpsw_port *port, struct xdp_buff *xdp, - int desc_idx, int cpu, int *len) + int cpu, int *len) { struct am65_cpsw_common *common = flow->common; struct am65_cpsw_ndev_priv *ndev_priv; @@ -1101,7 +1096,7 @@ drop: } page = virt_to_head_page(xdp->data); - am65_cpsw_put_page(flow, page, true, desc_idx); + am65_cpsw_put_page(flow, page, true); out: return ret; @@ -1150,16 +1145,16 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, struct am65_cpsw_ndev_stats *stats; struct cppi5_host_desc_t *desc_rx; struct device *dev = common->dev; + struct am65_cpsw_swdata *swdata; struct page *page, *new_page; dma_addr_t desc_dma, buf_dma; struct am65_cpsw_port *port; - int headroom, desc_idx, ret; struct net_device *ndev; u32 flow_idx = flow->id; struct sk_buff *skb; struct xdp_buff xdp; + int headroom, ret; void *page_addr; - void **swdata; u32 *psdata; *xdp_state = AM65_CPSW_XDP_PASS; @@ -1182,8 +1177,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, __func__, flow_idx, &desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); - page_addr = *swdata; - page = virt_to_page(page_addr); + page = swdata->page; + page_addr = page_address(page); cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); pkt_len = cppi5_hdesc_get_pktlen(desc_rx); @@ -1199,9 +1194,6 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx, - rx_chn->dsize_log2); - skb = am65_cpsw_build_skb(page_addr, ndev, AM65_CPSW_MAX_PACKET_SIZE); if (unlikely(!skb)) { @@ -1213,7 +1205,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]); xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM, pkt_len, false); - *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, desc_idx, + *xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, cpu, &pkt_len); if (*xdp_state != AM65_CPSW_XDP_PASS) goto allocate; @@ -1247,10 +1239,8 @@ allocate: return -ENOMEM; } - flow->pages[desc_idx] = new_page; - if (netif_dormant(ndev)) { - am65_cpsw_put_page(flow, new_page, true, desc_idx); + am65_cpsw_put_page(flow, new_page, true); ndev->stats.rx_dropped++; return 0; } @@ -1258,7 +1248,7 @@ allocate: requeue: ret = am65_cpsw_nuss_rx_push(common, new_page, flow_idx); if (WARN_ON(ret < 0)) { - am65_cpsw_put_page(flow, new_page, true, desc_idx); + am65_cpsw_put_page(flow, new_page, true); ndev->stats.rx_errors++; ndev->stats.rx_dropped++; } @@ -2402,10 +2392,6 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) for (i = 0; i < common->rx_ch_num_flows; i++) { flow = &rx_chn->flows[i]; flow->page_pool = NULL; - flow->pages = devm_kcalloc(dev, AM65_CPSW_MAX_RX_DESC, - sizeof(*flow->pages), GFP_KERNEL); - if (!flow->pages) - return -ENOMEM; } rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, "rx", &rx_cfg); @@ -2458,7 +2444,8 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) rx_flow_cfg.ring_rxfdq0_id = fdqring_id; rx_flow_cfg.rx_cfg.size = max_desc_num; - rx_flow_cfg.rxfdq_cfg.size = max_desc_num; + /* share same FDQ for all flows */ + rx_flow_cfg.rxfdq_cfg.size = max_desc_num * rx_cfg.flow_id_num; rx_flow_cfg.rxfdq_cfg.mode = common->pdata.fdqring_mode; ret = k3_udma_glue_rx_flow_init(rx_chn->rx_chn, @@ -3349,8 +3336,8 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) for (i = 0; i < common->rx_ch_num_flows; i++) k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, - &rx_chan->flows[i], - am65_cpsw_nuss_rx_cleanup, 0); + rx_chan, + am65_cpsw_nuss_rx_cleanup, !!i); k3_udma_glue_disable_rx_chn(rx_chan->rx_chn); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index dc8d544230dc..92a27ba4c601 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -101,10 +101,14 @@ struct am65_cpsw_rx_flow { struct hrtimer rx_hrtimer; unsigned long rx_pace_timeout; struct page_pool *page_pool; - struct page **pages; char name[32]; }; +struct am65_cpsw_swdata { + u32 flow_id; + struct page *page; +}; + struct am65_cpsw_rx_chn { struct device *dev; struct device *dma_dev; From ba3b7ac4f7143568ed6480180a847dc752780ece Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 1 Nov 2024 12:18:51 +0200 Subject: [PATCH 186/281] net: ethernet: ti: am65-cpsw: fix warning in am65_cpsw_nuss_remove_rx_chns() flow->irq is initialized to 0 which is a valid IRQ. Set it to -EINVAL in error path of am65_cpsw_nuss_init_rx_chns() so we do not try to free an unallocated IRQ in am65_cpsw_nuss_remove_rx_chns(). If user tried to change number of RX queues and am65_cpsw_nuss_init_rx_chns() failed due to any reason, the warning will happen if user tries to change the number of RX queues after the error condition. root@am62xx-evm:~# ethtool -L eth0 rx 3 [ 40.385293] am65-cpsw-nuss 8000000.ethernet: set new flow-id-base 19 [ 40.393211] am65-cpsw-nuss 8000000.ethernet: Failed to init rx flow2 netlink error: Invalid argument root@am62xx-evm:~# ethtool -L eth0 rx 2 [ 82.306427] ------------[ cut here ]------------ [ 82.311075] WARNING: CPU: 0 PID: 378 at kernel/irq/devres.c:144 devm_free_irq+0x84/0x90 [ 82.469770] Call trace: [ 82.472208] devm_free_irq+0x84/0x90 [ 82.475777] am65_cpsw_nuss_remove_rx_chns+0x6c/0xac [ti_am65_cpsw_nuss] [ 82.482487] am65_cpsw_nuss_update_tx_rx_chns+0x2c/0x9c [ti_am65_cpsw_nuss] [ 82.489442] am65_cpsw_set_channels+0x30/0x4c [ti_am65_cpsw_nuss] [ 82.495531] ethnl_set_channels+0x224/0x2dc [ 82.499713] ethnl_default_set_doit+0xb8/0x1b8 [ 82.504149] genl_family_rcv_msg_doit+0xc0/0x124 [ 82.508757] genl_rcv_msg+0x1f0/0x284 [ 82.512409] netlink_rcv_skb+0x58/0x130 [ 82.516239] genl_rcv+0x38/0x50 [ 82.519374] netlink_unicast+0x1d0/0x2b0 [ 82.523289] netlink_sendmsg+0x180/0x3c4 [ 82.527205] __sys_sendto+0xe4/0x158 [ 82.530779] __arm64_sys_sendto+0x28/0x38 [ 82.534782] invoke_syscall+0x44/0x100 [ 82.538526] el0_svc_common.constprop.0+0xc0/0xe0 [ 82.543221] do_el0_svc+0x1c/0x28 [ 82.546528] el0_svc+0x28/0x98 [ 82.549578] el0t_64_sync_handler+0xc0/0xc4 [ 82.553752] el0t_64_sync+0x190/0x194 [ 82.557407] ---[ end trace 0000000000000000 ]--- Fixes: da70d184a8c3 ("net: ethernet: ti: am65-cpsw: Introduce multi queue Rx") Signed-off-by: Roger Quadros Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 70aea654c79f..ba6db61dd227 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2441,6 +2441,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) flow = &rx_chn->flows[i]; flow->id = i; flow->common = common; + flow->irq = -EINVAL; rx_flow_cfg.ring_rxfdq0_id = fdqring_id; rx_flow_cfg.rx_cfg.size = max_desc_num; @@ -2483,6 +2484,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) if (ret) { dev_err(dev, "failure requesting rx %d irq %u, %d\n", i, flow->irq, ret); + flow->irq = -EINVAL; goto err; } } From 3ce3f85787352fa48fc02ef6cbd7a5e5aba93347 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 4 Nov 2024 10:36:13 +0530 Subject: [PATCH 187/281] drm/amdgpu: Fix DPX valid mode check on GC 9.4.3 For DPX mode, the number of memory partitions supported should be less than or equal to 2. Fixes: 1589c82a1085 ("drm/amdgpu: Check memory ranges for valid xcp mode") Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 990c4f580742de7bb78fa57420ffd182fc3ab4cd) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 5e8833e4fed2..ccfd2a4b4acc 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -482,7 +482,7 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, case AMDGPU_SPX_PARTITION_MODE: return adev->gmc.num_mem_partitions == 1 && num_xcc > 0; case AMDGPU_DPX_PARTITION_MODE: - return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0; + return adev->gmc.num_mem_partitions <= 2 && (num_xcc % 4) == 0; case AMDGPU_TPX_PARTITION_MODE: return (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 3) && From b46dadf7e3cfe26d0b109c9c3d81b278d6c75361 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 16:37:52 -0400 Subject: [PATCH 188/281] drm/amdgpu: Adjust debugfs register access permissions Regular users shouldn't have read access. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit c0cfd2e652553d607b910be47d0cc5a7f3a78641) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index cbef720de779..29b45e370885 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1648,7 +1648,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { ent = debugfs_create_file(debugfs_regs_names[i], - S_IFREG | 0444, root, + S_IFREG | 0400, root, adev, debugfs_regs[i]); if (!i && !IS_ERR_OR_NULL(ent)) i_size_write(ent->d_inode, adev->rmmio_size); From f790a2c494c4ef587eeeb9fca20124de76a1646f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 16:39:36 -0400 Subject: [PATCH 189/281] drm/amdgpu: Adjust debugfs eviction and IB access permissions Users should not be able to run these. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit 7ba9395430f611cfc101b1c2687732baafa239d5) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 29b45e370885..eac92130c0be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2100,11 +2100,11 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); - debugfs_create_file("amdgpu_evict_vram", 0444, root, adev, + debugfs_create_file("amdgpu_evict_vram", 0400, root, adev, &amdgpu_evict_vram_fops); - debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev, + debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev, &amdgpu_evict_gtt_fops); - debugfs_create_file("amdgpu_test_ib", 0444, root, adev, + debugfs_create_file("amdgpu_test_ib", 0400, root, adev, &amdgpu_debugfs_test_ib_fops); debugfs_create_file("amdgpu_vm_info", 0444, root, adev, &amdgpu_debugfs_vm_info_fops); From 4d75b9468021c73108b4439794d69e892b1d24e3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 16:52:08 -0400 Subject: [PATCH 190/281] drm/amdgpu: add missing size check in amdgpu_debugfs_gprwave_read() Avoid a possible buffer overflow if size is larger than 4K. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit f5d873f5825b40d886d03bd2aede91d4cf002434) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index eac92130c0be..9da4414de617 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -402,7 +402,7 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz int r; uint32_t *data, x; - if (size & 0x3 || *pos & 0x3) + if (size > 4096 || size & 0x3 || *pos & 0x3) return -EINVAL; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); From 9bb4af400c386374ab1047df44c508512c08c31f Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Tue, 5 Nov 2024 15:02:42 +0100 Subject: [PATCH 191/281] ASoC: stm32: spdifrx: fix dma channel release in stm32_spdifrx_remove In case of error when requesting ctrl_chan DMA channel, ctrl_chan is not null. So the release of the dma channel leads to the following issue: [ 4.879000] st,stm32-spdifrx 500d0000.audio-controller: dma_request_slave_channel error -19 [ 4.888975] Unable to handle kernel NULL pointer dereference at virtual address 000000000000003d [...] [ 5.096577] Call trace: [ 5.099099] dma_release_channel+0x24/0x100 [ 5.103235] stm32_spdifrx_remove+0x24/0x60 [snd_soc_stm32_spdifrx] [ 5.109494] stm32_spdifrx_probe+0x320/0x4c4 [snd_soc_stm32_spdifrx] To avoid this issue, release channel only if the pointer is valid. Fixes: 794df9448edb ("ASoC: stm32: spdifrx: manage rebind issue") Signed-off-by: Amelie Delaunay Signed-off-by: Olivier Moysan Link: https://patch.msgid.link/20241105140242.527279-1-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_spdifrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_spdifrx.c b/sound/soc/stm/stm32_spdifrx.c index d1b32ba1e1a2..9e30852de93c 100644 --- a/sound/soc/stm/stm32_spdifrx.c +++ b/sound/soc/stm/stm32_spdifrx.c @@ -939,7 +939,7 @@ static void stm32_spdifrx_remove(struct platform_device *pdev) { struct stm32_spdifrx_data *spdifrx = platform_get_drvdata(pdev); - if (spdifrx->ctrl_chan) + if (!IS_ERR(spdifrx->ctrl_chan)) dma_release_channel(spdifrx->ctrl_chan); if (spdifrx->dmab) From 9c9201afebea1efc7ea4b8f721ee18a05bb8aca1 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Tue, 5 Nov 2024 11:27:47 +0900 Subject: [PATCH 192/281] mm/slab: fix warning caused by duplicate kmem_cache creation in kmem_buckets_create Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], resulting in kmem_buckets_create() attempting to create a kmem_cache for size 16 twice. This duplication triggers warnings on boot: [ 2.325108] ------------[ cut here ]------------ [ 2.325135] kmem_cache of name 'memdup_user-16' already exists [ 2.325783] WARNING: CPU: 0 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0 [ 2.327957] Modules linked in: [ 2.328550] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5mm-unstable-arm64+ #12 [ 2.328683] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024 [ 2.328790] pstate: 61000009 (nZCv daif -PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 2.328911] pc : __kmem_cache_create_args+0xb8/0x3b0 [ 2.328930] lr : __kmem_cache_create_args+0xb8/0x3b0 [ 2.328942] sp : ffff800083d6fc50 [ 2.328961] x29: ffff800083d6fc50 x28: f2ff0000c1674410 x27: ffff8000820b0598 [ 2.329061] x26: 000000007fffffff x25: 0000000000000010 x24: 0000000000002000 [ 2.329101] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388 [ 2.329118] x20: f2ff0000c1674410 x19: f5ff0000c16364c0 x18: ffff800083d80030 [ 2.329135] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 2.329152] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120 [ 2.329169] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000 [ 2.329194] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 [ 2.329210] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 [ 2.329226] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 [ 2.329291] Call trace: [ 2.329407] __kmem_cache_create_args+0xb8/0x3b0 [ 2.329499] kmem_buckets_create+0xfc/0x320 [ 2.329526] init_user_buckets+0x34/0x78 [ 2.329540] do_one_initcall+0x64/0x3c8 [ 2.329550] kernel_init_freeable+0x26c/0x578 [ 2.329562] kernel_init+0x3c/0x258 [ 2.329574] ret_from_fork+0x10/0x20 [ 2.329698] ---[ end trace 0000000000000000 ]--- [ 2.403704] ------------[ cut here ]------------ [ 2.404716] kmem_cache of name 'msg_msg-16' already exists [ 2.404801] WARNING: CPU: 2 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0 [ 2.404842] Modules linked in: [ 2.404971] CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.12.0-rc5mm-unstable-arm64+ #12 [ 2.405026] Tainted: [W]=WARN [ 2.405043] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024 [ 2.405057] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.405079] pc : __kmem_cache_create_args+0xb8/0x3b0 [ 2.405100] lr : __kmem_cache_create_args+0xb8/0x3b0 [ 2.405111] sp : ffff800083d6fc50 [ 2.405115] x29: ffff800083d6fc50 x28: fbff0000c1674410 x27: ffff8000820b0598 [ 2.405135] x26: 000000000000ffd0 x25: 0000000000000010 x24: 0000000000006000 [ 2.405153] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388 [ 2.405169] x20: fbff0000c1674410 x19: fdff0000c163d6c0 x18: ffff800083d80030 [ 2.405185] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 2.405201] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120 [ 2.405217] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000 [ 2.405233] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 [ 2.405248] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 [ 2.405271] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 [ 2.405287] Call trace: [ 2.405293] __kmem_cache_create_args+0xb8/0x3b0 [ 2.405305] kmem_buckets_create+0xfc/0x320 [ 2.405315] init_msg_buckets+0x34/0x78 [ 2.405326] do_one_initcall+0x64/0x3c8 [ 2.405337] kernel_init_freeable+0x26c/0x578 [ 2.405348] kernel_init+0x3c/0x258 [ 2.405360] ret_from_fork+0x10/0x20 [ 2.405370] ---[ end trace 0000000000000000 ]--- To address this, alias kmem_cache for sizes smaller than min alignment to the aligned sized kmem_cache, as done with the default system kmalloc bucket. Fixes: b32801d1255b ("mm/slab: Introduce kmem_buckets_create() and family") Cc: # v6.11+ Signed-off-by: Koichiro Den Reviewed-by: Catalin Marinas Tested-by: Catalin Marinas Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 552b92dfdac7..893d32059915 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -380,8 +380,11 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, unsigned int usersize, void (*ctor)(void *)) { + unsigned long mask = 0; + unsigned int idx; kmem_buckets *b; - int idx; + + BUILD_BUG_ON(ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]) > BITS_PER_LONG); /* * When the separate buckets API is not built in, just return @@ -403,7 +406,7 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) { char *short_size, *cache_name; unsigned int cache_useroffset, cache_usersize; - unsigned int size; + unsigned int size, aligned_idx; if (!kmalloc_caches[KMALLOC_NORMAL][idx]) continue; @@ -416,10 +419,6 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, if (WARN_ON(!short_size)) goto fail; - cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1); - if (WARN_ON(!cache_name)) - goto fail; - if (useroffset >= size) { cache_useroffset = 0; cache_usersize = 0; @@ -427,18 +426,28 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, cache_useroffset = useroffset; cache_usersize = min(size - cache_useroffset, usersize); } - (*b)[idx] = kmem_cache_create_usercopy(cache_name, size, + + aligned_idx = __kmalloc_index(size, false); + if (!(*b)[aligned_idx]) { + cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1); + if (WARN_ON(!cache_name)) + goto fail; + (*b)[aligned_idx] = kmem_cache_create_usercopy(cache_name, size, 0, flags, cache_useroffset, cache_usersize, ctor); - kfree(cache_name); - if (WARN_ON(!(*b)[idx])) - goto fail; + kfree(cache_name); + if (WARN_ON(!(*b)[aligned_idx])) + goto fail; + set_bit(aligned_idx, &mask); + } + if (idx != aligned_idx) + (*b)[idx] = (*b)[aligned_idx]; } return b; fail: - for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) + for_each_set_bit(idx, &mask, ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL])) kmem_cache_destroy((*b)[idx]); kmem_cache_free(kmem_buckets_cache, b); From af797b831d8975cb4610f396dcb7f03f4b9908e7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 4 Nov 2024 20:35:23 -0800 Subject: [PATCH 193/281] drm/xe: Fix possible exec queue leak in exec IOCTL In a couple of places after an exec queue is looked up the exec IOCTL returns on input errors without dropping the exec queue ref. Fix this ensuring the exec queue ref is dropped on input error. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Signed-off-by: Matthew Brost Reviewed-by: Tejas Upadhyay Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20241105043524.4062774-2-matthew.brost@intel.com (cherry picked from commit 07064a200b40ac2195cb6b7b779897d9377e5e6f) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index f23ac1e2ed88..6de12f91b865 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -132,12 +132,16 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) { + err = -EINVAL; + goto err_exec_queue; + } if (XE_IOCTL_DBG(xe, args->num_batch_buffer && - q->width != args->num_batch_buffer)) - return -EINVAL; + q->width != args->num_batch_buffer)) { + err = -EINVAL; + goto err_exec_queue; + } if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) { err = -ECANCELED; From 64a2b6ed4bfd890a0e91955dd8ef8422a3944ed9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 4 Nov 2024 20:35:24 -0800 Subject: [PATCH 194/281] drm/xe: Drop VM dma-resv lock on xe_sync_in_fence_get failure in exec IOCTL Upon failure all locks need to be dropped before returning to the user. Fixes: 58480c1c912f ("drm/xe: Skip VMAs pin when requesting signal to the last XE_EXEC") Cc: Signed-off-by: Matthew Brost Reviewed-by: Tejas Upadhyay Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20241105043524.4062774-3-matthew.brost@intel.com (cherry picked from commit 7d1a4258e602ffdce529f56686925034c1b3b095) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 6de12f91b865..756b492f13b0 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -224,6 +224,7 @@ retry: fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm); if (IS_ERR(fence)) { err = PTR_ERR(fence); + xe_vm_unlock(vm); goto err_unlock_list; } for (i = 0; i < num_syncs; i++) From a353c78459f4d116216393cc29032ef5fe1472d2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 4 Nov 2024 15:49:01 +0100 Subject: [PATCH 195/281] drm/xe/pf: Fix potential GGTT allocation leak In unlikely event that we fail during sending the new VF GGTT configuration to the GuC, we will free only the GGTT node data struct but will miss to release the actual GGTT allocation. This will later lead to list corruption, GGTT space leak and finally risking crash when unloading the driver: [ ] ... [drm] GT0: PF: Failed to provision VF1 with 1073741824 (1.00 GiB) GGTT (-EIO) [ ] ... [drm] GT0: PF: VF1 provisioning remains at 0 (0 B) GGTT [ ] list_add corruption. next->prev should be prev (ffff88813cfcd628), but was 0000000000000000. (next=ffff88813cfe2028). [ ] RIP: 0010:__list_add_valid_or_report+0x6b/0xb0 [ ] Call Trace: [ ] drm_mm_insert_node_in_range+0x2c0/0x4e0 [ ] xe_ggtt_node_insert+0x46/0x70 [xe] [ ] pf_provision_vf_ggtt+0x7f5/0xa70 [xe] [ ] xe_gt_sriov_pf_config_set_ggtt+0x5e/0x770 [xe] [ ] ggtt_set+0x4b/0x70 [xe] [ ] simple_attr_write_xsigned.constprop.0.isra.0+0xb0/0x110 [ ] ... [drm] GT0: PF: Failed to provision VF1 with 1073741824 (1.00 GiB) GGTT (-ENOSPC) [ ] ... [drm] GT0: PF: VF1 provisioning remains at 0 (0 B) GGTT [ ] Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6b7b: 0000 [#1] PREEMPT SMP NOPTI [ ] RIP: 0010:drm_mm_remove_node+0x1b7/0x390 [ ] Call Trace: [ ] [ ] ? die_addr+0x2e/0x80 [ ] ? exc_general_protection+0x1a1/0x3e0 [ ] ? asm_exc_general_protection+0x22/0x30 [ ] ? drm_mm_remove_node+0x1b7/0x390 [ ] ggtt_node_remove+0xa5/0xf0 [xe] [ ] xe_ggtt_node_remove+0x35/0x70 [xe] [ ] xe_ttm_bo_destroy+0x123/0x220 [xe] [ ] intel_user_framebuffer_destroy+0x44/0x70 [xe] [ ] intel_plane_destroy_state+0x3b/0xc0 [xe] [ ] drm_atomic_state_default_clear+0x1cd/0x2f0 [ ] intel_atomic_state_clear+0x9/0x20 [xe] [ ] __drm_atomic_state_free+0x1d/0xb0 Fix that by using pf_release_ggtt() on the error path, which now works regardless if the node has GGTT allocation or not. Fixes: 34e804220f69 ("drm/xe: Make xe_ggtt_node struct independent") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Matthew Auld Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241104144901.1903-1-michal.wajdeczko@intel.com (cherry picked from commit 43b1dd2b550f0861ce80fbfffd5881b1b26272b1) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 8250ef71e685..afdb477ecf83 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -387,6 +387,8 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ xe_ggtt_node_remove(node, false); + } else { + xe_ggtt_node_fini(node); } } @@ -442,7 +444,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) config->ggtt_region = node; return 0; err: - xe_ggtt_node_fini(node); + pf_release_ggtt(tile, node); return err; } From 514447a1219021298329ce586536598c3b4b2dc0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Nov 2024 06:38:12 -0800 Subject: [PATCH 196/281] drm/xe: Stop accumulating LRC timestamp on job_free The exec queue timestamp is only really useful when it's being queried through the fdinfo. There's no need to update it so often, on every job_free. Tracing a simple app like vkcube running shows an update rate of ~ 120Hz. In case of discrete, the BO is on vram, creating a lot of pcie transactions. The update on job_free() is used to cover a gap: if exec queue is created and destroyed rapidly, before a new query, the timestamp still needs to be accumulated and accounted for in the xef. Initial implementation in commit 6109f24f87d7 ("drm/xe: Add helper to accumulate exec queue runtime") couldn't do it on the exec_queue_fini since the xef could be gone at that point. However since commit ce8c161cbad4 ("drm/xe: Add ref counting for xe_file") the xef is refcounted and the exec queue always holds a reference, making this safe now. Improve the fix in commit 2149ded63079 ("drm/xe: Fix use after free when client stats are captured") by reducing the frequency in which the update is needed. Fixes: 2149ded63079 ("drm/xe: Fix use after free when client stats are captured") Reviewed-by: Nirmoy Das Reviewed-by: Jonathan Cavitt Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20241104143815.2112272-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 83db047d9425d9a649f01573797558eff0f632e1) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec_queue.c | 6 ++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index d098d2dd1b2d..fd0f3b3c9101 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -260,8 +260,14 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) { int i; + /* + * Before releasing our ref to lrc and xef, accumulate our run ticks + */ + xe_exec_queue_update_run_ticks(q); + for (i = 0; i < q->width; ++i) xe_lrc_put(q->lrc[i]); + __xe_exec_queue_free(q); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index f903b0772722..4f5d00aea716 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -745,8 +745,6 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); - xe_exec_queue_update_run_ticks(job->q); - trace_xe_sched_job_free(job); xe_sched_job_put(job); } From e66f3185fa04ccb807c6fbf0ea066574f4308831 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 27 Oct 2024 12:59:34 -0700 Subject: [PATCH 197/281] mm/thp: fix deferred split queue not partially_mapped Recent changes are putting more pressure on THP deferred split queues: under load revealing long-standing races, causing list_del corruptions, "Bad page state"s and worse (I keep BUGs in both of those, so usually don't get to see how badly they end up without). The relevant recent changes being 6.8's mTHP, 6.10's mTHP swapout, and 6.12's mTHP swapin, improved swap allocation, and underused THP splitting. The new unlocked list_del_init() in deferred_split_scan() is buggy. I gave bad advice, it looks plausible since that's a local on-stack list, but the fact is that it can race with a third party freeing or migrating the preceding folio (properly unqueueing it with refcount 0 while holding split_queue_lock), thereby corrupting the list linkage. The obvious answer would be to take split_queue_lock there: but it has a long history of contention, so I'm reluctant to add to that. Instead, make sure that there is always one safe (raised refcount) folio before, by delaying its folio_put(). (And of course I was wrong to suggest updating split_queue_len without the lock: leave that until the splice.) And remove two over-eager partially_mapped checks, restoring those tests to how they were before: if uncharge_folio() or free_tail_page_prepare() finds _deferred_list non-empty, it's in trouble whether or not that folio is partially_mapped (and the flag was already cleared in the latter case). Link: https://lkml.kernel.org/r/81e34a8b-113a-0701-740e-2135c97eb1d7@google.com Fixes: dafff3f4c850 ("mm: split underused THPs") Signed-off-by: Hugh Dickins Acked-by: Usama Arif Reviewed-by: David Hildenbrand Reviewed-by: Baolin Wang Acked-by: Zi Yan Cc: Barry Song Cc: Chris Li Cc: Johannes Weiner Cc: Kefeng Wang Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Cc: Ryan Roberts Cc: Shakeel Butt Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton --- mm/huge_memory.c | 21 +++++++++++++++++---- mm/memcontrol.c | 3 +-- mm/page_alloc.c | 5 ++--- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2fb328880b50..a1d345f1680c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3718,8 +3718,8 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, struct deferred_split *ds_queue = &pgdata->deferred_split_queue; unsigned long flags; LIST_HEAD(list); - struct folio *folio, *next; - int split = 0; + struct folio *folio, *next, *prev = NULL; + int split = 0, removed = 0; #ifdef CONFIG_MEMCG if (sc->memcg) @@ -3775,15 +3775,28 @@ next: */ if (!did_split && !folio_test_partially_mapped(folio)) { list_del_init(&folio->_deferred_list); - ds_queue->split_queue_len--; + removed++; + } else { + /* + * That unlocked list_del_init() above would be unsafe, + * unless its folio is separated from any earlier folios + * left on the list (which may be concurrently unqueued) + * by one safe folio with refcount still raised. + */ + swap(folio, prev); } - folio_put(folio); + if (folio) + folio_put(folio); } spin_lock_irqsave(&ds_queue->split_queue_lock, flags); list_splice_tail(&list, &ds_queue->split_queue); + ds_queue->split_queue_len -= removed; spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); + if (prev) + folio_put(prev); + /* * Stop shrinker if we didn't split any page, but the queue is empty. * This can happen if pages were freed under us. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7845c64a2c57..2703227cce88 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4631,8 +4631,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); VM_BUG_ON_FOLIO(folio_order(folio) > 1 && !folio_test_hugetlb(folio) && - !list_empty(&folio->_deferred_list) && - folio_test_partially_mapped(folio), folio); + !list_empty(&folio->_deferred_list), folio); /* * Nobody should be changing or seriously looking at diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 94a2ffe28008..5e108ae755cc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -961,9 +961,8 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) break; case 2: /* the second tail page: deferred_list overlaps ->mapping */ - if (unlikely(!list_empty(&folio->_deferred_list) && - folio_test_partially_mapped(folio))) { - bad_page(page, "partially mapped folio on deferred list"); + if (unlikely(!list_empty(&folio->_deferred_list))) { + bad_page(page, "on deferred list"); goto out; } break; From f8f931bba0f92052cf842b7e30917b1afcc77d5a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 27 Oct 2024 13:02:13 -0700 Subject: [PATCH 198/281] mm/thp: fix deferred split unqueue naming and locking Recent changes are putting more pressure on THP deferred split queues: under load revealing long-standing races, causing list_del corruptions, "Bad page state"s and worse (I keep BUGs in both of those, so usually don't get to see how badly they end up without). The relevant recent changes being 6.8's mTHP, 6.10's mTHP swapout, and 6.12's mTHP swapin, improved swap allocation, and underused THP splitting. Before fixing locking: rename misleading folio_undo_large_rmappable(), which does not undo large_rmappable, to folio_unqueue_deferred_split(), which is what it does. But that and its out-of-line __callee are mm internals of very limited usability: add comment and WARN_ON_ONCEs to check usage; and return a bool to say if a deferred split was unqueued, which can then be used in WARN_ON_ONCEs around safety checks (sparing callers the arcane conditionals in __folio_unqueue_deferred_split()). Just omit the folio_unqueue_deferred_split() from free_unref_folios(), all of whose callers now call it beforehand (and if any forget then bad_page() will tell) - except for its caller put_pages_list(), which itself no longer has any callers (and will be deleted separately). Swapout: mem_cgroup_swapout() has been resetting folio->memcg_data 0 without checking and unqueueing a THP folio from deferred split list; which is unfortunate, since the split_queue_lock depends on the memcg (when memcg is enabled); so swapout has been unqueueing such THPs later, when freeing the folio, using the pgdat's lock instead: potentially corrupting the memcg's list. __remove_mapping() has frozen refcount to 0 here, so no problem with calling folio_unqueue_deferred_split() before resetting memcg_data. That goes back to 5.4 commit 87eaceb3faa5 ("mm: thp: make deferred split shrinker memcg aware"): which included a check on swapcache before adding to deferred queue, but no check on deferred queue before adding THP to swapcache. That worked fine with the usual sequence of events in reclaim (though there were a couple of rare ways in which a THP on deferred queue could have been swapped out), but 6.12 commit dafff3f4c850 ("mm: split underused THPs") avoids splitting underused THPs in reclaim, which makes swapcache THPs on deferred queue commonplace. Keep the check on swapcache before adding to deferred queue? Yes: it is no longer essential, but preserves the existing behaviour, and is likely to be a worthwhile optimization (vmstat showed much more traffic on the queue under swapping load if the check was removed); update its comment. Memcg-v1 move (deprecated): mem_cgroup_move_account() has been changing folio->memcg_data without checking and unqueueing a THP folio from the deferred list, sometimes corrupting "from" memcg's list, like swapout. Refcount is non-zero here, so folio_unqueue_deferred_split() can only be used in a WARN_ON_ONCE to validate the fix, which must be done earlier: mem_cgroup_move_charge_pte_range() first try to split the THP (splitting of course unqueues), or skip it if that fails. Not ideal, but moving charge has been requested, and khugepaged should repair the THP later: nobody wants new custom unqueueing code just for this deprecated case. The 87eaceb3faa5 commit did have the code to move from one deferred list to another (but was not conscious of its unsafety while refcount non-0); but that was removed by 5.6 commit fac0516b5534 ("mm: thp: don't need care deferred split queue in memcg charge move path"), which argued that the existence of a PMD mapping guarantees that the THP cannot be on a deferred list. As above, false in rare cases, and now commonly false. Backport to 6.11 should be straightforward. Earlier backports must take care that other _deferred_list fixes and dependencies are included. There is not a strong case for backports, but they can fix cornercases. Link: https://lkml.kernel.org/r/8dc111ae-f6db-2da7-b25c-7a20b1effe3b@google.com Fixes: 87eaceb3faa5 ("mm: thp: make deferred split shrinker memcg aware") Fixes: dafff3f4c850 ("mm: split underused THPs") Signed-off-by: Hugh Dickins Acked-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Baolin Wang Cc: Barry Song Cc: Chris Li Cc: Johannes Weiner Cc: Kefeng Wang Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Cc: Ryan Roberts Cc: Shakeel Butt Cc: Usama Arif Cc: Wei Yang Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 35 ++++++++++++++++++++++++++--------- mm/internal.h | 10 +++++----- mm/memcontrol-v1.c | 25 +++++++++++++++++++++++++ mm/memcontrol.c | 8 +++++--- mm/migrate.c | 4 ++-- mm/page_alloc.c | 1 - mm/swap.c | 4 ++-- mm/vmscan.c | 4 ++-- 8 files changed, 67 insertions(+), 24 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a1d345f1680c..03fd4bc39ea1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3588,10 +3588,27 @@ int split_folio_to_list(struct folio *folio, struct list_head *list) return split_huge_page_to_list_to_order(&folio->page, list, ret); } -void __folio_undo_large_rmappable(struct folio *folio) +/* + * __folio_unqueue_deferred_split() is not to be called directly: + * the folio_unqueue_deferred_split() inline wrapper in mm/internal.h + * limits its calls to those folios which may have a _deferred_list for + * queueing THP splits, and that list is (racily observed to be) non-empty. + * + * It is unsafe to call folio_unqueue_deferred_split() until folio refcount is + * zero: because even when split_queue_lock is held, a non-empty _deferred_list + * might be in use on deferred_split_scan()'s unlocked on-stack list. + * + * If memory cgroups are enabled, split_queue_lock is in the mem_cgroup: it is + * therefore important to unqueue deferred split before changing folio memcg. + */ +bool __folio_unqueue_deferred_split(struct folio *folio) { struct deferred_split *ds_queue; unsigned long flags; + bool unqueued = false; + + WARN_ON_ONCE(folio_ref_count(folio)); + WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg(folio)); ds_queue = get_deferred_split_queue(folio); spin_lock_irqsave(&ds_queue->split_queue_lock, flags); @@ -3603,8 +3620,11 @@ void __folio_undo_large_rmappable(struct folio *folio) MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); } list_del_init(&folio->_deferred_list); + unqueued = true; } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); + + return unqueued; /* useful for debug warnings */ } /* partially_mapped=false won't clear PG_partially_mapped folio flag */ @@ -3627,14 +3647,11 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped) return; /* - * The try_to_unmap() in page reclaim path might reach here too, - * this may cause a race condition to corrupt deferred split queue. - * And, if page reclaim is already handling the same folio, it is - * unnecessary to handle it again in shrinker. - * - * Check the swapcache flag to determine if the folio is being - * handled by page reclaim since THP swap would add the folio into - * swap cache before calling try_to_unmap(). + * Exclude swapcache: originally to avoid a corrupt deferred split + * queue. Nowadays that is fully prevented by mem_cgroup_swapout(); + * but if page reclaim is already handling the same folio, it is + * unnecessary to handle it again in the shrinker, so excluding + * swapcache here may still be a useful optimization. */ if (folio_test_swapcache(folio)) return; diff --git a/mm/internal.h b/mm/internal.h index 93083bbeeefa..16c1f3cd599e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -639,11 +639,11 @@ static inline void folio_set_order(struct folio *folio, unsigned int order) #endif } -void __folio_undo_large_rmappable(struct folio *folio); -static inline void folio_undo_large_rmappable(struct folio *folio) +bool __folio_unqueue_deferred_split(struct folio *folio); +static inline bool folio_unqueue_deferred_split(struct folio *folio) { if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) - return; + return false; /* * At this point, there is no one trying to add the folio to @@ -651,9 +651,9 @@ static inline void folio_undo_large_rmappable(struct folio *folio) * to check without acquiring the split_queue_lock. */ if (data_race(list_empty(&folio->_deferred_list))) - return; + return false; - __folio_undo_large_rmappable(folio); + return __folio_unqueue_deferred_split(folio); } static inline struct folio *page_rmappable_folio(struct page *page) diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 81d8819f13cd..f8744f5630bb 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -848,6 +848,8 @@ static int mem_cgroup_move_account(struct folio *folio, css_get(&to->css); css_put(&from->css); + /* Warning should never happen, so don't worry about refcount non-0 */ + WARN_ON_ONCE(folio_unqueue_deferred_split(folio)); folio->memcg_data = (unsigned long)to; __folio_memcg_unlock(from); @@ -1217,7 +1219,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, enum mc_target_type target_type; union mc_target target; struct folio *folio; + bool tried_split_before = false; +retry_pmd: ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { if (mc.precharge < HPAGE_PMD_NR) { @@ -1227,6 +1231,27 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); if (target_type == MC_TARGET_PAGE) { folio = target.folio; + /* + * Deferred split queue locking depends on memcg, + * and unqueue is unsafe unless folio refcount is 0: + * split or skip if on the queue? first try to split. + */ + if (!list_empty(&folio->_deferred_list)) { + spin_unlock(ptl); + if (!tried_split_before) + split_folio(folio); + folio_unlock(folio); + folio_put(folio); + if (tried_split_before) + return 0; + tried_split_before = true; + goto retry_pmd; + } + /* + * So long as that pmd lock is held, the folio cannot + * be racily added to the _deferred_list, because + * __folio_remove_rmap() will find !partially_mapped. + */ if (folio_isolate_lru(folio)) { if (!mem_cgroup_move_account(folio, true, mc.from, mc.to)) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2703227cce88..06df2af97415 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4629,9 +4629,6 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) struct obj_cgroup *objcg; VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - VM_BUG_ON_FOLIO(folio_order(folio) > 1 && - !folio_test_hugetlb(folio) && - !list_empty(&folio->_deferred_list), folio); /* * Nobody should be changing or seriously looking at @@ -4678,6 +4675,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) ug->nr_memory += nr_pages; ug->pgpgout++; + WARN_ON_ONCE(folio_unqueue_deferred_split(folio)); folio->memcg_data = 0; } @@ -4789,6 +4787,9 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) /* Transfer the charge and the css ref */ commit_charge(new, memcg); + + /* Warning should never happen, so don't worry about refcount non-0 */ + WARN_ON_ONCE(folio_unqueue_deferred_split(old)); old->memcg_data = 0; } @@ -4975,6 +4976,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) VM_BUG_ON_FOLIO(oldid, folio); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); + folio_unqueue_deferred_split(folio); folio->memcg_data = 0; if (!mem_cgroup_is_root(memcg)) diff --git a/mm/migrate.c b/mm/migrate.c index fab84a776088..dfa24e41e8f9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -490,7 +490,7 @@ static int __folio_migrate_mapping(struct address_space *mapping, folio_test_large_rmappable(folio)) { if (!folio_ref_freeze(folio, expected_count)) return -EAGAIN; - folio_undo_large_rmappable(folio); + folio_unqueue_deferred_split(folio); folio_ref_unfreeze(folio, expected_count); } @@ -515,7 +515,7 @@ static int __folio_migrate_mapping(struct address_space *mapping, } /* Take off deferred split queue while frozen and memcg set */ - folio_undo_large_rmappable(folio); + folio_unqueue_deferred_split(folio); /* * Now we know that no one else is looking at the folio: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5e108ae755cc..8ad38cd5e574 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2681,7 +2681,6 @@ void free_unref_folios(struct folio_batch *folios) unsigned long pfn = folio_pfn(folio); unsigned int order = folio_order(folio); - folio_undo_large_rmappable(folio); if (!free_pages_prepare(&folio->page, order)) continue; /* diff --git a/mm/swap.c b/mm/swap.c index 835bdf324b76..b8e3259ea2c4 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -121,7 +121,7 @@ void __folio_put(struct folio *folio) } page_cache_release(folio); - folio_undo_large_rmappable(folio); + folio_unqueue_deferred_split(folio); mem_cgroup_uncharge(folio); free_unref_page(&folio->page, folio_order(folio)); } @@ -988,7 +988,7 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) free_huge_folio(folio); continue; } - folio_undo_large_rmappable(folio); + folio_unqueue_deferred_split(folio); __page_cache_release(folio, &lruvec, &flags); if (j != i) diff --git a/mm/vmscan.c b/mm/vmscan.c index ddaaff67642e..28ba2b06fc7d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1476,7 +1476,7 @@ free_it: */ nr_reclaimed += nr_pages; - folio_undo_large_rmappable(folio); + folio_unqueue_deferred_split(folio); if (folio_batch_add(&free_folios, folio) == 0) { mem_cgroup_uncharge_folios(&free_folios); try_to_unmap_flush(); @@ -1864,7 +1864,7 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec, if (unlikely(folio_put_testzero(folio))) { __folio_clear_lru_flags(folio); - folio_undo_large_rmappable(folio); + folio_unqueue_deferred_split(folio); if (folio_batch_add(&free_folios, folio) == 0) { spin_unlock_irq(&lruvec->lru_lock); mem_cgroup_uncharge_folios(&free_folios); From 3dd6ed34ce1f2356a77fb88edafb5ec96784e3cf Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 29 Oct 2024 18:11:44 +0000 Subject: [PATCH 199/281] mm: avoid unsafe VMA hook invocation when error arises on mmap hook Patch series "fix error handling in mmap_region() and refactor (hotfixes)", v4. mmap_region() is somewhat terrifying, with spaghetti-like control flow and numerous means by which issues can arise and incomplete state, memory leaks and other unpleasantness can occur. A large amount of the complexity arises from trying to handle errors late in the process of mapping a VMA, which forms the basis of recently observed issues with resource leaks and observable inconsistent state. This series goes to great lengths to simplify how mmap_region() works and to avoid unwinding errors late on in the process of setting up the VMA for the new mapping, and equally avoids such operations occurring while the VMA is in an inconsistent state. The patches in this series comprise the minimal changes required to resolve existing issues in mmap_region() error handling, in order that they can be hotfixed and backported. There is additionally a follow up series which goes further, separated out from the v1 series and sent and updated separately. This patch (of 5): After an attempted mmap() fails, we are no longer in a situation where we can safely interact with VMA hooks. This is currently not enforced, meaning that we need complicated handling to ensure we do not incorrectly call these hooks. We can avoid the whole issue by treating the VMA as suspect the moment that the file->f_ops->mmap() function reports an error by replacing whatever VMA operations were installed with a dummy empty set of VMA operations. We do so through a new helper function internal to mm - mmap_file() - which is both more logically named than the existing call_mmap() function and correctly isolates handling of the vm_op reassignment to mm. All the existing invocations of call_mmap() outside of mm are ultimately nested within the call_mmap() from mm, which we now replace. It is therefore safe to leave call_mmap() in place as a convenience function (and to avoid churn). The invokers are: ovl_file_operations -> mmap -> ovl_mmap() -> backing_file_mmap() coda_file_operations -> mmap -> coda_file_mmap() shm_file_operations -> shm_mmap() shm_file_operations_huge -> shm_mmap() dma_buf_fops -> dma_buf_mmap_internal -> i915_dmabuf_ops -> i915_gem_dmabuf_mmap() None of these callers interact with vm_ops or mappings in a problematic way on error, quickly exiting out. Link: https://lkml.kernel.org/r/cover.1730224667.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/d41fd763496fd0048a962f3fd9407dc72dd4fd86.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Reviewed-by: Jann Horn Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Mark Brown Cc: Peter Xu Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- mm/internal.h | 27 +++++++++++++++++++++++++++ mm/mmap.c | 6 +++--- mm/nommu.c | 4 ++-- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 16c1f3cd599e..4eab2961e69c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -108,6 +108,33 @@ static inline void *folio_raw_mapping(const struct folio *folio) return (void *)(mapping & ~PAGE_MAPPING_FLAGS); } +/* + * This is a file-backed mapping, and is about to be memory mapped - invoke its + * mmap hook and safely handle error conditions. On error, VMA hooks will be + * mutated. + * + * @file: File which backs the mapping. + * @vma: VMA which we are mapping. + * + * Returns: 0 if success, error otherwise. + */ +static inline int mmap_file(struct file *file, struct vm_area_struct *vma) +{ + int err = call_mmap(file, vma); + + if (likely(!err)) + return 0; + + /* + * OK, we tried to call the file hook for mmap(), but an error + * arose. The mapping is in an inconsistent state and we most not invoke + * any further hooks on it. + */ + vma->vm_ops = &vma_dummy_vm_ops; + + return err; +} + #ifdef CONFIG_MMU /* Flags for folio_pte_batch(). */ diff --git a/mm/mmap.c b/mm/mmap.c index 9841b41e3c76..6e3b25f7728f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1422,7 +1422,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, /* * clear PTEs while the vma is still in the tree so that rmap * cannot race with the freeing later in the truncate scenario. - * This is also needed for call_mmap(), which is why vm_ops + * This is also needed for mmap_file(), which is why vm_ops * close function is called. */ vms_clean_up_area(&vms, &mas_detach); @@ -1447,7 +1447,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (file) { vma->vm_file = get_file(file); - error = call_mmap(file, vma); + error = mmap_file(file, vma); if (error) goto unmap_and_free_vma; @@ -1470,7 +1470,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma_iter_config(&vmi, addr, end); /* - * If vm_flags changed after call_mmap(), we should try merge + * If vm_flags changed after mmap_file(), we should try merge * vma again as we may succeed this time. */ if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) { diff --git a/mm/nommu.c b/mm/nommu.c index 385b0c15add8..f9ccc02458ec 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -885,7 +885,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) { int ret; - ret = call_mmap(vma->vm_file, vma); + ret = mmap_file(vma->vm_file, vma); if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; return 0; @@ -918,7 +918,7 @@ static int do_mmap_private(struct vm_area_struct *vma, * happy. */ if (capabilities & NOMMU_MAP_DIRECT) { - ret = call_mmap(vma->vm_file, vma); + ret = mmap_file(vma->vm_file, vma); /* shouldn't return success if we're not sharing */ if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags))) ret = -ENOSYS; From 4080ef1579b2413435413988d14ac8c68e4d42c8 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 29 Oct 2024 18:11:45 +0000 Subject: [PATCH 200/281] mm: unconditionally close VMAs on error Incorrect invocation of VMA callbacks when the VMA is no longer in a consistent state is bug prone and risky to perform. With regards to the important vm_ops->close() callback We have gone to great lengths to try to track whether or not we ought to close VMAs. Rather than doing so and risking making a mistake somewhere, instead unconditionally close and reset vma->vm_ops to an empty dummy operations set with a NULL .close operator. We introduce a new function to do so - vma_close() - and simplify existing vms logic which tracked whether we needed to close or not. This simplifies the logic, avoids incorrect double-calling of the .close() callback and allows us to update error paths to simply call vma_close() unconditionally - making VMA closure idempotent. Link: https://lkml.kernel.org/r/28e89dda96f68c505cb6f8e9fc9b57c3e9f74b42.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Reviewed-by: Jann Horn Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Mark Brown Cc: Peter Xu Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- mm/internal.h | 18 ++++++++++++++++++ mm/mmap.c | 5 ++--- mm/nommu.c | 3 +-- mm/vma.c | 14 +++++--------- mm/vma.h | 4 +--- 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 4eab2961e69c..64c2eb0b160e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -135,6 +135,24 @@ static inline int mmap_file(struct file *file, struct vm_area_struct *vma) return err; } +/* + * If the VMA has a close hook then close it, and since closing it might leave + * it in an inconsistent state which makes the use of any hooks suspect, clear + * them down by installing dummy empty hooks. + */ +static inline void vma_close(struct vm_area_struct *vma) +{ + if (vma->vm_ops && vma->vm_ops->close) { + vma->vm_ops->close(vma); + + /* + * The mapping is in an inconsistent state, and no further hooks + * may be invoked upon it. + */ + vma->vm_ops = &vma_dummy_vm_ops; + } +} + #ifdef CONFIG_MMU /* Flags for folio_pte_batch(). */ diff --git a/mm/mmap.c b/mm/mmap.c index 6e3b25f7728f..ac0604f146f6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1573,8 +1573,7 @@ expanded: return addr; close_and_free_vma: - if (file && !vms.closed_vm_ops && vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); + vma_close(vma); if (file || vma->vm_file) { unmap_and_free_vma: @@ -1934,7 +1933,7 @@ void exit_mmap(struct mm_struct *mm) do { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); - remove_vma(vma, /* unreachable = */ true, /* closed = */ false); + remove_vma(vma, /* unreachable = */ true); count++; cond_resched(); vma = vma_next(&vmi); diff --git a/mm/nommu.c b/mm/nommu.c index f9ccc02458ec..635d028d647b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -589,8 +589,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vma) */ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) { - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); + vma_close(vma); if (vma->vm_file) fput(vma->vm_file); put_nommu_region(vma->vm_region); diff --git a/mm/vma.c b/mm/vma.c index b21ffec33f8e..7621384d64cf 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -323,11 +323,10 @@ static bool can_vma_merge_right(struct vma_merge_struct *vmg, /* * Close a vm structure and free it. */ -void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed) +void remove_vma(struct vm_area_struct *vma, bool unreachable) { might_sleep(); - if (!closed && vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); + vma_close(vma); if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); @@ -1115,9 +1114,7 @@ void vms_clean_up_area(struct vma_munmap_struct *vms, vms_clear_ptes(vms, mas_detach, true); mas_set(mas_detach, 0); mas_for_each(mas_detach, vma, ULONG_MAX) - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); - vms->closed_vm_ops = true; + vma_close(vma); } /* @@ -1160,7 +1157,7 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, /* Remove and clean up vmas */ mas_set(mas_detach, 0); mas_for_each(mas_detach, vma, ULONG_MAX) - remove_vma(vma, /* = */ false, vms->closed_vm_ops); + remove_vma(vma, /* unreachable = */ false); vm_unacct_memory(vms->nr_accounted); validate_mm(mm); @@ -1684,8 +1681,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return new_vma; out_vma_link: - if (new_vma->vm_ops && new_vma->vm_ops->close) - new_vma->vm_ops->close(new_vma); + vma_close(new_vma); if (new_vma->vm_file) fput(new_vma->vm_file); diff --git a/mm/vma.h b/mm/vma.h index 55457cb68200..75558b5e9c8c 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -42,7 +42,6 @@ struct vma_munmap_struct { int vma_count; /* Number of vmas that will be removed */ bool unlock; /* Unlock after the munmap */ bool clear_ptes; /* If there are outstanding PTE to be cleared */ - bool closed_vm_ops; /* call_mmap() was encountered, so vmas may be closed */ /* 1 byte hole */ unsigned long nr_pages; /* Number of pages being removed */ unsigned long locked_vm; /* Number of locked pages */ @@ -198,7 +197,6 @@ static inline void init_vma_munmap(struct vma_munmap_struct *vms, vms->unmap_start = FIRST_USER_ADDRESS; vms->unmap_end = USER_PGTABLES_CEILING; vms->clear_ptes = false; - vms->closed_vm_ops = false; } #endif @@ -269,7 +267,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool unlock); -void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed); +void remove_vma(struct vm_area_struct *vma, bool unreachable); void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, struct vm_area_struct *prev, struct vm_area_struct *next); From 0fb4a7ad270b3b209e510eb9dc5b07bf02b7edaf Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 29 Oct 2024 18:11:46 +0000 Subject: [PATCH 201/281] mm: refactor map_deny_write_exec() Refactor the map_deny_write_exec() to not unnecessarily require a VMA parameter but rather to accept VMA flags parameters, which allows us to use this function early in mmap_region() in a subsequent commit. While we're here, we refactor the function to be more readable and add some additional documentation. Link: https://lkml.kernel.org/r/6be8bb59cd7c68006ebb006eb9d8dc27104b1f70.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Reviewed-by: Jann Horn Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Mark Brown Cc: Peter Xu Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- include/linux/mman.h | 21 ++++++++++++++++++--- mm/mmap.c | 2 +- mm/mprotect.c | 2 +- mm/vma.h | 2 +- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/linux/mman.h b/include/linux/mman.h index bcb201ab7a41..8ddca62d6460 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -188,16 +188,31 @@ static inline bool arch_memory_deny_write_exec_supported(void) * * d) mmap(PROT_READ | PROT_EXEC) * mmap(PROT_READ | PROT_EXEC | PROT_BTI) + * + * This is only applicable if the user has set the Memory-Deny-Write-Execute + * (MDWE) protection mask for the current process. + * + * @old specifies the VMA flags the VMA originally possessed, and @new the ones + * we propose to set. + * + * Return: false if proposed change is OK, true if not ok and should be denied. */ -static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags) +static inline bool map_deny_write_exec(unsigned long old, unsigned long new) { + /* If MDWE is disabled, we have nothing to deny. */ if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) return false; - if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE)) + /* If the new VMA is not executable, we have nothing to deny. */ + if (!(new & VM_EXEC)) + return false; + + /* Under MDWE we do not accept newly writably executable VMAs... */ + if (new & VM_WRITE) return true; - if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC)) + /* ...nor previously non-executable VMAs becoming executable. */ + if (!(old & VM_EXEC)) return true; return false; diff --git a/mm/mmap.c b/mm/mmap.c index ac0604f146f6..ab71d4c3464c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1505,7 +1505,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma_set_anonymous(vma); } - if (map_deny_write_exec(vma, vma->vm_flags)) { + if (map_deny_write_exec(vma->vm_flags, vma->vm_flags)) { error = -EACCES; goto close_and_free_vma; } diff --git a/mm/mprotect.c b/mm/mprotect.c index 0c5d6d06107d..6f450af3252e 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -810,7 +810,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, break; } - if (map_deny_write_exec(vma, newflags)) { + if (map_deny_write_exec(vma->vm_flags, newflags)) { error = -EACCES; break; } diff --git a/mm/vma.h b/mm/vma.h index 75558b5e9c8c..d58068c0ff2e 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -42,7 +42,7 @@ struct vma_munmap_struct { int vma_count; /* Number of vmas that will be removed */ bool unlock; /* Unlock after the munmap */ bool clear_ptes; /* If there are outstanding PTE to be cleared */ - /* 1 byte hole */ + /* 2 byte hole */ unsigned long nr_pages; /* Number of pages being removed */ unsigned long locked_vm; /* Number of locked pages */ unsigned long nr_accounted; /* Number of VM_ACCOUNT pages */ From 5baf8b037debf4ec60108ccfeccb8636d1dbad81 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 29 Oct 2024 18:11:47 +0000 Subject: [PATCH 202/281] mm: refactor arch_calc_vm_flag_bits() and arm64 MTE handling Currently MTE is permitted in two circumstances (desiring to use MTE having been specified by the VM_MTE flag) - where MAP_ANONYMOUS is specified, as checked by arch_calc_vm_flag_bits() and actualised by setting the VM_MTE_ALLOWED flag, or if the file backing the mapping is shmem, in which case we set VM_MTE_ALLOWED in shmem_mmap() when the mmap hook is activated in mmap_region(). The function that checks that, if VM_MTE is set, VM_MTE_ALLOWED is also set is the arm64 implementation of arch_validate_flags(). Unfortunately, we intend to refactor mmap_region() to perform this check earlier, meaning that in the case of a shmem backing we will not have invoked shmem_mmap() yet, causing the mapping to fail spuriously. It is inappropriate to set this architecture-specific flag in general mm code anyway, so a sensible resolution of this issue is to instead move the check somewhere else. We resolve this by setting VM_MTE_ALLOWED much earlier in do_mmap(), via the arch_calc_vm_flag_bits() call. This is an appropriate place to do this as we already check for the MAP_ANONYMOUS case here, and the shmem file case is simply a variant of the same idea - we permit RAM-backed memory. This requires a modification to the arch_calc_vm_flag_bits() signature to pass in a pointer to the struct file associated with the mapping, however this is not too egregious as this is only used by two architectures anyway - arm64 and parisc. So this patch performs this adjustment and removes the unnecessary assignment of VM_MTE_ALLOWED in shmem_mmap(). [akpm@linux-foundation.org: fix whitespace, per Catalin] Link: https://lkml.kernel.org/r/ec251b20ba1964fb64cf1607d2ad80c47f3873df.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes Suggested-by: Catalin Marinas Reported-by: Jann Horn Reviewed-by: Catalin Marinas Reviewed-by: Vlastimil Babka Cc: Andreas Larsson Cc: David S. Miller Cc: Helge Deller Cc: James E.J. Bottomley Cc: Liam R. Howlett Cc: Linus Torvalds Cc: Mark Brown Cc: Peter Xu Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- arch/arm64/include/asm/mman.h | 10 +++++++--- arch/parisc/include/asm/mman.h | 5 +++-- include/linux/mman.h | 7 ++++--- mm/mmap.c | 2 +- mm/nommu.c | 2 +- mm/shmem.c | 3 --- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 9e39217b4afb..798d965760d4 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -6,6 +6,8 @@ #ifndef BUILD_VDSO #include +#include +#include #include static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, @@ -31,19 +33,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +static inline unsigned long arch_calc_vm_flag_bits(struct file *file, + unsigned long flags) { /* * Only allow MTE on anonymous mappings as these are guaranteed to be * backed by tags-capable memory. The vm_flags may be overridden by a * filesystem supporting MTE (RAM-based). */ - if (system_supports_mte() && (flags & MAP_ANONYMOUS)) + if (system_supports_mte() && + ((flags & MAP_ANONYMOUS) || shmem_file(file))) return VM_MTE_ALLOWED; return 0; } -#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) +#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) static inline bool arch_validate_prot(unsigned long prot, unsigned long addr __always_unused) diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h index 89b6beeda0b8..663f587dc789 100644 --- a/arch/parisc/include/asm/mman.h +++ b/arch/parisc/include/asm/mman.h @@ -2,6 +2,7 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ +#include #include /* PARISC cannot allow mdwe as it needs writable stacks */ @@ -11,7 +12,7 @@ static inline bool arch_memory_deny_write_exec_supported(void) } #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags) { /* * The stack on parisc grows upwards, so if userspace requests memory @@ -23,6 +24,6 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) return 0; } -#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) +#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) #endif /* __ASM_MMAN_H__ */ diff --git a/include/linux/mman.h b/include/linux/mman.h index 8ddca62d6460..a842783ffa62 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -2,6 +2,7 @@ #ifndef _LINUX_MMAN_H #define _LINUX_MMAN_H +#include #include #include @@ -94,7 +95,7 @@ static inline void vm_unacct_memory(long pages) #endif #ifndef arch_calc_vm_flag_bits -#define arch_calc_vm_flag_bits(flags) 0 +#define arch_calc_vm_flag_bits(file, flags) 0 #endif #ifndef arch_validate_prot @@ -151,13 +152,13 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey) * Combine the mmap "flags" argument into "vm_flags" used internally. */ static inline unsigned long -calc_vm_flag_bits(unsigned long flags) +calc_vm_flag_bits(struct file *file, unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | - arch_calc_vm_flag_bits(flags); + arch_calc_vm_flag_bits(file, flags); } unsigned long vm_commit_limit(void); diff --git a/mm/mmap.c b/mm/mmap.c index ab71d4c3464c..aee5fa08ae5d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -344,7 +344,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ - vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | + vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(file, flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; /* Obtain the address to map to. we verify (or select) it and ensure diff --git a/mm/nommu.c b/mm/nommu.c index 635d028d647b..e9b5f527ab5b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -842,7 +842,7 @@ static unsigned long determine_vm_flags(struct file *file, { unsigned long vm_flags; - vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); + vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags); if (!file) { /* diff --git a/mm/shmem.c b/mm/shmem.c index 4ba1d00fabda..e87f5d6799a7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2733,9 +2733,6 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) if (ret) return ret; - /* arm64 - allow memory tagging on RAM-based files */ - vm_flags_set(vma, VM_MTE_ALLOWED); - file_accessed(file); /* This is anonymous shared memory if it is unlinked at the time of mmap */ if (inode->i_nlink) From 5de195060b2e251a835f622759550e6202167641 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 29 Oct 2024 18:11:48 +0000 Subject: [PATCH 203/281] mm: resolve faulty mmap_region() error path behaviour The mmap_region() function is somewhat terrifying, with spaghetti-like control flow and numerous means by which issues can arise and incomplete state, memory leaks and other unpleasantness can occur. A large amount of the complexity arises from trying to handle errors late in the process of mapping a VMA, which forms the basis of recently observed issues with resource leaks and observable inconsistent state. Taking advantage of previous patches in this series we move a number of checks earlier in the code, simplifying things by moving the core of the logic into a static internal function __mmap_region(). Doing this allows us to perform a number of checks up front before we do any real work, and allows us to unwind the writable unmap check unconditionally as required and to perform a CONFIG_DEBUG_VM_MAPLE_TREE validation unconditionally also. We move a number of things here: 1. We preallocate memory for the iterator before we call the file-backed memory hook, allowing us to exit early and avoid having to perform complicated and error-prone close/free logic. We carefully free iterator state on both success and error paths. 2. The enclosing mmap_region() function handles the mapping_map_writable() logic early. Previously the logic had the mapping_map_writable() at the point of mapping a newly allocated file-backed VMA, and a matching mapping_unmap_writable() on success and error paths. We now do this unconditionally if this is a file-backed, shared writable mapping. If a driver changes the flags to eliminate VM_MAYWRITE, however doing so does not invalidate the seal check we just performed, and we in any case always decrement the counter in the wrapper. We perform a debug assert to ensure a driver does not attempt to do the opposite. 3. We also move arch_validate_flags() up into the mmap_region() function. This is only relevant on arm64 and sparc64, and the check is only meaningful for SPARC with ADI enabled. We explicitly add a warning for this arch if a driver invalidates this check, though the code ought eventually to be fixed to eliminate the need for this. With all of these measures in place, we no longer need to explicitly close the VMA on error paths, as we place all checks which might fail prior to a call to any driver mmap hook. This eliminates an entire class of errors, makes the code easier to reason about and more robust. Link: https://lkml.kernel.org/r/6e0becb36d2f5472053ac5d544c0edfe9b899e25.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Tested-by: Mark Brown Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Peter Xu Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- mm/mmap.c | 119 +++++++++++++++++++++++++++++------------------------- 1 file changed, 65 insertions(+), 54 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index aee5fa08ae5d..79d541f1502b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1358,20 +1358,18 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, return do_vmi_munmap(&vmi, mm, start, len, uf, false); } -unsigned long mmap_region(struct file *file, unsigned long addr, +static unsigned long __mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; pgoff_t pglen = PHYS_PFN(len); - struct vm_area_struct *merge; unsigned long charged = 0; struct vma_munmap_struct vms; struct ma_state mas_detach; struct maple_tree mt_detach; unsigned long end = addr + len; - bool writable_file_mapping = false; int error; VMA_ITERATOR(vmi, mm, addr); VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff); @@ -1445,28 +1443,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vm_flags_init(vma, vm_flags); vma->vm_page_prot = vm_get_page_prot(vm_flags); + if (vma_iter_prealloc(&vmi, vma)) { + error = -ENOMEM; + goto free_vma; + } + if (file) { vma->vm_file = get_file(file); error = mmap_file(file, vma); if (error) - goto unmap_and_free_vma; - - if (vma_is_shared_maywrite(vma)) { - error = mapping_map_writable(file->f_mapping); - if (error) - goto close_and_free_vma; - - writable_file_mapping = true; - } + goto unmap_and_free_file_vma; + /* Drivers cannot alter the address of the VMA. */ + WARN_ON_ONCE(addr != vma->vm_start); /* - * Expansion is handled above, merging is handled below. - * Drivers should not alter the address of the VMA. + * Drivers should not permit writability when previously it was + * disallowed. */ - if (WARN_ON((addr != vma->vm_start))) { - error = -EINVAL; - goto close_and_free_vma; - } + VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && + !(vm_flags & VM_MAYWRITE) && + (vma->vm_flags & VM_MAYWRITE)); vma_iter_config(&vmi, addr, end); /* @@ -1474,6 +1470,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * vma again as we may succeed this time. */ if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) { + struct vm_area_struct *merge; + vmg.flags = vma->vm_flags; /* If this fails, state is reset ready for a reattempt. */ merge = vma_merge_new_range(&vmg); @@ -1491,7 +1489,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma = merge; /* Update vm_flags to pick up the change. */ vm_flags = vma->vm_flags; - goto unmap_writable; + goto file_expanded; } vma_iter_config(&vmi, addr, end); } @@ -1500,26 +1498,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr, } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) - goto free_vma; + goto free_iter_vma; } else { vma_set_anonymous(vma); } - if (map_deny_write_exec(vma->vm_flags, vma->vm_flags)) { - error = -EACCES; - goto close_and_free_vma; - } - - /* Allow architectures to sanity-check the vm_flags */ - if (!arch_validate_flags(vma->vm_flags)) { - error = -EINVAL; - goto close_and_free_vma; - } - - if (vma_iter_prealloc(&vmi, vma)) { - error = -ENOMEM; - goto close_and_free_vma; - } +#ifdef CONFIG_SPARC64 + /* TODO: Fix SPARC ADI! */ + WARN_ON_ONCE(!arch_validate_flags(vm_flags)); +#endif /* Lock the VMA since it is modified after insertion into VMA tree */ vma_start_write(vma); @@ -1533,10 +1520,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ khugepaged_enter_vma(vma, vma->vm_flags); - /* Once vma denies write, undo our temporary denial count */ -unmap_writable: - if (writable_file_mapping) - mapping_unmap_writable(file->f_mapping); +file_expanded: file = vma->vm_file; ksm_add_vma(vma); expanded: @@ -1569,23 +1553,17 @@ expanded: vma_set_page_prot(vma); - validate_mm(mm); return addr; -close_and_free_vma: - vma_close(vma); +unmap_and_free_file_vma: + fput(vma->vm_file); + vma->vm_file = NULL; - if (file || vma->vm_file) { -unmap_and_free_vma: - fput(vma->vm_file); - vma->vm_file = NULL; - - vma_iter_set(&vmi, vma->vm_end); - /* Undo any partial mapping done by a device driver. */ - unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); - } - if (writable_file_mapping) - mapping_unmap_writable(file->f_mapping); + vma_iter_set(&vmi, vma->vm_end); + /* Undo any partial mapping done by a device driver. */ + unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); +free_iter_vma: + vma_iter_free(&vmi); free_vma: vm_area_free(vma); unacct_error: @@ -1595,10 +1573,43 @@ unacct_error: abort_munmap: vms_abort_munmap_vmas(&vms, &mas_detach); gather_failed: - validate_mm(mm); return error; } +unsigned long mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) +{ + unsigned long ret; + bool writable_file_mapping = false; + + /* Check to see if MDWE is applicable. */ + if (map_deny_write_exec(vm_flags, vm_flags)) + return -EACCES; + + /* Allow architectures to sanity-check the vm_flags. */ + if (!arch_validate_flags(vm_flags)) + return -EINVAL; + + /* Map writable and ensure this isn't a sealed memfd. */ + if (file && is_shared_maywrite(vm_flags)) { + int error = mapping_map_writable(file->f_mapping); + + if (error) + return error; + writable_file_mapping = true; + } + + ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf); + + /* Clear our write mapping regardless of error. */ + if (writable_file_mapping) + mapping_unmap_writable(file->f_mapping); + + validate_mm(current->mm); + return ret; +} + static int __vm_munmap(unsigned long start, size_t len, bool unlock) { int ret; From 256748d5480bb3c4b731236c6d6fc86a8e2815d8 Mon Sep 17 00:00:00 2001 From: Diogo Silva Date: Sat, 2 Nov 2024 16:15:05 +0100 Subject: [PATCH 204/281] net: phy: ti: add PHY_RST_AFTER_CLK_EN flag DP83848 datasheet (section 4.7.2) indicates that the reset pin should be toggled after the clocks are running. Add the PHY_RST_AFTER_CLK_EN to make sure that this indication is respected. In my experience not having this flag enabled would lead to, on some boots, the wrong MII mode being selected if the PHY was initialized on the bootloader and was receiving data during Linux boot. Signed-off-by: Diogo Silva Reviewed-by: Andrew Lunn Fixes: 34e45ad9378c ("net: phy: dp83848: Add TI DP83848 Ethernet PHY") Link: https://patch.msgid.link/20241102151504.811306-1-paissilva@ld-100007.ds1.internal Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83848.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 937061acfc61..351411f0aa6f 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -147,6 +147,8 @@ MODULE_DEVICE_TABLE(mdio, dp83848_tbl); /* IRQ related */ \ .config_intr = dp83848_config_intr, \ .handle_interrupt = dp83848_handle_interrupt, \ + \ + .flags = PHY_RST_AFTER_CLK_EN, \ } static struct phy_driver dp83848_driver[] = { From cfbbd4859882a5469f6f4945937a074ee78c4b46 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 4 Nov 2024 13:31:41 +0100 Subject: [PATCH 205/281] mptcp: no admin perm to list endpoints During the switch to YNL, the command to list all endpoints has been accidentally restricted to users with admin permissions. It looks like there are no reasons to have this restriction which makes it harder for a user to quickly check if the endpoint list has been correctly populated by an automated tool. Best to go back to the previous behaviour then. mptcp_pm_gen.c has been modified using ynl-gen-c.py: $ ./tools/net/ynl/ynl-gen-c.py --mode kernel \ --spec Documentation/netlink/specs/mptcp_pm.yaml --source \ -o net/mptcp/mptcp_pm_gen.c The header file doesn't need to be regenerated. Fixes: 1d0507f46843 ("net: mptcp: convert netlink from small_ops to ops") Cc: stable@vger.kernel.org Reviewed-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241104-net-mptcp-misc-6-12-v1-1-c13f2ff1656f@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/mptcp_pm.yaml | 1 - net/mptcp/mptcp_pm_gen.c | 1 - 2 files changed, 2 deletions(-) diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index 30d8342cacc8..dc190bf838fe 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -293,7 +293,6 @@ operations: doc: Get endpoint information attribute-set: attr dont-validate: [ strict ] - flags: [ uns-admin-perm ] do: &get-addr-attrs request: attributes: diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index c30a2a90a192..bfb37c5a88c4 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -112,7 +112,6 @@ const struct genl_ops mptcp_pm_nl_ops[11] = { .dumpit = mptcp_pm_nl_get_addr_dumpit, .policy = mptcp_pm_get_addr_nl_policy, .maxattr = MPTCP_PM_ATTR_TOKEN, - .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, From 99635c91fb8b860a6404b9bc8b769df7bdaa2ae3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 4 Nov 2024 13:31:42 +0100 Subject: [PATCH 206/281] mptcp: use sock_kfree_s instead of kfree The local address entries on userspace_pm_local_addr_list are allocated by sock_kmalloc(). It's then required to use sock_kfree_s() instead of kfree() to free these entries in order to adjust the allocated size on the sk side. Fixes: 24430f8bf516 ("mptcp: add address into userspace pm list") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241104-net-mptcp-misc-6-12-v1-2-c13f2ff1656f@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2cceded3a83a..56dfea9862b7 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -91,6 +91,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *addr) { struct mptcp_pm_addr_entry *entry, *tmp; + struct sock *sk = (struct sock *)msk; list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) { if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) { @@ -98,7 +99,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, * be used multiple times (e.g. fullmesh mode). */ list_del_rcu(&entry->list); - kfree(entry); + sock_kfree_s(sk, entry, sizeof(*entry)); msk->pm.local_addr_used--; return 0; } From a373830f96db288a3eb43a8692b6bcd0bd88dfe1 Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Mon, 28 Oct 2024 14:34:09 +0530 Subject: [PATCH 207/281] KVM: PPC: Book3S HV: Mask off LPCR_MER for a vCPU before running it to avoid spurious interrupts Running a L2 vCPU (see [1] for terminology) with LPCR_MER bit set and no pending interrupts results in that L2 vCPU getting an infinite flood of spurious interrupts. The 'if check' in kvmhv_run_single_vcpu() sets the LPCR_MER bit if there are pending interrupts. The spurious flood problem can be observed in 2 cases: 1. Crashing the guest while interrupt heavy workload is running a. Start a L2 guest and run an interrupt heavy workload (eg: ipistorm) b. While the workload is running, crash the guest (make sure kdump is configured) c. Any one of the vCPUs of the guest will start getting an infinite flood of spurious interrupts. 2. Running LTP stress tests in multiple guests at the same time a. Start 4 L2 guests. b. Start running LTP stress tests on all 4 guests at same time. c. In some time, any one/more of the vCPUs of any of the guests will start getting an infinite flood of spurious interrupts. The root cause of both the above issues is the same: 1. A NMI is sent to a running vCPU that has LPCR_MER bit set. 2. In the NMI path, all registers are refreshed, i.e, H_GUEST_GET_STATE is called for all the registers. 3. When H_GUEST_GET_STATE is called for LPCR, the vcpu->arch.vcore->lpcr of that vCPU at L1 level gets updated with LPCR_MER set to 1, and this new value is always used whenever that vCPU runs, regardless of whether there was a pending interrupt. 4. Since LPCR_MER is set, the vCPU in L2 always jumps to the external interrupt handler, and this cycle never ends. Fix the spurious flood by masking off the LPCR_MER bit before running a L2 vCPU to ensure that it is not set if there are no pending interrupts. [1] Terminology: 1. L0 : PAPR hypervisor running in HV mode 2. L1 : Linux guest (logical partition) running on top of L0 3. L2 : KVM guest running on top of L1 Fixes: ec0f6639fa88 ("KVM: PPC: Book3S HV nestedv2: Ensure LPCR_MER bit is passed to the L0") Cc: stable@vger.kernel.org # v6.8+ Signed-off-by: Gautam Menghani Signed-off-by: Madhavan Srinivasan --- arch/powerpc/kvm/book3s_hv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ba0492f9de65..ad8dc4ccdaab 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4898,6 +4898,18 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, BOOK3S_INTERRUPT_EXTERNAL, 0); else lpcr |= LPCR_MER; + } else { + /* + * L1's copy of L2's LPCR (vcpu->arch.vcore->lpcr) can get its MER bit + * unexpectedly set - for e.g. during NMI handling when all register + * states are synchronized from L0 to L1. L1 needs to inform L0 about + * MER=1 only when there are pending external interrupts. + * In the above if check, MER bit is set if there are pending + * external interrupts. Hence, explicity mask off MER bit + * here as otherwise it may generate spurious interrupts in L2 KVM + * causing an endless loop, which results in L2 guest getting hung. + */ + lpcr &= ~LPCR_MER; } } else if (vcpu->arch.pending_exceptions || vcpu->arch.doorbell_request || From 1be765b292577c752e0b87bf8c0e92aff6699d8e Mon Sep 17 00:00:00 2001 From: Vishnu Sankar Date: Wed, 6 Nov 2024 08:55:05 +0900 Subject: [PATCH 208/281] platform/x86: thinkpad_acpi: Fix for ThinkPad's with ECFW showing incorrect fan speed Fix for Thinkpad's with ECFW showing incorrect fan speed. Some models use decimal instead of hexadecimal for the speed stored in the EC registers. For example the rpm register will have 0x4200 instead of 0x1068, here the actual RPM is "4200" in decimal. Add a quirk to handle this. Signed-off-by: Vishnu Sankar Suggested-by: Mark Pearson Link: https://lore.kernel.org/r/20241105235505.8493-1-vishnuocv@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 4c1b0553f872..6371a9f765c1 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -7936,6 +7936,7 @@ static u8 fan_control_resume_level; static int fan_watchdog_maxinterval; static bool fan_with_ns_addr; +static bool ecfw_with_fan_dec_rpm; static struct mutex fan_mutex; @@ -8682,7 +8683,11 @@ static ssize_t fan_fan1_input_show(struct device *dev, if (res < 0) return res; - return sysfs_emit(buf, "%u\n", speed); + /* Check for fan speeds displayed in hexadecimal */ + if (!ecfw_with_fan_dec_rpm) + return sysfs_emit(buf, "%u\n", speed); + else + return sysfs_emit(buf, "%x\n", speed); } static DEVICE_ATTR(fan1_input, S_IRUGO, fan_fan1_input_show, NULL); @@ -8699,7 +8704,11 @@ static ssize_t fan_fan2_input_show(struct device *dev, if (res < 0) return res; - return sysfs_emit(buf, "%u\n", speed); + /* Check for fan speeds displayed in hexadecimal */ + if (!ecfw_with_fan_dec_rpm) + return sysfs_emit(buf, "%u\n", speed); + else + return sysfs_emit(buf, "%x\n", speed); } static DEVICE_ATTR(fan2_input, S_IRUGO, fan_fan2_input_show, NULL); @@ -8775,6 +8784,7 @@ static const struct attribute_group fan_driver_attr_group = { #define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */ #define TPACPI_FAN_NOFAN 0x0008 /* no fan available */ #define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */ +#define TPACPI_FAN_DECRPM 0x0020 /* For ECFW's with RPM in register as decimal */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8803,6 +8813,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('R', '1', 'D', TPACPI_FAN_NS), /* 11e Gen5 GL-R */ TPACPI_Q_LNV3('R', '0', 'V', TPACPI_FAN_NS), /* 11e Gen5 KL-Y */ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ + TPACPI_Q_LNV3('R', '0', 'Q', TPACPI_FAN_DECRPM),/* L480 */ }; static int __init fan_init(struct ibm_init_struct *iibm) @@ -8847,6 +8858,13 @@ static int __init fan_init(struct ibm_init_struct *iibm) tp_features.fan_ctrl_status_undef = 1; } + /* Check for the EC/BIOS with RPM reported in decimal*/ + if (quirks & TPACPI_FAN_DECRPM) { + pr_info("ECFW with fan RPM as decimal in EC register\n"); + ecfw_with_fan_dec_rpm = 1; + tp_features.fan_ctrl_status_undef = 1; + } + if (gfan_handle) { /* 570, 600e/x, 770e, 770x */ fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN; @@ -9067,7 +9085,11 @@ static int fan_read(struct seq_file *m) if (rc < 0) return rc; - seq_printf(m, "speed:\t\t%d\n", speed); + /* Check for fan speeds displayed in hexadecimal */ + if (!ecfw_with_fan_dec_rpm) + seq_printf(m, "speed:\t\t%d\n", speed); + else + seq_printf(m, "speed:\t\t%x\n", speed); if (fan_status_access_mode == TPACPI_FAN_RD_TPEC_NS) { /* From 751ecf6afd6568adc98f2a6052315552c0483d18 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 30 Oct 2024 20:23:50 +0000 Subject: [PATCH 209/281] arm64/sve: Discard stale CPU state when handling SVE traps The logic for handling SVE traps manipulates saved FPSIMD/SVE state incorrectly, and a race with preemption can result in a task having TIF_SVE set and TIF_FOREIGN_FPSTATE clear even though the live CPU state is stale (e.g. with SVE traps enabled). This has been observed to result in warnings from do_sve_acc() where SVE traps are not expected while TIF_SVE is set: | if (test_and_set_thread_flag(TIF_SVE)) | WARN_ON(1); /* SVE access shouldn't have trapped */ Warnings of this form have been reported intermittently, e.g. https://lore.kernel.org/linux-arm-kernel/CA+G9fYtEGe_DhY2Ms7+L7NKsLYUomGsgqpdBj+QwDLeSg=JhGg@mail.gmail.com/ https://lore.kernel.org/linux-arm-kernel/000000000000511e9a060ce5a45c@google.com/ The race can occur when the SVE trap handler is preempted before and after manipulating the saved FPSIMD/SVE state, starting and ending on the same CPU, e.g. | void do_sve_acc(unsigned long esr, struct pt_regs *regs) | { | // Trap on CPU 0 with TIF_SVE clear, SVE traps enabled | // task->fpsimd_cpu is 0. | // per_cpu_ptr(&fpsimd_last_state, 0) is task. | | ... | | // Preempted; migrated from CPU 0 to CPU 1. | // TIF_FOREIGN_FPSTATE is set. | | get_cpu_fpsimd_context(); | | if (test_and_set_thread_flag(TIF_SVE)) | WARN_ON(1); /* SVE access shouldn't have trapped */ | | sve_init_regs() { | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { | ... | } else { | fpsimd_to_sve(current); | current->thread.fp_type = FP_STATE_SVE; | } | } | | put_cpu_fpsimd_context(); | | // Preempted; migrated from CPU 1 to CPU 0. | // task->fpsimd_cpu is still 0 | // If per_cpu_ptr(&fpsimd_last_state, 0) is still task then: | // - Stale HW state is reused (with SVE traps enabled) | // - TIF_FOREIGN_FPSTATE is cleared | // - A return to userspace skips HW state restore | } Fix the case where the state is not live and TIF_FOREIGN_FPSTATE is set by calling fpsimd_flush_task_state() to detach from the saved CPU state. This ensures that a subsequent context switch will not reuse the stale CPU state, and will instead set TIF_FOREIGN_FPSTATE, forcing the new state to be reloaded from memory prior to a return to userspace. Fixes: cccb78ce89c4 ("arm64/sve: Rework SVE access trap to convert state in registers") Reported-by: Mark Rutland Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20241030-arm64-fpsimd-foreign-flush-v1-1-bd7bd66905a2@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 77006df20a75..6d21971ae559 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1367,6 +1367,7 @@ static void sve_init_regs(void) } else { fpsimd_to_sve(current); current->thread.fp_type = FP_STATE_SVE; + fpsimd_flush_task_state(current); } } From 25eb47eed52979c2f5eee3f37e6c67714e02c49c Mon Sep 17 00:00:00 2001 From: Jack Wu Date: Wed, 6 Nov 2024 18:50:29 +0800 Subject: [PATCH 210/281] USB: serial: qcserial: add support for Sierra Wireless EM86xx Add support for Sierra Wireless EM86xx with USB-id 0x1199:0x90e5 and 0x1199:0x90e4. 0x1199:0x90e5 T: Bus=03 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 14 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1199 ProdID=90e5 Rev= 5.15 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Semtech EM8695 Mobile Broadband Adapter S: SerialNumber=004403161882339 C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#=12 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=qcserial E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=qcserial E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#=12 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#=13 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#=13 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1199:0x90e4 T: Bus=03 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1199 ProdID=90e4 Rev= 0.00 S: Manufacturer=Sierra Wireless, Incorporated S: SerialNumber=004403161882339 C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr= 2mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=10 Driver=qcserial E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Jack Wu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index c7de9585feb2..13c664317a05 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x1199, 0x90d2)}, /* Sierra Wireless EM9191 QDL */ + {DEVICE_SWI(0x1199, 0x90e4)}, /* Sierra Wireless EM86xx QDL*/ + {DEVICE_SWI(0x1199, 0x90e5)}, /* Sierra Wireless EM86xx */ {DEVICE_SWI(0x1199, 0xc080)}, /* Sierra Wireless EM7590 QDL */ {DEVICE_SWI(0x1199, 0xc081)}, /* Sierra Wireless EM7590 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ From de156f3cf70e17dc6ff4c3c364bb97a6db961ffd Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Wed, 6 Nov 2024 10:40:50 +0800 Subject: [PATCH 211/281] ASoC: amd: yc: fix internal mic on Xiaomi Book Pro 14 2022 Xiaomi Book Pro 14 2022 (MIA2210-AD) requires a quirk entry for its internal microphone to be enabled. This is likely due to similar reasons as seen previously on Redmi Book 14/15 Pro 2022 models (since they likely came with similar firmware): - commit dcff8b7ca92d ("ASoC: amd: yc: Add Xiaomi Redmi Book Pro 15 2022 into DMI table") - commit c1dd6bf61997 ("ASoC: amd: yc: Add Xiaomi Redmi Book Pro 14 2022 into DMI table") A quirk would likely be needed for Xiaomi Book Pro 15 2022 models, too. However, I do not have such device on hand so I will leave it for now. Signed-off-by: Mingcong Bai Link: https://patch.msgid.link/20241106024052.15748-1-jeffbai@aosc.io Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 438865d5e376..dc476bfb6da4 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -395,6 +395,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 15 2022"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "TIMI"), + DMI_MATCH(DMI_PRODUCT_NAME, "Xiaomi Book Pro 14 2022"), + } + }, { .driver_data = &acp6x_card, .matches = { From b79276dcac9124a79c8cf7cc8fbdd3d4c3c9a7c7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 4 Nov 2024 16:28:55 -0600 Subject: [PATCH 212/281] ACPI: processor: Move arch_init_invariance_cppc() call later arch_init_invariance_cppc() is called at the end of acpi_cppc_processor_probe() in order to configure frequency invariance based upon the values from _CPC. This however doesn't work on AMD CPPC shared memory designs that have AMD preferred cores enabled because _CPC needs to be analyzed from all cores to judge if preferred cores are enabled. This issue manifests to users as a warning since commit 21fb59ab4b97 ("ACPI: CPPC: Adjust debug messages in amd_set_max_freq_ratio() to warn"): ``` Could not retrieve highest performance (-19) ``` However the warning isn't the cause of this, it was actually commit 279f838a61f9 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()") which exposed the issue. To fix this problem, change arch_init_invariance_cppc() into a new weak symbol that is called at the end of acpi_processor_driver_init(). Each architecture that supports it can declare the symbol to override the weak one. Define it for x86, in arch/x86/kernel/acpi/cppc.c, and for all of the architectures using the generic arch_topology.c code. Fixes: 279f838a61f9 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()") Reported-by: Ivan Shapovalov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219431 Tested-by: Oleksandr Natalenko Signed-off-by: Mario Limonciello Link: https://patch.msgid.link/20241104222855.3959267-1-superm1@kernel.org [ rjw: Changelog edit ] Signed-off-by: Rafael J. Wysocki --- arch/arm64/include/asm/topology.h | 4 ---- arch/x86/include/asm/topology.h | 5 ----- arch/x86/kernel/acpi/cppc.c | 7 ++++++- drivers/acpi/cppc_acpi.c | 6 ------ drivers/acpi/processor_driver.c | 9 +++++++++ drivers/base/arch_topology.c | 6 +++++- include/acpi/processor.h | 2 ++ include/linux/arch_topology.h | 4 ---- 8 files changed, 22 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 5fc3af9f8f29..341174bf9106 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -26,10 +26,6 @@ void update_freq_counters_refs(void); #define arch_scale_freq_invariant topology_scale_freq_invariant #define arch_scale_freq_ref topology_get_freq_ref -#ifdef CONFIG_ACPI_CPPC_LIB -#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc -#endif - /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index aef70336d624..92f3664dd933 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -305,9 +305,4 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled extern void arch_scale_freq_tick(void); #define arch_scale_freq_tick arch_scale_freq_tick -#ifdef CONFIG_ACPI_CPPC_LIB -void init_freq_invariance_cppc(void); -#define arch_init_invariance_cppc init_freq_invariance_cppc -#endif - #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 956984054bf3..aab9d0570841 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -110,7 +110,7 @@ static void amd_set_max_freq_ratio(void) static DEFINE_MUTEX(freq_invariance_lock); -void init_freq_invariance_cppc(void) +static inline void init_freq_invariance_cppc(void) { static bool init_done; @@ -127,6 +127,11 @@ void init_freq_invariance_cppc(void) mutex_unlock(&freq_invariance_lock); } +void acpi_processor_init_invariance_cppc(void) +{ + init_freq_invariance_cppc(); +} + /* * Get the highest performance register value. * @cpu: CPU from which to get highest performance. diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 1a40f0514eaa..5c0cc7aae872 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -671,10 +671,6 @@ static int pcc_data_alloc(int pcc_ss_id) * ) */ -#ifndef arch_init_invariance_cppc -static inline void arch_init_invariance_cppc(void) { } -#endif - /** * acpi_cppc_processor_probe - Search for per CPU _CPC objects. * @pr: Ptr to acpi_processor containing this CPU's logical ID. @@ -905,8 +901,6 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) goto out_free; } - arch_init_invariance_cppc(); - kfree(output.pointer); return 0; diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index cb52dd000b95..3b281bc1e73c 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -237,6 +237,9 @@ static struct notifier_block acpi_processor_notifier_block = { .notifier_call = acpi_processor_notifier, }; +void __weak acpi_processor_init_invariance_cppc(void) +{ } + /* * We keep the driver loaded even when ACPI is not running. * This is needed for the powernow-k8 driver, that works even without @@ -270,6 +273,12 @@ static int __init acpi_processor_driver_init(void) NULL, acpi_soft_cpu_dead); acpi_processor_throttling_init(); + + /* + * Frequency invariance calculations on AMD platforms can't be run until + * after acpi_cppc_processor_probe() has been called for all online CPUs + */ + acpi_processor_init_invariance_cppc(); return 0; err: driver_unregister(&acpi_processor_driver); diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 75fcb75d5515..3ebe77566788 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -366,7 +366,7 @@ void __weak freq_inv_set_max_ratio(int cpu, u64 max_rate) #ifdef CONFIG_ACPI_CPPC_LIB #include -void topology_init_cpu_capacity_cppc(void) +static inline void topology_init_cpu_capacity_cppc(void) { u64 capacity, capacity_scale = 0; struct cppc_perf_caps perf_caps; @@ -417,6 +417,10 @@ void topology_init_cpu_capacity_cppc(void) exit: free_raw_capacity(); } +void acpi_processor_init_invariance_cppc(void) +{ + topology_init_cpu_capacity_cppc(); +} #endif #ifdef CONFIG_CPU_FREQ diff --git a/include/acpi/processor.h b/include/acpi/processor.h index e6f6074eadbf..a17e97e634a6 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -465,4 +465,6 @@ extern int acpi_processor_ffh_lpi_probe(unsigned int cpu); extern int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi); #endif +void acpi_processor_init_invariance_cppc(void); + #endif diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index b721f360d759..4a952c4885ed 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -11,10 +11,6 @@ void topology_normalize_cpu_scale(void); int topology_update_cpu_topology(void); -#ifdef CONFIG_ACPI_CPPC_LIB -void topology_init_cpu_capacity_cppc(void); -#endif - struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); From 94debe5eaa0adaa24a6de4a8e5f138be7381eb9e Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Wed, 6 Nov 2024 19:56:57 +0530 Subject: [PATCH 213/281] ASoC: SOF: amd: Fix for incorrect DMA ch status register offset DMA ch status register offset change in acp7.0 platform Incorrect DMA channel status register offset check lead to firmware boot failure. [ 14.432497] snd_sof_amd_acp70 0000:c4:00.5: ------------[ DSP dump start ]------------ [ 14.432533] snd_sof_amd_acp70 0000:c4:00.5: Firmware boot failure due to timeout [ 14.432549] snd_sof_amd_acp70 0000:c4:00.5: fw_state: SOF_FW_BOOT_IN_PROGRESS (3) [ 14.432610] snd_sof_amd_acp70 0000:c4:00.5: invalid header size 0x71c41000. FW oops is bogus [ 14.432626] snd_sof_amd_acp70 0000:c4:00.5: unexpected fault 0x71c40000 trace 0x71c40000 [ 14.432642] snd_sof_amd_acp70 0000:c4:00.5: ------------[ DSP dump end ]------------ [ 14.432657] snd_sof_amd_acp70 0000:c4:00.5: error: failed to boot DSP firmware -5 [ 14.432672] snd_sof_amd_acp70 0000:c4:00.5: fw_state change: 3 -> 4 [ 14.433260] dmic-codec dmic-codec: ASoC: Unregistered DAI 'dmic-hifi' [ 14.433319] snd_sof_amd_acp70 0000:c4:00.5: fw_state change: 4 -> 0 [ 14.433358] snd_sof_amd_acp70 0000:c4:00.5: error: sof_probe_work failed err: -5 Update correct register offset for DMA ch status register. Fixes: 490be7ba2a01 ("ASoC: SOF: amd: add support for acp7.0 based platform") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20241106142658.1240929-1-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index de3001f5b9bb..95d4762c9d93 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -342,11 +342,19 @@ int acp_dma_status(struct acp_dev_data *adata, unsigned char ch) { struct snd_sof_dev *sdev = adata->dev; unsigned int val; + unsigned int acp_dma_ch_sts; int ret = 0; + switch (adata->pci_rev) { + case ACP70_PCI_ID: + acp_dma_ch_sts = ACP70_DMA_CH_STS; + break; + default: + acp_dma_ch_sts = ACP_DMA_CH_STS; + } val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_DMA_CNTL_0 + ch * sizeof(u32)); if (val & ACP_DMA_CH_RUN) { - ret = snd_sof_dsp_read_poll_timeout(sdev, ACP_DSP_BAR, ACP_DMA_CH_STS, val, !val, + ret = snd_sof_dsp_read_poll_timeout(sdev, ACP_DSP_BAR, acp_dma_ch_sts, val, !val, ACP_REG_POLL_INTERVAL, ACP_DMA_COMPLETE_TIMEOUT_US); if (ret < 0) From a4aebaf6e6efff548b01a3dc49b4b9074751c15b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 6 Nov 2024 21:50:55 +0100 Subject: [PATCH 214/281] media: dvbdev: fix the logic when DVB_DYNAMIC_MINORS is not set When CONFIG_DVB_DYNAMIC_MINORS, ret is not initialized, and a semaphore is left at the wrong state, in case of errors. Make the code simpler and avoid mistakes by having just one error check logic used weather DVB_DYNAMIC_MINORS is used or not. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202410201717.ULWWdJv8-lkp@intel.com/ Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/9e067488d8935b8cf00959764a1fa5de85d65725.1730926254.git.mchehab+huawei@kernel.org --- drivers/media/dvb-core/dvbdev.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 14f323fbada7..9df7c213716a 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -530,6 +530,9 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, for (minor = 0; minor < MAX_DVB_MINORS; minor++) if (!dvb_minors[minor]) break; +#else + minor = nums2minor(adap->num, type, id); +#endif if (minor >= MAX_DVB_MINORS) { if (new_node) { list_del(&new_node->list_head); @@ -543,17 +546,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, mutex_unlock(&dvbdev_register_lock); return -EINVAL; } -#else - minor = nums2minor(adap->num, type, id); - if (minor >= MAX_DVB_MINORS) { - dvb_media_device_free(dvbdev); - list_del(&dvbdev->list_head); - kfree(dvbdev); - *pdvbdev = NULL; - mutex_unlock(&dvbdev_register_lock); - return ret; - } -#endif + dvbdev->minor = minor; dvb_minors[minor] = dvb_device_get(dvbdev); up_write(&minor_rwsem); From 464cb98f1c07298c4c10e714ae0c36338d18d316 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Nov 2024 08:44:18 +0000 Subject: [PATCH 215/281] irqchip/gic-v3: Force propagation of the active state with a read-back Christoffer reports that on some implementations, writing to GICR_ISACTIVER0 (and similar GICD registers) can race badly with a guest issuing a deactivation of that interrupt via the system register interface. There are multiple reasons to this: - this uses an early write-acknoledgement memory type (nGnRE), meaning that the write may only have made it as far as some interconnect by the time the store is considered "done" - the GIC itself is allowed to buffer the write until it decides to take it into account (as long as it is in finite time) The effects are that the activation may not have taken effect by the time the kernel enters the guest, forcing an immediate exit, or that a guest deactivation occurs before the interrupt is active, doing nothing. In order to guarantee that the write to the ISACTIVER register has taken effect, read back from it, forcing the interconnect to propagate the write, and the GIC to process the write before returning the read. Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Acked-by: Christoffer Dall Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241106084418.3794612-1-maz@kernel.org --- drivers/irqchip/irq-gic-v3.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index ce87205e3e82..8b6159f4cdaf 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -524,6 +524,13 @@ static int gic_irq_set_irqchip_state(struct irq_data *d, } gic_poke_irq(d, reg); + + /* + * Force read-back to guarantee that the active state has taken + * effect, and won't race with a guest-driven deactivation. + */ + if (reg == GICD_ISACTIVER) + gic_peek_irq(d, reg); return 0; } From cda7163d4e3d99db93aa38f0e825b8433c7a8452 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 30 Oct 2024 11:25:47 +1030 Subject: [PATCH 216/281] btrfs: fix per-subvolume RO/RW flags with new mount API [BUG] With util-linux 2.40.2, the 'mount' utility is already utilizing the new mount API. e.g: # strace mount -o subvol=subv1,ro /dev/test/scratch1 /mnt/test/ ... fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/mapper/test-scratch1", 0) = 0 fsconfig(3, FSCONFIG_SET_STRING, "subvol", "subv1", 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0 fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = 0 fsmount(3, FSMOUNT_CLOEXEC, 0) = 4 mount_setattr(4, "", AT_EMPTY_PATH, {attr_set=MOUNT_ATTR_RDONLY, attr_clr=0, propagation=0 /* MS_??? */, userns_fd=0}, 32) = 0 move_mount(4, "", AT_FDCWD, "/mnt/test", MOVE_MOUNT_F_EMPTY_PATH) = 0 But this leads to a new problem, that per-subvolume RO/RW mount no longer works, if the initial mount is RO: # mount -o subvol=subv1,ro /dev/test/scratch1 /mnt/test # mount -o rw,subvol=subv2 /dev/test/scratch1 /mnt/scratch # mount | grep mnt /dev/mapper/test-scratch1 on /mnt/test type btrfs (ro,relatime,discard=async,space_cache=v2,subvolid=256,subvol=/subv1) /dev/mapper/test-scratch1 on /mnt/scratch type btrfs (ro,relatime,discard=async,space_cache=v2,subvolid=257,subvol=/subv2) # touch /mnt/scratch/foobar touch: cannot touch '/mnt/scratch/foobar': Read-only file system This is a common use cases on distros. [CAUSE] We have a workaround for remount to handle the RO->RW change, but if the mount is using the new mount API, we do not do that, and rely on the mount tool NOT to set the ro flag. But that's not how the mount tool is doing for the new API: fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/mapper/test-scratch1", 0) = 0 fsconfig(3, FSCONFIG_SET_STRING, "subvol", "subv1", 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0 <<<< Setting RO flag for super block fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = 0 fsmount(3, FSMOUNT_CLOEXEC, 0) = 4 mount_setattr(4, "", AT_EMPTY_PATH, {attr_set=MOUNT_ATTR_RDONLY, attr_clr=0, propagation=0 /* MS_??? */, userns_fd=0}, 32) = 0 move_mount(4, "", AT_FDCWD, "/mnt/test", MOVE_MOUNT_F_EMPTY_PATH) = 0 This means we will set the super block RO at the first mount. Later RW mount will not try to reconfigure the fs to RW because the mount tool is already using the new API. This totally breaks the per-subvolume RO/RW mount behavior. [FIX] Do not skip the reconfiguration even if using the new API. The old comments are just expecting any mount tool to properly skip the RO flag set even if we specify "ro", which is not the reality. Update the comments regarding the backward compatibility on the kernel level so it works with old and new mount utilities. CC: stable@vger.kernel.org # 6.8+ Fixes: f044b318675f ("btrfs: handle the ro->rw transition for mounting different subvolumes") Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 926d7a9ed99d..c64d07134122 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1979,25 +1979,10 @@ error: * fsconfig(FSCONFIG_SET_FLAG, "ro"). This option is seen by the filesystem * in fc->sb_flags. * - * This disambiguation has rather positive consequences. Mounting a subvolume - * ro will not also turn the superblock ro. Only the mount for the subvolume - * will become ro. - * - * So, if the superblock creation request comes from the new mount API the - * caller must have explicitly done: - * - * fsconfig(FSCONFIG_SET_FLAG, "ro") - * fsmount/mount_setattr(MOUNT_ATTR_RDONLY) - * - * IOW, at some point the caller must have explicitly turned the whole - * superblock ro and we shouldn't just undo it like we did for the old mount - * API. In any case, it lets us avoid the hack in the new mount API. - * - * Consequently, the remounting hack must only be used for requests originating - * from the old mount API and should be marked for full deprecation so it can be - * turned off in a couple of years. - * - * The new mount API has no reason to support this hack. + * But, currently the util-linux mount command already utilizes the new mount + * API and is still setting fsconfig(FSCONFIG_SET_FLAG, "ro") no matter if it's + * btrfs or not, setting the whole super block RO. To make per-subvolume mounting + * work with different options work we need to keep backward compatibility. */ static struct vfsmount *btrfs_reconfigure_for_mount(struct fs_context *fc) { @@ -2019,7 +2004,7 @@ static struct vfsmount *btrfs_reconfigure_for_mount(struct fs_context *fc) if (IS_ERR(mnt)) return mnt; - if (!fc->oldapi || !ro2rw) + if (!ro2rw) return mnt; /* We need to convert to rw, call reconfigure. */ From c9a75ec45f1111ef530ab186c2a7684d0a0c9245 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Nov 2024 12:11:15 +0000 Subject: [PATCH 217/281] btrfs: reinitialize delayed ref list after deleting it from the list At insert_delayed_ref() if we need to update the action of an existing ref to BTRFS_DROP_DELAYED_REF, we delete the ref from its ref head's ref_add_list using list_del(), which leaves the ref's add_list member not reinitialized, as list_del() sets the next and prev members of the list to LIST_POISON1 and LIST_POISON2, respectively. If later we end up calling drop_delayed_ref() against the ref, which can happen during merging or when destroying delayed refs due to a transaction abort, we can trigger a crash since at drop_delayed_ref() we call list_empty() against the ref's add_list, which returns false since the list was not reinitialized after the list_del() and as a consequence we call list_del() again at drop_delayed_ref(). This results in an invalid list access since the next and prev members are set to poison pointers, resulting in a splat if CONFIG_LIST_HARDENED and CONFIG_DEBUG_LIST are set or invalid poison pointer dereferences otherwise. So fix this by deleting from the list with list_del_init() instead. Fixes: 1d57ee941692 ("btrfs: improve delayed refs iterations") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 115b90d29b1d..65d841d7142c 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -649,7 +649,7 @@ static bool insert_delayed_ref(struct btrfs_trans_handle *trans, &href->ref_add_list); else if (ref->action == BTRFS_DROP_DELAYED_REF) { ASSERT(!list_empty(&exist->add_list)); - list_del(&exist->add_list); + list_del_init(&exist->add_list); } else { ASSERT(0); } From 2b084d8205949dd804e279df8e68531da78be1e8 Mon Sep 17 00:00:00 2001 From: Haisu Wang Date: Fri, 25 Oct 2024 14:54:40 +0800 Subject: [PATCH 218/281] btrfs: fix the length of reserved qgroup to free The dealloc flag may be cleared and the extent won't reach the disk in cow_file_range when errors path. The reserved qgroup space is freed in commit 30479f31d44d ("btrfs: fix qgroup reserve leaks in cow_file_range"). However, the length of untouched region to free needs to be adjusted with the correct remaining region size. Fixes: 30479f31d44d ("btrfs: fix qgroup reserve leaks in cow_file_range") CC: stable@vger.kernel.org # 6.11+ Reviewed-by: Qu Wenruo Reviewed-by: Boris Burkov Signed-off-by: Haisu Wang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c6d257fb40af..a97dfc72a80a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1618,7 +1618,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_folio, &cached, clear_bits, page_ops); - btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); + btrfs_qgroup_free_data(inode, NULL, start, end - start + 1, NULL); } return ret; } From 1f26339b2ed63d1e8e18a18674fb73a392f3660e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 5 Nov 2024 17:31:01 +0100 Subject: [PATCH 219/281] net: vertexcom: mse102x: Fix possible double free of TX skb The scope of the TX skb is wider than just mse102x_tx_frame_spi(), so in case the TX skb room needs to be expanded, we should free the the temporary skb instead of the original skb. Otherwise the original TX skb pointer would be freed again in mse102x_tx_work(), which leads to crashes: Internal error: Oops: 0000000096000004 [#2] PREEMPT SMP CPU: 0 PID: 712 Comm: kworker/0:1 Tainted: G D 6.6.23 Hardware name: chargebyte Charge SOM DC-ONE (DT) Workqueue: events mse102x_tx_work [mse102x] pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_release_data+0xb8/0x1d8 lr : skb_release_data+0x1ac/0x1d8 sp : ffff8000819a3cc0 x29: ffff8000819a3cc0 x28: ffff0000046daa60 x27: ffff0000057f2dc0 x26: ffff000005386c00 x25: 0000000000000002 x24: 00000000ffffffff x23: 0000000000000000 x22: 0000000000000001 x21: ffff0000057f2e50 x20: 0000000000000006 x19: 0000000000000000 x18: ffff00003fdacfcc x17: e69ad452d0c49def x16: 84a005feff870102 x15: 0000000000000000 x14: 000000000000024a x13: 0000000000000002 x12: 0000000000000000 x11: 0000000000000400 x10: 0000000000000930 x9 : ffff00003fd913e8 x8 : fffffc00001bc008 x7 : 0000000000000000 x6 : 0000000000000008 x5 : ffff00003fd91340 x4 : 0000000000000000 x3 : 0000000000000009 x2 : 00000000fffffffe x1 : 0000000000000000 x0 : 0000000000000000 Call trace: skb_release_data+0xb8/0x1d8 kfree_skb_reason+0x48/0xb0 mse102x_tx_work+0x164/0x35c [mse102x] process_one_work+0x138/0x260 worker_thread+0x32c/0x438 kthread+0x118/0x11c ret_from_fork+0x10/0x20 Code: aa1303e0 97fffab6 72001c1f 54000141 (f9400660) Cc: stable@vger.kernel.org Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Link: https://patch.msgid.link/20241105163101.33216-1-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index a04d4073def9..2c37957478fb 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -222,7 +222,7 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, struct mse102x_net_spi *mses = to_mse102x_spi(mse); struct spi_transfer *xfer = &mses->spi_xfer; struct spi_message *msg = &mses->spi_msg; - struct sk_buff *tskb; + struct sk_buff *tskb = NULL; int ret; netif_dbg(mse, tx_queued, mse->ndev, "%s: skb %p, %d@%p\n", @@ -235,7 +235,6 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, if (!tskb) return -ENOMEM; - dev_kfree_skb(txp); txp = tskb; } @@ -257,6 +256,8 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, mse->stats.xfer_err++; } + dev_kfree_skb(tskb); + return ret; } From 25d70702142ac2115e75e01a0a985c6ea1d78033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Fri, 1 Nov 2024 17:17:29 -0400 Subject: [PATCH 220/281] net: stmmac: Fix unbalanced IRQ wake disable warning on single irq case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a23aa0404218 ("net: stmmac: ethtool: Fixed calltrace caused by unbalanced disable_irq_wake calls") introduced checks to prevent unbalanced enable and disable IRQ wake calls. However it only initialized the auxiliary variable on one of the paths, stmmac_request_irq_multi_msi(), missing the other, stmmac_request_irq_single(). Add the same initialization on stmmac_request_irq_single() to prevent "Unbalanced IRQ wake disable" warnings from being printed the first time disable_irq_wake() is called on platforms that run on that code path. Fixes: a23aa0404218 ("net: stmmac: ethtool: Fixed calltrace caused by unbalanced disable_irq_wake calls") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241101-stmmac-unbalanced-wake-single-fix-v1-1-5952524c97f0@collabora.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 208dbc68aaf9..7bf275f127c9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3780,6 +3780,7 @@ static int stmmac_request_irq_single(struct net_device *dev) /* Request the Wake IRQ in case of another line * is used for WoL */ + priv->wol_irq_disabled = true; if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { ret = request_irq(priv->wol_irq, stmmac_interrupt, IRQF_SHARED, dev->name, dev); From 8c462d56487e3abdbf8a61cedfe7c795a54f4a78 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 6 Nov 2024 16:04:48 +0000 Subject: [PATCH 221/281] arm64: smccc: Remove broken support for SMCCCv1.3 SVE discard hint SMCCCv1.3 added a hint bit which callers can set in an SMCCC function ID (AKA "FID") to indicate that it is acceptable for the SMCCC implementation to discard SVE and/or SME state over a specific SMCCC call. The kernel support for using this hint is broken and SMCCC calls may clobber the SVE and/or SME state of arbitrary tasks, though FPSIMD state is unaffected. The kernel support is intended to use the hint when there is no SVE or SME state to save, and to do this it checks whether TIF_FOREIGN_FPSTATE is set or TIF_SVE is clear in assembly code: | ldr , [, #TSK_TI_FLAGS] | tbnz , #TIF_FOREIGN_FPSTATE, 1f // Any live FP state? | tbnz , #TIF_SVE, 2f // Does that state include SVE? | | 1: orr , , ARM_SMCCC_1_3_SVE_HINT | 2: | << SMCCC call using FID >> This is not safe as-is: (1) SMCCC calls can be made in a preemptible context and preemption can result in TIF_FOREIGN_FPSTATE being set or cleared at arbitrary points in time. Thus checking for TIF_FOREIGN_FPSTATE provides no guarantee. (2) TIF_FOREIGN_FPSTATE only indicates that the live FP/SVE/SME state in the CPU does not belong to the current task, and does not indicate that clobbering this state is acceptable. When the live CPU state is clobbered it is necessary to update fpsimd_last_state.st to ensure that a subsequent context switch will reload FP/SVE/SME state from memory rather than consuming the clobbered state. This and the SMCCC call itself must happen in a critical section with preemption disabled to avoid races. (3) Live SVE/SME state can exist with TIF_SVE clear (e.g. with only TIF_SME set), and checking TIF_SVE alone is insufficient. Remove the broken support for the SMCCCv1.3 SVE saving hint. This is effectively a revert of commits: * cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") * a7c3acca5380 ("arm64: smccc: Save lr before calling __arm_smccc_sve_check()") ... leaving behind the ARM_SMCCC_VERSION_1_3 and ARM_SMCCC_1_3_SVE_HINT definitions, since these are simply definitions from the SMCCC specification, and the latter is used in KVM via ARM_SMCCC_CALL_HINTS. If we want to bring this back in future, we'll probably want to handle this logic in C where we can use all the usual FPSIMD/SVE/SME helper functions, and that'll likely require some rework of the SMCCC code and/or its callers. Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Cc: stable@vger.kernel.org Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241106160448.2712997-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/smccc-call.S | 35 +++------------------------------- drivers/firmware/smccc/smccc.c | 4 ---- include/linux/arm-smccc.h | 32 +++---------------------------- 3 files changed, 6 insertions(+), 65 deletions(-) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index 487381164ff6..2def9d0dd3dd 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -7,48 +7,19 @@ #include #include -#include - -/* - * If we have SMCCC v1.3 and (as is likely) no SVE state in - * the registers then set the SMCCC hint bit to say there's no - * need to preserve it. Do this by directly adjusting the SMCCC - * function value which is already stored in x0 ready to be called. - */ -SYM_FUNC_START(__arm_smccc_sve_check) - - ldr_l x16, smccc_has_sve_hint - cbz x16, 2f - - get_current_task x16 - ldr x16, [x16, #TSK_TI_FLAGS] - tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state? - tbnz x16, #TIF_SVE, 2f // Does that state include SVE? - -1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT - -2: ret -SYM_FUNC_END(__arm_smccc_sve_check) -EXPORT_SYMBOL(__arm_smccc_sve_check) .macro SMCCC instr - stp x29, x30, [sp, #-16]! - mov x29, sp -alternative_if ARM64_SVE - bl __arm_smccc_sve_check -alternative_else_nop_endif \instr #0 - ldr x4, [sp, #16] + ldr x4, [sp] stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] - ldr x4, [sp, #24] + ldr x4, [sp, #8] cbz x4, 1f /* no quirk structure */ ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 b.ne 1f str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] -1: ldp x29, x30, [sp], #16 - ret +1: ret .endm /* diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c index d670635914ec..a74600d9f2d7 100644 --- a/drivers/firmware/smccc/smccc.c +++ b/drivers/firmware/smccc/smccc.c @@ -16,7 +16,6 @@ static u32 smccc_version = ARM_SMCCC_VERSION_1_0; static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE; bool __ro_after_init smccc_trng_available = false; -u64 __ro_after_init smccc_has_sve_hint = false; s32 __ro_after_init smccc_soc_id_version = SMCCC_RET_NOT_SUPPORTED; s32 __ro_after_init smccc_soc_id_revision = SMCCC_RET_NOT_SUPPORTED; @@ -28,9 +27,6 @@ void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit) smccc_conduit = conduit; smccc_trng_available = smccc_probe_trng(); - if (IS_ENABLED(CONFIG_ARM64_SVE) && - smccc_version >= ARM_SMCCC_VERSION_1_3) - smccc_has_sve_hint = true; if ((smccc_version >= ARM_SMCCC_VERSION_1_2) && (smccc_conduit != SMCCC_CONDUIT_NONE)) { diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index f59099a213d0..67f6fdf2e7cd 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -315,8 +315,6 @@ u32 arm_smccc_get_version(void); void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit); -extern u64 smccc_has_sve_hint; - /** * arm_smccc_get_soc_id_version() * @@ -414,15 +412,6 @@ struct arm_smccc_quirk { } state; }; -/** - * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls - * - * Sets the SMCCC hint bit to indicate if there is live state in the SVE - * registers, this modifies x0 in place and should never be called from C - * code. - */ -asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0); - /** * __arm_smccc_smc() - make SMC calls * @a0-a7: arguments passed in registers 0 to 7 @@ -490,20 +479,6 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #endif -/* nVHE hypervisor doesn't have a current thread so needs separate checks */ -#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__) - -#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n", "bl __arm_smccc_sve_check \n", \ - ARM64_SVE) -#define smccc_sve_clobbers "x16", "x30", "cc", - -#else - -#define SMCCC_SVE_CHECK -#define smccc_sve_clobbers - -#endif - #define __constraint_read_2 "r" (arg0) #define __constraint_read_3 __constraint_read_2, "r" (arg1) #define __constraint_read_4 __constraint_read_3, "r" (arg2) @@ -574,12 +549,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register unsigned long r3 asm("r3"); \ CONCATENATE(__declare_arg_, \ COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \ - asm volatile(SMCCC_SVE_CHECK \ - inst "\n" : \ + asm volatile(inst "\n" : \ "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \ : CONCATENATE(__constraint_read_, \ COUNT_ARGS(__VA_ARGS__)) \ - : smccc_sve_clobbers "memory"); \ + : "memory"); \ if (___res) \ *___res = (typeof(*___res)){r0, r1, r2, r3}; \ } while (0) @@ -628,7 +602,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, asm ("" : \ : CONCATENATE(__constraint_read_, \ COUNT_ARGS(__VA_ARGS__)) \ - : smccc_sve_clobbers "memory"); \ + : "memory"); \ if (___res) \ ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ } while (0) From 81235ae0c846e1fb46a2c6fe9283fe2b2b24f7dc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 6 Nov 2024 16:42:20 +0000 Subject: [PATCH 222/281] arm64: Kconfig: Make SME depend on BROKEN for now Although support for SME was merged in v5.19, we've since uncovered a number of issues with the implementation, including issues which might corrupt the FPSIMD/SVE/SME state of arbitrary tasks. While there are patches to address some of these issues, ongoing review has highlighted additional functional problems, and more time is necessary to analyse and fix these. For now, mark SME as BROKEN in the hope that we can fix things properly in the near future. As SME is an OPTIONAL part of ARMv9.2+, and there is very little extant hardware, this should not adversely affect the vast majority of users. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Cc: stable@vger.kernel.org # 5.19 Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20241106164220.2789279-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fd9df6dcc593..70d7f4f20225 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2214,6 +2214,7 @@ config ARM64_SME bool "ARM Scalable Matrix Extension support" default y depends on ARM64_SVE + depends on BROKEN help The Scalable Matrix Extension (SME) is an extension to the AArch64 execution state which utilises a substantial subset of the SVE From c03d278fdf35e73dd0ec543b9b556876b9d9a8dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Nov 2024 12:07:22 +0100 Subject: [PATCH 223/281] netfilter: nf_tables: wait for rcu grace period on net_device removal 8c873e219970 ("netfilter: core: free hooks with call_rcu") removed synchronize_net() call when unregistering basechain hook, however, net_device removal event handler for the NFPROTO_NETDEV was not updated to wait for RCU grace period. Note that 835b803377f5 ("netfilter: nf_tables_netdev: unregister hooks on net_device removal") does not remove basechain rules on device removal, I was hinted to remove rules on net_device removal later, see 5ebe0b0eec9d ("netfilter: nf_tables: destroy basechain and rules on netdevice removal"). Although NETDEV_UNREGISTER event is guaranteed to be handled after synchronize_net() call, this path needs to wait for rcu grace period via rcu callback to release basechain hooks if netns is alive because an ongoing netlink dump could be in progress (sockets hold a reference on the netns). Note that nf_tables_pre_exit_net() unregisters and releases basechain hooks but it is possible to see NETDEV_UNREGISTER at a later stage in the netns exit path, eg. veth peer device in another netns: cleanup_net() default_device_exit_batch() unregister_netdevice_many_notify() notifier_call_chain() nf_tables_netdev_event() __nft_release_basechain() In this particular case, same rule of thumb applies: if netns is alive, then wait for rcu grace period because netlink dump in the other netns could be in progress. Otherwise, if the other netns is going away then no netlink dump can be in progress and basechain hooks can be released inmediately. While at it, turn WARN_ON() into WARN_ON_ONCE() for the basechain validation, which should not ever happen. Fixes: 835b803377f5 ("netfilter: nf_tables_netdev: unregister hooks on net_device removal") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +++ net/netfilter/nf_tables_api.c | 41 +++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 91ae20cb7648..066a3ea33b12 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1103,6 +1103,7 @@ struct nft_rule_blob { * @name: name of the chain * @udlen: user data length * @udata: user data in the chain + * @rcu_head: rcu head for deferred release * @blob_next: rule blob pointer to the next in the chain */ struct nft_chain { @@ -1120,6 +1121,7 @@ struct nft_chain { char *name; u16 udlen; u8 *udata; + struct rcu_head rcu_head; /* Only used during control plane commit phase: */ struct nft_rule_blob *blob_next; @@ -1263,6 +1265,7 @@ static inline void nft_use_inc_restore(u32 *use) * @sets: sets in the table * @objects: stateful objects in the table * @flowtables: flow tables in the table + * @net: netnamespace this table belongs to * @hgenerator: handle generator state * @handle: table handle * @use: number of chain references to this table @@ -1282,6 +1285,7 @@ struct nft_table { struct list_head sets; struct list_head objects; struct list_head flowtables; + possible_net_t net; u64 hgenerator; u64 handle; u32 use; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a24fe62650a7..588a2757986c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1495,6 +1495,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); INIT_LIST_HEAD(&table->flowtables); + write_pnet(&table->net, net); table->family = family; table->flags = flags; table->handle = ++nft_net->table_handle; @@ -11430,22 +11431,48 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); -int __nft_release_basechain(struct nft_ctx *ctx) +static void __nft_release_basechain_now(struct nft_ctx *ctx) { struct nft_rule *rule, *nr; - if (WARN_ON(!nft_is_base_chain(ctx->chain))) + list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { + list_del(&rule->list); + nf_tables_rule_release(ctx, rule); + } + nf_tables_chain_destroy(ctx->chain); +} + +static void nft_release_basechain_rcu(struct rcu_head *head) +{ + struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); + struct nft_ctx ctx = { + .family = chain->table->family, + .chain = chain, + .net = read_pnet(&chain->table->net), + }; + + __nft_release_basechain_now(&ctx); + put_net(ctx.net); +} + +int __nft_release_basechain(struct nft_ctx *ctx) +{ + struct nft_rule *rule; + + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) return 0; nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); - list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { - list_del(&rule->list); + list_for_each_entry(rule, &ctx->chain->rules, list) nft_use_dec(&ctx->chain->use); - nf_tables_rule_release(ctx, rule); - } + nft_chain_del(ctx->chain); nft_use_dec(&ctx->table->use); - nf_tables_chain_destroy(ctx->chain); + + if (maybe_get_net(ctx->net)) + call_rcu(&ctx->chain->rcu_head, nft_release_basechain_rcu); + else + __nft_release_basechain_now(ctx); return 0; } From 86a48a00efdf61197b6658e52c6140463eb313dc Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Mon, 4 Nov 2024 16:57:03 +0800 Subject: [PATCH 224/281] virtio_net: Support dynamic rss indirection table size When reading/writing virtio_net_ctrl_rss, we get the indirection table size from vi->rss_indir_table_size, which is initialized in virtnet_probe(). However, the actual size of indirection_table was set as VIRTIO_NET_RSS_MAX_TABLE_LEN=128. This collision may cause issues if the vi->rss_indir_table_size exceeds 128. This patch instead uses dynamic indirection table, allocated with vi->rss after vi->rss_indir_table_size initialized. And free it in virtnet_remove(). In virtnet_commit_rss_command(), sgs for rss is initialized differently with hash_report. So indirection_table is not used if !vi->has_rss, and then we don't need to alloc indirection_table for hash_report only uses. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Signed-off-by: Philo Lu Signed-off-by: Xuan Zhuo Acked-by: Joe Damato Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 792e9eadbfc3..4b507007d242 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -368,15 +368,16 @@ struct receive_queue { * because table sizes may be differ according to the device configuration. */ #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 -#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 struct virtio_net_ctrl_rss { u32 hash_types; u16 indirection_table_mask; u16 unclassified_queue; - u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; + u16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */ u16 max_tx_vq; u8 hash_key_length; u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; + + u16 *indirection_table; }; /* Control VQ buffers: protected by the rtnl lock */ @@ -512,6 +513,25 @@ static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, struct page *page, void *buf, int len, int truesize); +static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss *rss, u16 indir_table_size) +{ + if (!indir_table_size) { + rss->indirection_table = NULL; + return 0; + } + + rss->indirection_table = kmalloc_array(indir_table_size, sizeof(u16), GFP_KERNEL); + if (!rss->indirection_table) + return -ENOMEM; + + return 0; +} + +static void rss_indirection_table_free(struct virtio_net_ctrl_rss *rss) +{ + kfree(rss->indirection_table); +} + static bool is_xdp_frame(void *ptr) { return (unsigned long)ptr & VIRTIO_XDP_FLAG; @@ -3828,11 +3848,15 @@ static bool virtnet_commit_rss_command(struct virtnet_info *vi) /* prepare sgs */ sg_init_table(sgs, 4); - sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); + sg_buf_size = offsetof(struct virtio_net_ctrl_rss, hash_cfg_reserved); sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); - sg_buf_size = sizeof(uint16_t) * (vi->rss.indirection_table_mask + 1); - sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); + if (vi->has_rss) { + sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size; + sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); + } else { + sg_set_buf(&sgs[1], &vi->rss.hash_cfg_reserved, sizeof(uint16_t)); + } sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); @@ -6420,6 +6444,9 @@ static int virtnet_probe(struct virtio_device *vdev) virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); } + err = rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size); + if (err) + goto free; if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = @@ -6674,6 +6701,8 @@ static void virtnet_remove(struct virtio_device *vdev) remove_vq_common(vi); + rss_indirection_table_free(&vi->rss); + free_netdev(vi->dev); } From 3f7d9c1964fcd16d02a8a9d4fd6f6cb60c4cc530 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Mon, 4 Nov 2024 16:57:04 +0800 Subject: [PATCH 225/281] virtio_net: Add hash_key_length check Add hash_key_length check in virtnet_probe() to avoid possible out of bound errors when setting/reading the hash key. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Signed-off-by: Philo Lu Signed-off-by: Xuan Zhuo Acked-by: Joe Damato Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4b507007d242..545dda8ec077 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -6451,6 +6451,12 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); + if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { + dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", + vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); + err = -EINVAL; + goto free; + } vi->rss_hash_types_supported = virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); From dc749b7b06082ccaacc602e724445da19cd03e9f Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Mon, 4 Nov 2024 16:57:05 +0800 Subject: [PATCH 226/281] virtio_net: Sync rss config to device when virtnet_probe During virtnet_probe, default rss configuration is initialized, but was not committed to the device. This patch fix this by sending rss command after device ready in virtnet_probe. Otherwise, the actual rss configuration used by device can be different with that read by user from driver, which may confuse the user. If the command committing fails, driver rss will be disabled. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Signed-off-by: Philo Lu Signed-off-by: Xuan Zhuo Acked-by: Joe Damato Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 545dda8ec077..b3232b8baa25 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -6584,6 +6584,15 @@ static int virtnet_probe(struct virtio_device *vdev) virtio_device_ready(vdev); + if (vi->has_rss || vi->has_rss_hash_report) { + if (!virtnet_commit_rss_command(vi)) { + dev_warn(&vdev->dev, "RSS disabled because committing failed.\n"); + dev->hw_features &= ~NETIF_F_RXHASH; + vi->has_rss_hash_report = false; + vi->has_rss = false; + } + } + virtnet_set_queues(vi, vi->curr_queue_pairs); /* a random MAC address has been assigned, notify the device. From 50bfcaedd78e53135ec0504302269b3b65bf1eff Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Mon, 4 Nov 2024 16:57:06 +0800 Subject: [PATCH 227/281] virtio_net: Update rss when set queue RSS configuration should be updated with queue number. In particular, it should be updated when (1) rss enabled and (2) default rss configuration is used without user modification. During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. Also remove the `vi->has_rss ?` check when setting vi->rss.max_tx_vq, because this is not used in the other hash_report case. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Signed-off-by: Philo Lu Signed-off-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 65 +++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b3232b8baa25..53a038fcbe99 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3394,15 +3394,59 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) dev_warn(&vi->dev->dev, "Failed to ack link announce.\n"); } +static bool virtnet_commit_rss_command(struct virtnet_info *vi); + +static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pairs) +{ + u32 indir_val = 0; + int i = 0; + + for (; i < vi->rss_indir_table_size; ++i) { + indir_val = ethtool_rxfh_indir_default(i, queue_pairs); + vi->rss.indirection_table[i] = indir_val; + } + vi->rss.max_tx_vq = queue_pairs; +} + static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) { struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; - struct scatterlist sg; + struct virtio_net_ctrl_rss old_rss; struct net_device *dev = vi->dev; + struct scatterlist sg; if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) return 0; + /* Firstly check if we need update rss. Do updating if both (1) rss enabled and + * (2) no user configuration. + * + * During rss command processing, device updates queue_pairs using rss.max_tx_vq. That is, + * the device updates queue_pairs together with rss, so we can skip the sperate queue_pairs + * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. + */ + if (vi->has_rss && !netif_is_rxfh_configured(dev)) { + memcpy(&old_rss, &vi->rss, sizeof(old_rss)); + if (rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size)) { + vi->rss.indirection_table = old_rss.indirection_table; + return -ENOMEM; + } + + virtnet_rss_update_by_qpairs(vi, queue_pairs); + + if (!virtnet_commit_rss_command(vi)) { + /* restore ctrl_rss if commit_rss_command failed */ + rss_indirection_table_free(&vi->rss); + memcpy(&vi->rss, &old_rss, sizeof(old_rss)); + + dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", + queue_pairs); + return -EINVAL; + } + rss_indirection_table_free(&old_rss); + goto succ; + } + mq = kzalloc(sizeof(*mq), GFP_KERNEL); if (!mq) return -ENOMEM; @@ -3415,12 +3459,12 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", queue_pairs); return -EINVAL; - } else { - vi->curr_queue_pairs = queue_pairs; - /* virtnet_open() will refill when device is going to up. */ - if (dev->flags & IFF_UP) - schedule_delayed_work(&vi->refill, 0); } +succ: + vi->curr_queue_pairs = queue_pairs; + /* virtnet_open() will refill when device is going to up. */ + if (dev->flags & IFF_UP) + schedule_delayed_work(&vi->refill, 0); return 0; } @@ -3880,21 +3924,14 @@ err: static void virtnet_init_default_rss(struct virtnet_info *vi) { - u32 indir_val = 0; - int i = 0; - vi->rss.hash_types = vi->rss_hash_types_supported; vi->rss_hash_types_saved = vi->rss_hash_types_supported; vi->rss.indirection_table_mask = vi->rss_indir_table_size ? vi->rss_indir_table_size - 1 : 0; vi->rss.unclassified_queue = 0; - for (; i < vi->rss_indir_table_size; ++i) { - indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); - vi->rss.indirection_table[i] = indir_val; - } + virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); - vi->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; vi->rss.hash_key_length = vi->rss_key_size; netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); From 702a47ce6dde72f6e247b3c3c00a0fc521f9b1c6 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 6 Nov 2024 12:18:02 +0000 Subject: [PATCH 228/281] media: videobuf2-core: copy vb planes unconditionally Copy the relevant data from userspace to the vb->planes unconditionally as it's possible some of the fields may have changed after the buffer has been validated. Keep the dma_buf_put(planes[plane].dbuf) calls in the first `if (!reacquired)` case, in order to be close to the plane validation code where the buffers were got in the first place. Cc: stable@vger.kernel.org Fixes: 95af7c00f35b ("media: videobuf2-core: release all planes first in __prepare_dmabuf()") Signed-off-by: Tudor Ambarus Tested-by: Will McVicker Acked-by: Tomasz Figa Signed-off-by: Hans Verkuil --- .../media/common/videobuf2/videobuf2-core.c | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 29a8d876e6c2..b0523fc23506 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -1482,18 +1482,23 @@ static int __prepare_dmabuf(struct vb2_buffer *vb) } vb->planes[plane].dbuf_mapped = 1; } + } else { + for (plane = 0; plane < vb->num_planes; ++plane) + dma_buf_put(planes[plane].dbuf); + } - /* - * Now that everything is in order, copy relevant information - * provided by userspace. - */ - for (plane = 0; plane < vb->num_planes; ++plane) { - vb->planes[plane].bytesused = planes[plane].bytesused; - vb->planes[plane].length = planes[plane].length; - vb->planes[plane].m.fd = planes[plane].m.fd; - vb->planes[plane].data_offset = planes[plane].data_offset; - } + /* + * Now that everything is in order, copy relevant information + * provided by userspace. + */ + for (plane = 0; plane < vb->num_planes; ++plane) { + vb->planes[plane].bytesused = planes[plane].bytesused; + vb->planes[plane].length = planes[plane].length; + vb->planes[plane].m.fd = planes[plane].m.fd; + vb->planes[plane].data_offset = planes[plane].data_offset; + } + if (reacquired) { /* * Call driver-specific initialization on the newly acquired buffer, * if provided. @@ -1503,9 +1508,6 @@ static int __prepare_dmabuf(struct vb2_buffer *vb) dprintk(q, 1, "buffer initialization failed\n"); goto err_put_vb2_buf; } - } else { - for (plane = 0; plane < vb->num_planes; ++plane) - dma_buf_put(planes[plane].dbuf); } ret = call_vb_qop(vb, buf_prepare, vb); From 71803c1dfa29e0d13b99e48fda11107cc8caebc7 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Mon, 4 Nov 2024 21:01:38 +0800 Subject: [PATCH 229/281] net: arc: fix the device for dma_map_single/dma_unmap_single The ndev->dev and pdev->dev aren't the same device, use ndev->dev.parent which has dma_mask, ndev->dev.parent is just pdev->dev. Or it would cause the following issue: [ 39.933526] ------------[ cut here ]------------ [ 39.938414] WARNING: CPU: 1 PID: 501 at kernel/dma/mapping.c:149 dma_map_page_attrs+0x90/0x1f8 Fixes: f959dcd6ddfd ("dma-direct: Fix potential NULL pointer dereference") Signed-off-by: David Wu Signed-off-by: Johan Jonker Signed-off-by: Andy Yan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/arc/emac_main.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 31ee477dd131..8283aeee35fb 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -111,6 +111,7 @@ static void arc_emac_tx_clean(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; + struct device *dev = ndev->dev.parent; unsigned int i; for (i = 0; i < TX_BD_NUM; i++) { @@ -140,7 +141,7 @@ static void arc_emac_tx_clean(struct net_device *ndev) stats->tx_bytes += skb->len; } - dma_unmap_single(&ndev->dev, dma_unmap_addr(tx_buff, addr), + dma_unmap_single(dev, dma_unmap_addr(tx_buff, addr), dma_unmap_len(tx_buff, len), DMA_TO_DEVICE); /* return the sk_buff to system */ @@ -174,6 +175,7 @@ static void arc_emac_tx_clean(struct net_device *ndev) static int arc_emac_rx(struct net_device *ndev, int budget) { struct arc_emac_priv *priv = netdev_priv(ndev); + struct device *dev = ndev->dev.parent; unsigned int work_done; for (work_done = 0; work_done < budget; work_done++) { @@ -223,9 +225,9 @@ static int arc_emac_rx(struct net_device *ndev, int budget) continue; } - addr = dma_map_single(&ndev->dev, (void *)skb->data, + addr = dma_map_single(dev, (void *)skb->data, EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, addr)) { + if (dma_mapping_error(dev, addr)) { if (net_ratelimit()) netdev_err(ndev, "cannot map dma buffer\n"); dev_kfree_skb(skb); @@ -237,7 +239,7 @@ static int arc_emac_rx(struct net_device *ndev, int budget) } /* unmap previosly mapped skb */ - dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), + dma_unmap_single(dev, dma_unmap_addr(rx_buff, addr), dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); pktlen = info & LEN_MASK; @@ -423,6 +425,7 @@ static int arc_emac_open(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); struct phy_device *phy_dev = ndev->phydev; + struct device *dev = ndev->dev.parent; int i; phy_dev->autoneg = AUTONEG_ENABLE; @@ -445,9 +448,9 @@ static int arc_emac_open(struct net_device *ndev) if (unlikely(!rx_buff->skb)) return -ENOMEM; - addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data, + addr = dma_map_single(dev, (void *)rx_buff->skb->data, EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, addr)) { + if (dma_mapping_error(dev, addr)) { netdev_err(ndev, "cannot dma map\n"); dev_kfree_skb(rx_buff->skb); return -ENOMEM; @@ -548,6 +551,7 @@ static void arc_emac_set_rx_mode(struct net_device *ndev) static void arc_free_tx_queue(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); + struct device *dev = ndev->dev.parent; unsigned int i; for (i = 0; i < TX_BD_NUM; i++) { @@ -555,7 +559,7 @@ static void arc_free_tx_queue(struct net_device *ndev) struct buffer_state *tx_buff = &priv->tx_buff[i]; if (tx_buff->skb) { - dma_unmap_single(&ndev->dev, + dma_unmap_single(dev, dma_unmap_addr(tx_buff, addr), dma_unmap_len(tx_buff, len), DMA_TO_DEVICE); @@ -579,6 +583,7 @@ static void arc_free_tx_queue(struct net_device *ndev) static void arc_free_rx_queue(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); + struct device *dev = ndev->dev.parent; unsigned int i; for (i = 0; i < RX_BD_NUM; i++) { @@ -586,7 +591,7 @@ static void arc_free_rx_queue(struct net_device *ndev) struct buffer_state *rx_buff = &priv->rx_buff[i]; if (rx_buff->skb) { - dma_unmap_single(&ndev->dev, + dma_unmap_single(dev, dma_unmap_addr(rx_buff, addr), dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); @@ -679,6 +684,7 @@ static netdev_tx_t arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) unsigned int len, *txbd_curr = &priv->txbd_curr; struct net_device_stats *stats = &ndev->stats; __le32 *info = &priv->txbd[*txbd_curr].info; + struct device *dev = ndev->dev.parent; dma_addr_t addr; if (skb_padto(skb, ETH_ZLEN)) @@ -692,10 +698,9 @@ static netdev_tx_t arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_BUSY; } - addr = dma_map_single(&ndev->dev, (void *)skb->data, len, - DMA_TO_DEVICE); + addr = dma_map_single(dev, (void *)skb->data, len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&ndev->dev, addr))) { + if (unlikely(dma_mapping_error(dev, addr))) { stats->tx_dropped++; stats->tx_errors++; dev_kfree_skb_any(skb); From 0a1c7a7b0adbf595ce7f218609db53749e966573 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Mon, 4 Nov 2024 21:01:39 +0800 Subject: [PATCH 230/281] net: arc: rockchip: fix emac mdio node support The binding emac_rockchip.txt is converted to YAML. Changed against the original binding is an added MDIO subnode. This make the driver failed to find the PHY, and given the 'mdio has invalid PHY address' it is probably looking in the wrong node. Fix emac_mdio.c so that it can handle both old and new device trees. Fixes: 1dabb74971b3 ("ARM: dts: rockchip: restyle emac nodes") Signed-off-by: Johan Jonker Tested-by: Andy Yan Link: https://lore.kernel.org/r/20220603163539.537-3-jbx6244@gmail.com Signed-off-by: Andy Yan Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni --- drivers/net/ethernet/arc/emac_mdio.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c index 87f40c2ba904..078b1a72c161 100644 --- a/drivers/net/ethernet/arc/emac_mdio.c +++ b/drivers/net/ethernet/arc/emac_mdio.c @@ -133,6 +133,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv) struct arc_emac_mdio_bus_data *data = &priv->bus_data; struct device_node *np = priv->dev->of_node; const char *name = "Synopsys MII Bus"; + struct device_node *mdio_node; struct mii_bus *bus; int error; @@ -164,7 +165,13 @@ int arc_mdio_probe(struct arc_emac_priv *priv) snprintf(bus->id, MII_BUS_ID_SIZE, "%s", bus->name); - error = of_mdiobus_register(bus, priv->dev->of_node); + /* Backwards compatibility for EMAC nodes without MDIO subnode. */ + mdio_node = of_get_child_by_name(np, "mdio"); + if (!mdio_node) + mdio_node = of_node_get(np); + + error = of_mdiobus_register(bus, mdio_node); + of_node_put(mdio_node); if (error) { mdiobus_free(bus); return dev_err_probe(priv->dev, error, From 8c21e40e1e481f7fef6e570089e317068b972c45 Mon Sep 17 00:00:00 2001 From: Markus Petri Date: Thu, 7 Nov 2024 10:40:20 +0100 Subject: [PATCH 231/281] ASoC: amd: yc: Support dmic on another model of Lenovo Thinkpad E14 Gen 6 Another model of Thinkpad E14 Gen 6 (21M4) needs a quirk entry for the dmic to be detected. Signed-off-by: Markus Petri Link: https://patch.msgid.link/20241107094020.1050935-1-mp@localhost Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index dc476bfb6da4..2436e8deb2be 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -227,6 +227,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21M3"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M4"), + } + }, { .driver_data = &acp6x_card, .matches = { From 63c1c87993e0e5bb11bced3d8224446a2bc62338 Mon Sep 17 00:00:00 2001 From: Luo Yifan Date: Wed, 6 Nov 2024 09:46:54 +0800 Subject: [PATCH 232/281] ASoC: stm: Prevent potential division by zero in stm32_sai_mclk_round_rate() This patch checks if div is less than or equal to zero (div <= 0). If div is zero or negative, the function returns -EINVAL, ensuring the division operation (*prate / div) is safe to perform. Signed-off-by: Luo Yifan Link: https://patch.msgid.link/20241106014654.206860-1-luoyifan@cmss.chinamobile.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 7bc4a96b7503..2570daa3e225 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -378,8 +378,8 @@ static long stm32_sai_mclk_round_rate(struct clk_hw *hw, unsigned long rate, int div; div = stm32_sai_get_clk_div(sai, *prate, rate); - if (div < 0) - return div; + if (div <= 0) + return -EINVAL; mclk->freq = *prate / div; From 23569c8b314925bdb70dd1a7b63cfe6100868315 Mon Sep 17 00:00:00 2001 From: Luo Yifan Date: Thu, 7 Nov 2024 09:59:36 +0800 Subject: [PATCH 233/281] ASoC: stm: Prevent potential division by zero in stm32_sai_get_clk_div() This patch checks if div is less than or equal to zero (div <= 0). If div is zero or negative, the function returns -EINVAL, ensuring the division operation is safe to perform. Signed-off-by: Luo Yifan Reviewed-by: Olivier Moysan Link: https://patch.msgid.link/20241107015936.211902-1-luoyifan@cmss.chinamobile.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 2570daa3e225..5828f9dd866e 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -317,7 +317,7 @@ static int stm32_sai_get_clk_div(struct stm32_sai_sub_data *sai, int div; div = DIV_ROUND_CLOSEST(input_rate, output_rate); - if (div > SAI_XCR1_MCKDIV_MAX(version)) { + if (div > SAI_XCR1_MCKDIV_MAX(version) || div <= 0) { dev_err(&sai->pdev->dev, "Divider %d out of range\n", div); return -EINVAL; } From bb1fb40f8beb45a3733118780a3da24fb071a2e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 6 Nov 2024 16:55:05 -0500 Subject: [PATCH 234/281] NFSD: Fix READDIR on NFSv3 mounts of ext4 exports I noticed that recently, simple operations like "make" started failing on NFSv3 mounts of ext4 exports. Network capture shows that READDIRPLUS operated correctly but READDIR failed with NFS3ERR_INVAL. The vfs_llseek() call returned EINVAL when it is passed a non-zero starting directory cookie. I bisected to commit c689bdd3bffa ("nfsd: further centralize protocol version checks."). Turns out that nfsd3_proc_readdir() does not call fh_verify() before it calls nfsd_readdir(), so the new fhp->fh_64bit_cookies boolean is not set properly. This leaves the NFSD_MAY_64BIT_COOKIE unset when the directory is opened. For ext4, this causes the wrong "max file size" value to be used when sanity checking the incoming directory cookie (which is a seek offset value). The fhp->fh_64bit_cookies boolean is /always/ properly initialized after nfsd_open() returns. There doesn't seem to be a reason for the generic NFSD open helper to handle the f_mode fix-up for directories, so just move that to the one caller that tries to open an S_IFDIR with NFSD_MAY_64BIT_COOKIE. Suggested-by: NeilBrown Fixes: c689bdd3bffa ("nfsd: further centralize protocol version checks.") Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 22325b590e17..d6d4f2a0e898 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -903,11 +903,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, goto out; } - if (may_flags & NFSD_MAY_64BIT_COOKIE) - file->f_mode |= FMODE_64BITHASH; - else - file->f_mode |= FMODE_32BITHASH; - *filp = file; out: return host_err; @@ -2174,13 +2169,15 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, loff_t offset = *offsetp; int may_flags = NFSD_MAY_READ; - if (fhp->fh_64bit_cookies) - may_flags |= NFSD_MAY_64BIT_COOKIE; - err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file); if (err) goto out; + if (fhp->fh_64bit_cookies) + file->f_mode |= FMODE_64BITHASH; + else + file->f_mode |= FMODE_32BITHASH; + offset = vfs_llseek(file, offset, SEEK_SET); if (offset < 0) { err = nfserrno((int)offset); From 052ef642bd6c108a24f375f9ad174b97b425a50b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 25 Aug 2024 15:21:31 +0200 Subject: [PATCH 235/281] drm: panel-orientation-quirks: Make Lenovo Yoga Tab 3 X90F DMI match less strict There are 2G and 4G RAM versions of the Lenovo Yoga Tab 3 X90F and it turns out that the 2G version has a DMI product name of "CHERRYVIEW D1 PLATFORM" where as the 4G version has "CHERRYVIEW C0 PLATFORM". The sys-vendor + product-version check are unique enough that the product-name check is not necessary. Drop the product-name check so that the existing DMI match for the 4G RAM version also matches the 2G RAM version. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240825132131.6643-1-hdegoede@redhat.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 0830cae9a4d0..2d84d7ea1ab7 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -403,7 +403,6 @@ static const struct dmi_system_id orientation_data[] = { }, { /* Lenovo Yoga Tab 3 X90F */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, From 444fa5b100e5c90550d6bccfe4476efb0391b3ca Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Wed, 6 Nov 2024 18:58:06 +0000 Subject: [PATCH 236/281] drm/panthor: Lock XArray when getting entries for the VM Similar to commit cac075706f29 ("drm/panthor: Fix race when converting group handle to group object") we need to use the XArray's internal locking when retrieving a vm pointer from there. v2: Removed part of the patch that was trying to protect fetching the heap pointer from XArray, as that operation is protected by the @pool->lock. Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block") Reported-by: Jann Horn Cc: stable@vger.kernel.org Signed-off-by: Liviu Dudau Reviewed-by: Boris Brezillon Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20241106185806.389089-1-liviu.dudau@arm.com --- drivers/gpu/drm/panthor/panthor_mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 5d5e25b1be95..7db2edb3374c 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle) { struct panthor_vm *vm; + xa_lock(&pool->xa); vm = panthor_vm_get(xa_load(&pool->xa, handle)); + xa_unlock(&pool->xa); return vm; } From 48b86532c10128cf50c854a90c2d5b1410f4012d Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Nov 2024 15:28:40 +0200 Subject: [PATCH 237/281] ASoC: SOF: sof-client-probes-ipc4: Set param_size extension bits Write the size of the optional payload of SOF_IPC4_MOD_INIT_INSTANCE message to extension param_size-bits. The previous IPC4 version does not set these bits that should indicate the size of the optional payload (struct sof_ipc4_probe_cfg). The old firmware side component code works well without these bits, but when the probes are converted to use the generic module API, this does not work anymore. Fixes: f5623593060f ("ASoC: SOF: IPC4: probes: Implement IPC4 ops for probes client device") Signed-off-by: Jyri Sarha Reviewed-by: Ranjani Sridharan Reviewed-by: Liam Girdwood Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Link: https://patch.msgid.link/20241107132840.17386-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-client-probes-ipc4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/sof-client-probes-ipc4.c b/sound/soc/sof/sof-client-probes-ipc4.c index 796eac0a2e74..603aed222480 100644 --- a/sound/soc/sof/sof-client-probes-ipc4.c +++ b/sound/soc/sof/sof-client-probes-ipc4.c @@ -125,6 +125,7 @@ static int ipc4_probes_init(struct sof_client_dev *cdev, u32 stream_tag, msg.primary |= SOF_IPC4_MSG_TARGET(SOF_IPC4_MODULE_MSG); msg.extension = SOF_IPC4_MOD_EXT_DST_MOD_INSTANCE(INVALID_PIPELINE_ID); msg.extension |= SOF_IPC4_MOD_EXT_CORE_ID(0); + msg.extension |= SOF_IPC4_MOD_EXT_PARAM_SIZE(sizeof(cfg) / sizeof(uint32_t)); msg.data_size = sizeof(cfg); msg.data_ptr = &cfg; From f432a1621f049bb207e78363d9d0e3c6fa2da5db Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 5 Nov 2024 00:17:13 +0100 Subject: [PATCH 238/281] drm/panthor: Be stricter about IO mapping flags The current panthor_device_mmap_io() implementation has two issues: 1. For mapping DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET, panthor_device_mmap_io() bails if VM_WRITE is set, but does not clear VM_MAYWRITE. That means userspace can use mprotect() to make the mapping writable later on. This is a classic Linux driver gotcha. I don't think this actually has any impact in practice: When the GPU is powered, writes to the FLUSH_ID seem to be ignored; and when the GPU is not powered, the dummy_latest_flush page provided by the driver is deliberately designed to not do any flushes, so the only thing writing to the dummy_latest_flush could achieve would be to make *more* flushes happen. 2. panthor_device_mmap_io() does not block MAP_PRIVATE mappings (which are mappings without the VM_SHARED flag). MAP_PRIVATE in combination with VM_MAYWRITE indicates that the VMA has copy-on-write semantics, which for VM_PFNMAP are semi-supported but fairly cursed. In particular, in such a mapping, the driver can only install PTEs during mmap() by calling remap_pfn_range() (because remap_pfn_range() wants to **store the physical address of the mapped physical memory into the vm_pgoff of the VMA**); installing PTEs later on with a fault handler (as panthor does) is not supported in private mappings, and so if you try to fault in such a mapping, vmf_insert_pfn_prot() splats when it hits a BUG() check. Fix it by clearing the VM_MAYWRITE flag (userspace writing to the FLUSH_ID doesn't make sense) and requiring VM_SHARED (copy-on-write semantics for the FLUSH_ID don't make sense). Reproducers for both scenarios are in the notes of my patch on the mailing list; I tested that these bugs exist on a Rock 5B machine. Note that I only compile-tested the patch, I haven't tested it; I don't have a working kernel build setup for the test machine yet. Please test it before applying it. Cc: stable@vger.kernel.org Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block") Signed-off-by: Jann Horn Reviewed-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20241105-panthor-flush-page-fixes-v1-1-829aaf37db93@google.com --- drivers/gpu/drm/panthor/panthor_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 4082c8f2951d..6fbff516c1c1 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -390,11 +390,15 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct * { u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT; + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + switch (offset) { case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: if (vma->vm_end - vma->vm_start != PAGE_SIZE || (vma->vm_flags & (VM_WRITE | VM_EXEC))) return -EINVAL; + vm_flags_clear(vma, VM_MAYWRITE); break; From 84b9749a3a704dcc824a88aa8267247c801d51e4 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Wed, 6 Nov 2024 10:12:28 +0800 Subject: [PATCH 239/281] proc/softirqs: replace seq_printf with seq_put_decimal_ull_width seq_printf is costy, on a system with n CPUs, reading /proc/softirqs would yield 10*n decimal values, and the extra cost parsing format string grows linearly with number of cpus. Replace seq_printf with seq_put_decimal_ull_width have significant performance improvement. On an 8CPUs system, reading /proc/softirqs show ~40% performance gain with this patch. Signed-off-by: David Wang <00107082@163.com> Signed-off-by: Linus Torvalds --- fs/proc/softirqs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index f4616083faef..04bb29721419 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -20,7 +20,7 @@ static int show_softirqs(struct seq_file *p, void *v) for (i = 0; i < NR_SOFTIRQS; i++) { seq_printf(p, "%12s:", softirq_to_name[i]); for_each_possible_cpu(j) - seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); + seq_put_decimal_ull_width(p, " ", kstat_softirqs_cpu(i, j), 10); seq_putc(p, '\n'); } return 0; From de88df01796b309903b70888fbdf2b89607e3a6a Mon Sep 17 00:00:00 2001 From: Wenjia Zhang Date: Wed, 6 Nov 2024 09:26:12 +0100 Subject: [PATCH 240/281] net/smc: Fix lookup of netdev by using ib_device_get_netdev() The SMC-R variant of the SMC protocol used direct call to function ib_device_ops.get_netdev() to lookup netdev. As we used mlx5 device driver to run SMC-R, it failed to find a device, because in mlx5_ib the internal net device management for retrieving net devices was replaced by a common interface ib_device_get_netdev() in commit 8d159eb2117b ("RDMA/mlx5: Use IB set_netdev and get_netdev functions"). Since such direct accesses to the internal net device management is not recommended at all, update the SMC-R code to use proper API ib_device_get_netdev(). Fixes: 54903572c23c ("net/smc: allow pnetid-less configuration") Reported-by: Aswin K Reviewed-by: Gerd Bayer Reviewed-by: Halil Pasic Reviewed-by: Simon Horman Reviewed-by: Dust Li Reviewed-by: Wen Gu Reviewed-by: Zhu Yanjun Reviewed-by: D. Wythe Signed-off-by: Wenjia Zhang Reviewed-by: Leon Romanovsky Link: https://patch.msgid.link/20241106082612.57803-1-wenjia@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/smc/smc_ib.c | 8 ++------ net/smc/smc_pnet.c | 4 +--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9297dc20bfe2..9c563cdbea90 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -899,9 +899,7 @@ static void smc_copy_netdev_ifindex(struct smc_ib_device *smcibdev, int port) struct ib_device *ibdev = smcibdev->ibdev; struct net_device *ndev; - if (!ibdev->ops.get_netdev) - return; - ndev = ibdev->ops.get_netdev(ibdev, port + 1); + ndev = ib_device_get_netdev(ibdev, port + 1); if (ndev) { smcibdev->ndev_ifidx[port] = ndev->ifindex; dev_put(ndev); @@ -921,9 +919,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) port_cnt = smcibdev->ibdev->phys_port_cnt; for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { libdev = smcibdev->ibdev; - if (!libdev->ops.get_netdev) - continue; - lndev = libdev->ops.get_netdev(libdev, i + 1); + lndev = ib_device_get_netdev(libdev, i + 1); dev_put(lndev); if (lndev != ndev) continue; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index a04aa0e882f8..716808f374a8 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -1054,9 +1054,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; - if (!ibdev->ibdev->ops.get_netdev) - continue; - ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); + ndev = ib_device_get_netdev(ibdev->ibdev, i); if (!ndev) continue; dev_put(ndev); From fc9de52de38f656399d2ce40f7349a6b5f86e787 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Nov 2024 13:03:22 +0000 Subject: [PATCH 241/281] rxrpc: Fix missing locking causing hanging calls If a call gets aborted (e.g. because kafs saw a signal) between it being queued for connection and the I/O thread picking up the call, the abort will be prioritised over the connection and it will be removed from local->new_client_calls by rxrpc_disconnect_client_call() without a lock being held. This may cause other calls on the list to disappear if a race occurs. Fix this by taking the client_call_lock when removing a call from whatever list its ->wait_link happens to be on. Signed-off-by: David Howells cc: linux-afs@lists.infradead.org Reported-by: Marc Dionne Fixes: 9d35d880e0e4 ("rxrpc: Move client call connection to the I/O thread") Link: https://patch.msgid.link/726660.1730898202@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 + net/rxrpc/conn_client.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index a1b126a6b0d7..cc22596c7250 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -287,6 +287,7 @@ EM(rxrpc_call_see_input, "SEE input ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ + EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ E_(rxrpc_call_see_zap, "SEE zap ") #define rxrpc_txqueue_traces \ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index d25bf1cf3670..bb11e8289d6d 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -516,6 +516,7 @@ void rxrpc_connect_client_calls(struct rxrpc_local *local) spin_lock(&local->client_call_lock); list_move_tail(&call->wait_link, &bundle->waiting_calls); + rxrpc_see_call(call, rxrpc_call_see_waiting_call); spin_unlock(&local->client_call_lock); if (rxrpc_bundle_has_space(bundle)) @@ -586,7 +587,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call _debug("call is waiting"); ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); + /* May still be on ->new_client_calls. */ + spin_lock(&local->client_call_lock); list_del_init(&call->wait_link); + spin_unlock(&local->client_call_lock); return; } From d293958a8595ba566fb90b99da4d6263e14fee15 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Nov 2024 22:19:22 +0000 Subject: [PATCH 242/281] net/smc: do not leave a dangling sk pointer in __smc_create() Thanks to commit 4bbd360a5084 ("socket: Print pf->create() when it does not clear sock->sk on failure."), syzbot found an issue with AF_SMC: smc_create must clear sock->sk on failure, family: 43, type: 1, protocol: 0 WARNING: CPU: 0 PID: 5827 at net/socket.c:1565 __sock_create+0x96f/0xa30 net/socket.c:1563 Modules linked in: CPU: 0 UID: 0 PID: 5827 Comm: syz-executor259 Not tainted 6.12.0-rc6-next-20241106-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__sock_create+0x96f/0xa30 net/socket.c:1563 Code: 03 00 74 08 4c 89 e7 e8 4f 3b 85 f8 49 8b 34 24 48 c7 c7 40 89 0c 8d 8b 54 24 04 8b 4c 24 0c 44 8b 44 24 08 e8 32 78 db f7 90 <0f> 0b 90 90 e9 d3 fd ff ff 89 e9 80 e1 07 fe c1 38 c1 0f 8c ee f7 RSP: 0018:ffffc90003e4fda0 EFLAGS: 00010246 RAX: 099c6f938c7f4700 RBX: 1ffffffff1a595fd RCX: ffff888034823c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 00000000ffffffe9 R08: ffffffff81567052 R09: 1ffff920007c9f50 R10: dffffc0000000000 R11: fffff520007c9f51 R12: ffffffff8d2cafe8 R13: 1ffffffff1a595fe R14: ffffffff9a789c40 R15: ffff8880764298c0 FS: 000055557b518380(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa62ff43225 CR3: 0000000031628000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sock_create net/socket.c:1616 [inline] __sys_socket_create net/socket.c:1653 [inline] __sys_socket+0x150/0x3c0 net/socket.c:1700 __do_sys_socket net/socket.c:1714 [inline] __se_sys_socket net/socket.c:1712 [inline] For reference, see commit 2d859aff775d ("Merge branch 'do-not-leave-dangling-sk-pointers-in-pf-create-functions'") Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Signed-off-by: Eric Dumazet Cc: Ignat Korchagin Cc: D. Wythe Cc: Dust Li Reviewed-by: Kuniyuki Iwashima Reviewed-by: Wenjia Zhang Link: https://patch.msgid.link/20241106221922.1544045-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0316217b7687..9d76e902fd77 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3359,8 +3359,10 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, else rc = smc_create_clcsk(net, sk, family); - if (rc) + if (rc) { sk_common_release(sk); + sock->sk = NULL; + } out: return rc; } From 71712cf519faeed529549a79559c06c7fc250a15 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Thu, 7 Nov 2024 10:17:56 +0800 Subject: [PATCH 243/281] drivers: net: ionic: add missed debugfs cleanup to ionic_probe() error path The ionic_setup_one() creates a debugfs entry for ionic upon successful execution. However, the ionic_probe() does not release the dentry before returning, resulting in a memory leak. To fix this bug, we add the ionic_debugfs_del_dev() to release the resources in a timely manner before returning. Fixes: 0de38d9f1dba ("ionic: extract common bits from ionic_probe") Signed-off-by: Wentao Liang Acked-by: Shannon Nelson Link: https://patch.msgid.link/20241107021756.1677-1-liangwentao@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index b93791d6b593..f5dc876eb500 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -394,6 +394,7 @@ err_out_free_irqs: err_out_pci: ionic_dev_teardown(ionic); ionic_clear_pci(ionic); + ionic_debugfs_del_dev(ionic); err_out: mutex_destroy(&ionic->dev_cmd_lock); ionic_devlink_free(ionic); From fd00045f383f51b66a7a46084a0e92b8de563157 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 27 Oct 2024 20:40:20 -0400 Subject: [PATCH 244/281] bcachefs: Fix null ptr deref in bucket_gen_get() bucket_gen() checks if we're lookup up a valid bucket and returns NULL otherwise, but bucket_gen_get() was failing to check; other callers were correct. Also do a bit of cleanup on callers. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_node_scan.c | 2 +- fs/bcachefs/buckets.h | 19 +++++++++++-------- fs/bcachefs/io_read.c | 7 +++---- fs/bcachefs/io_write.c | 7 ++----- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index a7aedb134e9f..30131c3bdd97 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -186,7 +186,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, .ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr, .ptrs[0].offset = offset, .ptrs[0].dev = ca->dev_idx, - .ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)), + .ptrs[0].gen = bucket_gen_get(ca, sector_to_bucket(ca, offset)), }; rcu_read_unlock(); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index fd5e6ccad45e..ccc78bfe2fd4 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -103,12 +103,18 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) return gens->b + b; } -static inline u8 bucket_gen_get(struct bch_dev *ca, size_t b) +static inline int bucket_gen_get_rcu(struct bch_dev *ca, size_t b) +{ + u8 *gen = bucket_gen(ca, b); + return gen ? *gen : -1; +} + +static inline int bucket_gen_get(struct bch_dev *ca, size_t b) { rcu_read_lock(); - u8 gen = *bucket_gen(ca, b); + int ret = bucket_gen_get_rcu(ca, b); rcu_read_unlock(); - return gen; + return ret; } static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, @@ -169,10 +175,8 @@ static inline int gen_after(u8 a, u8 b) static inline int dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_ptr *ptr) { - u8 *gen = bucket_gen(ca, PTR_BUCKET_NR(ca, ptr)); - if (!gen) - return -1; - return gen_after(*gen, ptr->gen); + int gen = bucket_gen_get_rcu(ca, PTR_BUCKET_NR(ca, ptr)); + return gen < 0 ? gen : gen_after(gen, ptr->gen); } /** @@ -184,7 +188,6 @@ static inline int dev_ptr_stale(struct bch_dev *ca, const struct bch_extent_ptr rcu_read_lock(); int ret = dev_ptr_stale_rcu(ca, ptr); rcu_read_unlock(); - return ret; } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index fc246f342820..ac6a6fcc2bb8 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -802,16 +802,15 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, PTR_BUCKET_POS(ca, &ptr), BTREE_ITER_cached); - u8 *gen = bucket_gen(ca, iter.pos.offset); - if (gen) { - + int gen = bucket_gen_get(ca, iter.pos.offset); + if (gen >= 0) { prt_printf(&buf, "Attempting to read from stale dirty pointer:\n"); printbuf_indent_add(&buf, 2); bch2_bkey_val_to_text(&buf, c, k); prt_newline(&buf); - prt_printf(&buf, "memory gen: %u", *gen); + prt_printf(&buf, "memory gen: %u", gen); ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); if (!ret) { diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 8609e25e450f..96720adcfee0 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1300,11 +1300,8 @@ retry: bucket_to_u64(i->b), BUCKET_NOCOW_LOCK_UPDATE); - rcu_read_lock(); - u8 *gen = bucket_gen(ca, i->b.offset); - stale = !gen ? -1 : gen_after(*gen, i->gen); - rcu_read_unlock(); - + int gen = bucket_gen_get(ca, i->b.offset); + stale = gen < 0 ? gen : gen_after(gen, i->gen); if (unlikely(stale)) { stale_at = i; goto err_bucket_stale; From 72acab3a7c5aee76451fa6054e9608026476a971 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 27 Oct 2024 18:25:30 -0400 Subject: [PATCH 245/281] bcachefs: Fix error handling in bch2_btree_node_prefetch() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 6e4afb2b5441..6f6225703eef 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -1312,9 +1312,12 @@ int bch2_btree_node_prefetch(struct btree_trans *trans, b = bch2_btree_node_fill(trans, path, k, btree_id, level, SIX_LOCK_read, false); - if (!IS_ERR_OR_NULL(b)) + int ret = PTR_ERR_OR_ZERO(b); + if (ret) + return ret; + if (b) six_unlock_read(&b->c.lock); - return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b); + return 0; } void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) From d335bb3fd3a4102f325ef8a353efc3d2fb523f55 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 31 Oct 2024 02:36:21 -0400 Subject: [PATCH 246/281] bcachefs: Ancient versions with bad bkey_formats are no longer supported Syzbot found an assertion pop, by generating an ancient filesystem version with an invalid bkey_format (with fields that can overflow) as well as packed keys that aren't representable unpacked. This breaks key comparisons in all sorts of painful ways. Filesystems have been automatically rewriting nodes with such invalid formats for years; we can safely drop support for them. Reported-by: syzbot+8a0109511de9d4b61217@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index 587d7318a2e8..995ba32e9b6e 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -643,7 +643,7 @@ int bch2_bkey_format_invalid(struct bch_fs *c, enum bch_validate_flags flags, struct printbuf *err) { - unsigned i, bits = KEY_PACKED_BITS_START; + unsigned bits = KEY_PACKED_BITS_START; if (f->nr_fields != BKEY_NR_FIELDS) { prt_printf(err, "incorrect number of fields: got %u, should be %u", @@ -655,9 +655,8 @@ int bch2_bkey_format_invalid(struct bch_fs *c, * Verify that the packed format can't represent fields larger than the * unpacked format: */ - for (i = 0; i < f->nr_fields; i++) { - if ((!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) && - bch2_bkey_format_field_overflows(f, i)) { + for (unsigned i = 0; i < f->nr_fields; i++) { + if (bch2_bkey_format_field_overflows(f, i)) { unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); unsigned packed_bits = min(64, f->bits_per_field[i]); From cec136d348e037ea5b6a463164454d6d0174d92f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 31 Oct 2024 02:50:55 -0400 Subject: [PATCH 247/281] bcachefs: Fix topology errors on split after merge If a btree split picks a pivot that's being deleted by a btree node merge, we're going to have problems. Fix this by checking if the pivot is being deleted, the same as we check for deletions in journal replay keys. Found by single_devic.ktest small_nodes. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 64f0928e1137..c12d7b51de0c 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1434,6 +1434,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as, } } +static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) +{ + if (insert_keys) + for_each_keylist_key(insert_keys, k) + if (bkey_deleted(&k->k) && bpos_eq(k->k.p, pos)) + return true; + return false; +} + /* * Move keys from n1 (original replacement node, now lower node) to n2 (higher * node) @@ -1441,7 +1450,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, static void __btree_split_node(struct btree_update *as, struct btree_trans *trans, struct btree *b, - struct btree *n[2]) + struct btree *n[2], + struct keylist *insert_keys) { struct bkey_packed *k; struct bpos n1_pos = POS_MIN; @@ -1476,7 +1486,8 @@ static void __btree_split_node(struct btree_update *as, if (b->c.level && u64s < n1_u64s && u64s + k->u64s >= n1_u64s && - bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p)) + (bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p) || + key_deleted_in_insert(insert_keys, uk.p))) n1_u64s += k->u64s; i = u64s >= n1_u64s; @@ -1603,7 +1614,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level); n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level); - __btree_split_node(as, trans, b, n); + __btree_split_node(as, trans, b, n, keys); if (keys) { btree_split_insert_keys(as, trans, path, n1, keys); From ef4f6c322bf4ca8e6d050cd0667a9447b8cbe212 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 31 Oct 2024 03:33:36 -0400 Subject: [PATCH 248/281] bcachefs: Ensure BCH_FS_may_go_rw is set before exiting recovery If BCH_FS_may_go_rw is not yet set, it indicates to the transaction commit path that updates should be done via the list of journal replay keys. This must be set before multithreaded use commences. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 7 +++++++ fs/bcachefs/recovery_passes.c | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 32d15aacc069..3c7f941dde39 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -862,6 +862,13 @@ use_clean: if (ret) goto err; + /* + * Normally set by the appropriate recovery pass: when cleared, this + * indicates we're in early recovery and btree updates should be done by + * being applied to the journal replay keys. _Must_ be cleared before + * multithreaded use: + */ + set_bit(BCH_FS_may_go_rw, &c->flags); clear_bit(BCH_FS_fsck_running, &c->flags); /* in case we don't run journal replay, i.e. norecovery mode */ diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 735b8adc8f9d..4bbeac9e0526 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -221,6 +221,12 @@ int bch2_run_recovery_passes(struct bch_fs *c) { int ret = 0; + /* + * We can't allow set_may_go_rw to be excluded; that would cause us to + * use the journal replay keys for updates where it's not expected. + */ + c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { if (c->opts.recovery_pass_last && c->curr_recovery_pass > c->opts.recovery_pass_last) From 93d53f1caf2cf861d0f28d096792d3b92efae178 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Wed, 30 Oct 2024 15:48:01 +0800 Subject: [PATCH 249/281] bcachefs: add check NULL return of bio_kmalloc in journal_read_bucket bio_kmalloc may return NULL, will cause NULL pointer dereference. Add check NULL return for bio_kmalloc in journal_read_bucket. Signed-off-by: Pei Xiao Fixes: ac10a9611d87 ("bcachefs: Some fixes for building in userspace") Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/journal_io.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a1bc6c7a8ba0..9c4fe5cdbfb7 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -84,6 +84,7 @@ x(ENOMEM, ENOMEM_dev_alloc) \ x(ENOMEM, ENOMEM_disk_accounting) \ x(ENOMEM, ENOMEM_stripe_head_alloc) \ + x(ENOMEM, ENOMEM_journal_read_bucket) \ x(ENOSPC, ENOSPC_disk_reservation) \ x(ENOSPC, ENOSPC_bucket_alloc) \ x(ENOSPC, ENOSPC_disk_label_add) \ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 954f6a96e0f4..ccaafa90f4f4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1012,6 +1012,8 @@ reread: nr_bvecs = buf_pages(buf->data, sectors_read << 9); bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); + if (!bio) + return -BCH_ERR_ENOMEM_journal_read_bucket; bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ); bio->bi_iter.bi_sector = offset; From 9bb33852f5cc145b17d96f3792ff69148a37e1fd Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 29 Oct 2024 20:53:29 +0800 Subject: [PATCH 250/281] bcachefs: check the invalid parameter for perf test The perf_test does not check the number of iterations and threads when it is zero. If nr_thread is 0, the perf test will keep waiting for wakekup. If iteration is 0, it will cause exception of division by zero. This can be reproduced by: echo "rand_insert 0 1" > /sys/fs/bcachefs/${uuid}/perf_test or echo "rand_insert 1 0" > /sys/fs/bcachefs/${uuid}/perf_test Fixes: 1c6fdbd8f246 ("bcachefs: Initial commit") Signed-off-by: Hongbo Li Signed-off-by: Kent Overstreet --- fs/bcachefs/tests.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 315038a0a92d..fb5c1543e52f 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -809,6 +809,11 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, unsigned i; u64 time; + if (nr == 0 || nr_threads == 0) { + pr_err("nr of iterations or threads is not allowed to be 0"); + return -EINVAL; + } + atomic_set(&j.ready, nr_threads); init_waitqueue_head(&j.ready_wait); From baefd3f849ed956d4c1aee80889093cf0d9c6a94 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 31 Oct 2024 01:17:54 -0400 Subject: [PATCH 251/281] bcachefs: btree_cache.freeable list fixes When allocating new btree nodes, we were leaving them on the freeable list - unlocked - allowing them to be reclaimed: ouch. Additionally, bch2_btree_node_free_never_used() -> bch2_btree_node_hash_remove was putting it on the freelist, while bch2_btree_node_free_never_used() was putting it back on the btree update reserve list - ouch. Originally, the code was written to always keep btree nodes on a list - live or freeable - and this worked when new nodes were kept locked. But now with the cycle detector, we can't keep nodes locked that aren't tracked by the cycle detector; and this is fine as long as they're not reachable. We also have better and more robust leak detection now, with memory allocation profiling, so the original justification no longer applies. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 100 +++++++++++++++++----------- fs/bcachefs/btree_cache.h | 2 + fs/bcachefs/btree_update_interior.c | 13 ++-- 3 files changed, 67 insertions(+), 48 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 6f6225703eef..7123019ab3bc 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -59,16 +59,38 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list) static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) { + BUG_ON(!list_empty(&b->list)); + if (b->c.lock.readers) - list_move(&b->list, &bc->freed_pcpu); + list_add(&b->list, &bc->freed_pcpu); else - list_move(&b->list, &bc->freed_nonpcpu); + list_add(&b->list, &bc->freed_nonpcpu); } -static void btree_node_data_free(struct bch_fs *c, struct btree *b) +static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b) +{ + BUG_ON(!list_empty(&b->list)); + BUG_ON(!b->data); + + bc->nr_freeable++; + list_add(&b->list, &bc->freeable); +} + +void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; + mutex_lock(&bc->lock); + __bch2_btree_node_to_freelist(bc, b); + mutex_unlock(&bc->lock); + + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); +} + +static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) +{ + BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); /* @@ -94,11 +116,17 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) #endif b->aux_data = NULL; - bc->nr_freeable--; - btree_node_to_freedlist(bc, b); } +static void btree_node_data_free(struct btree_cache *bc, struct btree *b) +{ + BUG_ON(list_empty(&b->list)); + list_del_init(&b->list); + --bc->nr_freeable; + __btree_node_data_free(bc, b); +} + static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, const void *obj) { @@ -174,21 +202,10 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) bch2_btree_lock_init(&b->c, 0); - bc->nr_freeable++; - list_add(&b->list, &bc->freeable); + __bch2_btree_node_to_freelist(bc, b); return b; } -void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) -{ - mutex_lock(&c->btree_cache.lock); - list_move(&b->list, &c->btree_cache.freeable); - mutex_unlock(&c->btree_cache.lock); - - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); -} - static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b) { struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p); @@ -236,11 +253,11 @@ void bch2_btree_cache_unpin(struct bch_fs *c) /* Btree in memory cache - hash table */ -void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) +void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { lockdep_assert_held(&bc->lock); - int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); + int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); BUG_ON(ret); /* Cause future lookups for this node to fail: */ @@ -248,17 +265,22 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) if (b->c.btree_id < BTREE_ID_NR) --bc->nr_by_btree[b->c.btree_id]; + --bc->live[btree_node_pinned(b)].nr; + list_del_init(&b->list); +} - bc->live[btree_node_pinned(b)].nr--; - bc->nr_freeable++; - list_move(&b->list, &bc->freeable); +void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) +{ + __bch2_btree_node_hash_remove(bc, b); + __bch2_btree_node_to_freelist(bc, b); } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) { + BUG_ON(!list_empty(&b->list)); BUG_ON(b->hash_val); - b->hash_val = btree_ptr_hash_val(&b->key); + b->hash_val = btree_ptr_hash_val(&b->key); int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params); if (ret) @@ -270,10 +292,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) bool p = __btree_node_pinned(bc, b); mod_bit(BTREE_NODE_pinned, &b->flags, p); - list_move_tail(&b->list, &bc->live[p].list); + list_add_tail(&b->list, &bc->live[p].list); bc->live[p].nr++; - - bc->nr_freeable--; return 0; } @@ -485,7 +505,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, goto out; if (!btree_node_reclaim(c, b, true)) { - btree_node_data_free(c, b); + btree_node_data_free(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); freed++; @@ -501,10 +521,10 @@ restart: bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; --touched;; } else if (!btree_node_reclaim(c, b, true)) { - bch2_btree_node_hash_remove(bc, b); + __bch2_btree_node_hash_remove(bc, b); + __btree_node_data_free(bc, b); freed++; - btree_node_data_free(c, b); bc->nr_freed++; six_unlock_write(&b->c.lock); @@ -587,7 +607,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) BUG_ON(btree_node_read_in_flight(b) || btree_node_write_in_flight(b)); - btree_node_data_free(c, b); + btree_node_data_free(bc, b); } BUG_ON(!bch2_journal_error(&c->journal) && @@ -786,8 +806,8 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); -got_node: +got_node: /* * btree_free() doesn't free memory; it sticks the node on the end of * the list. Check if there's any freed nodes there: @@ -796,7 +816,12 @@ got_node: if (!btree_node_reclaim(c, b2, false)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); + + list_del_init(&b2->list); + --bc->nr_freeable; btree_node_to_freedlist(bc, b2); + mutex_unlock(&bc->lock); + six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); goto got_mem; @@ -810,11 +835,8 @@ got_node: goto err; } - mutex_lock(&bc->lock); - bc->nr_freeable++; got_mem: - mutex_unlock(&bc->lock); - + BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); BUG_ON(btree_node_dirty(b)); BUG_ON(btree_node_write_in_flight(b)); @@ -845,7 +867,7 @@ err: if (bc->alloc_lock == current) { b2 = btree_node_cannibalize(c); clear_btree_node_just_written(b2); - bch2_btree_node_hash_remove(bc, b2); + __bch2_btree_node_hash_remove(bc, b2); if (b) { swap(b->data, b2->data); @@ -855,9 +877,9 @@ err: six_unlock_intent(&b2->c.lock); } else { b = b2; - list_del_init(&b->list); } + BUG_ON(!list_empty(&b->list)); mutex_unlock(&bc->lock); trace_and_count(c, btree_cache_cannibalize, trans); @@ -936,7 +958,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, b->hash_val = 0; mutex_lock(&bc->lock); - list_add(&b->list, &bc->freeable); + __bch2_btree_node_to_freelist(bc, b); mutex_unlock(&bc->lock); six_unlock_write(&b->c.lock); @@ -1356,7 +1378,7 @@ wait_on_io: mutex_lock(&bc->lock); bch2_btree_node_hash_remove(bc, b); - btree_node_data_free(c, b); + btree_node_data_free(bc, b); mutex_unlock(&bc->lock); out: six_unlock_write(&b->c.lock); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 367acd217c6a..66e86d1a178d 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -14,7 +14,9 @@ void bch2_recalc_btree_reserve(struct bch_fs *); void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *); +void __bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); + int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, unsigned, enum btree_id); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index c12d7b51de0c..22740b605f0a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -237,10 +237,6 @@ static void __btree_node_free(struct btree_trans *trans, struct btree *b) BUG_ON(b->will_make_reachable); clear_btree_node_noevict(b); - - mutex_lock(&c->btree_cache.lock); - list_move(&b->list, &c->btree_cache.freeable); - mutex_unlock(&c->btree_cache.lock); } static void bch2_btree_node_free_inmem(struct btree_trans *trans, @@ -252,12 +248,12 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, bch2_btree_node_lock_write_nofail(trans, path, &b->c); + __btree_node_free(trans, b); + mutex_lock(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_unlock(&c->btree_cache.lock); - __btree_node_free(trans, b); - six_unlock_write(&b->c.lock); mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); @@ -289,7 +285,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, clear_btree_node_need_write(b); mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); + __bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_unlock(&c->btree_cache.lock); BUG_ON(p->nr >= ARRAY_SIZE(p->b)); @@ -521,8 +517,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans * btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); __btree_node_free(trans, b); - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); + bch2_btree_node_to_freelist(c, b); } } } From f9f0a5390dcef1f96cc506a2cf7d50c8e348fa3d Mon Sep 17 00:00:00 2001 From: Piotr Zalewski Date: Wed, 6 Nov 2024 19:46:30 +0000 Subject: [PATCH 252/281] bcachefs: Change OPT_STR max to be 1 less than the size of choices array Change OPT_STR max value to be 1 less than the "ARRAY_SIZE" of "_choices" array. As a result, remove -1 from (opt->max-1) in bch2_opt_to_text. The "_choices" array is a null-terminated array, so computing the maximum using "ARRAY_SIZE" without subtracting 1 yields an incorrect result. Since bch2_opt_validate don't subtract 1, as bch2_opt_to_text does, values bigger than the actual maximum would pass through option validation. Reported-by: syzbot+bee87a0c3291c06aa8c6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=bee87a0c3291c06aa8c6 Fixes: 63c4b2545382 ("bcachefs: Better superblock opt validation") Suggested-by: Kent Overstreet Signed-off-by: Piotr Zalewski Signed-off-by: Kent Overstreet --- fs/bcachefs/opts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 6673cbd8bdb9..0e2ee262fbd4 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -226,7 +226,7 @@ const struct bch_option bch2_opt_table[] = { #define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \ .min = _min, .max = _max #define OPT_STR(_choices) .type = BCH_OPT_STR, \ - .min = 0, .max = ARRAY_SIZE(_choices), \ + .min = 0, .max = ARRAY_SIZE(_choices) - 1, \ .choices = _choices #define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \ .min = 0, .max = U64_MAX, \ @@ -428,7 +428,7 @@ void bch2_opt_to_text(struct printbuf *out, prt_printf(out, "%lli", v); break; case BCH_OPT_STR: - if (v < opt->min || v >= opt->max - 1) + if (v < opt->min || v >= opt->max) prt_printf(out, "(invalid option %lli)", v); else if (flags & OPT_SHOW_FULL_LIST) prt_string_option(out, opt->choices, v); From 8440da933127fc5330c3d1090cdd612fddbc40eb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Nov 2024 16:40:08 -0500 Subject: [PATCH 253/281] bcachefs: Fix UAF in __promote_alloc() error path If we error in data_update_init() after adding to the rhashtable of outstanding promotes, kfree_rcu() is required. Reported-by: Reed Riley Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index ac6a6fcc2bb8..b3b934a87c6d 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -262,7 +262,8 @@ err: bio_free_pages(&(*rbio)->bio); kfree(*rbio); *rbio = NULL; - kfree(op); + /* We may have added to the rhashtable and thus need rcu freeing: */ + kfree_rcu(op, rcu); bch2_write_ref_put(c, BCH_WRITE_REF_promote); return ERR_PTR(ret); } From c928807f6f6b6d595a7e199591ae297c81de3aeb Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 28 Oct 2024 12:26:53 -0600 Subject: [PATCH 254/281] mm/page_alloc: keep track of free highatomic OOM kills due to vastly overestimated free highatomic reserves were observed: ... invoked oom-killer: gfp_mask=0x100cca(GFP_HIGHUSER_MOVABLE), order=0 ... Node 0 Normal free:1482936kB boost:0kB min:410416kB low:739404kB high:1068392kB reserved_highatomic:1073152KB ... Node 0 Normal: 1292*4kB (ME) 1920*8kB (E) 383*16kB (UE) 220*32kB (ME) 340*64kB (E) 2155*128kB (UE) 3243*256kB (UE) 615*512kB (U) 1*1024kB (M) 0*2048kB 0*4096kB = 1477408kB The second line above shows that the OOM kill was due to the following condition: free (1482936kB) - reserved_highatomic (1073152kB) = 409784KB < min (410416kB) And the third line shows there were no free pages in any MIGRATE_HIGHATOMIC pageblocks, which otherwise would show up as type 'H'. Therefore __zone_watermark_unusable_free() underestimated the usable free memory by over 1GB, which resulted in the unnecessary OOM kill above. The comments in __zone_watermark_unusable_free() warns about the potential risk, i.e., If the caller does not have rights to reserves below the min watermark then subtract the high-atomic reserves. This will over-estimate the size of the atomic reserve but it avoids a search. However, it is possible to keep track of free pages in reserved highatomic pageblocks with a new per-zone counter nr_free_highatomic protected by the zone lock, to avoid a search when calculating the usable free memory. And the cost would be minimal, i.e., simple arithmetics in the highatomic alloc/free/move paths. Note that since nr_free_highatomic can be relatively small, using a per-cpu counter might cause too much drift and defeat its purpose, in addition to the extra memory overhead. Dependson e0932b6c1f94 ("mm: page_alloc: consolidate free page accounting") - see [1] [akpm@linux-foundation.org: s/if/else if/, per Johannes, stealth whitespace tweak] Link: https://lkml.kernel.org/r/20241028182653.3420139-1-yuzhao@google.com Link: https://lkml.kernel.org/r/0d0ddb33-fcdc-43e2-801f-0c1df2031afb@suse.cz [1] Fixes: 0aaa29a56e4f ("mm, page_alloc: reserve pageblocks for high-order atomic allocations on demand") Signed-off-by: Yu Zhao Reported-by: Link Lin Acked-by: David Rientjes Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 1 + mm/page_alloc.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5b1c984daf45..80bc5640bb60 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -823,6 +823,7 @@ struct zone { unsigned long watermark_boost; unsigned long nr_reserved_highatomic; + unsigned long nr_free_highatomic; /* * We don't know if the memory that we're going to allocate will be diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8ad38cd5e574..c6c7bb3ea71b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -635,6 +635,8 @@ compaction_capture(struct capture_control *capc, struct page *page, static inline void account_freepages(struct zone *zone, int nr_pages, int migratetype) { + lockdep_assert_held(&zone->lock); + if (is_migrate_isolate(migratetype)) return; @@ -642,6 +644,9 @@ static inline void account_freepages(struct zone *zone, int nr_pages, if (is_migrate_cma(migratetype)) __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages); + else if (is_migrate_highatomic(migratetype)) + WRITE_ONCE(zone->nr_free_highatomic, + zone->nr_free_highatomic + nr_pages); } /* Used for pages not on another list */ @@ -3079,11 +3084,10 @@ static inline long __zone_watermark_unusable_free(struct zone *z, /* * If the caller does not have rights to reserves below the min - * watermark then subtract the high-atomic reserves. This will - * over-estimate the size of the atomic reserve but it avoids a search. + * watermark then subtract the free pages reserved for highatomic. */ if (likely(!(alloc_flags & ALLOC_RESERVES))) - unusable_free += z->nr_reserved_highatomic; + unusable_free += READ_ONCE(z->nr_free_highatomic); #ifdef CONFIG_CMA /* If allocation can't use CMA areas don't use free CMA pages */ From cb6fcef8b4b6c655b6a25cc3a415cd9eb81b3da8 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 28 Oct 2024 12:26:27 +0900 Subject: [PATCH 255/281] objpool: fix to make percpu slot allocation more robust Since gfp & GFP_ATOMIC == GFP_ATOMIC is true for GFP_KERNEL | GFP_HIGH, it will use kmalloc if user specifies that combination. Here the reason why combining the __vmalloc_node() and kmalloc_node() is that the vmalloc does not support all GFP flag, especially GFP_ATOMIC. So we should check if gfp & (GFP_ATOMIC | GFP_KERNEL) != GFP_ATOMIC for vmalloc first. This ensures caller can sleep. And for the robustness, even if vmalloc fails, it should retry with kmalloc to allocate it. Link: https://lkml.kernel.org/r/173008598713.1262174.2959179484209897252.stgit@mhiramat.roam.corp.google.com Fixes: aff1871bfc81 ("objpool: fix choosing allocation for percpu slots") Signed-off-by: Masami Hiramatsu (Google) Reported-by: Linus Torvalds Closes: https://lore.kernel.org/all/CAHk-=whO+vSH+XVRio8byJU8idAWES0SPGVZ7KAVdc4qrV0VUA@mail.gmail.com/ Cc: Leo Yan Cc: Linus Torvalds Cc: Matt Wu Cc: Mikel Rychliski Cc: Steven Rostedt (Google) Cc: Viktor Malik Signed-off-by: Andrew Morton --- lib/objpool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/objpool.c b/lib/objpool.c index fd108fe0d095..b998b720c732 100644 --- a/lib/objpool.c +++ b/lib/objpool.c @@ -74,15 +74,21 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs, * warm caches and TLB hits. in default vmalloc is used to * reduce the pressure of kernel slab system. as we know, * mimimal size of vmalloc is one page since vmalloc would - * always align the requested size to page size + * always align the requested size to page size. + * but if vmalloc fails or it is not available (e.g. GFP_ATOMIC) + * allocate percpu slot with kmalloc. */ - if ((pool->gfp & GFP_ATOMIC) == GFP_ATOMIC) - slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); - else + slot = NULL; + + if ((pool->gfp & (GFP_ATOMIC | GFP_KERNEL)) != GFP_ATOMIC) slot = __vmalloc_node(size, sizeof(void *), pool->gfp, cpu_to_node(i), __builtin_return_address(0)); - if (!slot) - return -ENOMEM; + + if (!slot) { + slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); + if (!slot) + return -ENOMEM; + } memset(slot, 0, size); pool->cpu_slots[i] = slot; From faa242b1d2a97143150bdc50d5b61fd70fcd17cd Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sun, 27 Oct 2024 12:33:21 +0000 Subject: [PATCH 256/281] mm/mlock: set the correct prev on failure After commit 94d7d9233951 ("mm: abstract the vma_merge()/split_vma() pattern for mprotect() et al."), if vma_modify_flags() return error, the vma is set to an error code. This will lead to an invalid prev be returned. Generally this shouldn't matter as the caller should treat an error as indicating state is now invalidated, however unfortunately apply_mlockall_flags() does not check for errors and assumes that mlock_fixup() correctly maintains prev even if an error were to occur. This patch fixes that assumption. [lorenzo.stoakes@oracle.com: provide a better fix and rephrase the log] Link: https://lkml.kernel.org/r/20241027123321.19511-1-richard.weiyang@gmail.com Fixes: 94d7d9233951 ("mm: abstract the vma_merge()/split_vma() pattern for mprotect() et al.") Signed-off-by: Wei Yang Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Vlastimil Babka Cc: Jann Horn Cc: Signed-off-by: Andrew Morton --- mm/mlock.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index e3e3dc2b2956..cde076fa7d5e 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -725,14 +725,17 @@ static int apply_mlockall_flags(int flags) } for_each_vma(vmi, vma) { + int error; vm_flags_t newflags; newflags = vma->vm_flags & ~VM_LOCKED_MASK; newflags |= to_add; - /* Ignore errors */ - mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end, - newflags); + error = mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end, + newflags); + /* Ignore errors, but prev needs fixing up. */ + if (error) + prev = vma; cond_resched(); } out: From 3488af0970445ff5532c7e8dc5e6456b877aee5e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 31 Oct 2024 11:37:56 -0700 Subject: [PATCH 257/281] mm/damon/core: handle zero {aggregation,ops_update} intervals Patch series "mm/damon/core: fix handling of zero non-sampling intervals". DAMON's internal intervals accounting logic is not correctly handling non-sampling intervals of zero values for a wrong assumption. This could cause unexpected monitoring behavior, and even result in infinite hang of DAMON sysfs interface user threads in case of zero aggregation interval. Fix those by updating the intervals accounting logic. For details of the root case and solutions, please refer to commit messages of fixes. This patch (of 2): DAMON's logics to determine if this is the time to do aggregation and ops update assumes next_{aggregation,ops_update}_sis are always set larger than current passed_sample_intervals. And therefore it further assumes continuously incrementing passed_sample_intervals every sampling interval will make it reaches to the next_{aggregation,ops_update}_sis in future. The logic therefore make the action and update next_{aggregation,ops_updaste}_sis only if passed_sample_intervals is same to the counts, respectively. If Aggregation interval or Ops update interval are zero, however, next_aggregation_sis or next_ops_update_sis are set same to current passed_sample_intervals, respectively. And passed_sample_intervals is incremented before doing the next_{aggregation,ops_update}_sis check. Hence, passed_sample_intervals becomes larger than next_{aggregation,ops_update}_sis, and the logic says it is not the time to do the action and update next_{aggregation,ops_update}_sis forever, until an overflow happens. In other words, DAMON stops doing aggregations or ops updates effectively forever, and users cannot get monitoring results. Based on the documents and the common sense, a reasonable behavior for such inputs is doing an aggregation and an ops update for every sampling interval. Handle the case by removing the assumption. Note that this could incur particular real issue for DAMON sysfs interface users, in case of zero Aggregation interval. When user starts DAMON with zero Aggregation interval and asks online DAMON parameter tuning via DAMON sysfs interface, the request is handled by the aggregation callback. Until the callback finishes the work, the user who requested the online tuning just waits. Hence, the user will be stuck until the passed_sample_intervals overflows. Link: https://lkml.kernel.org/r/20241031183757.49610-1-sj@kernel.org Link: https://lkml.kernel.org/r/20241031183757.49610-2-sj@kernel.org Fixes: 4472edf63d66 ("mm/damon/core: use number of passed access sampling as a timer") Signed-off-by: SeongJae Park Cc: [6.7.x] Signed-off-by: Andrew Morton --- mm/damon/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index a83f3b736d51..3131a07569e4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2000,7 +2000,7 @@ static int kdamond_fn(void *data) if (ctx->ops.check_accesses) max_nr_accesses = ctx->ops.check_accesses(ctx); - if (ctx->passed_sample_intervals == next_aggregation_sis) { + if (ctx->passed_sample_intervals >= next_aggregation_sis) { kdamond_merge_regions(ctx, max_nr_accesses / 10, sz_limit); @@ -2018,7 +2018,7 @@ static int kdamond_fn(void *data) sample_interval = ctx->attrs.sample_interval ? ctx->attrs.sample_interval : 1; - if (ctx->passed_sample_intervals == next_aggregation_sis) { + if (ctx->passed_sample_intervals >= next_aggregation_sis) { ctx->next_aggregation_sis = next_aggregation_sis + ctx->attrs.aggr_interval / sample_interval; @@ -2028,7 +2028,7 @@ static int kdamond_fn(void *data) ctx->ops.reset_aggregated(ctx); } - if (ctx->passed_sample_intervals == next_ops_update_sis) { + if (ctx->passed_sample_intervals >= next_ops_update_sis) { ctx->next_ops_update_sis = next_ops_update_sis + ctx->attrs.ops_update_interval / sample_interval; From 8e7bde615f634a82a44b1f3d293c049fd3ef9ca9 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 31 Oct 2024 11:37:57 -0700 Subject: [PATCH 258/281] mm/damon/core: handle zero schemes apply interval DAMON's logics to determine if this is the time to apply damos schemes assumes next_apply_sis is always set larger than current passed_sample_intervals. And therefore assume continuously incrementing passed_sample_intervals will make it reaches to the next_apply_sis in future. The logic hence does apply the scheme and update next_apply_sis only if passed_sample_intervals is same to next_apply_sis. If Schemes apply interval is set as zero, however, next_apply_sis is set same to current passed_sample_intervals, respectively. And passed_sample_intervals is incremented before doing the next_apply_sis check. Hence, next_apply_sis becomes larger than next_apply_sis, and the logic says it is not the time to apply schemes and update next_apply_sis. In other words, DAMON stops applying schemes until passed_sample_intervals overflows. Based on the documents and the common sense, a reasonable behavior for such inputs would be applying the schemes for every sampling interval. Handle the case by removing the assumption. Link: https://lkml.kernel.org/r/20241031183757.49610-3-sj@kernel.org Fixes: 42f994b71404 ("mm/damon/core: implement scheme-specific apply interval") Signed-off-by: SeongJae Park Cc: [6.7.x] Signed-off-by: Andrew Morton --- mm/damon/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 3131a07569e4..ce700e694b63 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1412,7 +1412,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c, damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; - if (c->passed_sample_intervals != s->next_apply_sis) + if (c->passed_sample_intervals < s->next_apply_sis) continue; if (!s->wmarks.activated) @@ -1622,7 +1622,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c) bool has_schemes_to_apply = false; damon_for_each_scheme(s, c) { - if (c->passed_sample_intervals != s->next_apply_sis) + if (c->passed_sample_intervals < s->next_apply_sis) continue; if (!s->wmarks.activated) @@ -1642,9 +1642,9 @@ static void kdamond_apply_schemes(struct damon_ctx *c) } damon_for_each_scheme(s, c) { - if (c->passed_sample_intervals != s->next_apply_sis) + if (c->passed_sample_intervals < s->next_apply_sis) continue; - s->next_apply_sis += + s->next_apply_sis = c->passed_sample_intervals + (s->apply_interval_us ? s->apply_interval_us : c->attrs.aggr_interval) / sample_interval; } From 4401e9d10ab0281a520b9f8c220f30f60b5c248f Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 31 Oct 2024 09:12:03 -0700 Subject: [PATCH 259/281] mm/damon/core: avoid overflow in damon_feed_loop_next_input() damon_feed_loop_next_input() is inefficient and fragile to overflows. Specifically, 'score_goal_diff_bp' calculation can overflow when 'score' is high. The calculation is actually unnecessary at all because 'goal' is a constant of value 10,000. Calculation of 'compensation' is again fragile to overflow. Final calculation of return value for under-achiving case is again fragile to overflow when the current score is under-achieving the target. Add two corner cases handling at the beginning of the function to make the body easier to read, and rewrite the body of the function to avoid overflows and the unnecessary bp value calcuation. Link: https://lkml.kernel.org/r/20241031161203.47751-1-sj@kernel.org Fixes: 9294a037c015 ("mm/damon/core: implement goal-oriented feedback-driven quota auto-tuning") Signed-off-by: SeongJae Park Reported-by: Guenter Roeck Closes: https://lore.kernel.org/944f3d5b-9177-48e7-8ec9-7f1331a3fea3@roeck-us.net Tested-by: Guenter Roeck Cc: [6.8.x] Signed-off-by: Andrew Morton --- mm/damon/core.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index ce700e694b63..511c3f61ab44 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1456,17 +1456,31 @@ static unsigned long damon_feed_loop_next_input(unsigned long last_input, unsigned long score) { const unsigned long goal = 10000; - unsigned long score_goal_diff = max(goal, score) - min(goal, score); - unsigned long score_goal_diff_bp = score_goal_diff * 10000 / goal; - unsigned long compensation = last_input * score_goal_diff_bp / 10000; /* Set minimum input as 10000 to avoid compensation be zero */ const unsigned long min_input = 10000; + unsigned long score_goal_diff, compensation; + bool over_achieving = score > goal; - if (goal > score) + if (score == goal) + return last_input; + if (score >= goal * 2) + return min_input; + + if (over_achieving) + score_goal_diff = score - goal; + else + score_goal_diff = goal - score; + + if (last_input < ULONG_MAX / score_goal_diff) + compensation = last_input * score_goal_diff / goal; + else + compensation = last_input / goal * score_goal_diff; + + if (over_achieving) + return max(last_input - compensation, min_input); + if (last_input < ULONG_MAX - compensation) return last_input + compensation; - if (last_input > compensation + min_input) - return last_input - compensation; - return min_input; + return ULONG_MAX; } #ifdef CONFIG_PSI From 652e1a51465f2e8e75590bc3dd1e3a3b61020568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Fri, 1 Nov 2024 13:54:05 -0300 Subject: [PATCH 260/281] mm: fix docs for the kernel parameter ``thp_anon=`` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we add ``thp_anon=32,64K:always`` to the kernel command line, we will see the following error: [ 0.000000] huge_memory: thp_anon=32,64K:always: error parsing string, ignoring setting This happens because the correct format isn't ``thp_anon=,[KMG]:```, as [KMG] must follow each number to especify its unit. So, the correct format is ``thp_anon=[KMG],[KMG]:```. Therefore, adjust the documentation to reflect the correct format of the parameter ``thp_anon=``. Link: https://lkml.kernel.org/r/20241101165719.1074234-3-mcanal@igalia.com Fixes: dd4d30d1cdbe ("mm: override mTHP "enabled" defaults at kernel cmdline") Signed-off-by: Maíra Canal Acked-by: Barry Song Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Hugh Dickins Cc: Jonathan Corbet Cc: Lance Yang Cc: Ryan Roberts Signed-off-by: Andrew Morton --- Documentation/admin-guide/kernel-parameters.txt | 2 +- Documentation/admin-guide/mm/transhuge.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1518343bbe22..1666576acc0e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6688,7 +6688,7 @@ 0: no polling (default) thp_anon= [KNL] - Format: ,[KMG]:;-[KMG]: + Format: [KMG],[KMG]:;[KMG]-[KMG]: state is one of "always", "madvise", "never" or "inherit". Control the default behavior of the system with respect to anonymous transparent hugepages. diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index cfdd16a52e39..a1bb495eab59 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -303,7 +303,7 @@ control by passing the parameter ``transparent_hugepage=always`` or kernel command line. Alternatively, each supported anonymous THP size can be controlled by -passing ``thp_anon=,[KMG]:;-[KMG]:``, +passing ``thp_anon=[KMG],[KMG]:;[KMG]-[KMG]:``, where ```` is the THP size (must be a power of 2 of PAGE_SIZE and supported anonymous THP) and ```` is one of ``always``, ``madvise``, ``never`` or ``inherit``. From 0268d4579901821ff17259213c2d8c9679995d48 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 1 Nov 2024 19:15:57 +0500 Subject: [PATCH 261/281] selftests: hugetlb_dio: check for initial conditions to skip in the start The test should be skipped if initial conditions aren't fulfilled in the start instead of failing and outputting non-compliant TAP logs. This kind of failure pollutes the results. The initial conditions are: - The test should only execute if /tmp file can be allocated. - The test should only execute if huge pages are free. Before: TAP version 13 1..4 Bail out! Error opening file : Read-only file system (30) # Planned tests != run tests (4 != 0) # Totals: pass:0 fail:0 xfail:0 xpass:0 skip:0 error:0 After: TAP version 13 1..0 # SKIP Unable to allocate file: Read-only file system Link: https://lkml.kernel.org/r/20241101141557.3159432-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Fixes: 3a103b5315b7 ("selftest: mm: Test if hugepage does not get leaked during __bio_release_pages()") Cc: Muhammad Usama Anjum Cc: Shuah Khan Cc: Donet Tom Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/hugetlb_dio.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c index f9ac20c657ec..60001c142ce9 100644 --- a/tools/testing/selftests/mm/hugetlb_dio.c +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -44,13 +44,6 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) if (fd < 0) ksft_exit_fail_perror("Error opening file\n"); - /* Get the free huge pages before allocation */ - free_hpage_b = get_free_hugepages(); - if (free_hpage_b == 0) { - close(fd); - ksft_exit_skip("No free hugepage, exiting!\n"); - } - /* Allocate a hugetlb page */ orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0); if (orig_buffer == MAP_FAILED) { @@ -94,8 +87,20 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) int main(void) { size_t pagesize = 0; + int fd; ksft_print_header(); + + /* Open the file to DIO */ + fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664); + if (fd < 0) + ksft_exit_skip("Unable to allocate file: %s\n", strerror(errno)); + close(fd); + + /* Check if huge pages are free */ + if (!get_free_hugepages()) + ksft_exit_skip("No free hugepage, exiting\n"); + ksft_set_plan(4); /* Get base page size */ From 432dc0654c612457285a5dcf9bb13968ac6f0804 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 1 Nov 2024 19:19:40 +0000 Subject: [PATCH 262/281] ucounts: fix counter leak in inc_rlimit_get_ucounts() The inc_rlimit_get_ucounts() increments the specified rlimit counter and then checks its limit. If the value exceeds the limit, the function returns an error without decrementing the counter. Link: https://lkml.kernel.org/r/20241101191940.3211128-1-roman.gushchin@linux.dev Fixes: 15bc01effefe ("ucounts: Fix signal ucount refcounting") Signed-off-by: Andrei Vagin Co-developed-by: Roman Gushchin Signed-off-by: Roman Gushchin Tested-by: Roman Gushchin Acked-by: Alexey Gladkov Cc: Kees Cook Cc: Andrei Vagin Cc: "Eric W. Biederman" Cc: Alexey Gladkov Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton --- kernel/ucount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 8c07714ff27d..9469102c5ac0 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -317,7 +317,7 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type) for (iter = ucounts; iter; iter = iter->ns->ucounts) { long new = atomic_long_add_return(1, &iter->rlimit[type]); if (new < 0 || new > max) - goto unwind; + goto dec_unwind; if (iter == ucounts) ret = new; max = get_userns_rlimit_max(iter->ns, type); @@ -334,7 +334,6 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type) dec_unwind: dec = atomic_long_sub_return(1, &iter->rlimit[type]); WARN_ON_ONCE(dec < 0); -unwind: do_dec_rlimit_put_ucounts(ucounts, iter, type); return 0; } From b8ee299855f08539e04d6c1a6acb3dc9e5423c00 Mon Sep 17 00:00:00 2001 From: Qi Xi Date: Fri, 1 Nov 2024 11:48:03 +0800 Subject: [PATCH 263/281] fs/proc: fix compile warning about variable 'vmcore_mmap_ops' When build with !CONFIG_MMU, the variable 'vmcore_mmap_ops' is defined but not used: >> fs/proc/vmcore.c:458:42: warning: unused variable 'vmcore_mmap_ops' 458 | static const struct vm_operations_struct vmcore_mmap_ops = { Fix this by only defining it when CONFIG_MMU is enabled. Link: https://lkml.kernel.org/r/20241101034803.9298-1-xiqi2@huawei.com Fixes: 9cb218131de1 ("vmcore: introduce remap_oldmem_pfn_range()") Signed-off-by: Qi Xi Reported-by: kernel test robot Closes: https://lore.kernel.org/lkml/202410301936.GcE8yUos-lkp@intel.com/ Cc: Baoquan He Cc: Dave Young Cc: Michael Holzheu Cc: Vivek Goyal Cc: Wang ShaoBo Signed-off-by: Andrew Morton --- fs/proc/vmcore.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index b52d85f8ad59..b4521b096058 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -457,10 +457,6 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf) #endif } -static const struct vm_operations_struct vmcore_mmap_ops = { - .fault = mmap_vmcore_fault, -}; - /** * vmcore_alloc_buf - allocate buffer in vmalloc memory * @size: size of buffer @@ -488,6 +484,11 @@ static inline char *vmcore_alloc_buf(size_t size) * virtually contiguous user-space in ELF layout. */ #ifdef CONFIG_MMU + +static const struct vm_operations_struct vmcore_mmap_ops = { + .fault = mmap_vmcore_fault, +}; + /* * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages * reported as not being ram with the zero page. From 9e05e5c7ee8758141d2db7e8fea2cab34500c6ed Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 4 Nov 2024 19:54:19 +0000 Subject: [PATCH 264/281] signal: restore the override_rlimit logic Prior to commit d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts") UCOUNT_RLIMIT_SIGPENDING rlimit was not enforced for a class of signals. However now it's enforced unconditionally, even if override_rlimit is set. This behavior change caused production issues. For example, if the limit is reached and a process receives a SIGSEGV signal, sigqueue_alloc fails to allocate the necessary resources for the signal delivery, preventing the signal from being delivered with siginfo. This prevents the process from correctly identifying the fault address and handling the error. From the user-space perspective, applications are unaware that the limit has been reached and that the siginfo is effectively 'corrupted'. This can lead to unpredictable behavior and crashes, as we observed with java applications. Fix this by passing override_rlimit into inc_rlimit_get_ucounts() and skip the comparison to max there if override_rlimit is set. This effectively restores the old behavior. Link: https://lkml.kernel.org/r/20241104195419.3962584-1-roman.gushchin@linux.dev Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts") Signed-off-by: Roman Gushchin Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Acked-by: Oleg Nesterov Acked-by: Alexey Gladkov Cc: Kees Cook Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton --- include/linux/user_namespace.h | 3 ++- kernel/signal.c | 3 ++- kernel/ucount.c | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 3625096d5f85..7183e5aca282 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -141,7 +141,8 @@ static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type ty long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); -long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type); +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, + bool override_rlimit); void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); diff --git a/kernel/signal.c b/kernel/signal.c index 4344860ffcac..cbabb2d05e0a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -419,7 +419,8 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, */ rcu_read_lock(); ucounts = task_ucounts(t); - sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); + sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, + override_rlimit); rcu_read_unlock(); if (!sigpending) return NULL; diff --git a/kernel/ucount.c b/kernel/ucount.c index 9469102c5ac0..696406939be5 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -307,7 +307,8 @@ void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type) do_dec_rlimit_put_ucounts(ucounts, NULL, type); } -long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type) +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, + bool override_rlimit) { /* Caller must hold a reference to ucounts */ struct ucounts *iter; @@ -320,7 +321,8 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type) goto dec_unwind; if (iter == ucounts) ret = new; - max = get_userns_rlimit_max(iter->ns, type); + if (!override_rlimit) + max = get_userns_rlimit_max(iter->ns, type); /* * Grab an extra ucount reference for the caller when * the rlimit count was previously 0. From 0b63c0e01fba40e3992bc627272ec7b618ccaef7 Mon Sep 17 00:00:00 2001 From: Andrew Kanner Date: Sun, 3 Nov 2024 20:38:45 +0100 Subject: [PATCH 265/281] ocfs2: remove entry once instead of null-ptr-dereference in ocfs2_xa_remove() Syzkaller is able to provoke null-ptr-dereference in ocfs2_xa_remove(): [ 57.319872] (a.out,1161,7):ocfs2_xa_remove:2028 ERROR: status = -12 [ 57.320420] (a.out,1161,7):ocfs2_xa_cleanup_value_truncate:1999 ERROR: Partial truncate while removing xattr overlay.upper. Leaking 1 clusters and removing the entry [ 57.321727] BUG: kernel NULL pointer dereference, address: 0000000000000004 [...] [ 57.325727] RIP: 0010:ocfs2_xa_block_wipe_namevalue+0x2a/0xc0 [...] [ 57.331328] Call Trace: [ 57.331477] [...] [ 57.333511] ? do_user_addr_fault+0x3e5/0x740 [ 57.333778] ? exc_page_fault+0x70/0x170 [ 57.334016] ? asm_exc_page_fault+0x2b/0x30 [ 57.334263] ? __pfx_ocfs2_xa_block_wipe_namevalue+0x10/0x10 [ 57.334596] ? ocfs2_xa_block_wipe_namevalue+0x2a/0xc0 [ 57.334913] ocfs2_xa_remove_entry+0x23/0xc0 [ 57.335164] ocfs2_xa_set+0x704/0xcf0 [ 57.335381] ? _raw_spin_unlock+0x1a/0x40 [ 57.335620] ? ocfs2_inode_cache_unlock+0x16/0x20 [ 57.335915] ? trace_preempt_on+0x1e/0x70 [ 57.336153] ? start_this_handle+0x16c/0x500 [ 57.336410] ? preempt_count_sub+0x50/0x80 [ 57.336656] ? _raw_read_unlock+0x20/0x40 [ 57.336906] ? start_this_handle+0x16c/0x500 [ 57.337162] ocfs2_xattr_block_set+0xa6/0x1e0 [ 57.337424] __ocfs2_xattr_set_handle+0x1fd/0x5d0 [ 57.337706] ? ocfs2_start_trans+0x13d/0x290 [ 57.337971] ocfs2_xattr_set+0xb13/0xfb0 [ 57.338207] ? dput+0x46/0x1c0 [ 57.338393] ocfs2_xattr_trusted_set+0x28/0x30 [ 57.338665] ? ocfs2_xattr_trusted_set+0x28/0x30 [ 57.338948] __vfs_removexattr+0x92/0xc0 [ 57.339182] __vfs_removexattr_locked+0xd5/0x190 [ 57.339456] ? preempt_count_sub+0x50/0x80 [ 57.339705] vfs_removexattr+0x5f/0x100 [...] Reproducer uses faultinject facility to fail ocfs2_xa_remove() -> ocfs2_xa_value_truncate() with -ENOMEM. In this case the comment mentions that we can return 0 if ocfs2_xa_cleanup_value_truncate() is going to wipe the entry anyway. But the following 'rc' check is wrong and execution flow do 'ocfs2_xa_remove_entry(loc);' twice: * 1st: in ocfs2_xa_cleanup_value_truncate(); * 2nd: returning back to ocfs2_xa_remove() instead of going to 'out'. Fix this by skipping the 2nd removal of the same entry and making syzkaller repro happy. Link: https://lkml.kernel.org/r/20241103193845.2940988-1-andrew.kanner@gmail.com Fixes: 399ff3a748cf ("ocfs2: Handle errors while setting external xattr values.") Signed-off-by: Andrew Kanner Reported-by: syzbot+386ce9e60fa1b18aac5b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/671e13ab.050a0220.2b8c0f.01d0.GAE@google.com/T/ Tested-by: syzbot+386ce9e60fa1b18aac5b@syzkaller.appspotmail.com Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/xattr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index dd0a05365e79..73a6f6fd8a8e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2036,8 +2036,7 @@ static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, rc = 0; ocfs2_xa_cleanup_value_truncate(loc, "removing", orig_clusters); - if (rc) - goto out; + goto out; } } From c289f4de8e479251b64988839fd0e87f246e03a2 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 4 Nov 2024 00:44:09 +0100 Subject: [PATCH 266/281] mailmap: add entry for Thorsten Blum Map my previously used email address to my @linux.dev address. Link: https://lkml.kernel.org/r/20241103234411.2522-2-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Cc: Alex Elder Cc: David S. Miller Cc: Geliang Tang Cc: Kees Cook Cc: Mathieu Othacehe Cc: Matthieu Baerts (NGI0) Cc: Matt Ranostay Cc: Naoya Horiguchi Cc: Neeraj Upadhyay Cc: Quentin Monnet Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 5378f04b2566..5e829da09e7f 100644 --- a/.mailmap +++ b/.mailmap @@ -665,6 +665,7 @@ Tomeu Vizoso Thomas Graf Thomas Körper Thomas Pedersen +Thorsten Blum Tiezhu Yang Tingwei Zhang Tirupathi Reddy From 8de3e97f3d3d62cd9f3067f073e8ac93261597db Mon Sep 17 00:00:00 2001 From: Liu Peibao Date: Fri, 1 Nov 2024 16:12:43 +0800 Subject: [PATCH 267/281] i2c: designware: do not hold SCL low when I2C_DYNAMIC_TAR_UPDATE is not set When the Tx FIFO is empty and the last command has no STOP bit set, the master holds SCL low. If I2C_DYNAMIC_TAR_UPDATE is not set, BIT(13) MST_ON_HOLD of IC_RAW_INTR_STAT is not enabled, causing the __i2c_dw_disable() timeout. This is quite similar to commit 2409205acd3c ("i2c: designware: fix __i2c_dw_disable() in case master is holding SCL low"). Also check BIT(7) MST_HOLD_TX_FIFO_EMPTY in IC_STATUS, which is available when IC_STAT_FOR_CLK_STRETCH is set. Fixes: 2409205acd3c ("i2c: designware: fix __i2c_dw_disable() in case master is holding SCL low") Co-developed-by: Xiaowu Ding Signed-off-by: Xiaowu Ding Co-developed-by: Angus Chen Signed-off-by: Angus Chen Signed-off-by: Liu Peibao Acked-by: Jarkko Nikula Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-designware-common.c | 6 ++++-- drivers/i2c/busses/i2c-designware-core.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index f31d352d98b5..9d88b4fa03e4 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -524,7 +524,7 @@ err_release_lock: void __i2c_dw_disable(struct dw_i2c_dev *dev) { struct i2c_timings *t = &dev->timings; - unsigned int raw_intr_stats; + unsigned int raw_intr_stats, ic_stats; unsigned int enable; int timeout = 100; bool abort_needed; @@ -532,9 +532,11 @@ void __i2c_dw_disable(struct dw_i2c_dev *dev) int ret; regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &raw_intr_stats); + regmap_read(dev->map, DW_IC_STATUS, &ic_stats); regmap_read(dev->map, DW_IC_ENABLE, &enable); - abort_needed = raw_intr_stats & DW_IC_INTR_MST_ON_HOLD; + abort_needed = (raw_intr_stats & DW_IC_INTR_MST_ON_HOLD) || + (ic_stats & DW_IC_STATUS_MASTER_HOLD_TX_FIFO_EMPTY); if (abort_needed) { if (!(enable & DW_IC_ENABLE_ENABLE)) { regmap_write(dev->map, DW_IC_ENABLE, DW_IC_ENABLE_ENABLE); diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 8e8854ec9882..2d32896d0673 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -116,6 +116,7 @@ #define DW_IC_STATUS_RFNE BIT(3) #define DW_IC_STATUS_MASTER_ACTIVITY BIT(5) #define DW_IC_STATUS_SLAVE_ACTIVITY BIT(6) +#define DW_IC_STATUS_MASTER_HOLD_TX_FIFO_EMPTY BIT(7) #define DW_IC_SDA_HOLD_RX_SHIFT 16 #define DW_IC_SDA_HOLD_RX_MASK GENMASK(23, 16) From ace149e0830c380ddfce7e466fe860ca502fe4ee Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Sep 2024 13:57:04 -0400 Subject: [PATCH 268/281] filemap: Fix bounds checking in filemap_read() If the caller supplies an iocb->ki_pos value that is close to the filesystem upper limit, and an iterator with a count that causes us to overflow that limit, then filemap_read() enters an infinite loop. This behaviour was discovered when testing xfstests generic/525 with the "localio" optimisation for loopback NFS mounts. Reported-by: Mike Snitzer Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Tested-by: Mike Snitzer Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 36d22968be9a..56fa431c52af 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2625,7 +2625,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(!iov_iter_count(iter))) return 0; - iov_iter_truncate(iter, inode->i_sb->s_maxbytes); + iov_iter_truncate(iter, inode->i_sb->s_maxbytes - iocb->ki_pos); folio_batch_init(&fbatch); do { From 2d5404caa8c7bb5c4e0435f94b28834ae5456623 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Nov 2024 14:19:35 -0800 Subject: [PATCH 269/281] Linux 6.12-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b8efbfe9da94..79192a3024bf 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Baby Opossum Posse # *DOCUMENTATION* From 948ccbd95016f50ce01df5eef9440eede3b8c713 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:26 +0800 Subject: [PATCH 270/281] LoongArch: KVM: Add iocsr and mmio bus simulation in kernel Add iocsr and mmio memory read and write simulation to the kernel. When the VM accesses the device address space through iocsr instructions or mmio, it does not need to return to the qemu user mode but can directly completes the access in the kernel mode. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 82 +++++++++++++++++++++++++++----------- include/linux/kvm_host.h | 1 + include/trace/events/kvm.h | 35 ++++++++++++++++ 3 files changed, 94 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 90894f70ff4a..69f3e3782cc9 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -157,7 +157,7 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret; - unsigned long val; + unsigned long *val; u32 addr, rd, rj, opcode; /* @@ -170,6 +170,7 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) ret = EMULATE_DO_IOCSR; run->iocsr_io.phys_addr = addr; run->iocsr_io.is_write = 0; + val = &vcpu->arch.gprs[rd]; /* LoongArch is Little endian */ switch (opcode) { @@ -202,16 +203,25 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) run->iocsr_io.is_write = 1; break; default: - ret = EMULATE_FAIL; - break; + return EMULATE_FAIL; } - if (ret == EMULATE_DO_IOCSR) { - if (run->iocsr_io.is_write) { - val = vcpu->arch.gprs[rd]; - memcpy(run->iocsr_io.data, &val, run->iocsr_io.len); - } - vcpu->arch.io_gpr = rd; + if (run->iocsr_io.is_write) { + if (!kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + ret = EMULATE_DONE; + else + /* Save data and let user space to write it */ + memcpy(run->iocsr_io.data, val, run->iocsr_io.len); + + trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val); + } else { + if (!kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + ret = EMULATE_DONE; + else + /* Save register id for iocsr read completion */ + vcpu->arch.io_gpr = rd; + + trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL); } return ret; @@ -447,19 +457,33 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) } if (ret == EMULATE_DO_MMIO) { + trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, run->mmio.phys_addr, NULL); + + /* + * If mmio device such as PCH-PIC is emulated in KVM, + * it need not return to user space to handle the mmio + * exception. + */ + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, + run->mmio.len, &vcpu->arch.gprs[rd]); + if (!ret) { + update_pc(&vcpu->arch); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + /* Set for kvm_complete_mmio_read() use */ vcpu->arch.io_gpr = rd; run->mmio.is_write = 0; vcpu->mmio_is_write = 0; - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, run->mmio.len, - run->mmio.phys_addr, NULL); - } else { - kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", - inst.word, vcpu->arch.pc, vcpu->arch.badv); - kvm_arch_vcpu_dump_regs(vcpu); - vcpu->mmio_needed = 0; + return EMULATE_DO_MMIO; } + kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", + inst.word, vcpu->arch.pc, vcpu->arch.badv); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->mmio_needed = 0; + return ret; } @@ -600,19 +624,29 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) } if (ret == EMULATE_DO_MMIO) { + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, run->mmio.len, run->mmio.phys_addr, data); + + /* + * If mmio device such as PCH-PIC is emulated in KVM, + * it need not return to user space to handle the mmio + * exception. + */ + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, data); + if (!ret) + return EMULATE_DONE; + run->mmio.is_write = 1; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, run->mmio.len, - run->mmio.phys_addr, data); - } else { - vcpu->arch.pc = curr_pc; - kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", - inst.word, vcpu->arch.pc, vcpu->arch.badv); - kvm_arch_vcpu_dump_regs(vcpu); - /* Rollback PC if emulation was unsuccessful */ + return EMULATE_DO_MMIO; } + vcpu->arch.pc = curr_pc; + kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", + inst.word, vcpu->arch.pc, vcpu->arch.badv); + kvm_arch_vcpu_dump_regs(vcpu); + /* Rollback PC if emulation was unsuccessful */ + return ret; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 45be36e5285f..164d9725cb08 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -219,6 +219,7 @@ enum kvm_bus { KVM_PIO_BUS, KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_FAST_MMIO_BUS, + KVM_IOCSR_BUS, KVM_NR_BUSES }; diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 74e40d5d4af4..fc7d0f8ff078 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -236,6 +236,41 @@ TRACE_EVENT(kvm_mmio, __entry->len, __entry->gpa, __entry->val) ); +#define KVM_TRACE_IOCSR_READ_UNSATISFIED 0 +#define KVM_TRACE_IOCSR_READ 1 +#define KVM_TRACE_IOCSR_WRITE 2 + +#define kvm_trace_symbol_iocsr \ + { KVM_TRACE_IOCSR_READ_UNSATISFIED, "unsatisfied-read" }, \ + { KVM_TRACE_IOCSR_READ, "read" }, \ + { KVM_TRACE_IOCSR_WRITE, "write" } + +TRACE_EVENT(kvm_iocsr, + TP_PROTO(int type, int len, u64 gpa, void *val), + TP_ARGS(type, len, gpa, val), + + TP_STRUCT__entry( + __field( u32, type ) + __field( u32, len ) + __field( u64, gpa ) + __field( u64, val ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->len = len; + __entry->gpa = gpa; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); + ), + + TP_printk("iocsr %s len %u gpa 0x%llx val 0x%llx", + __print_symbolic(__entry->type, kvm_trace_symbol_iocsr), + __entry->len, __entry->gpa, __entry->val) +); + #define kvm_fpu_load_symbol \ {0, "unload"}, \ {1, "load"} From c532de5a67a70f8533d495f8f2aaa9a0491c3ad0 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 271/281] LoongArch: KVM: Add IPI device support Add device model for IPI interrupt controller, implement basic create & destroy interfaces, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 4 + arch/loongarch/include/asm/kvm_ipi.h | 33 ++++++++ arch/loongarch/kvm/Makefile | 1 + arch/loongarch/kvm/intc/ipi.c | 112 ++++++++++++++++++++++++++ arch/loongarch/kvm/main.c | 7 +- arch/loongarch/kvm/vcpu.c | 3 + include/uapi/linux/kvm.h | 4 + 7 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 arch/loongarch/include/asm/kvm_ipi.h create mode 100644 arch/loongarch/kvm/intc/ipi.c diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index d6bb72424027..8e5393d21fcb 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -18,6 +18,7 @@ #include #include +#include #include /* Loongarch KVM register ids */ @@ -117,6 +118,7 @@ struct kvm_arch { s64 time_offset; struct kvm_context __percpu *vmcs; + struct loongarch_ipi *ipi; }; #define CSR_MAX_NUMS 0x800 @@ -221,6 +223,8 @@ struct kvm_vcpu_arch { int last_sched_cpu; /* mp state */ struct kvm_mp_state mp_state; + /* ipi state */ + struct ipi_state ipi_state; /* cpucfg */ u32 cpucfg[KVM_MAX_CPUCFG_REGS]; diff --git a/arch/loongarch/include/asm/kvm_ipi.h b/arch/loongarch/include/asm/kvm_ipi.h new file mode 100644 index 000000000000..baaa6253e4dc --- /dev/null +++ b/arch/loongarch/include/asm/kvm_ipi.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef __ASM_KVM_IPI_H +#define __ASM_KVM_IPI_H + +#include + +#define LARCH_INT_IPI 12 + +struct loongarch_ipi { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; +}; + +struct ipi_state { + spinlock_t lock; + uint32_t status; + uint32_t en; + uint32_t set; + uint32_t clear; + uint64_t buf[4]; +}; + +#define IOCSR_IPI_BASE 0x1000 +#define IOCSR_IPI_SIZE 0x160 + +int kvm_loongarch_register_ipi_device(void); + +#endif diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index b2f4cbe01ae8..36c3009fe89c 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -18,5 +18,6 @@ kvm-y += timer.o kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vm.o +kvm-y += intc/ipi.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c new file mode 100644 index 000000000000..9e45571ad76e --- /dev/null +++ b/arch/loongarch/kvm/intc/ipi.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +static int kvm_ipi_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + return 0; +} + +static int kvm_ipi_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + return 0; +} + +static const struct kvm_io_device_ops kvm_ipi_ops = { + .read = kvm_ipi_read, + .write = kvm_ipi_write, +}; + +static int kvm_ipi_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_ipi_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_ipi_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct kvm *kvm; + struct kvm_io_device *device; + struct loongarch_ipi *s; + + if (!dev) { + kvm_err("%s: kvm_device ptr is invalid!\n", __func__); + return -EINVAL; + } + + kvm = dev->kvm; + if (kvm->arch.ipi) { + kvm_err("%s: LoongArch IPI has already been created!\n", __func__); + return -EINVAL; + } + + s = kzalloc(sizeof(struct loongarch_ipi), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize IOCSR device + */ + device = &s->device; + kvm_iodevice_init(device, &kvm_ipi_ops); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, IOCSR_IPI_BASE, IOCSR_IPI_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kvm_err("%s: Initialize IOCSR dev failed, ret = %d\n", __func__, ret); + goto err; + } + + kvm->arch.ipi = s; + return 0; + +err: + kfree(s); + return -EFAULT; +} + +static void kvm_ipi_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_ipi *ipi; + + if (!dev || !dev->kvm || !dev->kvm->arch.ipi) + return; + + kvm = dev->kvm; + ipi = kvm->arch.ipi; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &ipi->device); + kfree(ipi); +} + +static struct kvm_device_ops kvm_ipi_dev_ops = { + .name = "kvm-loongarch-ipi", + .create = kvm_ipi_create, + .destroy = kvm_ipi_destroy, + .set_attr = kvm_ipi_set_attr, + .get_attr = kvm_ipi_get_attr, +}; + +int kvm_loongarch_register_ipi_device(void) +{ + return kvm_register_device_ops(&kvm_ipi_dev_ops, KVM_DEV_TYPE_LOONGARCH_IPI); +} diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 27e9b94c0a0b..14f3f69c5bb9 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -313,7 +313,7 @@ void kvm_arch_disable_virtualization_cpu(void) static int kvm_loongarch_env_init(void) { - int cpu, order; + int cpu, order, ret; void *addr; struct kvm_context *context; @@ -368,7 +368,10 @@ static int kvm_loongarch_env_init(void) kvm_init_gcsr_flag(); - return 0; + /* Register LoongArch IPI interrupt controller interface. */ + ret = kvm_loongarch_register_ipi_device(); + + return ret; } static void kvm_loongarch_env_exit(void) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 174734a23d0a..cab1818be68d 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1475,6 +1475,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Init */ vcpu->arch.last_sched_cpu = -1; + /* Init ipi_state lock */ + spin_lock_init(&vcpu->arch.ipi_state.lock); + /* * Initialize guest register state to valid architectural reset state. */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 637efc055145..9fff439c30ea 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1158,7 +1158,11 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LOONGARCH_IPI, +#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_MAX, + }; struct kvm_vfio_spapr_tce { From daee2f9cae5510ba1bd9eed6b0cf0ca8dc276118 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 272/281] LoongArch: KVM: Add IPI read and write function Add implementation of IPI interrupt controller's address space read and write function simulation. Signed-off-by: Min Zhou Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/include/asm/kvm_ipi.h | 12 ++ arch/loongarch/kvm/intc/ipi.c | 279 +++++++++++++++++++++++++- 3 files changed, 291 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 8e5393d21fcb..a1de884ebb44 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -45,6 +45,8 @@ struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 pages; u64 hugepages; + u64 ipi_read_exits; + u64 ipi_write_exits; }; struct kvm_vcpu_stat { diff --git a/arch/loongarch/include/asm/kvm_ipi.h b/arch/loongarch/include/asm/kvm_ipi.h index baaa6253e4dc..060163dfb4a3 100644 --- a/arch/loongarch/include/asm/kvm_ipi.h +++ b/arch/loongarch/include/asm/kvm_ipi.h @@ -28,6 +28,18 @@ struct ipi_state { #define IOCSR_IPI_BASE 0x1000 #define IOCSR_IPI_SIZE 0x160 +#define IOCSR_IPI_STATUS 0x000 +#define IOCSR_IPI_EN 0x004 +#define IOCSR_IPI_SET 0x008 +#define IOCSR_IPI_CLEAR 0x00c +#define IOCSR_IPI_BUF_20 0x020 +#define IOCSR_IPI_BUF_28 0x028 +#define IOCSR_IPI_BUF_30 0x030 +#define IOCSR_IPI_BUF_38 0x038 +#define IOCSR_IPI_SEND 0x040 +#define IOCSR_MAIL_SEND 0x048 +#define IOCSR_ANY_SEND 0x158 + int kvm_loongarch_register_ipi_device(void); #endif diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 9e45571ad76e..462d6ac084d3 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -7,18 +7,293 @@ #include #include +static void ipi_send(struct kvm *kvm, uint64_t data) +{ + int cpu, action; + uint32_t status; + struct kvm_vcpu *vcpu; + struct kvm_interrupt irq; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + + action = BIT(data & 0x1f); + spin_lock(&vcpu->arch.ipi_state.lock); + status = vcpu->arch.ipi_state.status; + vcpu->arch.ipi_state.status |= action; + spin_unlock(&vcpu->arch.ipi_state.lock); + if (status == 0) { + irq.irq = LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } +} + +static void ipi_clear(struct kvm_vcpu *vcpu, uint64_t data) +{ + uint32_t status; + struct kvm_interrupt irq; + + spin_lock(&vcpu->arch.ipi_state.lock); + vcpu->arch.ipi_state.status &= ~data; + status = vcpu->arch.ipi_state.status; + spin_unlock(&vcpu->arch.ipi_state.lock); + if (status == 0) { + irq.irq = -LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } +} + +static uint64_t read_mailbox(struct kvm_vcpu *vcpu, int offset, int len) +{ + uint64_t data = 0; + + spin_lock(&vcpu->arch.ipi_state.lock); + data = *(ulong *)((void *)vcpu->arch.ipi_state.buf + (offset - 0x20)); + spin_unlock(&vcpu->arch.ipi_state.lock); + + switch (len) { + case 1: + return data & 0xff; + case 2: + return data & 0xffff; + case 4: + return data & 0xffffffff; + case 8: + return data; + default: + kvm_err("%s: unknown data len: %d\n", __func__, len); + return 0; + } +} + +static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int len) +{ + void *pbuf; + + spin_lock(&vcpu->arch.ipi_state.lock); + pbuf = (void *)vcpu->arch.ipi_state.buf + (offset - 0x20); + + switch (len) { + case 1: + *(unsigned char *)pbuf = (unsigned char)data; + break; + case 2: + *(unsigned short *)pbuf = (unsigned short)data; + break; + case 4: + *(unsigned int *)pbuf = (unsigned int)data; + break; + case 8: + *(unsigned long *)pbuf = (unsigned long)data; + break; + default: + kvm_err("%s: unknown data len: %d\n", __func__, len); + } + spin_unlock(&vcpu->arch.ipi_state.lock); +} + +static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) +{ + int i, ret; + uint32_t val = 0, mask = 0; + + /* + * Bit 27-30 is mask for byte writing. + * If the mask is 0, we need not to do anything. + */ + if ((data >> 27) & 0xf) { + /* Read the old val */ + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + if (unlikely(ret)) { + kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + return ret; + } + /* Construct the mask by scanning the bit 27-30 */ + for (i = 0; i < 4; i++) { + if (data & (BIT(27 + i))) + mask |= (0xff << (i * 8)); + } + /* Save the old part of val */ + val &= mask; + } + val |= ((uint32_t)(data >> 32) & ~mask); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + if (unlikely(ret)) + kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + + return ret; +} + +static int mail_send(struct kvm *kvm, uint64_t data) +{ + int cpu, mailbox, offset; + struct kvm_vcpu *vcpu; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + mailbox = ((data & 0xffffffff) >> 2) & 0x7; + offset = IOCSR_IPI_BASE + IOCSR_IPI_BUF_20 + mailbox * 4; + + return send_ipi_data(vcpu, offset, data); +} + +static int any_send(struct kvm *kvm, uint64_t data) +{ + int cpu, offset; + struct kvm_vcpu *vcpu; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + offset = data & 0xffff; + + return send_ipi_data(vcpu, offset, data); +} + +static int loongarch_ipi_readl(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *val) +{ + int ret = 0; + uint32_t offset; + uint64_t res = 0; + + offset = (uint32_t)(addr & 0x1ff); + WARN_ON_ONCE(offset & (len - 1)); + + switch (offset) { + case IOCSR_IPI_STATUS: + spin_lock(&vcpu->arch.ipi_state.lock); + res = vcpu->arch.ipi_state.status; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case IOCSR_IPI_EN: + spin_lock(&vcpu->arch.ipi_state.lock); + res = vcpu->arch.ipi_state.en; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case IOCSR_IPI_SET: + res = 0; + break; + case IOCSR_IPI_CLEAR: + res = 0; + break; + case IOCSR_IPI_BUF_20 ... IOCSR_IPI_BUF_38 + 7: + if (offset + len > IOCSR_IPI_BUF_38 + 8) { + kvm_err("%s: invalid offset or len: offset = %d, len = %d\n", + __func__, offset, len); + ret = -EINVAL; + break; + } + res = read_mailbox(vcpu, offset, len); + break; + default: + kvm_err("%s: unknown addr: %llx\n", __func__, addr); + ret = -EINVAL; + break; + } + *(uint64_t *)val = res; + + return ret; +} + +static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *val) +{ + int ret = 0; + uint64_t data; + uint32_t offset; + + data = *(uint64_t *)val; + + offset = (uint32_t)(addr & 0x1ff); + WARN_ON_ONCE(offset & (len - 1)); + + switch (offset) { + case IOCSR_IPI_STATUS: + ret = -EINVAL; + break; + case IOCSR_IPI_EN: + spin_lock(&vcpu->arch.ipi_state.lock); + vcpu->arch.ipi_state.en = data; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case IOCSR_IPI_SET: + ret = -EINVAL; + break; + case IOCSR_IPI_CLEAR: + /* Just clear the status of the current vcpu */ + ipi_clear(vcpu, data); + break; + case IOCSR_IPI_BUF_20 ... IOCSR_IPI_BUF_38 + 7: + if (offset + len > IOCSR_IPI_BUF_38 + 8) { + kvm_err("%s: invalid offset or len: offset = %d, len = %d\n", + __func__, offset, len); + ret = -EINVAL; + break; + } + write_mailbox(vcpu, offset, data, len); + break; + case IOCSR_IPI_SEND: + ipi_send(vcpu->kvm, data); + break; + case IOCSR_MAIL_SEND: + ret = mail_send(vcpu->kvm, *(uint64_t *)val); + break; + case IOCSR_ANY_SEND: + ret = any_send(vcpu->kvm, *(uint64_t *)val); + break; + default: + kvm_err("%s: unknown addr: %llx\n", __func__, addr); + ret = -EINVAL; + break; + } + + return ret; +} + static int kvm_ipi_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { - return 0; + int ret; + struct loongarch_ipi *ipi; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + ipi->kvm->stat.ipi_read_exits++; + ret = loongarch_ipi_readl(vcpu, addr, len, val); + + return ret; } static int kvm_ipi_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { - return 0; + int ret; + struct loongarch_ipi *ipi; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + ipi->kvm->stat.ipi_write_exits++; + ret = loongarch_ipi_writel(vcpu, addr, len, val); + + return ret; } static const struct kvm_io_device_ops kvm_ipi_ops = { From 8e3054261bc373f50122b2bc2d726d66a344bf29 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 273/281] LoongArch: KVM: Add IPI user mode read and write function Implement the communication interface between the user mode programs and the kernel in IPI interrupt controller simulation, which is used to obtain or send the simulation data of the interrupt controller in the user mode process, and is also used in VM migration or VM saving and restoration. Signed-off-by: Min Zhou Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/uapi/asm/kvm.h | 2 + arch/loongarch/kvm/intc/ipi.c | 92 ++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 70d89070bfeb..c6bbbb34cef4 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -132,4 +132,6 @@ struct kvm_iocsr_entry { #define KVM_IRQCHIP_NUM_PINS 64 #define KVM_MAX_CORES 256 +#define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000001 + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 462d6ac084d3..a233a323e295 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -301,16 +301,104 @@ static const struct kvm_io_device_ops kvm_ipi_ops = { .write = kvm_ipi_write, }; +static int kvm_ipi_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int len = 4; + int cpu, addr; + uint64_t val; + void *p = NULL; + struct kvm_vcpu *vcpu; + + cpu = (attr->attr >> 16) & 0x3ff; + addr = attr->attr & 0xff; + + vcpu = kvm_get_vcpu(dev->kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + + switch (addr) { + case IOCSR_IPI_STATUS: + p = &vcpu->arch.ipi_state.status; + break; + case IOCSR_IPI_EN: + p = &vcpu->arch.ipi_state.en; + break; + case IOCSR_IPI_SET: + p = &vcpu->arch.ipi_state.set; + break; + case IOCSR_IPI_CLEAR: + p = &vcpu->arch.ipi_state.clear; + break; + case IOCSR_IPI_BUF_20: + p = &vcpu->arch.ipi_state.buf[0]; + len = 8; + break; + case IOCSR_IPI_BUF_28: + p = &vcpu->arch.ipi_state.buf[1]; + len = 8; + break; + case IOCSR_IPI_BUF_30: + p = &vcpu->arch.ipi_state.buf[2]; + len = 8; + break; + case IOCSR_IPI_BUF_38: + p = &vcpu->arch.ipi_state.buf[3]; + len = 8; + break; + default: + kvm_err("%s: unknown ipi register, addr = %d\n", __func__, addr); + return -EINVAL; + } + + if (is_write) { + if (len == 4) { + if (get_user(val, (uint32_t __user *)attr->addr)) + return -EFAULT; + *(uint32_t *)p = (uint32_t)val; + } else if (len == 8) { + if (get_user(val, (uint64_t __user *)attr->addr)) + return -EFAULT; + *(uint64_t *)p = val; + } + } else { + if (len == 4) { + val = *(uint32_t *)p; + return put_user(val, (uint32_t __user *)attr->addr); + } else if (len == 8) { + val = *(uint64_t *)p; + return put_user(val, (uint64_t __user *)attr->addr); + } + } + + return 0; +} + static int kvm_ipi_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + switch (attr->group) { + case KVM_DEV_LOONGARCH_IPI_GRP_REGS: + return kvm_ipi_regs_access(dev, attr, false); + default: + kvm_err("%s: unknown group (%d)\n", __func__, attr->group); + return -EINVAL; + } } static int kvm_ipi_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + switch (attr->group) { + case KVM_DEV_LOONGARCH_IPI_GRP_REGS: + return kvm_ipi_regs_access(dev, attr, true); + default: + kvm_err("%s: unknown group (%d)\n", __func__, attr->group); + return -EINVAL; + } } static int kvm_ipi_create(struct kvm_device *dev, u32 type) From 2e8b9df82631e714cc2b7bf302772c8259673180 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 274/281] LoongArch: KVM: Add EIOINTC device support Add device model for EIOINTC interrupt controller, implement basic create & destroy interfaces, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_eiointc.h | 93 ++++++++++++++++ arch/loongarch/include/asm/kvm_host.h | 4 +- arch/loongarch/kvm/Makefile | 1 + arch/loongarch/kvm/intc/eiointc.c | 132 +++++++++++++++++++++++ arch/loongarch/kvm/main.c | 6 ++ include/uapi/linux/kvm.h | 2 + 6 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/kvm_eiointc.h create mode 100644 arch/loongarch/kvm/intc/eiointc.c diff --git a/arch/loongarch/include/asm/kvm_eiointc.h b/arch/loongarch/include/asm/kvm_eiointc.h new file mode 100644 index 000000000000..ed65de5a8168 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_eiointc.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef __ASM_KVM_EIOINTC_H +#define __ASM_KVM_EIOINTC_H + +#include + +#define EIOINTC_IRQS 256 +#define EIOINTC_ROUTE_MAX_VCPUS 256 +#define EIOINTC_IRQS_U8_NUMS (EIOINTC_IRQS / 8) +#define EIOINTC_IRQS_U16_NUMS (EIOINTC_IRQS_U8_NUMS / 2) +#define EIOINTC_IRQS_U32_NUMS (EIOINTC_IRQS_U8_NUMS / 4) +#define EIOINTC_IRQS_U64_NUMS (EIOINTC_IRQS_U8_NUMS / 8) +/* map to ipnum per 32 irqs */ +#define EIOINTC_IRQS_NODETYPE_COUNT 16 + +#define EIOINTC_BASE 0x1400 +#define EIOINTC_SIZE 0x900 + +#define EIOINTC_VIRT_BASE (0x40000000) +#define EIOINTC_VIRT_SIZE (0x1000) + +#define LOONGSON_IP_NUM 8 + +struct loongarch_eiointc { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + struct kvm_io_device device_vext; + uint32_t num_cpu; + uint32_t features; + uint32_t status; + + /* hardware state */ + union nodetype { + u64 reg_u64[EIOINTC_IRQS_NODETYPE_COUNT / 4]; + u32 reg_u32[EIOINTC_IRQS_NODETYPE_COUNT / 2]; + u16 reg_u16[EIOINTC_IRQS_NODETYPE_COUNT]; + u8 reg_u8[EIOINTC_IRQS_NODETYPE_COUNT * 2]; + } nodetype; + + /* one bit shows the state of one irq */ + union bounce { + u64 reg_u64[EIOINTC_IRQS_U64_NUMS]; + u32 reg_u32[EIOINTC_IRQS_U32_NUMS]; + u16 reg_u16[EIOINTC_IRQS_U16_NUMS]; + u8 reg_u8[EIOINTC_IRQS_U8_NUMS]; + } bounce; + + union isr { + u64 reg_u64[EIOINTC_IRQS_U64_NUMS]; + u32 reg_u32[EIOINTC_IRQS_U32_NUMS]; + u16 reg_u16[EIOINTC_IRQS_U16_NUMS]; + u8 reg_u8[EIOINTC_IRQS_U8_NUMS]; + } isr; + union coreisr { + u64 reg_u64[EIOINTC_ROUTE_MAX_VCPUS][EIOINTC_IRQS_U64_NUMS]; + u32 reg_u32[EIOINTC_ROUTE_MAX_VCPUS][EIOINTC_IRQS_U32_NUMS]; + u16 reg_u16[EIOINTC_ROUTE_MAX_VCPUS][EIOINTC_IRQS_U16_NUMS]; + u8 reg_u8[EIOINTC_ROUTE_MAX_VCPUS][EIOINTC_IRQS_U8_NUMS]; + } coreisr; + union enable { + u64 reg_u64[EIOINTC_IRQS_U64_NUMS]; + u32 reg_u32[EIOINTC_IRQS_U32_NUMS]; + u16 reg_u16[EIOINTC_IRQS_U16_NUMS]; + u8 reg_u8[EIOINTC_IRQS_U8_NUMS]; + } enable; + + /* use one byte to config ipmap for 32 irqs at once */ + union ipmap { + u64 reg_u64; + u32 reg_u32[EIOINTC_IRQS_U32_NUMS / 4]; + u16 reg_u16[EIOINTC_IRQS_U16_NUMS / 4]; + u8 reg_u8[EIOINTC_IRQS_U8_NUMS / 4]; + } ipmap; + /* use one byte to config coremap for one irq */ + union coremap { + u64 reg_u64[EIOINTC_IRQS / 8]; + u32 reg_u32[EIOINTC_IRQS / 4]; + u16 reg_u16[EIOINTC_IRQS / 2]; + u8 reg_u8[EIOINTC_IRQS]; + } coremap; + + DECLARE_BITMAP(sw_coreisr[EIOINTC_ROUTE_MAX_VCPUS][LOONGSON_IP_NUM], EIOINTC_IRQS); + uint8_t sw_coremap[EIOINTC_IRQS]; +}; + +int kvm_loongarch_register_eiointc_device(void); + +#endif /* __ASM_KVM_EIOINTC_H */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index a1de884ebb44..2d0476f05148 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -19,6 +19,7 @@ #include #include #include +#include #include /* Loongarch KVM register ids */ @@ -87,7 +88,7 @@ struct kvm_world_switch { * * For LOONGARCH_CSR_CPUID register, max CPUID size if 512 * For IPI hardware, max destination CPUID size 1024 - * For extioi interrupt controller, max destination CPUID size is 256 + * For eiointc interrupt controller, max destination CPUID size is 256 * For msgint interrupt controller, max supported CPUID size is 65536 * * Currently max CPUID is defined as 256 for KVM hypervisor, in future @@ -121,6 +122,7 @@ struct kvm_arch { s64 time_offset; struct kvm_context __percpu *vmcs; struct loongarch_ipi *ipi; + struct loongarch_eiointc *eiointc; }; #define CSR_MAX_NUMS 0x800 diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index 36c3009fe89c..bb50fc799c29 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -19,5 +19,6 @@ kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vm.o kvm-y += intc/ipi.o +kvm-y += intc/eiointc.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c new file mode 100644 index 000000000000..10afa6163643 --- /dev/null +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +static int kvm_eiointc_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + return 0; +} + +static int kvm_eiointc_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + return 0; +} + +static const struct kvm_io_device_ops kvm_eiointc_ops = { + .read = kvm_eiointc_read, + .write = kvm_eiointc_write, +}; + +static int kvm_eiointc_virt_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + return 0; +} + +static int kvm_eiointc_virt_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + return 0; +} + +static const struct kvm_io_device_ops kvm_eiointc_virt_ops = { + .read = kvm_eiointc_virt_read, + .write = kvm_eiointc_virt_write, +}; + +static int kvm_eiointc_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_eiointc_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_eiointc_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct loongarch_eiointc *s; + struct kvm_io_device *device, *device1; + struct kvm *kvm = dev->kvm; + + /* eiointc has been created */ + if (kvm->arch.eiointc) + return -EINVAL; + + s = kzalloc(sizeof(struct loongarch_eiointc), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize IOCSR device + */ + device = &s->device; + kvm_iodevice_init(device, &kvm_eiointc_ops); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, + EIOINTC_BASE, EIOINTC_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kfree(s); + return ret; + } + + device1 = &s->device_vext; + kvm_iodevice_init(device1, &kvm_eiointc_virt_ops); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, + EIOINTC_VIRT_BASE, EIOINTC_VIRT_SIZE, device1); + if (ret < 0) { + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &s->device); + kfree(s); + return ret; + } + kvm->arch.eiointc = s; + + return 0; +} + +static void kvm_eiointc_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_eiointc *eiointc; + + if (!dev || !dev->kvm || !dev->kvm->arch.eiointc) + return; + + kvm = dev->kvm; + eiointc = kvm->arch.eiointc; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &eiointc->device); + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &eiointc->device_vext); + kfree(eiointc); +} + +static struct kvm_device_ops kvm_eiointc_dev_ops = { + .name = "kvm-loongarch-eiointc", + .create = kvm_eiointc_create, + .destroy = kvm_eiointc_destroy, + .set_attr = kvm_eiointc_set_attr, + .get_attr = kvm_eiointc_get_attr, +}; + +int kvm_loongarch_register_eiointc_device(void) +{ + return kvm_register_device_ops(&kvm_eiointc_dev_ops, KVM_DEV_TYPE_LOONGARCH_EIOINTC); +} diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 14f3f69c5bb9..8de366ade99c 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "trace.h" unsigned long vpid_mask; @@ -370,6 +371,11 @@ static int kvm_loongarch_env_init(void) /* Register LoongArch IPI interrupt controller interface. */ ret = kvm_loongarch_register_ipi_device(); + if (ret) + return ret; + + /* Register LoongArch EIOINTC interrupt controller interface. */ + ret = kvm_loongarch_register_eiointc_device(); return ret; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9fff439c30ea..0ec5c631d9e9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1160,6 +1160,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_LOONGARCH_IPI, #define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_LOONGARCH_EIOINTC, +#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_MAX, From 3956a52bc05bd811082a3c9d2b423ee957e6fefc Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 275/281] LoongArch: KVM: Add EIOINTC read and write functions Add implementation of EIOINTC interrupt controller's address space read and write function simulation. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_eiointc.h | 30 + arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/kvm/intc/eiointc.c | 742 ++++++++++++++++++++++- 3 files changed, 771 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_eiointc.h b/arch/loongarch/include/asm/kvm_eiointc.h index ed65de5a8168..a3a40aba8acf 100644 --- a/arch/loongarch/include/asm/kvm_eiointc.h +++ b/arch/loongarch/include/asm/kvm_eiointc.h @@ -20,9 +20,38 @@ #define EIOINTC_BASE 0x1400 #define EIOINTC_SIZE 0x900 +#define EIOINTC_NODETYPE_START 0xa0 +#define EIOINTC_NODETYPE_END 0xbf +#define EIOINTC_IPMAP_START 0xc0 +#define EIOINTC_IPMAP_END 0xc7 +#define EIOINTC_ENABLE_START 0x200 +#define EIOINTC_ENABLE_END 0x21f +#define EIOINTC_BOUNCE_START 0x280 +#define EIOINTC_BOUNCE_END 0x29f +#define EIOINTC_ISR_START 0x300 +#define EIOINTC_ISR_END 0x31f +#define EIOINTC_COREISR_START 0x400 +#define EIOINTC_COREISR_END 0x41f +#define EIOINTC_COREMAP_START 0x800 +#define EIOINTC_COREMAP_END 0x8ff + #define EIOINTC_VIRT_BASE (0x40000000) #define EIOINTC_VIRT_SIZE (0x1000) +#define EIOINTC_VIRT_FEATURES (0x0) +#define EIOINTC_HAS_VIRT_EXTENSION (0) +#define EIOINTC_HAS_ENABLE_OPTION (1) +#define EIOINTC_HAS_INT_ENCODE (2) +#define EIOINTC_HAS_CPU_ENCODE (3) +#define EIOINTC_VIRT_HAS_FEATURES ((1U << EIOINTC_HAS_VIRT_EXTENSION) \ + | (1U << EIOINTC_HAS_ENABLE_OPTION) \ + | (1U << EIOINTC_HAS_INT_ENCODE) \ + | (1U << EIOINTC_HAS_CPU_ENCODE)) +#define EIOINTC_VIRT_CONFIG (0x4) +#define EIOINTC_ENABLE (1) +#define EIOINTC_ENABLE_INT_ENCODE (2) +#define EIOINTC_ENABLE_CPU_ENCODE (3) + #define LOONGSON_IP_NUM 8 struct loongarch_eiointc { @@ -89,5 +118,6 @@ struct loongarch_eiointc { }; int kvm_loongarch_register_eiointc_device(void); +void eiointc_set_irq(struct loongarch_eiointc *s, int irq, int level); #endif /* __ASM_KVM_EIOINTC_H */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 2d0476f05148..a63c2bf6fae0 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -48,6 +48,8 @@ struct kvm_vm_stat { u64 hugepages; u64 ipi_read_exits; u64 ipi_write_exits; + u64 eiointc_read_exits; + u64 eiointc_write_exits; }; struct kvm_vcpu_stat { diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index 10afa6163643..7369c23be036 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -7,18 +7,700 @@ #include #include +static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) +{ + int ipnum, cpu, irq_index, irq_mask, irq; + + for (irq = 0; irq < EIOINTC_IRQS; irq++) { + ipnum = s->ipmap.reg_u8[irq / 32]; + if (!(s->status & BIT(EIOINTC_ENABLE_INT_ENCODE))) { + ipnum = count_trailing_zeros(ipnum); + ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0; + } + irq_index = irq / 32; + irq_mask = BIT(irq & 0x1f); + + cpu = s->coremap.reg_u8[irq]; + if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask)) + set_bit(irq, s->sw_coreisr[cpu][ipnum]); + else + clear_bit(irq, s->sw_coreisr[cpu][ipnum]); + } +} + +static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) +{ + int ipnum, cpu, found, irq_index, irq_mask; + struct kvm_vcpu *vcpu; + struct kvm_interrupt vcpu_irq; + + ipnum = s->ipmap.reg_u8[irq / 32]; + if (!(s->status & BIT(EIOINTC_ENABLE_INT_ENCODE))) { + ipnum = count_trailing_zeros(ipnum); + ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0; + } + + cpu = s->sw_coremap[irq]; + vcpu = kvm_get_vcpu(s->kvm, cpu); + irq_index = irq / 32; + irq_mask = BIT(irq & 0x1f); + + if (level) { + /* if not enable return false */ + if (((s->enable.reg_u32[irq_index]) & irq_mask) == 0) + return; + s->coreisr.reg_u32[cpu][irq_index] |= irq_mask; + found = find_first_bit(s->sw_coreisr[cpu][ipnum], EIOINTC_IRQS); + set_bit(irq, s->sw_coreisr[cpu][ipnum]); + } else { + s->coreisr.reg_u32[cpu][irq_index] &= ~irq_mask; + clear_bit(irq, s->sw_coreisr[cpu][ipnum]); + found = find_first_bit(s->sw_coreisr[cpu][ipnum], EIOINTC_IRQS); + } + + if (found < EIOINTC_IRQS) + return; /* other irq is handling, needn't update parent irq */ + + vcpu_irq.irq = level ? (INT_HWI0 + ipnum) : -(INT_HWI0 + ipnum); + kvm_vcpu_ioctl_interrupt(vcpu, &vcpu_irq); +} + +static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, + int irq, void *pvalue, u32 len, bool notify) +{ + int i, cpu; + u64 val = *(u64 *)pvalue; + + for (i = 0; i < len; i++) { + cpu = val & 0xff; + val = val >> 8; + + if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { + cpu = ffs(cpu) - 1; + cpu = (cpu >= 4) ? 0 : cpu; + } + + if (s->sw_coremap[irq + i] == cpu) + continue; + + if (notify && test_bit(irq + i, (unsigned long *)s->isr.reg_u8)) { + /* lower irq at old cpu and raise irq at new cpu */ + eiointc_update_irq(s, irq + i, 0); + s->sw_coremap[irq + i] = cpu; + eiointc_update_irq(s, irq + i, 1); + } else { + s->sw_coremap[irq + i] = cpu; + } + } +} + +void eiointc_set_irq(struct loongarch_eiointc *s, int irq, int level) +{ + unsigned long flags; + unsigned long *isr = (unsigned long *)s->isr.reg_u8; + + level ? set_bit(irq, isr) : clear_bit(irq, isr); + spin_lock_irqsave(&s->lock, flags); + eiointc_update_irq(s, irq, level); + spin_unlock_irqrestore(&s->lock, flags); +} + +static inline void eiointc_enable_irq(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, int index, u8 mask, int level) +{ + u8 val; + int irq; + + val = mask & s->isr.reg_u8[index]; + irq = ffs(val); + while (irq != 0) { + /* + * enable bit change from 0 to 1, + * need to update irq by pending bits + */ + eiointc_update_irq(s, irq - 1 + index * 8, level); + val &= ~BIT(irq - 1); + irq = ffs(val); + } +} + +static int loongarch_eiointc_readb(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + u8 data = 0; + gpa_t offset; + + offset = addr - EIOINTC_BASE; + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = offset - EIOINTC_NODETYPE_START; + data = s->nodetype.reg_u8[index]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + index = offset - EIOINTC_IPMAP_START; + data = s->ipmap.reg_u8[index]; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = offset - EIOINTC_ENABLE_START; + data = s->enable.reg_u8[index]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + index = offset - EIOINTC_BOUNCE_START; + data = s->bounce.reg_u8[index]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = offset - EIOINTC_COREISR_START; + data = s->coreisr.reg_u8[vcpu->vcpu_id][index]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + index = offset - EIOINTC_COREMAP_START; + data = s->coremap.reg_u8[index]; + break; + default: + ret = -EINVAL; + break; + } + *(u8 *)val = data; + + return ret; +} + +static int loongarch_eiointc_readw(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + u16 data = 0; + gpa_t offset; + + offset = addr - EIOINTC_BASE; + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 1; + data = s->nodetype.reg_u16[index]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + index = (offset - EIOINTC_IPMAP_START) >> 1; + data = s->ipmap.reg_u16[index]; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 1; + data = s->enable.reg_u16[index]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + index = (offset - EIOINTC_BOUNCE_START) >> 1; + data = s->bounce.reg_u16[index]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 1; + data = s->coreisr.reg_u16[vcpu->vcpu_id][index]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + index = (offset - EIOINTC_COREMAP_START) >> 1; + data = s->coremap.reg_u16[index]; + break; + default: + ret = -EINVAL; + break; + } + *(u16 *)val = data; + + return ret; +} + +static int loongarch_eiointc_readl(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + u32 data = 0; + gpa_t offset; + + offset = addr - EIOINTC_BASE; + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 2; + data = s->nodetype.reg_u32[index]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + index = (offset - EIOINTC_IPMAP_START) >> 2; + data = s->ipmap.reg_u32[index]; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 2; + data = s->enable.reg_u32[index]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + index = (offset - EIOINTC_BOUNCE_START) >> 2; + data = s->bounce.reg_u32[index]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 2; + data = s->coreisr.reg_u32[vcpu->vcpu_id][index]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + index = (offset - EIOINTC_COREMAP_START) >> 2; + data = s->coremap.reg_u32[index]; + break; + default: + ret = -EINVAL; + break; + } + *(u32 *)val = data; + + return ret; +} + +static int loongarch_eiointc_readq(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + u64 data = 0; + gpa_t offset; + + offset = addr - EIOINTC_BASE; + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 3; + data = s->nodetype.reg_u64[index]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + index = (offset - EIOINTC_IPMAP_START) >> 3; + data = s->ipmap.reg_u64; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 3; + data = s->enable.reg_u64[index]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + index = (offset - EIOINTC_BOUNCE_START) >> 3; + data = s->bounce.reg_u64[index]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 3; + data = s->coreisr.reg_u64[vcpu->vcpu_id][index]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + index = (offset - EIOINTC_COREMAP_START) >> 3; + data = s->coremap.reg_u64[index]; + break; + default: + ret = -EINVAL; + break; + } + *(u64 *)val = data; + + return ret; +} + static int kvm_eiointc_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { - return 0; + int ret = -EINVAL; + unsigned long flags; + struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc; + + if (!eiointc) { + kvm_err("%s: eiointc irqchip not valid!\n", __func__); + return -EINVAL; + } + + vcpu->kvm->stat.eiointc_read_exits++; + spin_lock_irqsave(&eiointc->lock, flags); + switch (len) { + case 1: + ret = loongarch_eiointc_readb(vcpu, eiointc, addr, len, val); + break; + case 2: + ret = loongarch_eiointc_readw(vcpu, eiointc, addr, len, val); + break; + case 4: + ret = loongarch_eiointc_readl(vcpu, eiointc, addr, len, val); + break; + case 8: + ret = loongarch_eiointc_readq(vcpu, eiointc, addr, len, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access: addr 0x%llx, size %d\n", + __func__, addr, len); + } + spin_unlock_irqrestore(&eiointc->lock, flags); + + return ret; +} + +static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, + gpa_t addr, int len, const void *val) +{ + int index, irq, bits, ret = 0; + u8 cpu; + u8 data, old_data; + u8 coreisr, old_coreisr; + gpa_t offset; + + data = *(u8 *)val; + offset = addr - EIOINTC_BASE; + + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START); + s->nodetype.reg_u8[index] = data; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of irqchip driver, need not update upper irq level + */ + index = (offset - EIOINTC_IPMAP_START); + s->ipmap.reg_u8[index] = data; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START); + old_data = s->enable.reg_u8[index]; + s->enable.reg_u8[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u8[index] & ~old_data & s->isr.reg_u8[index]; + eiointc_enable_irq(vcpu, s, index, data, 1); + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u8[index] & old_data & s->isr.reg_u8[index]; + eiointc_enable_irq(vcpu, s, index, data, 0); + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = offset - EIOINTC_BOUNCE_START; + s->bounce.reg_u8[index] = data; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START); + /* use attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u8[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u8[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + bits = sizeof(data) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + eiointc_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + irq = offset - EIOINTC_COREMAP_START; + index = irq; + s->coremap.reg_u8[index] = data; + eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, bits, ret = 0; + u8 cpu; + u16 data, old_data; + u16 coreisr, old_coreisr; + gpa_t offset; + + data = *(u16 *)val; + offset = addr - EIOINTC_BASE; + + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 1; + s->nodetype.reg_u16[index] = data; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of irqchip driver, need not update upper irq level + */ + index = (offset - EIOINTC_IPMAP_START) >> 1; + s->ipmap.reg_u16[index] = data; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 1; + old_data = s->enable.reg_u32[index]; + s->enable.reg_u16[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index]; + index = index << 1; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EIOINTC_BOUNCE_START) >> 1; + s->bounce.reg_u16[index] = data; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 1; + /* use attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u16[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u16[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + bits = sizeof(data) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + eiointc_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + irq = offset - EIOINTC_COREMAP_START; + index = irq >> 1; + s->coremap.reg_u16[index] = data; + eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, bits, ret = 0; + u8 cpu; + u32 data, old_data; + u32 coreisr, old_coreisr; + gpa_t offset; + + data = *(u32 *)val; + offset = addr - EIOINTC_BASE; + + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 2; + s->nodetype.reg_u32[index] = data; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of irqchip driver, need not update upper irq level + */ + index = (offset - EIOINTC_IPMAP_START) >> 2; + s->ipmap.reg_u32[index] = data; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 2; + old_data = s->enable.reg_u32[index]; + s->enable.reg_u32[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index]; + index = index << 2; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EIOINTC_BOUNCE_START) >> 2; + s->bounce.reg_u32[index] = data; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 2; + /* use attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u32[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u32[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + bits = sizeof(data) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + eiointc_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + irq = offset - EIOINTC_COREMAP_START; + index = irq >> 2; + s->coremap.reg_u32[index] = data; + eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, + struct loongarch_eiointc *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, bits, ret = 0; + u8 cpu; + u64 data, old_data; + u64 coreisr, old_coreisr; + gpa_t offset; + + data = *(u64 *)val; + offset = addr - EIOINTC_BASE; + + switch (offset) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + index = (offset - EIOINTC_NODETYPE_START) >> 3; + s->nodetype.reg_u64[index] = data; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of irqchip driver, need not update upper irq level + */ + index = (offset - EIOINTC_IPMAP_START) >> 3; + s->ipmap.reg_u64 = data; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + index = (offset - EIOINTC_ENABLE_START) >> 3; + old_data = s->enable.reg_u64[index]; + s->enable.reg_u64[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index]; + index = index << 3; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + eiointc_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EIOINTC_BOUNCE_START) >> 3; + s->bounce.reg_u64[index] = data; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + index = (offset - EIOINTC_COREISR_START) >> 3; + /* use attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u64[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u64[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + bits = sizeof(data) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + eiointc_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + irq = offset - EIOINTC_COREMAP_START; + index = irq >> 3; + s->coremap.reg_u64[index] = data; + eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + break; + default: + ret = -EINVAL; + break; + } + + return ret; } static int kvm_eiointc_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { - return 0; + int ret = -EINVAL; + unsigned long flags; + struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc; + + if (!eiointc) { + kvm_err("%s: eiointc irqchip not valid!\n", __func__); + return -EINVAL; + } + + vcpu->kvm->stat.eiointc_write_exits++; + spin_lock_irqsave(&eiointc->lock, flags); + switch (len) { + case 1: + ret = loongarch_eiointc_writeb(vcpu, eiointc, addr, len, val); + break; + case 2: + ret = loongarch_eiointc_writew(vcpu, eiointc, addr, len, val); + break; + case 4: + ret = loongarch_eiointc_writel(vcpu, eiointc, addr, len, val); + break; + case 8: + ret = loongarch_eiointc_writeq(vcpu, eiointc, addr, len, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access: addr 0x%llx, size %d\n", + __func__, addr, len); + } + spin_unlock_irqrestore(&eiointc->lock, flags); + + return ret; } static const struct kvm_io_device_ops kvm_eiointc_ops = { @@ -30,6 +712,29 @@ static int kvm_eiointc_virt_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { + unsigned long flags; + u32 *data = val; + struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc; + + if (!eiointc) { + kvm_err("%s: eiointc irqchip not valid!\n", __func__); + return -EINVAL; + } + + addr -= EIOINTC_VIRT_BASE; + spin_lock_irqsave(&eiointc->lock, flags); + switch (addr) { + case EIOINTC_VIRT_FEATURES: + *data = eiointc->features; + break; + case EIOINTC_VIRT_CONFIG: + *data = eiointc->status; + break; + default: + break; + } + spin_unlock_irqrestore(&eiointc->lock, flags); + return 0; } @@ -37,7 +742,38 @@ static int kvm_eiointc_virt_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { - return 0; + int ret = 0; + unsigned long flags; + u32 value = *(u32 *)val; + struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc; + + if (!eiointc) { + kvm_err("%s: eiointc irqchip not valid!\n", __func__); + return -EINVAL; + } + + addr -= EIOINTC_VIRT_BASE; + spin_lock_irqsave(&eiointc->lock, flags); + switch (addr) { + case EIOINTC_VIRT_FEATURES: + ret = -EPERM; + break; + case EIOINTC_VIRT_CONFIG: + /* + * eiointc features can only be set at disabled status + */ + if ((eiointc->status & BIT(EIOINTC_ENABLE)) && value) { + ret = -EPERM; + break; + } + eiointc->status = value & eiointc->features; + break; + default: + break; + } + spin_unlock_irqrestore(&eiointc->lock, flags); + + return ret; } static const struct kvm_io_device_ops kvm_eiointc_virt_ops = { From 1ad7efa552fd5cf4e8c49fea863c5c6a5dcf9f00 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 276/281] LoongArch: KVM: Add EIOINTC user mode read and write functions Implement the communication interface between the user mode programs and the kernel in EIOINTC interrupt controller simulation, which is used to obtain or send the simulation data of the interrupt controller in the user mode process, and is also used in VM migration or VM saving and restoration. Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/uapi/asm/kvm.h | 12 ++ arch/loongarch/kvm/intc/eiointc.c | 163 +++++++++++++++++++++++++- 2 files changed, 173 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index c6bbbb34cef4..98126c034eb8 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -134,4 +134,16 @@ struct kvm_iocsr_entry { #define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000001 +#define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000002 + +#define KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS 0x40000003 +#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU 0x0 +#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE 0x1 +#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE 0x2 + +#define KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL 0x40000004 +#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU 0x0 +#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE 0x1 +#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED 0x3 + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index 7369c23be036..f39929d7bf8a 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -781,16 +781,175 @@ static const struct kvm_io_device_ops kvm_eiointc_virt_ops = { .write = kvm_eiointc_virt_write, }; +static int kvm_eiointc_ctrl_access(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret = 0; + unsigned long flags; + unsigned long type = (unsigned long)attr->attr; + u32 i, start_irq; + void __user *data; + struct loongarch_eiointc *s = dev->kvm->arch.eiointc; + + data = (void __user *)attr->addr; + spin_lock_irqsave(&s->lock, flags); + switch (type) { + case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: + if (copy_from_user(&s->num_cpu, data, 4)) + ret = -EFAULT; + break; + case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE: + if (copy_from_user(&s->features, data, 4)) + ret = -EFAULT; + if (!(s->features & BIT(EIOINTC_HAS_VIRT_EXTENSION))) + s->status |= BIT(EIOINTC_ENABLE); + break; + case KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED: + eiointc_set_sw_coreisr(s); + for (i = 0; i < (EIOINTC_IRQS / 4); i++) { + start_irq = i * 4; + eiointc_update_sw_coremap(s, start_irq, + (void *)&s->coremap.reg_u32[i], sizeof(u32), false); + } + break; + default: + break; + } + spin_unlock_irqrestore(&s->lock, flags); + + return ret; +} + +static int kvm_eiointc_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int addr, cpuid, offset, ret = 0; + unsigned long flags; + void *p = NULL; + void __user *data; + struct loongarch_eiointc *s; + + s = dev->kvm->arch.eiointc; + addr = attr->attr; + cpuid = addr >> 16; + addr &= 0xffff; + data = (void __user *)attr->addr; + switch (addr) { + case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: + offset = (addr - EIOINTC_NODETYPE_START) / 4; + p = &s->nodetype.reg_u32[offset]; + break; + case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: + offset = (addr - EIOINTC_IPMAP_START) / 4; + p = &s->ipmap.reg_u32[offset]; + break; + case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: + offset = (addr - EIOINTC_ENABLE_START) / 4; + p = &s->enable.reg_u32[offset]; + break; + case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: + offset = (addr - EIOINTC_BOUNCE_START) / 4; + p = &s->bounce.reg_u32[offset]; + break; + case EIOINTC_ISR_START ... EIOINTC_ISR_END: + offset = (addr - EIOINTC_ISR_START) / 4; + p = &s->isr.reg_u32[offset]; + break; + case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + offset = (addr - EIOINTC_COREISR_START) / 4; + p = &s->coreisr.reg_u32[cpuid][offset]; + break; + case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: + offset = (addr - EIOINTC_COREMAP_START) / 4; + p = &s->coremap.reg_u32[offset]; + break; + default: + kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr); + return -EINVAL; + } + + spin_lock_irqsave(&s->lock, flags); + if (is_write) { + if (copy_from_user(p, data, 4)) + ret = -EFAULT; + } else { + if (copy_to_user(data, p, 4)) + ret = -EFAULT; + } + spin_unlock_irqrestore(&s->lock, flags); + + return ret; +} + +static int kvm_eiointc_sw_status_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int addr, ret = 0; + unsigned long flags; + void *p = NULL; + void __user *data; + struct loongarch_eiointc *s; + + s = dev->kvm->arch.eiointc; + addr = attr->attr; + addr &= 0xffff; + + data = (void __user *)attr->addr; + switch (addr) { + case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU: + p = &s->num_cpu; + break; + case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE: + p = &s->features; + break; + case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE: + p = &s->status; + break; + default: + kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr); + return -EINVAL; + } + spin_lock_irqsave(&s->lock, flags); + if (is_write) { + if (copy_from_user(p, data, 4)) + ret = -EFAULT; + } else { + if (copy_to_user(data, p, 4)) + ret = -EFAULT; + } + spin_unlock_irqrestore(&s->lock, flags); + + return ret; +} + static int kvm_eiointc_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + switch (attr->group) { + case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS: + return kvm_eiointc_regs_access(dev, attr, false); + case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS: + return kvm_eiointc_sw_status_access(dev, attr, false); + default: + return -EINVAL; + } } static int kvm_eiointc_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + switch (attr->group) { + case KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL: + return kvm_eiointc_ctrl_access(dev, attr); + case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS: + return kvm_eiointc_regs_access(dev, attr, true); + case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS: + return kvm_eiointc_sw_status_access(dev, attr, true); + default: + return -EINVAL; + } } static int kvm_eiointc_create(struct kvm_device *dev, u32 type) From e785dfacf7e7fe94370fa0e8e3ff1bc8fe179831 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 277/281] LoongArch: KVM: Add PCHPIC device support Add device model for PCHPIC interrupt controller, implemente basic create & destroy interface, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/include/asm/kvm_pch_pic.h | 31 +++++++++ arch/loongarch/kvm/Makefile | 1 + arch/loongarch/kvm/intc/pch_pic.c | 88 ++++++++++++++++++++++++ arch/loongarch/kvm/main.c | 6 ++ include/uapi/linux/kvm.h | 2 + 6 files changed, 130 insertions(+) create mode 100644 arch/loongarch/include/asm/kvm_pch_pic.h create mode 100644 arch/loongarch/kvm/intc/pch_pic.c diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index a63c2bf6fae0..a6b82c8ab7bc 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -20,6 +20,7 @@ #include #include #include +#include #include /* Loongarch KVM register ids */ @@ -125,6 +126,7 @@ struct kvm_arch { struct kvm_context __percpu *vmcs; struct loongarch_ipi *ipi; struct loongarch_eiointc *eiointc; + struct loongarch_pch_pic *pch_pic; }; #define CSR_MAX_NUMS 0x800 diff --git a/arch/loongarch/include/asm/kvm_pch_pic.h b/arch/loongarch/include/asm/kvm_pch_pic.h new file mode 100644 index 000000000000..914be4fd35e9 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_pch_pic.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef __ASM_KVM_PCH_PIC_H +#define __ASM_KVM_PCH_PIC_H + +#include + +struct loongarch_pch_pic { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + uint64_t mask; /* 1:disable irq, 0:enable irq */ + uint64_t htmsi_en; /* 1:msi */ + uint64_t edge; /* 1:edge triggered, 0:level triggered */ + uint64_t auto_ctrl0; /* only use default value 00b */ + uint64_t auto_ctrl1; /* only use default value 00b */ + uint64_t last_intirr; /* edge detection */ + uint64_t irr; /* interrupt request register */ + uint64_t isr; /* interrupt service register */ + uint64_t polarity; /* 0: high level trigger, 1: low level trigger */ + uint8_t route_entry[64]; /* default value 0, route to int0: eiointc */ + uint8_t htmsi_vector[64]; /* irq route table for routing to eiointc */ + uint64_t pch_pic_base; +}; + +int kvm_loongarch_register_pch_pic_device(void); + +#endif /* __ASM_KVM_PCH_PIC_H */ diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index bb50fc799c29..97b2adf08206 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -20,5 +20,6 @@ kvm-y += vcpu.o kvm-y += vm.o kvm-y += intc/ipi.o kvm-y += intc/eiointc.o +kvm-y += intc/pch_pic.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c new file mode 100644 index 000000000000..564b7dcbbbed --- /dev/null +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +static int kvm_pch_pic_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + return 0; +} + +static int kvm_pch_pic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + return 0; +} + +static const struct kvm_io_device_ops kvm_pch_pic_ops = { + .read = kvm_pch_pic_read, + .write = kvm_pch_pic_write, +}; + +static int kvm_pch_pic_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_pch_pic_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) +{ + struct kvm *kvm = dev->kvm; + struct loongarch_pch_pic *s; + + /* pch pic should not has been created */ + if (kvm->arch.pch_pic) + return -EINVAL; + + s = kzalloc(sizeof(struct loongarch_pch_pic), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + s->kvm = kvm; + kvm->arch.pch_pic = s; + + return 0; +} + +static void kvm_pch_pic_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_pch_pic *s; + + if (!dev || !dev->kvm || !dev->kvm->arch.pch_pic) + return; + + kvm = dev->kvm; + s = kvm->arch.pch_pic; + /* unregister pch pic device and free it's memory */ + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &s->device); + kfree(s); +} + +static struct kvm_device_ops kvm_pch_pic_dev_ops = { + .name = "kvm-loongarch-pch-pic", + .create = kvm_pch_pic_create, + .destroy = kvm_pch_pic_destroy, + .set_attr = kvm_pch_pic_set_attr, + .get_attr = kvm_pch_pic_get_attr, +}; + +int kvm_loongarch_register_pch_pic_device(void) +{ + return kvm_register_device_ops(&kvm_pch_pic_dev_ops, KVM_DEV_TYPE_LOONGARCH_PCHPIC); +} diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 8de366ade99c..396fed2665a5 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "trace.h" unsigned long vpid_mask; @@ -376,6 +377,11 @@ static int kvm_loongarch_env_init(void) /* Register LoongArch EIOINTC interrupt controller interface. */ ret = kvm_loongarch_register_eiointc_device(); + if (ret) + return ret; + + /* Register LoongArch PCH-PIC interrupt controller interface. */ + ret = kvm_loongarch_register_pch_pic_device(); return ret; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0ec5c631d9e9..502ea63b5d2e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1162,6 +1162,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_EIOINTC, #define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC + KVM_DEV_TYPE_LOONGARCH_PCHPIC, +#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_MAX, From f5f31efa3c2d51c03afab5ab6e3f004d2d529013 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 278/281] LoongArch: KVM: Add PCHPIC read and write functions Add implementation of IPI interrupt controller's address space read and write function simulation. Implement interrupt injection interface under loongarch. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/include/asm/kvm_pch_pic.h | 31 +++ arch/loongarch/kvm/intc/pch_pic.c | 291 ++++++++++++++++++++++- 3 files changed, 322 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index a6b82c8ab7bc..d1f75b854107 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -51,6 +51,8 @@ struct kvm_vm_stat { u64 ipi_write_exits; u64 eiointc_read_exits; u64 eiointc_write_exits; + u64 pch_pic_read_exits; + u64 pch_pic_write_exits; }; struct kvm_vcpu_stat { diff --git a/arch/loongarch/include/asm/kvm_pch_pic.h b/arch/loongarch/include/asm/kvm_pch_pic.h index 914be4fd35e9..e6df6a4c1c70 100644 --- a/arch/loongarch/include/asm/kvm_pch_pic.h +++ b/arch/loongarch/include/asm/kvm_pch_pic.h @@ -8,6 +8,35 @@ #include +#define PCH_PIC_SIZE 0x3e8 + +#define PCH_PIC_INT_ID_START 0x0 +#define PCH_PIC_INT_ID_END 0x7 +#define PCH_PIC_MASK_START 0x20 +#define PCH_PIC_MASK_END 0x27 +#define PCH_PIC_HTMSI_EN_START 0x40 +#define PCH_PIC_HTMSI_EN_END 0x47 +#define PCH_PIC_EDGE_START 0x60 +#define PCH_PIC_EDGE_END 0x67 +#define PCH_PIC_CLEAR_START 0x80 +#define PCH_PIC_CLEAR_END 0x87 +#define PCH_PIC_AUTO_CTRL0_START 0xc0 +#define PCH_PIC_AUTO_CTRL0_END 0xc7 +#define PCH_PIC_AUTO_CTRL1_START 0xe0 +#define PCH_PIC_AUTO_CTRL1_END 0xe7 +#define PCH_PIC_ROUTE_ENTRY_START 0x100 +#define PCH_PIC_ROUTE_ENTRY_END 0x13f +#define PCH_PIC_HTMSI_VEC_START 0x200 +#define PCH_PIC_HTMSI_VEC_END 0x23f +#define PCH_PIC_INT_IRR_START 0x380 +#define PCH_PIC_INT_IRR_END 0x38f +#define PCH_PIC_INT_ISR_START 0x3a0 +#define PCH_PIC_INT_ISR_END 0x3af +#define PCH_PIC_POLARITY_START 0x3e0 +#define PCH_PIC_POLARITY_END 0x3e7 +#define PCH_PIC_INT_ID_VAL 0x7000000UL +#define PCH_PIC_INT_ID_VER 0x1UL + struct loongarch_pch_pic { spinlock_t lock; struct kvm *kvm; @@ -27,5 +56,7 @@ struct loongarch_pch_pic { }; int kvm_loongarch_register_pch_pic_device(void); +void pch_pic_set_irq(struct loongarch_pch_pic *s, int irq, int level); +void pch_msi_set_irq(struct kvm *kvm, int irq, int level); #endif /* __ASM_KVM_PCH_PIC_H */ diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index 564b7dcbbbed..1269ab264ac2 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -8,18 +8,305 @@ #include #include +/* update the isr according to irq level and route irq to eiointc */ +static void pch_pic_update_irq(struct loongarch_pch_pic *s, int irq, int level) +{ + u64 mask = BIT(irq); + + /* + * set isr and route irq to eiointc and + * the route table is in htmsi_vector[] + */ + if (level) { + if (mask & s->irr & ~s->mask) { + s->isr |= mask; + irq = s->htmsi_vector[irq]; + eiointc_set_irq(s->kvm->arch.eiointc, irq, level); + } + } else { + if (mask & s->isr & ~s->irr) { + s->isr &= ~mask; + irq = s->htmsi_vector[irq]; + eiointc_set_irq(s->kvm->arch.eiointc, irq, level); + } + } +} + +/* update batch irqs, the irq_mask is a bitmap of irqs */ +static void pch_pic_update_batch_irqs(struct loongarch_pch_pic *s, u64 irq_mask, int level) +{ + int irq, bits; + + /* find each irq by irqs bitmap and update each irq */ + bits = sizeof(irq_mask) * 8; + irq = find_first_bit((void *)&irq_mask, bits); + while (irq < bits) { + pch_pic_update_irq(s, irq, level); + bitmap_clear((void *)&irq_mask, irq, 1); + irq = find_first_bit((void *)&irq_mask, bits); + } +} + +/* called when a irq is triggered in pch pic */ +void pch_pic_set_irq(struct loongarch_pch_pic *s, int irq, int level) +{ + u64 mask = BIT(irq); + + spin_lock(&s->lock); + if (level) + s->irr |= mask; /* set irr */ + else { + /* + * In edge triggered mode, 0 does not mean to clear irq + * The irr register variable is cleared when cpu writes to the + * PCH_PIC_CLEAR_START address area + */ + if (s->edge & mask) { + spin_unlock(&s->lock); + return; + } + s->irr &= ~mask; + } + pch_pic_update_irq(s, irq, level); + spin_unlock(&s->lock); +} + +/* msi irq handler */ +void pch_msi_set_irq(struct kvm *kvm, int irq, int level) +{ + eiointc_set_irq(kvm->arch.eiointc, irq, level); +} + +/* + * pch pic register is 64-bit, but it is accessed by 32-bit, + * so we use high to get whether low or high 32 bits we want + * to read. + */ +static u32 pch_pic_read_reg(u64 *s, int high) +{ + u64 val = *s; + + /* read the high 32 bits when high is 1 */ + return high ? (u32)(val >> 32) : (u32)val; +} + +/* + * pch pic register is 64-bit, but it is accessed by 32-bit, + * so we use high to get whether low or high 32 bits we want + * to write. + */ +static u32 pch_pic_write_reg(u64 *s, int high, u32 v) +{ + u64 val = *s, data = v; + + if (high) { + /* + * Clear val high 32 bits + * Write the high 32 bits when the high is 1 + */ + *s = (val << 32 >> 32) | (data << 32); + val >>= 32; + } else + /* + * Clear val low 32 bits + * Write the low 32 bits when the high is 0 + */ + *s = (val >> 32 << 32) | v; + + return (u32)val; +} + +static int loongarch_pch_pic_read(struct loongarch_pch_pic *s, gpa_t addr, int len, void *val) +{ + int offset, index, ret = 0; + u32 data = 0; + u64 int_id = 0; + + offset = addr - s->pch_pic_base; + + spin_lock(&s->lock); + switch (offset) { + case PCH_PIC_INT_ID_START ... PCH_PIC_INT_ID_END: + /* int id version */ + int_id |= (u64)PCH_PIC_INT_ID_VER << 32; + /* irq number */ + int_id |= (u64)31 << (32 + 16); + /* int id value */ + int_id |= PCH_PIC_INT_ID_VAL; + *(u64 *)val = int_id; + break; + case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: + offset -= PCH_PIC_MASK_START; + index = offset >> 2; + /* read mask reg */ + data = pch_pic_read_reg(&s->mask, index); + *(u32 *)val = data; + break; + case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: + offset -= PCH_PIC_HTMSI_EN_START; + index = offset >> 2; + /* read htmsi enable reg */ + data = pch_pic_read_reg(&s->htmsi_en, index); + *(u32 *)val = data; + break; + case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: + offset -= PCH_PIC_EDGE_START; + index = offset >> 2; + /* read edge enable reg */ + data = pch_pic_read_reg(&s->edge, index); + *(u32 *)val = data; + break; + case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: + case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: + /* we only use default mode: fixed interrupt distribution mode */ + *(u32 *)val = 0; + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + /* only route to int0: eiointc */ + *(u8 *)val = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + offset -= PCH_PIC_HTMSI_VEC_START; + /* read htmsi vector */ + data = s->htmsi_vector[offset]; + *(u8 *)val = data; + break; + case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: + /* we only use defalut value 0: high level triggered */ + *(u32 *)val = 0; + break; + default: + ret = -EINVAL; + } + spin_unlock(&s->lock); + + return ret; +} + static int kvm_pch_pic_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { - return 0; + int ret; + struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic; + + if (!s) { + kvm_err("%s: pch pic irqchip not valid!\n", __func__); + return -EINVAL; + } + + /* statistics of pch pic reading */ + vcpu->kvm->stat.pch_pic_read_exits++; + ret = loongarch_pch_pic_read(s, addr, len, val); + + return ret; +} + +static int loongarch_pch_pic_write(struct loongarch_pch_pic *s, gpa_t addr, + int len, const void *val) +{ + int ret; + u32 old, data, offset, index; + u64 irq; + + ret = 0; + data = *(u32 *)val; + offset = addr - s->pch_pic_base; + + spin_lock(&s->lock); + switch (offset) { + case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: + offset -= PCH_PIC_MASK_START; + /* get whether high or low 32 bits we want to write */ + index = offset >> 2; + old = pch_pic_write_reg(&s->mask, index, data); + /* enable irq when mask value change to 0 */ + irq = (old & ~data) << (32 * index); + pch_pic_update_batch_irqs(s, irq, 1); + /* disable irq when mask value change to 1 */ + irq = (~old & data) << (32 * index); + pch_pic_update_batch_irqs(s, irq, 0); + break; + case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: + offset -= PCH_PIC_HTMSI_EN_START; + index = offset >> 2; + pch_pic_write_reg(&s->htmsi_en, index, data); + break; + case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: + offset -= PCH_PIC_EDGE_START; + index = offset >> 2; + /* 1: edge triggered, 0: level triggered */ + pch_pic_write_reg(&s->edge, index, data); + break; + case PCH_PIC_CLEAR_START ... PCH_PIC_CLEAR_END: + offset -= PCH_PIC_CLEAR_START; + index = offset >> 2; + /* write 1 to clear edge irq */ + old = pch_pic_read_reg(&s->irr, index); + /* + * get the irq bitmap which is edge triggered and + * already set and to be cleared + */ + irq = old & pch_pic_read_reg(&s->edge, index) & data; + /* write irr to the new state where irqs have been cleared */ + pch_pic_write_reg(&s->irr, index, old & ~irq); + /* update cleared irqs */ + pch_pic_update_batch_irqs(s, irq, 0); + break; + case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: + offset -= PCH_PIC_AUTO_CTRL0_START; + index = offset >> 2; + /* we only use default mode: fixed interrupt distribution mode */ + pch_pic_write_reg(&s->auto_ctrl0, index, 0); + break; + case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: + offset -= PCH_PIC_AUTO_CTRL1_START; + index = offset >> 2; + /* we only use default mode: fixed interrupt distribution mode */ + pch_pic_write_reg(&s->auto_ctrl1, index, 0); + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + offset -= PCH_PIC_ROUTE_ENTRY_START; + /* only route to int0: eiointc */ + s->route_entry[offset] = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + /* route table to eiointc */ + offset -= PCH_PIC_HTMSI_VEC_START; + s->htmsi_vector[offset] = (u8)data; + break; + case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: + offset -= PCH_PIC_POLARITY_START; + index = offset >> 2; + /* we only use defalut value 0: high level triggered */ + pch_pic_write_reg(&s->polarity, index, 0); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock(&s->lock); + + return ret; } static int kvm_pch_pic_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { - return 0; + int ret; + struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic; + + if (!s) { + kvm_err("%s: pch pic irqchip not valid!\n", __func__); + return -EINVAL; + } + + /* statistics of pch pic writing */ + vcpu->kvm->stat.pch_pic_write_exits++; + ret = loongarch_pch_pic_write(s, addr, len, val); + + return ret; } static const struct kvm_io_device_ops kvm_pch_pic_ops = { From d206d951487326b535007639d58e2a98d18e3dee Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 279/281] LoongArch: KVM: Add PCHPIC user mode read and write functions Implement the communication interface between the user mode programs and the kernel in PCHPIC interrupt control simulation, which is used to obtain or send the simulation data of the interrupt controller in the user mode process, and is also used in VM migration or VM saving and restoration. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/uapi/asm/kvm.h | 4 + arch/loongarch/kvm/intc/pch_pic.c | 121 +++++++++++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 98126c034eb8..72af0e854a50 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -146,4 +146,8 @@ struct kvm_iocsr_entry { #define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE 0x1 #define KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED 0x3 +#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS 0x40000005 +#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL 0x40000006 +#define KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT 0 + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index 1269ab264ac2..fe1f966d6c8a 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -314,16 +314,133 @@ static const struct kvm_io_device_ops kvm_pch_pic_ops = { .write = kvm_pch_pic_write, }; +static int kvm_pch_pic_init(struct kvm_device *dev, u64 addr) +{ + int ret; + struct kvm *kvm = dev->kvm; + struct kvm_io_device *device; + struct loongarch_pch_pic *s = dev->kvm->arch.pch_pic; + + s->pch_pic_base = addr; + device = &s->device; + /* init device by pch pic writing and reading ops */ + kvm_iodevice_init(device, &kvm_pch_pic_ops); + mutex_lock(&kvm->slots_lock); + /* register pch pic device */ + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, addr, PCH_PIC_SIZE, device); + mutex_unlock(&kvm->slots_lock); + + return (ret < 0) ? -EFAULT : 0; +} + +/* used by user space to get or set pch pic registers */ +static int kvm_pch_pic_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int addr, offset, len = 8, ret = 0; + void __user *data; + void *p = NULL; + struct loongarch_pch_pic *s; + + s = dev->kvm->arch.pch_pic; + addr = attr->attr; + data = (void __user *)attr->addr; + + /* get pointer to pch pic register by addr */ + switch (addr) { + case PCH_PIC_MASK_START: + p = &s->mask; + break; + case PCH_PIC_HTMSI_EN_START: + p = &s->htmsi_en; + break; + case PCH_PIC_EDGE_START: + p = &s->edge; + break; + case PCH_PIC_AUTO_CTRL0_START: + p = &s->auto_ctrl0; + break; + case PCH_PIC_AUTO_CTRL1_START: + p = &s->auto_ctrl1; + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + offset = addr - PCH_PIC_ROUTE_ENTRY_START; + p = &s->route_entry[offset]; + len = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + offset = addr - PCH_PIC_HTMSI_VEC_START; + p = &s->htmsi_vector[offset]; + len = 1; + break; + case PCH_PIC_INT_IRR_START: + p = &s->irr; + break; + case PCH_PIC_INT_ISR_START: + p = &s->isr; + break; + case PCH_PIC_POLARITY_START: + p = &s->polarity; + break; + default: + return -EINVAL; + } + + spin_lock(&s->lock); + /* write or read value according to is_write */ + if (is_write) { + if (copy_from_user(p, data, len)) + ret = -EFAULT; + } else { + if (copy_to_user(data, p, len)) + ret = -EFAULT; + } + spin_unlock(&s->lock); + + return ret; +} + static int kvm_pch_pic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + switch (attr->group) { + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS: + return kvm_pch_pic_regs_access(dev, attr, false); + default: + return -EINVAL; + } } static int kvm_pch_pic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + u64 addr; + void __user *uaddr = (void __user *)(long)attr->addr; + + switch (attr->group) { + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT: + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + if (!dev->kvm->arch.pch_pic) { + kvm_err("%s: please create pch_pic irqchip first!\n", __func__); + return -ENODEV; + } + + return kvm_pch_pic_init(dev, addr); + default: + kvm_err("%s: unknown group (%d) attr (%lld)\n", __func__, attr->group, + attr->attr); + return -EINVAL; + } + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS: + return kvm_pch_pic_regs_access(dev, attr, true); + default: + return -EINVAL; + } } static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) From 1928254c5ccb7bdffd7f0334e1ce250e9ce4de94 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 280/281] LoongArch: KVM: Add irqfd support Enable the KVM_IRQ_ROUTING/KVM_IRQCHIP/KVM_MSI configuration items, add the KVM_CAP_IRQCHIP capability, and implement the query interface of the in-kernel irqchip. Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/include/uapi/asm/kvm.h | 2 + arch/loongarch/kvm/Kconfig | 5 +- arch/loongarch/kvm/Makefile | 1 + arch/loongarch/kvm/intc/pch_pic.c | 27 ++++++++ arch/loongarch/kvm/irqfd.c | 89 +++++++++++++++++++++++++++ arch/loongarch/kvm/vm.c | 21 +++++++ 7 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/kvm/irqfd.c diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index d1f75b854107..7b8367c39da8 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -23,6 +23,8 @@ #include #include +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + /* Loongarch KVM register ids */ #define KVM_GET_IOC_CSR_IDX(id) ((id & KVM_CSR_IDX_MASK) >> LOONGARCH_REG_SHIFT) #define KVM_GET_IOC_CPUCFG_IDX(id) ((id & KVM_CPUCFG_IDX_MASK) >> LOONGARCH_REG_SHIFT) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 72af0e854a50..5f354f5c6847 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -8,6 +8,8 @@ #include +#define __KVM_HAVE_IRQ_LINE + /* * KVM LoongArch specific structures and definitions. * diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 248744b4d086..97a811077ac3 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -21,13 +21,16 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on AS_HAS_LVZ_EXTENSION select HAVE_KVM_DIRTY_RING_ACQ_REL + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_IRQCHIP + select HAVE_KVM_MSI + select HAVE_KVM_READONLY_MEM select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO - select HAVE_KVM_READONLY_MEM select KVM_XFER_TO_GUEST_WORK select SCHED_INFO help diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index 97b2adf08206..3a01292f71cc 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -21,5 +21,6 @@ kvm-y += vm.o kvm-y += intc/ipi.o kvm-y += intc/eiointc.o kvm-y += intc/pch_pic.o +kvm-y += irqfd.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index fe1f966d6c8a..08fce845f668 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -443,8 +443,31 @@ static int kvm_pch_pic_set_attr(struct kvm_device *dev, } } +static int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + int i, ret; + u32 nr = KVM_IRQCHIP_NUM_PINS; + struct kvm_irq_routing_entry *entries; + + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + + return ret; +} + static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) { + int ret; struct kvm *kvm = dev->kvm; struct loongarch_pch_pic *s; @@ -452,6 +475,10 @@ static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) if (kvm->arch.pch_pic) return -EINVAL; + ret = kvm_setup_default_irq_routing(kvm); + if (ret) + return -ENOMEM; + s = kzalloc(sizeof(struct loongarch_pch_pic), GFP_KERNEL); if (!s) return -ENOMEM; diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c new file mode 100644 index 000000000000..9a39627aecf0 --- /dev/null +++ b/arch/loongarch/kvm/irqfd.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + /* PCH-PIC pin (0 ~ 64) <---> GSI (0 ~ 64) */ + pch_pic_set_irq(kvm->arch.pch_pic, e->irqchip.pin, level); + + return 0; +} + +/* + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + if (!level) + return -1; + + pch_msi_set_irq(kvm, e->msi.data, level); + + return 0; +} + +/* + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = kvm_set_pic_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + + if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) + return -EINVAL; + + return 0; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + return 0; + default: + return -EINVAL; + } +} + +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + switch (e->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + pch_pic_set_irq(kvm->arch.pch_pic, e->irqchip.pin, level); + return 0; + case KVM_IRQ_ROUTING_MSI: + pch_msi_set_irq(kvm, e->msi.data, level); + return 0; + default: + return -EWOULDBLOCK; + } +} + +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return kvm_arch_irqchip_in_kernel(kvm); +} diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 4ba734aaef87..b8b3e1972d6e 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), @@ -76,6 +78,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { + case KVM_CAP_IRQCHIP: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: @@ -161,6 +164,8 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) struct kvm_device_attr attr; switch (ioctl) { + case KVM_CREATE_IRQCHIP: + return 0; case KVM_HAS_DEVICE_ATTR: if (copy_from_user(&attr, argp, sizeof(attr))) return -EFAULT; @@ -170,3 +175,19 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return -ENOIOCTLCMD; } } + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, bool line_status) +{ + if (!kvm_arch_irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event->irq, irq_event->level, line_status); + + return 0; +} + +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return (kvm->arch.ipi && kvm->arch.eiointc && kvm->arch.pch_pic); +} From 9899b8201025d00b23aee143594a30c55cc4cc35 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: [PATCH 281/281] irqchip/loongson-eiointc: Add virt extension support Interrupts can be routed to maximal four virtual CPUs with real HW EIOINTC interrupt controller model, since interrupt routing is encoded with CPU bitmap and EIOINTC node combined method. Here add the EIOINTC virt extension support so that interrupts can be routed to 256 vCPUs in virtual machine mode. CPU bitmap is replaced with normal encoding and EIOINTC node type is removed, so there are 8 bits for cpu selection, at most 256 vCPUs are supported for interrupt routing. Reviewed-by: Thomas Gleixner Co-developed-by: Song Gao Signed-off-by: Song Gao Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- .../arch/loongarch/irq-chip-model.rst | 64 +++++++++++ .../zh_CN/arch/loongarch/irq-chip-model.rst | 55 ++++++++++ arch/loongarch/include/asm/irq.h | 1 + drivers/irqchip/irq-loongson-eiointc.c | 102 ++++++++++++++---- 4 files changed, 204 insertions(+), 18 deletions(-) diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst index 6dd48256e39f..a7ecce11e445 100644 --- a/Documentation/arch/loongarch/irq-chip-model.rst +++ b/Documentation/arch/loongarch/irq-chip-model.rst @@ -85,6 +85,70 @@ to CPUINTC directly:: | Devices | +---------+ +Virtual Extended IRQ model +========================== + +In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt +go to CPUINTC directly, CPU UARTS interrupts go to PCH-PIC, while all other +devices interrupts go to PCH-PIC/PCH-MSI and gathered by V-EIOINTC (Virtual +Extended I/O Interrupt Controller), and then go to CPUINTC directly:: + + +-----+ +-------------------+ +-------+ + | IPI |--> | CPUINTC(0-255vcpu)| <-- | Timer | + +-----+ +-------------------+ +-------+ + ^ + | + +-----------+ + | V-EIOINTC | + +-----------+ + ^ ^ + | | + +---------+ +---------+ + | PCH-PIC | | PCH-MSI | + +---------+ +---------+ + ^ ^ ^ + | | | + +--------+ +---------+ +---------+ + | UARTs | | Devices | | Devices | + +--------+ +---------+ +---------+ + + +Description +----------- +V-EIOINTC (Virtual Extended I/O Interrupt Controller) is an extension of +EIOINTC, it only works in VM mode which runs in KVM hypervisor. Interrupts can +be routed to up to four vCPUs via standard EIOINTC, however with V-EIOINTC +interrupts can be routed to up to 256 virtual cpus. + +With standard EIOINTC, interrupt routing setting includes two parts: eight +bits for CPU selection and four bits for CPU IP (Interrupt Pin) selection. +For CPU selection there is four bits for EIOINTC node selection, four bits +for EIOINTC CPU selection. Bitmap method is used for CPU selection and +CPU IP selection, so interrupt can only route to CPU0 - CPU3 and IP0-IP3 in +one EIOINTC node. + +With V-EIOINTC it supports to route more CPUs and CPU IP (Interrupt Pin), +there are two newly added registers with V-EIOINTC. + +EXTIOI_VIRT_FEATURES +-------------------- +This register is read-only register, which indicates supported features with +V-EIOINTC. Feature EXTIOI_HAS_INT_ENCODE and EXTIOI_HAS_CPU_ENCODE is added. + +Feature EXTIOI_HAS_INT_ENCODE is part of standard EIOINTC. If it is 1, it +indicates that CPU Interrupt Pin selection can be normal method rather than +bitmap method, so interrupt can be routed to IP0 - IP15. + +Feature EXTIOI_HAS_CPU_ENCODE is entension of V-EIOINTC. If it is 1, it +indicates that CPU selection can be normal method rather than bitmap method, +so interrupt can be routed to CPU0 - CPU255. + +EXTIOI_VIRT_CONFIG +------------------ +This register is read-write register, for compatibility intterupt routed uses +the default method which is the same with standard EIOINTC. If the bit is set +with 1, it indicated HW to use normal method rather than bitmap method. + Advanced Extended IRQ model =========================== diff --git a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst index 472761938682..d4ff80de47b6 100644 --- a/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst +++ b/Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst @@ -87,6 +87,61 @@ PCH-LPC/PCH-MSI,然后被EIOINTC统一收集,再直接到达CPUINTC:: | Devices | +---------+ +虚拟扩展IRQ模型 +=============== + +在这种模型里面, IPI(Inter-Processor Interrupt) 和CPU本地时钟中断直接发送到CPUINTC, +CPU串口 (UARTs) 中断发送到PCH-PIC, 而其他所有设备的中断则分别发送到所连接的PCH_PIC/ +PCH-MSI, 然后V-EIOINTC统一收集,再直接到达CPUINTC:: + + +-----+ +-------------------+ +-------+ + | IPI |--> | CPUINTC(0-255vcpu)| <-- | Timer | + +-----+ +-------------------+ +-------+ + ^ + | + +-----------+ + | V-EIOINTC | + +-----------+ + ^ ^ + | | + +---------+ +---------+ + | PCH-PIC | | PCH-MSI | + +---------+ +---------+ + ^ ^ ^ + | | | + +--------+ +---------+ +---------+ + | UARTs | | Devices | | Devices | + +--------+ +---------+ +---------+ + +V-EIOINTC 是EIOINTC的扩展, 仅工作在虚拟机模式下, 中断经EIOINTC最多可个路由到 +4个虚拟CPU. 但中断经V-EIOINTC最多可个路由到256个虚拟CPU. + +传统的EIOINTC中断控制器,中断路由分为两个部分:8比特用于控制路由到哪个CPU, +4比特用于控制路由到特定CPU的哪个中断管脚。控制CPU路由的8比特前4比特用于控制 +路由到哪个EIOINTC节点,后4比特用于控制此节点哪个CPU。中断路由在选择CPU路由 +和CPU中断管脚路由时,使用bitmap编码方式而不是正常编码方式,所以对于一个 +EIOINTC中断控制器节点,中断只能路由到CPU0 - CPU3,中断管脚IP0-IP3。 + +V-EIOINTC新增了两个寄存器,支持中断路由到更多CPU个和中断管脚。 + +V-EIOINTC功能寄存器 +------------------- +功能寄存器是只读寄存器,用于显示V-EIOINTC支持的特性,目前两个支持两个特性 +EXTIOI_HAS_INT_ENCODE 和 EXTIOI_HAS_CPU_ENCODE。 + +特性EXTIOI_HAS_INT_ENCODE是传统EIOINTC中断控制器的一个特性,如果此比特为1, +显示CPU中断管脚路由方式支持正常编码,而不是bitmap编码,所以中断可以路由到 +管脚IP0 - IP15。 + +特性EXTIOI_HAS_CPU_ENCODE是V-EIOINTC新增特性,如果此比特为1,表示CPU路由 +方式支持正常编码,而不是bitmap编码,所以中断可以路由到CPU0 - CPU255。 + +V-EIOINTC配置寄存器 +------------------- +配置寄存器是可读写寄存器,为了兼容性考虑,如果不写此寄存器,中断路由采用 +和传统EIOINTC相同的路由设置。如果对应比特设置为1,表示采用正常路由方式而 +不是bitmap编码的路由方式。 + 高级扩展IRQ模型 =============== diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index 9c2ca785faa9..a0ca84da8541 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -65,6 +65,7 @@ extern struct acpi_vector_group pch_group[MAX_IO_PICS]; extern struct acpi_vector_group msi_group[MAX_IO_PICS]; #define CORES_PER_EIO_NODE 4 +#define CORES_PER_VEIO_NODE 256 #define LOONGSON_CPU_UART0_VEC 10 /* CPU UART0 */ #define LOONGSON_CPU_THSENS_VEC 14 /* CPU Thsens */ diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index e24db71a8783..bb79e19dfb59 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -26,15 +27,37 @@ #define EIOINTC_REG_ISR 0x1800 #define EIOINTC_REG_ROUTE 0x1c00 +#define EXTIOI_VIRT_FEATURES 0x40000000 +#define EXTIOI_HAS_VIRT_EXTENSION BIT(0) +#define EXTIOI_HAS_ENABLE_OPTION BIT(1) +#define EXTIOI_HAS_INT_ENCODE BIT(2) +#define EXTIOI_HAS_CPU_ENCODE BIT(3) +#define EXTIOI_VIRT_CONFIG 0x40000004 +#define EXTIOI_ENABLE BIT(1) +#define EXTIOI_ENABLE_INT_ENCODE BIT(2) +#define EXTIOI_ENABLE_CPU_ENCODE BIT(3) + #define VEC_REG_COUNT 4 #define VEC_COUNT_PER_REG 64 #define VEC_COUNT (VEC_REG_COUNT * VEC_COUNT_PER_REG) #define VEC_REG_IDX(irq_id) ((irq_id) / VEC_COUNT_PER_REG) #define VEC_REG_BIT(irq_id) ((irq_id) % VEC_COUNT_PER_REG) #define EIOINTC_ALL_ENABLE 0xffffffff +#define EIOINTC_ALL_ENABLE_VEC_MASK(vector) (EIOINTC_ALL_ENABLE & ~BIT(vector & 0x1f)) +#define EIOINTC_REG_ENABLE_VEC(vector) (EIOINTC_REG_ENABLE + ((vector >> 5) << 2)) +#define EIOINTC_USE_CPU_ENCODE BIT(0) #define MAX_EIO_NODES (NR_CPUS / CORES_PER_EIO_NODE) +/* + * Routing registers are 32bit, and there is 8-bit route setting for every + * interrupt vector. So one Route register contains four vectors routing + * information. + */ +#define EIOINTC_REG_ROUTE_VEC(vector) (EIOINTC_REG_ROUTE + (vector & ~0x03)) +#define EIOINTC_REG_ROUTE_VEC_SHIFT(vector) ((vector & 0x03) << 3) +#define EIOINTC_REG_ROUTE_VEC_MASK(vector) (0xff << EIOINTC_REG_ROUTE_VEC_SHIFT(vector)) + static int nr_pics; struct eiointc_priv { @@ -44,6 +67,7 @@ struct eiointc_priv { cpumask_t cpuspan_map; struct fwnode_handle *domain_handle; struct irq_domain *eiointc_domain; + int flags; }; static struct eiointc_priv *eiointc_priv[MAX_IO_PICS]; @@ -59,7 +83,10 @@ static void eiointc_enable(void) static int cpu_to_eio_node(int cpu) { - return cpu_logical_map(cpu) / CORES_PER_EIO_NODE; + if (!kvm_para_has_feature(KVM_FEATURE_VIRT_EXTIOI)) + return cpu_logical_map(cpu) / CORES_PER_EIO_NODE; + else + return cpu_logical_map(cpu) / CORES_PER_VEIO_NODE; } #ifdef CONFIG_SMP @@ -89,6 +116,17 @@ static void eiointc_set_irq_route(int pos, unsigned int cpu, unsigned int mnode, } } +static void veiointc_set_irq_route(unsigned int vector, unsigned int cpu) +{ + unsigned long reg = EIOINTC_REG_ROUTE_VEC(vector); + unsigned int data; + + data = iocsr_read32(reg); + data &= ~EIOINTC_REG_ROUTE_VEC_MASK(vector); + data |= cpu_logical_map(cpu) << EIOINTC_REG_ROUTE_VEC_SHIFT(vector); + iocsr_write32(data, reg); +} + static DEFINE_RAW_SPINLOCK(affinity_lock); static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force) @@ -107,18 +145,24 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af } vector = d->hwirq; - regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2); + regaddr = EIOINTC_REG_ENABLE_VEC(vector); - /* Mask target vector */ - csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), - 0x0, priv->node * CORES_PER_EIO_NODE); + if (priv->flags & EIOINTC_USE_CPU_ENCODE) { + iocsr_write32(EIOINTC_ALL_ENABLE_VEC_MASK(vector), regaddr); + veiointc_set_irq_route(vector, cpu); + iocsr_write32(EIOINTC_ALL_ENABLE, regaddr); + } else { + /* Mask target vector */ + csr_any_send(regaddr, EIOINTC_ALL_ENABLE_VEC_MASK(vector), + 0x0, priv->node * CORES_PER_EIO_NODE); - /* Set route for target vector */ - eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map); + /* Set route for target vector */ + eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map); - /* Unmask target vector */ - csr_any_send(regaddr, EIOINTC_ALL_ENABLE, - 0x0, priv->node * CORES_PER_EIO_NODE); + /* Unmask target vector */ + csr_any_send(regaddr, EIOINTC_ALL_ENABLE, + 0x0, priv->node * CORES_PER_EIO_NODE); + } irq_data_update_effective_affinity(d, cpumask_of(cpu)); @@ -142,17 +186,23 @@ static int eiointc_index(int node) static int eiointc_router_init(unsigned int cpu) { - int i, bit; - uint32_t data; - uint32_t node = cpu_to_eio_node(cpu); - int index = eiointc_index(node); + int i, bit, cores, index, node; + unsigned int data; + + node = cpu_to_eio_node(cpu); + index = eiointc_index(node); if (index < 0) { pr_err("Error: invalid nodemap!\n"); - return -1; + return -EINVAL; } - if ((cpu_logical_map(cpu) % CORES_PER_EIO_NODE) == 0) { + if (!(eiointc_priv[index]->flags & EIOINTC_USE_CPU_ENCODE)) + cores = CORES_PER_EIO_NODE; + else + cores = CORES_PER_VEIO_NODE; + + if ((cpu_logical_map(cpu) % cores) == 0) { eiointc_enable(); for (i = 0; i < eiointc_priv[0]->vec_count / 32; i++) { @@ -168,7 +218,9 @@ static int eiointc_router_init(unsigned int cpu) for (i = 0; i < eiointc_priv[0]->vec_count / 4; i++) { /* Route to Node-0 Core-0 */ - if (index == 0) + if (eiointc_priv[index]->flags & EIOINTC_USE_CPU_ENCODE) + bit = cpu_logical_map(0); + else if (index == 0) bit = BIT(cpu_logical_map(0)); else bit = (eiointc_priv[index]->node << 4) | 1; @@ -375,7 +427,7 @@ static int __init acpi_cascade_irqdomain_init(void) static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq, u64 node_map) { - int i; + int i, val; node_map = node_map ? node_map : -1ULL; for_each_possible_cpu(i) { @@ -395,6 +447,20 @@ static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq, return -ENOMEM; } + if (kvm_para_has_feature(KVM_FEATURE_VIRT_EXTIOI)) { + val = iocsr_read32(EXTIOI_VIRT_FEATURES); + /* + * With EXTIOI_ENABLE_CPU_ENCODE set + * interrupts can route to 256 vCPUs. + */ + if (val & EXTIOI_HAS_CPU_ENCODE) { + val = iocsr_read32(EXTIOI_VIRT_CONFIG); + val |= EXTIOI_ENABLE_CPU_ENCODE; + iocsr_write32(val, EXTIOI_VIRT_CONFIG); + priv->flags = EIOINTC_USE_CPU_ENCODE; + } + } + eiointc_priv[nr_pics++] = priv; eiointc_router_init(0); irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv);