From b21a18f730e4b659be951a041eff682a60c0f4a0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 30 Sep 2024 13:21:08 +0200 Subject: [PATCH 001/127] PCI: hotplug: Reorganize kerneldoc parameter names Reorganize kerneldoc parameter names to match the parameter order in the function header. Problems identified using Coccinelle. Link: https://lore.kernel.org/r/20240930112121.95324-23-Julia.Lawall@inria.fr Signed-off-by: Julia Lawall Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/pci_hotplug_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 058d5937d8a9..db09d4992e6e 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -388,8 +388,8 @@ static struct hotplug_slot *get_slot_from_name(const char *name) /** * __pci_hp_register - register a hotplug_slot with the PCI hotplug subsystem - * @bus: bus this slot is on * @slot: pointer to the &struct hotplug_slot to register + * @bus: bus this slot is on * @devnr: device number * @name: name registered with kobject core * @owner: caller module owner From 43ee11adcb940204948ac0ca3a05d6178f0e8b08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 30 Sep 2024 15:44:36 +0300 Subject: [PATCH 002/127] PCI: hotplug: Remove "Returns" kerneldoc from void functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pci_hp_deregister() was converted to void by the commit 51bbf9bee34f ("PCI: hotplug: Demidlayer registration with the core") but its kerneldoc still describes the return value. pci_hp_del() and pci_hp_destroy() have been void since they were introduced in that same commit. Remove the return value description from the kerneldoc of those functions. Link: https://lore.kernel.org/r/20240930124436.17908-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/pci_hotplug_core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index db09d4992e6e..36236ac88fd5 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -498,8 +498,6 @@ EXPORT_SYMBOL_GPL(pci_hp_add); * * The @slot must have been registered with the pci hotplug subsystem * previously with a call to pci_hp_register(). - * - * Returns 0 if successful, anything else for an error. */ void pci_hp_deregister(struct hotplug_slot *slot) { @@ -513,8 +511,6 @@ EXPORT_SYMBOL_GPL(pci_hp_deregister); * @slot: pointer to the &struct hotplug_slot to unpublish * * Remove a hotplug slot's sysfs interface. - * - * Returns 0 on success or a negative int on error. */ void pci_hp_del(struct hotplug_slot *slot) { @@ -545,8 +541,6 @@ EXPORT_SYMBOL_GPL(pci_hp_del); * the driver may no longer invoke hotplug_slot_name() to get the slot's * unique name. The driver no longer needs to handle a ->reset_slot callback * from this point on. - * - * Returns 0 on success or a negative int on error. */ void pci_hp_destroy(struct hotplug_slot *slot) { From 5efa23224bf573d4bceb51bc646dd67b6ccb83b5 Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Wed, 25 Sep 2024 18:57:46 +0800 Subject: [PATCH 003/127] dt-bindings: PCI: mediatek-gen3: Allow exact number of clocks only In MediaTek PCIe gen3 bindings, "clocks" accepts a range of 1-6 clocks across all SoCs. But in practice, each SoC requires a particular number of clocks as defined in "clock-names", and the length of "clocks" and "clock-names" can be inconsistent with current bindings. For example: - MT8188, MT8192 and MT8195 all require 6 clocks, while the bindings accept 4-6 clocks. - MT7986 requires 4 clocks, while the bindings accept 4-6 clocks. Update minItems and maxItems properties for individual SoCs as needed to only accept the correct number of clocks. Fixes: c6abd0eadec6 ("dt-bindings: PCI: mediatek-gen3: Add support for Airoha EN7581") Link: https://lore.kernel.org/r/20240925110044.3678055-3-fshao@chromium.org Signed-off-by: Fei Shao Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Kozlowski --- .../devicetree/bindings/pci/mediatek-pcie-gen3.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml index 898c1be2d6a4..f05aab2b1add 100644 --- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml +++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml @@ -149,7 +149,7 @@ allOf: then: properties: clocks: - minItems: 4 + minItems: 6 clock-names: items: @@ -178,7 +178,7 @@ allOf: then: properties: clocks: - minItems: 4 + minItems: 6 clock-names: items: @@ -207,6 +207,7 @@ allOf: properties: clocks: minItems: 4 + maxItems: 4 clock-names: items: From f69767a1ada3ac74be2e1ac0795a05e1d1384eff Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:50 -0500 Subject: [PATCH 004/127] PCI: Add TLP Processing Hints (TPH) support Add support for PCIe TLP Processing Hints (TPH) support (see PCIe r6.2, sec 6.17). Add TPH register definitions in pci_regs.h, including the TPH Requester capability register, TPH Requester control register, TPH Completer capability, and the ST fields of MSI-X entry. Introduce pcie_enable_tph() and pcie_disable_tph(), enabling drivers to toggle TPH support and configure specific ST mode as needed. Also add a new kernel parameter, "pci=notph", allowing users to disable TPH support across the entire system. Link: https://lore.kernel.org/r/20241002165954.128085-2-wei.huang2@amd.com Co-developed-by: Jing Liu Co-developed-by: Paul Luse Co-developed-by: Eric Van Tassell Signed-off-by: Jing Liu Signed-off-by: Paul Luse Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Jonathan Cameron Reviewed-by: Lukas Wunner --- .../admin-guide/kernel-parameters.txt | 4 + drivers/pci/Kconfig | 9 + drivers/pci/Makefile | 1 + drivers/pci/pci.c | 4 + drivers/pci/pci.h | 12 ++ drivers/pci/probe.c | 1 + drivers/pci/tph.c | 197 ++++++++++++++++++ include/linux/pci-tph.h | 21 ++ include/linux/pci.h | 7 + include/uapi/linux/pci_regs.h | 37 +++- 10 files changed, 285 insertions(+), 8 deletions(-) create mode 100644 drivers/pci/tph.c create mode 100644 include/linux/pci-tph.h diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1518343bbe22..178995b07451 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4678,6 +4678,10 @@ nomio [S390] Do not use MIO instructions. norid [S390] ignore the RID field and force use of one PCI domain per PCI function + notph [PCIE] If the PCIE_TPH kernel config parameter + is enabled, this kernel boot option can be used + to disable PCIe TLP Processing Hints support + system-wide. pcie_aspm= [PCIE] Forcibly enable or ignore PCIe Active State Power Management. diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 0d94e4a967d8..2f270e4414b3 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -173,6 +173,15 @@ config PCI_PASID If unsure, say N. +config PCIE_TPH + bool "TLP Processing Hints" + help + This option adds support for PCIe TLP Processing Hints (TPH). + TPH allows endpoint devices to provide optimization hints, such as + desired caching behavior, for requests that target memory space. + These hints, called Steering Tags, can empower the system hardware + to optimize the utilization of platform resources. + config PCI_P2PDMA bool "PCI peer-to-peer transfer support" depends on ZONE_DEVICE diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 374c5c06d92f..b2a100f2e24a 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_VGA_ARB) += vgaarb.o obj-$(CONFIG_PCI_DOE) += doe.o obj-$(CONFIG_PCI_DYNAMIC_OF_NODES) += of_property.o obj-$(CONFIG_PCI_NPEM) += npem.o +obj-$(CONFIG_PCIE_TPH) += tph.o # Endpoint library must be initialized before its users obj-$(CONFIG_PCI_ENDPOINT) += endpoint/ diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7d85c04fbba2..89dafecc869b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1828,6 +1828,7 @@ int pci_save_state(struct pci_dev *dev) pci_save_dpc_state(dev); pci_save_aer_state(dev); pci_save_ptm_state(dev); + pci_save_tph_state(dev); return pci_save_vc_state(dev); } EXPORT_SYMBOL(pci_save_state); @@ -1933,6 +1934,7 @@ void pci_restore_state(struct pci_dev *dev) pci_restore_rebar_state(dev); pci_restore_dpc_state(dev); pci_restore_ptm_state(dev); + pci_restore_tph_state(dev); pci_aer_clear_status(dev); pci_restore_aer_state(dev); @@ -6896,6 +6898,8 @@ static int __init pci_setup(char *str) pci_no_domains(); } else if (!strncmp(str, "noari", 5)) { pcie_ari_disabled = true; + } else if (!strncmp(str, "notph", 5)) { + pci_no_tph(); } else if (!strncmp(str, "cbiosize=", 9)) { pci_cardbus_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "cbmemsize=", 10)) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 14d00ce45bfa..d89fdbf04f36 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -597,6 +597,18 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) #endif /* CONFIG_PCI_IOV */ +#ifdef CONFIG_PCIE_TPH +void pci_restore_tph_state(struct pci_dev *dev); +void pci_save_tph_state(struct pci_dev *dev); +void pci_no_tph(void); +void pci_tph_init(struct pci_dev *dev); +#else +static inline void pci_restore_tph_state(struct pci_dev *dev) { } +static inline void pci_save_tph_state(struct pci_dev *dev) { } +static inline void pci_no_tph(void) { } +static inline void pci_tph_init(struct pci_dev *dev) { } +#endif + #ifdef CONFIG_PCIE_PTM void pci_ptm_init(struct pci_dev *dev); void pci_save_ptm_state(struct pci_dev *dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..b086d53a9048 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2495,6 +2495,7 @@ static void pci_init_capabilities(struct pci_dev *dev) pci_dpc_init(dev); /* Downstream Port Containment */ pci_rcec_init(dev); /* Root Complex Event Collector */ pci_doe_init(dev); /* Data Object Exchange */ + pci_tph_init(dev); /* TLP Processing Hints */ pcie_report_downtraining(dev); pci_init_reset_methods(dev); diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c new file mode 100644 index 000000000000..4d6317cbf8a6 --- /dev/null +++ b/drivers/pci/tph.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * TPH (TLP Processing Hints) support + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * Eric Van Tassell + * Wei Huang + */ +#include +#include +#include + +#include "pci.h" + +/* System-wide TPH disabled */ +static bool pci_tph_disabled; + +static u8 get_st_modes(struct pci_dev *pdev) +{ + u32 reg; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + reg &= PCI_TPH_CAP_ST_NS | PCI_TPH_CAP_ST_IV | PCI_TPH_CAP_ST_DS; + + return reg; +} + +/* Return device's Root Port completer capability */ +static u8 get_rp_completer_type(struct pci_dev *pdev) +{ + struct pci_dev *rp; + u32 reg; + int ret; + + rp = pcie_find_root_port(pdev); + if (!rp) + return 0; + + ret = pcie_capability_read_dword(rp, PCI_EXP_DEVCAP2, ®); + if (ret) + return 0; + + return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg); +} + +/** + * pcie_disable_tph - Turn off TPH support for device + * @pdev: PCI device + * + * Return: none + */ +void pcie_disable_tph(struct pci_dev *pdev) +{ + if (!pdev->tph_cap) + return; + + if (!pdev->tph_enabled) + return; + + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, 0); + + pdev->tph_mode = 0; + pdev->tph_req_type = 0; + pdev->tph_enabled = 0; +} +EXPORT_SYMBOL(pcie_disable_tph); + +/** + * pcie_enable_tph - Enable TPH support for device using a specific ST mode + * @pdev: PCI device + * @mode: ST mode to enable. Current supported modes include: + * + * - PCI_TPH_ST_NS_MODE: NO ST Mode + * - PCI_TPH_ST_IV_MODE: Interrupt Vector Mode + * - PCI_TPH_ST_DS_MODE: Device Specific Mode + * + * Check whether the mode is actually supported by the device before enabling + * and return an error if not. Additionally determine what types of requests, + * TPH or extended TPH, can be issued by the device based on its TPH requester + * capability and the Root Port's completer capability. + * + * Return: 0 on success, otherwise negative value (-errno) + */ +int pcie_enable_tph(struct pci_dev *pdev, int mode) +{ + u32 reg; + u8 dev_modes; + u8 rp_req_type; + + /* Honor "notph" kernel parameter */ + if (pci_tph_disabled) + return -EINVAL; + + if (!pdev->tph_cap) + return -EINVAL; + + if (pdev->tph_enabled) + return -EBUSY; + + /* Sanitize and check ST mode compatibility */ + mode &= PCI_TPH_CTRL_MODE_SEL_MASK; + dev_modes = get_st_modes(pdev); + if (!((1 << mode) & dev_modes)) + return -EINVAL; + + pdev->tph_mode = mode; + + /* Get req_type supported by device and its Root Port */ + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + if (FIELD_GET(PCI_TPH_CAP_EXT_TPH, reg)) + pdev->tph_req_type = PCI_TPH_REQ_EXT_TPH; + else + pdev->tph_req_type = PCI_TPH_REQ_TPH_ONLY; + + rp_req_type = get_rp_completer_type(pdev); + + /* Final req_type is the smallest value of two */ + pdev->tph_req_type = min(pdev->tph_req_type, rp_req_type); + + if (pdev->tph_req_type == PCI_TPH_REQ_DISABLE) + return -EINVAL; + + /* Write them into TPH control register */ + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, ®); + + reg &= ~PCI_TPH_CTRL_MODE_SEL_MASK; + reg |= FIELD_PREP(PCI_TPH_CTRL_MODE_SEL_MASK, pdev->tph_mode); + + reg &= ~PCI_TPH_CTRL_REQ_EN_MASK; + reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, pdev->tph_req_type); + + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg); + + pdev->tph_enabled = 1; + + return 0; +} +EXPORT_SYMBOL(pcie_enable_tph); + +void pci_restore_tph_state(struct pci_dev *pdev) +{ + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!pdev->tph_cap) + return; + + if (!pdev->tph_enabled) + return; + + save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH); + if (!save_state) + return; + + /* Restore control register and all ST entries */ + cap = &save_state->cap.data[0]; + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, *cap++); +} + +void pci_save_tph_state(struct pci_dev *pdev) +{ + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!pdev->tph_cap) + return; + + if (!pdev->tph_enabled) + return; + + save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH); + if (!save_state) + return; + + /* Save control register */ + cap = &save_state->cap.data[0]; + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, cap++); +} + +void pci_no_tph(void) +{ + pci_tph_disabled = true; + + pr_info("PCIe TPH is disabled\n"); +} + +void pci_tph_init(struct pci_dev *pdev) +{ + u32 save_size; + + pdev->tph_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_TPH); + if (!pdev->tph_cap) + return; + + save_size = sizeof(u32); + pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_TPH, save_size); +} diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h new file mode 100644 index 000000000000..58654a334ffb --- /dev/null +++ b/include/linux/pci-tph.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TPH (TLP Processing Hints) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * Eric Van Tassell + * Wei Huang + */ +#ifndef LINUX_PCI_TPH_H +#define LINUX_PCI_TPH_H + +#ifdef CONFIG_PCIE_TPH +void pcie_disable_tph(struct pci_dev *pdev); +int pcie_enable_tph(struct pci_dev *pdev, int mode); +#else +static inline void pcie_disable_tph(struct pci_dev *pdev) { } +static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) +{ return -EINVAL; } +#endif + +#endif /* LINUX_PCI_TPH_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..8351d76b6e12 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -434,6 +434,7 @@ struct pci_dev { unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ + unsigned int tph_enabled:1; /* TLP Processing Hints */ unsigned int is_managed:1; /* Managed via devres */ unsigned int is_msi_managed:1; /* MSI release via devres installed */ unsigned int needs_freset:1; /* Requires fundamental reset */ @@ -534,6 +535,12 @@ struct pci_dev { /* These methods index pci_reset_fn_methods[] */ u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */ + +#ifdef CONFIG_PCIE_TPH + u16 tph_cap; /* TPH capability offset */ + u8 tph_mode; /* TPH mode */ + u8 tph_req_type; /* TPH requester type */ +#endif }; static inline struct pci_dev *pci_physfn(struct pci_dev *dev) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 12323b3334a9..155dea741615 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -340,7 +340,8 @@ #define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */ #define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */ #define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */ -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 /* Mask Bit */ +#define PCI_MSIX_ENTRY_CTRL_ST 0xffff0000 /* Steering Tag */ /* CompactPCI Hotswap Register */ @@ -659,6 +660,7 @@ #define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* 64b AtomicOp completion */ #define PCI_EXP_DEVCAP2_ATOMIC_COMP128 0x00000200 /* 128b AtomicOp completion */ #define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ +#define PCI_EXP_DEVCAP2_TPH_COMP_MASK 0x00003000 /* TPH completer support */ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ @@ -1023,15 +1025,34 @@ #define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ #define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ +/* TPH Completer Support */ +#define PCI_EXP_DEVCAP2_TPH_COMP_NONE 0x0 /* None */ +#define PCI_EXP_DEVCAP2_TPH_COMP_TPH_ONLY 0x1 /* TPH only */ +#define PCI_EXP_DEVCAP2_TPH_COMP_EXT_TPH 0x3 /* TPH and Extended TPH */ + /* TPH Requester */ #define PCI_TPH_CAP 4 /* capability register */ -#define PCI_TPH_CAP_LOC_MASK 0x600 /* location mask */ -#define PCI_TPH_LOC_NONE 0x000 /* no location */ -#define PCI_TPH_LOC_CAP 0x200 /* in capability */ -#define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ -#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */ -#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */ -#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */ +#define PCI_TPH_CAP_ST_NS 0x00000001 /* No ST Mode Supported */ +#define PCI_TPH_CAP_ST_IV 0x00000002 /* Interrupt Vector Mode Supported */ +#define PCI_TPH_CAP_ST_DS 0x00000004 /* Device Specific Mode Supported */ +#define PCI_TPH_CAP_EXT_TPH 0x00000100 /* Ext TPH Requester Supported */ +#define PCI_TPH_CAP_LOC_MASK 0x00000600 /* ST Table Location */ +#define PCI_TPH_LOC_NONE 0x00000000 /* Not present */ +#define PCI_TPH_LOC_CAP 0x00000200 /* In capability */ +#define PCI_TPH_LOC_MSIX 0x00000400 /* In MSI-X */ +#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST Table Size */ +#define PCI_TPH_CAP_ST_SHIFT 16 /* ST Table Size shift */ +#define PCI_TPH_BASE_SIZEOF 0xc /* Size with no ST table */ + +#define PCI_TPH_CTRL 8 /* control register */ +#define PCI_TPH_CTRL_MODE_SEL_MASK 0x00000007 /* ST Mode Select */ +#define PCI_TPH_ST_NS_MODE 0x0 /* No ST Mode */ +#define PCI_TPH_ST_IV_MODE 0x1 /* Interrupt Vector Mode */ +#define PCI_TPH_ST_DS_MODE 0x2 /* Device Specific Mode */ +#define PCI_TPH_CTRL_REQ_EN_MASK 0x00000300 /* TPH Requester Enable */ +#define PCI_TPH_REQ_DISABLE 0x0 /* No TPH requests allowed */ +#define PCI_TPH_REQ_TPH_ONLY 0x1 /* TPH only requests allowed */ +#define PCI_TPH_REQ_EXT_TPH 0x3 /* Extended TPH requests allowed */ /* Downstream Port Containment */ #define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */ From d2e8a34876ce69b27f450eebfc550ab8e316f752 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:51 -0500 Subject: [PATCH 005/127] PCI/TPH: Add Steering Tag support Add pcie_tph_get_cpu_st() to allow a caller to retrieve Steering Tags for a target memory associated with a specific CPU. The ST tag is retrieved by invoking PCI ACPI "_DSM to Query Cache Locality TPH Features" method (rev=0x7, func=0xF) of the device's Root Port device. Add pcie_tph_set_st_entry() to update the device's Steering Tags. The tags will be written into the device's MSI-X table or the ST table located in the TPH Extended Capability space. Co-developed-by: Eric Van Tassell Link: https://lore.kernel.org/r/20241002165954.128085-3-wei.huang2@amd.com Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek --- drivers/pci/tph.c | 352 +++++++++++++++++++++++++++++++++++++++- include/linux/pci-tph.h | 23 +++ 2 files changed, 374 insertions(+), 1 deletion(-) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 4d6317cbf8a6..1e604fbbda65 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -7,6 +7,8 @@ * Wei Huang */ #include +#include +#include #include #include @@ -15,6 +17,134 @@ /* System-wide TPH disabled */ static bool pci_tph_disabled; +#ifdef CONFIG_ACPI +/* + * The st_info struct defines the Steering Tag (ST) info returned by the + * firmware PCI ACPI _DSM method (rev=0x7, func=0xF, "_DSM to Query Cache + * Locality TPH Features"), as specified in the approved ECN for PCI Firmware + * Spec and available at https://members.pcisig.com/wg/PCI-SIG/document/15470. + * + * @vm_st_valid: 8-bit ST for volatile memory is valid + * @vm_xst_valid: 16-bit extended ST for volatile memory is valid + * @vm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied + * @vm_st: 8-bit ST for volatile mem + * @vm_xst: 16-bit extended ST for volatile mem + * @pm_st_valid: 8-bit ST for persistent memory is valid + * @pm_xst_valid: 16-bit extended ST for persistent memory is valid + * @pm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied + * @pm_st: 8-bit ST for persistent mem + * @pm_xst: 16-bit extended ST for persistent mem + */ +union st_info { + struct { + u64 vm_st_valid : 1; + u64 vm_xst_valid : 1; + u64 vm_ph_ignore : 1; + u64 rsvd1 : 5; + u64 vm_st : 8; + u64 vm_xst : 16; + u64 pm_st_valid : 1; + u64 pm_xst_valid : 1; + u64 pm_ph_ignore : 1; + u64 rsvd2 : 5; + u64 pm_st : 8; + u64 pm_xst : 16; + }; + u64 value; +}; + +static u16 tph_extract_tag(enum tph_mem_type mem_type, u8 req_type, + union st_info *info) +{ + switch (req_type) { + case PCI_TPH_REQ_TPH_ONLY: /* 8-bit tag */ + switch (mem_type) { + case TPH_MEM_TYPE_VM: + if (info->vm_st_valid) + return info->vm_st; + break; + case TPH_MEM_TYPE_PM: + if (info->pm_st_valid) + return info->pm_st; + break; + } + break; + case PCI_TPH_REQ_EXT_TPH: /* 16-bit tag */ + switch (mem_type) { + case TPH_MEM_TYPE_VM: + if (info->vm_xst_valid) + return info->vm_xst; + break; + case TPH_MEM_TYPE_PM: + if (info->pm_xst_valid) + return info->pm_xst; + break; + } + break; + default: + return 0; + } + + return 0; +} + +#define TPH_ST_DSM_FUNC_INDEX 0xF +static acpi_status tph_invoke_dsm(acpi_handle handle, u32 cpu_uid, + union st_info *st_out) +{ + union acpi_object arg3[3], in_obj, *out_obj; + + if (!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 7, + BIT(TPH_ST_DSM_FUNC_INDEX))) + return AE_ERROR; + + /* DWORD: feature ID (0 for processor cache ST query) */ + arg3[0].integer.type = ACPI_TYPE_INTEGER; + arg3[0].integer.value = 0; + + /* DWORD: target UID */ + arg3[1].integer.type = ACPI_TYPE_INTEGER; + arg3[1].integer.value = cpu_uid; + + /* QWORD: properties, all 0's */ + arg3[2].integer.type = ACPI_TYPE_INTEGER; + arg3[2].integer.value = 0; + + in_obj.type = ACPI_TYPE_PACKAGE; + in_obj.package.count = ARRAY_SIZE(arg3); + in_obj.package.elements = arg3; + + out_obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 7, + TPH_ST_DSM_FUNC_INDEX, &in_obj); + if (!out_obj) + return AE_ERROR; + + if (out_obj->type != ACPI_TYPE_BUFFER) { + ACPI_FREE(out_obj); + return AE_ERROR; + } + + st_out->value = *((u64 *)(out_obj->buffer.pointer)); + + ACPI_FREE(out_obj); + + return AE_OK; +} +#endif + +/* Update the TPH Requester Enable field of TPH Control Register */ +static void set_ctrl_reg_req_en(struct pci_dev *pdev, u8 req_type) +{ + u32 reg; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, ®); + + reg &= ~PCI_TPH_CTRL_REQ_EN_MASK; + reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, req_type); + + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg); +} + static u8 get_st_modes(struct pci_dev *pdev) { u32 reg; @@ -25,6 +155,37 @@ static u8 get_st_modes(struct pci_dev *pdev) return reg; } +static u32 get_st_table_loc(struct pci_dev *pdev) +{ + u32 reg; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + + return FIELD_GET(PCI_TPH_CAP_LOC_MASK, reg); +} + +/* + * Return the size of ST table. If ST table is not in TPH Requester Extended + * Capability space, return 0. Otherwise return the ST Table Size + 1. + */ +static u16 get_st_table_size(struct pci_dev *pdev) +{ + u32 reg; + u32 loc; + + /* Check ST table location first */ + loc = get_st_table_loc(pdev); + + /* Convert loc to match with PCI_TPH_LOC_* defined in pci_regs.h */ + loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); + if (loc != PCI_TPH_LOC_CAP) + return 0; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + + return FIELD_GET(PCI_TPH_CAP_ST_MASK, reg) + 1; +} + /* Return device's Root Port completer capability */ static u8 get_rp_completer_type(struct pci_dev *pdev) { @@ -43,6 +204,171 @@ static u8 get_rp_completer_type(struct pci_dev *pdev) return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg); } +/* Write ST to MSI-X vector control reg - Return 0 if OK, otherwise -errno */ +static int write_tag_to_msix(struct pci_dev *pdev, int msix_idx, u16 tag) +{ +#ifdef CONFIG_PCI_MSI + struct msi_desc *msi_desc = NULL; + void __iomem *vec_ctrl; + u32 val; + int err = 0; + + msi_lock_descs(&pdev->dev); + + /* Find the msi_desc entry with matching msix_idx */ + msi_for_each_desc(msi_desc, &pdev->dev, MSI_DESC_ASSOCIATED) { + if (msi_desc->msi_index == msix_idx) + break; + } + + if (!msi_desc) { + err = -ENXIO; + goto err_out; + } + + /* Get the vector control register (offset 0xc) pointed by msix_idx */ + vec_ctrl = pdev->msix_base + msix_idx * PCI_MSIX_ENTRY_SIZE; + vec_ctrl += PCI_MSIX_ENTRY_VECTOR_CTRL; + + val = readl(vec_ctrl); + val &= ~PCI_MSIX_ENTRY_CTRL_ST; + val |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); + writel(val, vec_ctrl); + + /* Read back to flush the update */ + val = readl(vec_ctrl); + +err_out: + msi_unlock_descs(&pdev->dev); + return err; +#else + return -ENODEV; +#endif +} + +/* Write tag to ST table - Return 0 if OK, otherwise -errno */ +static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag) +{ + int st_table_size; + int offset; + + /* Check if index is out of bound */ + st_table_size = get_st_table_size(pdev); + if (index >= st_table_size) + return -ENXIO; + + offset = pdev->tph_cap + PCI_TPH_BASE_SIZEOF + index * sizeof(u16); + + return pci_write_config_word(pdev, offset, tag); +} + +/** + * pcie_tph_get_cpu_st() - Retrieve Steering Tag for a target memory associated + * with a specific CPU + * @pdev: PCI device + * @mem_type: target memory type (volatile or persistent RAM) + * @cpu_uid: associated CPU id + * @tag: Steering Tag to be returned + * + * Return the Steering Tag for a target memory that is associated with a + * specific CPU as indicated by cpu_uid. + * + * Return: 0 if success, otherwise negative value (-errno) + */ +int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag) +{ +#ifdef CONFIG_ACPI + struct pci_dev *rp; + acpi_handle rp_acpi_handle; + union st_info info; + + rp = pcie_find_root_port(pdev); + if (!rp || !rp->bus || !rp->bus->bridge) + return -ENODEV; + + rp_acpi_handle = ACPI_HANDLE(rp->bus->bridge); + + if (tph_invoke_dsm(rp_acpi_handle, cpu_uid, &info) != AE_OK) { + *tag = 0; + return -EINVAL; + } + + *tag = tph_extract_tag(mem_type, pdev->tph_req_type, &info); + + pci_dbg(pdev, "get steering tag: mem_type=%s, cpu_uid=%d, tag=%#04x\n", + (mem_type == TPH_MEM_TYPE_VM) ? "volatile" : "persistent", + cpu_uid, *tag); + + return 0; +#else + return -ENODEV; +#endif +} +EXPORT_SYMBOL(pcie_tph_get_cpu_st); + +/** + * pcie_tph_set_st_entry() - Set Steering Tag in the ST table entry + * @pdev: PCI device + * @index: ST table entry index + * @tag: Steering Tag to be written + * + * Figure out the proper location of ST table, either in the MSI-X table or + * in the TPH Extended Capability space, and write the Steering Tag into + * the ST entry pointed by index. + * + * Return: 0 if success, otherwise negative value (-errno) + */ +int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) +{ + u32 loc; + int err = 0; + + if (!pdev->tph_cap) + return -EINVAL; + + if (!pdev->tph_enabled) + return -EINVAL; + + /* No need to write tag if device is in "No ST Mode" */ + if (pdev->tph_mode == PCI_TPH_ST_NS_MODE) + return 0; + + /* + * Disable TPH before updating ST to avoid potential instability as + * cautioned in PCIe r6.2, sec 6.17.3, "ST Modes of Operation" + */ + set_ctrl_reg_req_en(pdev, PCI_TPH_REQ_DISABLE); + + loc = get_st_table_loc(pdev); + /* Convert loc to match with PCI_TPH_LOC_* */ + loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); + + switch (loc) { + case PCI_TPH_LOC_MSIX: + err = write_tag_to_msix(pdev, index, tag); + break; + case PCI_TPH_LOC_CAP: + err = write_tag_to_st_table(pdev, index, tag); + break; + default: + err = -EINVAL; + } + + if (err) { + pcie_disable_tph(pdev); + return err; + } + + set_ctrl_reg_req_en(pdev, pdev->tph_mode); + + pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n", + (loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag); + + return 0; +} +EXPORT_SYMBOL(pcie_tph_set_st_entry); + /** * pcie_disable_tph - Turn off TPH support for device * @pdev: PCI device @@ -140,6 +466,8 @@ EXPORT_SYMBOL(pcie_enable_tph); void pci_restore_tph_state(struct pci_dev *pdev) { struct pci_cap_saved_state *save_state; + int num_entries, i, offset; + u16 *st_entry; u32 *cap; if (!pdev->tph_cap) @@ -155,11 +483,21 @@ void pci_restore_tph_state(struct pci_dev *pdev) /* Restore control register and all ST entries */ cap = &save_state->cap.data[0]; pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, *cap++); + st_entry = (u16 *)cap; + offset = PCI_TPH_BASE_SIZEOF; + num_entries = get_st_table_size(pdev); + for (i = 0; i < num_entries; i++) { + pci_write_config_word(pdev, pdev->tph_cap + offset, + *st_entry++); + offset += sizeof(u16); + } } void pci_save_tph_state(struct pci_dev *pdev) { struct pci_cap_saved_state *save_state; + int num_entries, i, offset; + u16 *st_entry; u32 *cap; if (!pdev->tph_cap) @@ -175,6 +513,16 @@ void pci_save_tph_state(struct pci_dev *pdev) /* Save control register */ cap = &save_state->cap.data[0]; pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, cap++); + + /* Save all ST entries in extended capability structure */ + st_entry = (u16 *)cap; + offset = PCI_TPH_BASE_SIZEOF; + num_entries = get_st_table_size(pdev); + for (i = 0; i < num_entries; i++) { + pci_read_config_word(pdev, pdev->tph_cap + offset, + st_entry++); + offset += sizeof(u16); + } } void pci_no_tph(void) @@ -186,12 +534,14 @@ void pci_no_tph(void) void pci_tph_init(struct pci_dev *pdev) { + int num_entries; u32 save_size; pdev->tph_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_TPH); if (!pdev->tph_cap) return; - save_size = sizeof(u32); + num_entries = get_st_table_size(pdev); + save_size = sizeof(u32) + num_entries * sizeof(u16); pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_TPH, save_size); } diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index 58654a334ffb..c3e806c13d64 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -9,10 +9,33 @@ #ifndef LINUX_PCI_TPH_H #define LINUX_PCI_TPH_H +/* + * According to the ECN for PCI Firmware Spec, Steering Tag can be different + * depending on the memory type: Volatile Memory or Persistent Memory. When a + * caller query about a target's Steering Tag, it must provide the target's + * tph_mem_type. ECN link: https://members.pcisig.com/wg/PCI-SIG/document/15470. + */ +enum tph_mem_type { + TPH_MEM_TYPE_VM, /* volatile memory */ + TPH_MEM_TYPE_PM /* persistent memory */ +}; + #ifdef CONFIG_PCIE_TPH +int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag); +int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag); void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); #else +static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag) +{ return -EINVAL; } +static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag) +{ return -EINVAL; } static inline void pcie_disable_tph(struct pci_dev *pdev) { } static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) { return -EINVAL; } From 2985b1844f3f3447f2d938eff1ef6762592065a5 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 1 Oct 2024 23:11:47 +0000 Subject: [PATCH 006/127] PCI: Fix reset_method_store() memory leak In reset_method_store(), a string is allocated via kstrndup() and assigned to the local "options". options is then used in with strsep() to find spaces: while ((name = strsep(&options, " ")) != NULL) { If there are no remaining spaces, then options is set to NULL by strsep(), so the subsequent kfree(options) doesn't free the memory allocated via kstrndup(). Fix by using a separate tmp_options to iterate with strsep() so options is preserved. Link: https://lore.kernel.org/r/20241001231147.3583649-1-tkjos@google.com Fixes: d88f521da3ef ("PCI: Allow userspace to query and set device reset mechanism") Signed-off-by: Todd Kjos Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7d85c04fbba2..0e6562ff3dcf 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5244,7 +5244,7 @@ static ssize_t reset_method_store(struct device *dev, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); - char *options, *name; + char *options, *tmp_options, *name; int m, n; u8 reset_methods[PCI_NUM_RESET_METHODS] = { 0 }; @@ -5264,7 +5264,8 @@ static ssize_t reset_method_store(struct device *dev, return -ENOMEM; n = 0; - while ((name = strsep(&options, " ")) != NULL) { + tmp_options = options; + while ((name = strsep(&tmp_options, " ")) != NULL) { if (sysfs_streq(name, "")) continue; From 3c87b3c85a5344098f239bf8fceee1db691e3a0c Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Mon, 23 Sep 2024 08:57:06 +0200 Subject: [PATCH 007/127] PCI: controller: Switch back to struct platform_driver::remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 0edb555a65d1 ("platform: Make platform_driver::remove() return void") .remove() is (again) the right callback to implement for platform drivers. Convert all PCI controller drivers to use .remove(), with the eventual goal to drop struct platform_driver::remove_new(). As .remove() and .remove_new() have the same prototypes, conversion is done by just changing the structure member name in the driver initializer. Link: https://lore.kernel.org/r/20240923065706.728769-1-sergio.paracuellos@gmail.com Signed-off-by: Sergio Paracuellos [bhelgaas: add pcie-xilinx-nwl.c and tidy whitespace per Uwe Kleine-König: https://lore.kernel.org/r/tdxrmmqyzcufupnwkdbg7lwgadizm7v3lxjirykijbml7x54ze@upbdzycdsilm] Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/cadence/pci-j721e.c | 2 +- drivers/pci/controller/dwc/pci-exynos.c | 2 +- drivers/pci/controller/dwc/pci-keystone.c | 2 +- drivers/pci/controller/dwc/pcie-bt1.c | 2 +- drivers/pci/controller/dwc/pcie-histb.c | 2 +- drivers/pci/controller/dwc/pcie-intel-gw.c | 2 +- drivers/pci/controller/dwc/pcie-kirin.c | 2 +- drivers/pci/controller/dwc/pcie-qcom-ep.c | 2 +- drivers/pci/controller/dwc/pcie-rcar-gen4.c | 2 +- drivers/pci/controller/dwc/pcie-tegra194.c | 2 +- drivers/pci/controller/pci-aardvark.c | 2 +- drivers/pci/controller/pci-host-generic.c | 2 +- drivers/pci/controller/pci-mvebu.c | 2 +- drivers/pci/controller/pci-tegra.c | 2 +- drivers/pci/controller/pci-xgene-msi.c | 2 +- drivers/pci/controller/pcie-altera-msi.c | 2 +- drivers/pci/controller/pcie-altera.c | 6 +++--- drivers/pci/controller/pcie-brcmstb.c | 2 +- drivers/pci/controller/pcie-hisi-error.c | 2 +- drivers/pci/controller/pcie-iproc-platform.c | 2 +- drivers/pci/controller/pcie-mediatek-gen3.c | 2 +- drivers/pci/controller/pcie-mediatek.c | 2 +- drivers/pci/controller/pcie-mt7621.c | 2 +- drivers/pci/controller/pcie-rockchip-host.c | 2 +- drivers/pci/controller/pcie-xilinx-nwl.c | 2 +- drivers/pci/controller/plda/pcie-starfive.c | 2 +- 26 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c index 284f2e0e4d26..ca24df236976 100644 --- a/drivers/pci/controller/cadence/pci-j721e.c +++ b/drivers/pci/controller/cadence/pci-j721e.c @@ -712,7 +712,7 @@ static DEFINE_NOIRQ_DEV_PM_OPS(j721e_pcie_pm_ops, static struct platform_driver j721e_pcie_driver = { .probe = j721e_pcie_probe, - .remove_new = j721e_pcie_remove, + .remove = j721e_pcie_remove, .driver = { .name = "j721e-pcie", .of_match_table = of_j721e_pcie_match, diff --git a/drivers/pci/controller/dwc/pci-exynos.c b/drivers/pci/controller/dwc/pci-exynos.c index fa45da28a218..6a830166d37f 100644 --- a/drivers/pci/controller/dwc/pci-exynos.c +++ b/drivers/pci/controller/dwc/pci-exynos.c @@ -383,7 +383,7 @@ static const struct of_device_id exynos_pcie_of_match[] = { static struct platform_driver exynos_pcie_driver = { .probe = exynos_pcie_probe, - .remove_new = exynos_pcie_remove, + .remove = exynos_pcie_remove, .driver = { .name = "exynos-pcie", .of_match_table = exynos_pcie_of_match, diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index 2219b1a866fa..b82e13e65e1a 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -1363,7 +1363,7 @@ static void ks_pcie_remove(struct platform_device *pdev) static struct platform_driver ks_pcie_driver = { .probe = ks_pcie_probe, - .remove_new = ks_pcie_remove, + .remove = ks_pcie_remove, .driver = { .name = "keystone-pcie", .of_match_table = ks_pcie_of_match, diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c index 76d0ddea8007..1340edc18d12 100644 --- a/drivers/pci/controller/dwc/pcie-bt1.c +++ b/drivers/pci/controller/dwc/pcie-bt1.c @@ -632,7 +632,7 @@ MODULE_DEVICE_TABLE(of, bt1_pcie_of_match); static struct platform_driver bt1_pcie_driver = { .probe = bt1_pcie_probe, - .remove_new = bt1_pcie_remove, + .remove = bt1_pcie_remove, .driver = { .name = "bt1-pcie", .of_match_table = bt1_pcie_of_match, diff --git a/drivers/pci/controller/dwc/pcie-histb.c b/drivers/pci/controller/dwc/pcie-histb.c index 7a11c618b9d9..615a0e3e6d7e 100644 --- a/drivers/pci/controller/dwc/pcie-histb.c +++ b/drivers/pci/controller/dwc/pcie-histb.c @@ -439,7 +439,7 @@ MODULE_DEVICE_TABLE(of, histb_pcie_of_match); static struct platform_driver histb_pcie_platform_driver = { .probe = histb_pcie_probe, - .remove_new = histb_pcie_remove, + .remove = histb_pcie_remove, .driver = { .name = "histb-pcie", .of_match_table = histb_pcie_of_match, diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c index 676d2aba4fbd..9b53b8f6f268 100644 --- a/drivers/pci/controller/dwc/pcie-intel-gw.c +++ b/drivers/pci/controller/dwc/pcie-intel-gw.c @@ -443,7 +443,7 @@ static const struct of_device_id of_intel_pcie_match[] = { static struct platform_driver intel_pcie_driver = { .probe = intel_pcie_probe, - .remove_new = intel_pcie_remove, + .remove = intel_pcie_remove, .driver = { .name = "intel-gw-pcie", .of_match_table = of_intel_pcie_match, diff --git a/drivers/pci/controller/dwc/pcie-kirin.c b/drivers/pci/controller/dwc/pcie-kirin.c index 85a2c77b1835..1b2088acb538 100644 --- a/drivers/pci/controller/dwc/pcie-kirin.c +++ b/drivers/pci/controller/dwc/pcie-kirin.c @@ -769,7 +769,7 @@ static int kirin_pcie_probe(struct platform_device *pdev) static struct platform_driver kirin_pcie_driver = { .probe = kirin_pcie_probe, - .remove_new = kirin_pcie_remove, + .remove = kirin_pcie_remove, .driver = { .name = "kirin-pcie", .of_match_table = kirin_pcie_match, diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index e588fcc54589..d663c5206450 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -937,7 +937,7 @@ MODULE_DEVICE_TABLE(of, qcom_pcie_ep_match); static struct platform_driver qcom_pcie_ep_driver = { .probe = qcom_pcie_ep_probe, - .remove_new = qcom_pcie_ep_remove, + .remove = qcom_pcie_ep_remove, .driver = { .name = "qcom-pcie-ep", .of_match_table = qcom_pcie_ep_match, diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c index 3a5511c3f7d9..fc872dd35029 100644 --- a/drivers/pci/controller/dwc/pcie-rcar-gen4.c +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c @@ -775,7 +775,7 @@ static struct platform_driver rcar_gen4_pcie_driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, .probe = rcar_gen4_pcie_probe, - .remove_new = rcar_gen4_pcie_remove, + .remove = rcar_gen4_pcie_remove, }; module_platform_driver(rcar_gen4_pcie_driver); diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index c1394f2ab63f..f4bd7f5275cf 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -2493,7 +2493,7 @@ static const struct dev_pm_ops tegra_pcie_dw_pm_ops = { static struct platform_driver tegra_pcie_dw_driver = { .probe = tegra_pcie_dw_probe, - .remove_new = tegra_pcie_dw_remove, + .remove = tegra_pcie_dw_remove, .shutdown = tegra_pcie_dw_shutdown, .driver = { .name = "tegra194-pcie", diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index a598a98247ce..a29796cce420 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -1996,7 +1996,7 @@ static struct platform_driver advk_pcie_driver = { .of_match_table = advk_pcie_of_match_table, }, .probe = advk_pcie_probe, - .remove_new = advk_pcie_remove, + .remove = advk_pcie_remove, }; module_platform_driver(advk_pcie_driver); diff --git a/drivers/pci/controller/pci-host-generic.c b/drivers/pci/controller/pci-host-generic.c index 5f06f94db7b1..4051b9b61dac 100644 --- a/drivers/pci/controller/pci-host-generic.c +++ b/drivers/pci/controller/pci-host-generic.c @@ -82,7 +82,7 @@ static struct platform_driver gen_pci_driver = { .of_match_table = gen_pci_of_match, }, .probe = pci_host_common_probe, - .remove_new = pci_host_common_remove, + .remove = pci_host_common_remove, }; module_platform_driver(gen_pci_driver); diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 29fe09c99e7d..46d3afe1d308 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -1727,7 +1727,7 @@ static struct platform_driver mvebu_pcie_driver = { .pm = &mvebu_pcie_pm_ops, }, .probe = mvebu_pcie_probe, - .remove_new = mvebu_pcie_remove, + .remove = mvebu_pcie_remove, }; module_platform_driver(mvebu_pcie_driver); diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c index d7517c3976e7..1f0a872790e5 100644 --- a/drivers/pci/controller/pci-tegra.c +++ b/drivers/pci/controller/pci-tegra.c @@ -2800,6 +2800,6 @@ static struct platform_driver tegra_pcie_driver = { .pm = &tegra_pcie_pm_ops, }, .probe = tegra_pcie_probe, - .remove_new = tegra_pcie_remove, + .remove = tegra_pcie_remove, }; module_platform_driver(tegra_pcie_driver); diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 3ce38dfd0d29..88c0977bc41a 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -518,7 +518,7 @@ static struct platform_driver xgene_msi_driver = { .of_match_table = xgene_msi_match_table, }, .probe = xgene_msi_probe, - .remove_new = xgene_msi_remove, + .remove = xgene_msi_remove, }; static int __init xgene_pcie_msi_init(void) diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c index e36a6e158d23..e1cee3c0575f 100644 --- a/drivers/pci/controller/pcie-altera-msi.c +++ b/drivers/pci/controller/pcie-altera-msi.c @@ -267,7 +267,7 @@ static struct platform_driver altera_msi_driver = { .of_match_table = altera_msi_of_match, }, .probe = altera_msi_probe, - .remove_new = altera_msi_remove, + .remove = altera_msi_remove, }; static int __init altera_msi_init(void) diff --git a/drivers/pci/controller/pcie-altera.c b/drivers/pci/controller/pcie-altera.c index 650b2dd81c48..eb55a7f8573a 100644 --- a/drivers/pci/controller/pcie-altera.c +++ b/drivers/pci/controller/pcie-altera.c @@ -815,10 +815,10 @@ static void altera_pcie_remove(struct platform_device *pdev) } static struct platform_driver altera_pcie_driver = { - .probe = altera_pcie_probe, - .remove_new = altera_pcie_remove, + .probe = altera_pcie_probe, + .remove = altera_pcie_remove, .driver = { - .name = "altera-pcie", + .name = "altera-pcie", .of_match_table = altera_pcie_of_match, }, }; diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 9321280f6edb..e733a27dc8df 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -1928,7 +1928,7 @@ static const struct dev_pm_ops brcm_pcie_pm_ops = { static struct platform_driver brcm_pcie_driver = { .probe = brcm_pcie_probe, - .remove_new = brcm_pcie_remove, + .remove = brcm_pcie_remove, .driver = { .name = "brcm-pcie", .of_match_table = brcm_pcie_match, diff --git a/drivers/pci/controller/pcie-hisi-error.c b/drivers/pci/controller/pcie-hisi-error.c index ad9d5ffcd9e3..aaf1ed2b6e59 100644 --- a/drivers/pci/controller/pcie-hisi-error.c +++ b/drivers/pci/controller/pcie-hisi-error.c @@ -317,7 +317,7 @@ static struct platform_driver hisi_pcie_error_handler_driver = { .acpi_match_table = hisi_pcie_acpi_match, }, .probe = hisi_pcie_error_handler_probe, - .remove_new = hisi_pcie_error_handler_remove, + .remove = hisi_pcie_error_handler_remove, }; module_platform_driver(hisi_pcie_error_handler_driver); diff --git a/drivers/pci/controller/pcie-iproc-platform.c b/drivers/pci/controller/pcie-iproc-platform.c index 4e6aa882a567..0cb78c583c7e 100644 --- a/drivers/pci/controller/pcie-iproc-platform.c +++ b/drivers/pci/controller/pcie-iproc-platform.c @@ -134,7 +134,7 @@ static struct platform_driver iproc_pltfm_pcie_driver = { .of_match_table = of_match_ptr(iproc_pcie_of_match_table), }, .probe = iproc_pltfm_pcie_probe, - .remove_new = iproc_pltfm_pcie_remove, + .remove = iproc_pltfm_pcie_remove, .shutdown = iproc_pltfm_pcie_shutdown, }; module_platform_driver(iproc_pltfm_pcie_driver); diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c index 66ce4b5d309b..969f62e9cf01 100644 --- a/drivers/pci/controller/pcie-mediatek-gen3.c +++ b/drivers/pci/controller/pcie-mediatek-gen3.c @@ -1225,7 +1225,7 @@ MODULE_DEVICE_TABLE(of, mtk_pcie_of_match); static struct platform_driver mtk_pcie_driver = { .probe = mtk_pcie_probe, - .remove_new = mtk_pcie_remove, + .remove = mtk_pcie_remove, .driver = { .name = "mtk-pcie-gen3", .of_match_table = mtk_pcie_of_match, diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c index 7f7d04c2ea57..3bcfc4e58ba2 100644 --- a/drivers/pci/controller/pcie-mediatek.c +++ b/drivers/pci/controller/pcie-mediatek.c @@ -1235,7 +1235,7 @@ MODULE_DEVICE_TABLE(of, mtk_pcie_ids); static struct platform_driver mtk_pcie_driver = { .probe = mtk_pcie_probe, - .remove_new = mtk_pcie_remove, + .remove = mtk_pcie_remove, .driver = { .name = "mtk-pcie", .of_match_table = mtk_pcie_ids, diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c index 9b4754a45515..776caa0b1011 100644 --- a/drivers/pci/controller/pcie-mt7621.c +++ b/drivers/pci/controller/pcie-mt7621.c @@ -541,7 +541,7 @@ MODULE_DEVICE_TABLE(of, mt7621_pcie_ids); static struct platform_driver mt7621_pcie_driver = { .probe = mt7621_pcie_probe, - .remove_new = mt7621_pcie_remove, + .remove = mt7621_pcie_remove, .driver = { .name = "mt7621-pci", .of_match_table = mt7621_pcie_ids, diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index cbec71114825..06cdb68f0920 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -1050,7 +1050,7 @@ static struct platform_driver rockchip_pcie_driver = { .pm = &rockchip_pcie_pm_ops, }, .probe = rockchip_pcie_probe, - .remove_new = rockchip_pcie_remove, + .remove = rockchip_pcie_remove, }; module_platform_driver(rockchip_pcie_driver); diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index a8ae14474dd0..8d6e2a89b067 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -916,6 +916,6 @@ static struct platform_driver nwl_pcie_driver = { .of_match_table = nwl_pcie_of_match, }, .probe = nwl_pcie_probe, - .remove_new = nwl_pcie_remove, + .remove = nwl_pcie_remove, }; builtin_platform_driver(nwl_pcie_driver); diff --git a/drivers/pci/controller/plda/pcie-starfive.c b/drivers/pci/controller/plda/pcie-starfive.c index c9933ecf6833..0567ec373a3e 100644 --- a/drivers/pci/controller/plda/pcie-starfive.c +++ b/drivers/pci/controller/plda/pcie-starfive.c @@ -480,7 +480,7 @@ static struct platform_driver starfive_pcie_driver = { .pm = pm_sleep_ptr(&starfive_pcie_pm_ops), }, .probe = starfive_pcie_probe, - .remove_new = starfive_pcie_remove, + .remove = starfive_pcie_remove, }; module_platform_driver(starfive_pcie_driver); From 43d6961cdf1d0621c55638ed33554027978ae1fe Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Fri, 27 Sep 2024 11:24:49 +0200 Subject: [PATCH 008/127] PCI: acpiphp_ampere_altra: Switch back to struct platform_driver::remove() After commit 0edb555a65d1 ("platform: Make platform_driver::remove() return void") .remove() is (again) the right callback to implement for platform drivers. Convert all PCI controller drivers to use .remove(), with the eventual goal to drop struct platform_driver::remove_new(). As .remove() and .remove_new() have the same prototypes, conversion is done by just changing the structure member name in the driver initializer. Link: https://lore.kernel.org/r/20240927092449.44628-1-sergio.paracuellos@gmail.com Signed-off-by: Sergio Paracuellos Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/acpiphp_ampere_altra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/acpiphp_ampere_altra.c b/drivers/pci/hotplug/acpiphp_ampere_altra.c index f5c9e741c1d4..70dbc0431fc6 100644 --- a/drivers/pci/hotplug/acpiphp_ampere_altra.c +++ b/drivers/pci/hotplug/acpiphp_ampere_altra.c @@ -119,7 +119,7 @@ static struct platform_driver altra_led_driver = { .acpi_match_table = altra_led_ids, }, .probe = altra_led_probe, - .remove_new = altra_led_remove, + .remove = altra_led_remove, }; module_platform_driver(altra_led_driver); From 3ca258994b32c486b0ed59307ec31ee413524598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 4 Oct 2024 18:22:40 +0300 Subject: [PATCH 009/127] PCI: Simplify pci_create_slot() logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify pci_create_slot() by splitting an "if" statement into two parts. In order to not duplicate error handling, add a new label to handle kobj put. Link: https://lore.kernel.org/r/20241004152240.7926-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas --- drivers/pci/slot.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 0f87cade10f7..9ac5a4f26794 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -244,12 +244,13 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, slot = get_slot(parent, slot_nr); if (slot) { if (hotplug) { - if ((err = slot->hotplug ? -EBUSY : 0) - || (err = rename_slot(slot, name))) { - kobject_put(&slot->kobj); - slot = NULL; - goto err; + if (slot->hotplug) { + err = -EBUSY; + goto put_slot; } + err = rename_slot(slot, name); + if (err) + goto put_slot; } goto out; } @@ -278,10 +279,8 @@ placeholder: err = kobject_init_and_add(&slot->kobj, &pci_slot_ktype, NULL, "%s", slot_name); - if (err) { - kobject_put(&slot->kobj); - goto err; - } + if (err) + goto put_slot; down_read(&pci_bus_sem); list_for_each_entry(dev, &parent->devices, bus_list) @@ -296,6 +295,9 @@ out: kfree(slot_name); mutex_unlock(&pci_slot_mutex); return slot; + +put_slot: + kobject_put(&slot->kobj); err: slot = ERR_PTR(err); goto out; From 9fb6fef0fb49124291837af1da5028f79d53f98e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 14 Jun 2024 13:06:03 +0300 Subject: [PATCH 010/127] resource: Add resource set range and size helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting the end address for a resource with a given size lacks a helper and is therefore coded manually unlike the getter side which has a helper for resource size calculation. Also, almost all callsites that calculate the end address for a resource also set the start address right before it like this: res->start = start_addr; res->end = res->start + size - 1; Add resource_set_range(res, start_addr, size) that sets the start address and calculates the end address to simplify this often repeated fragment. Also add resource_set_size() for the cases where setting the start address of the resource is not necessary but mention in its kerneldoc that resource_set_range() is preferred when setting both addresses. Link: https://lore.kernel.org/r/20240614100606.15830-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/ioport.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6e9fb667a1c5..5385349f0b8a 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -249,6 +249,38 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); + +/** + * resource_set_size - Calculate resource end address from size and start + * @res: Resource descriptor + * @size: Size of the resource + * + * Calculate the end address for @res based on @size. + * + * Note: The start address of @res must be set when calling this function. + * Prefer resource_set_range() if setting both the start address and @size. + */ +static inline void resource_set_size(struct resource *res, resource_size_t size) +{ + res->end = res->start + size - 1; +} + +/** + * resource_set_range - Set resource start and end addresses + * @res: Resource descriptor + * @start: Start address for the resource + * @size: Size of the resource + * + * Set @res start address and calculate the end address based on @size. + */ +static inline void resource_set_range(struct resource *res, + resource_size_t start, + resource_size_t size) +{ + res->start = start; + resource_set_size(res, size); +} + static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; From 783602c920e90023e6d38274d40e32979787d130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 14 Jun 2024 13:06:04 +0300 Subject: [PATCH 011/127] PCI: Use resource_set_{range,size}() helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert open-coded resource size calculations to use resource_set_{range,size}() helpers. While at it, use SZ_* for size parameter where appropriate which makes the intent of code more obvious. Also, cast sizes to resource_size_t, not u64. Link: https://lore.kernel.org/r/20240614100606.15830-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/controller/pci-tegra.c | 2 +- drivers/pci/controller/pci-thunder-pem.c | 4 +-- drivers/pci/ecam.c | 2 +- drivers/pci/iov.c | 6 ++-- drivers/pci/pci.c | 3 +- drivers/pci/quirks.c | 20 ++++++-------- drivers/pci/setup-bus.c | 35 ++++++++++-------------- drivers/pci/setup-res.c | 7 ++--- 8 files changed, 34 insertions(+), 45 deletions(-) diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c index d7517c3976e7..0471f2f7a6d1 100644 --- a/drivers/pci/controller/pci-tegra.c +++ b/drivers/pci/controller/pci-tegra.c @@ -1460,7 +1460,7 @@ static int tegra_pcie_get_resources(struct tegra_pcie *pcie) pcie->cs = *res; /* constrain configuration space to 4 KiB */ - pcie->cs.end = pcie->cs.start + SZ_4K - 1; + resource_set_size(&pcie->cs, SZ_4K); pcie->cfg = devm_ioremap_resource(dev, &pcie->cs); if (IS_ERR(pcie->cfg)) { diff --git a/drivers/pci/controller/pci-thunder-pem.c b/drivers/pci/controller/pci-thunder-pem.c index 06a9855cb431..f1bd5de67997 100644 --- a/drivers/pci/controller/pci-thunder-pem.c +++ b/drivers/pci/controller/pci-thunder-pem.c @@ -400,9 +400,9 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg) * Reserve 64K size PEM specific resources. The full 16M range * size is required for thunder_pem_init() call. */ - res_pem->end = res_pem->start + SZ_64K - 1; + resource_set_size(res_pem, SZ_64K); thunder_pem_reserve_range(dev, root->segment, res_pem); - res_pem->end = res_pem->start + SZ_16M - 1; + resource_set_size(res_pem, SZ_16M); /* Reserve PCI configuration space as well. */ thunder_pem_reserve_range(dev, root->segment, &cfg->res); diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index 1c40d2506aef..260b7de2dbd5 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -55,7 +55,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev, bus_range_max = resource_size(cfgres) >> bus_shift; if (bus_range > bus_range_max) { bus_range = bus_range_max; - cfg->busr.end = busr->start + bus_range - 1; + resource_set_size(&cfg->busr, bus_range); dev_warn(dev, "ECAM area %pR can only accommodate %pR (reduced from %pR desired)\n", cfgres, &cfg->busr, busr); } diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index aaa33e8dc4c9..4be402fe9ab9 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -327,8 +327,8 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id) virtfn->resource[i].name = pci_name(virtfn); virtfn->resource[i].flags = res->flags; size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); - virtfn->resource[i].start = res->start + size * id; - virtfn->resource[i].end = virtfn->resource[i].start + size - 1; + resource_set_range(&virtfn->resource[i], + res->start + size * id, size); rc = request_resource(res, &virtfn->resource[i]); BUG_ON(rc); } @@ -804,7 +804,7 @@ found: goto failed; } iov->barsz[i] = resource_size(res); - res->end = res->start + resource_size(res) * total - 1; + resource_set_size(res, resource_size(res) * total); pci_info(dev, "%s %pR: contains BAR %d for %d VFs\n", res_name, res, i, total); i += bar64; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7d85c04fbba2..25ced4073813 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6649,8 +6649,7 @@ static void pci_request_resource_alignment(struct pci_dev *dev, int bar, } else { r->flags &= ~IORESOURCE_SIZEALIGN; r->flags |= IORESOURCE_STARTALIGN; - r->start = align; - r->end = r->start + size - 1; + resource_set_range(r, align, size); } r->flags |= IORESOURCE_UNSET; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index dccb60c1d9cc..e25476a766a7 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include "pci.h" @@ -586,8 +587,7 @@ static void quirk_extend_bar_to_page(struct pci_dev *dev) const char *r_name = pci_resource_name(dev, i); if (r->flags & IORESOURCE_MEM && resource_size(r) < PAGE_SIZE) { - r->end = PAGE_SIZE - 1; - r->start = 0; + resource_set_range(r, 0, PAGE_SIZE); r->flags |= IORESOURCE_UNSET; pci_info(dev, "%s %pR: expanded to page size\n", r_name, r); @@ -606,8 +606,7 @@ static void quirk_s3_64M(struct pci_dev *dev) if ((r->start & 0x3ffffff) || r->end != r->start + 0x3ffffff) { r->flags |= IORESOURCE_UNSET; - r->start = 0; - r->end = 0x3ffffff; + resource_set_range(r, 0, SZ_64M); } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_868, quirk_s3_64M); @@ -1342,8 +1341,7 @@ static void quirk_dunord(struct pci_dev *dev) struct resource *r = &dev->resource[1]; r->flags |= IORESOURCE_UNSET; - r->start = 0; - r->end = 0xffffff; + resource_set_range(r, 0, SZ_16M); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DUNORD, PCI_DEVICE_ID_DUNORD_I3000, quirk_dunord); @@ -2340,8 +2338,7 @@ static void quirk_tc86c001_ide(struct pci_dev *dev) if (r->start & 0x8) { r->flags |= IORESOURCE_UNSET; - r->start = 0; - r->end = 0xf; + resource_set_range(r, 0, SZ_16); } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA_2, @@ -2369,8 +2366,7 @@ static void quirk_plx_pci9050(struct pci_dev *dev) pci_info(dev, "Re-allocating PLX PCI 9050 BAR %u to length 256 to avoid bit 7 bug\n", bar); r->flags |= IORESOURCE_UNSET; - r->start = 0; - r->end = 0xff; + resource_set_range(r, 0, SZ_256); } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, @@ -3522,13 +3518,13 @@ static void quirk_intel_ntb(struct pci_dev *dev) if (rc) return; - dev->resource[2].end = dev->resource[2].start + ((u64) 1 << val) - 1; + resource_set_size(&dev->resource[2], (resource_size_t)1 << val); rc = pci_read_config_byte(dev, 0x00D1, &val); if (rc) return; - dev->resource[4].end = dev->resource[4].start + ((u64) 1 << val) - 1; + resource_set_size(&dev->resource[4], (resource_size_t)1 << val); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb); diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 23082bc0ca37..7293958ab234 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -246,8 +246,7 @@ static void reassign_resources_sorted(struct list_head *realloc_head, add_size = add_res->add_size; align = add_res->min_align; if (!resource_size(res)) { - res->start = align; - res->end = res->start + add_size - 1; + resource_set_range(res, align, add_size); if (pci_assign_resource(add_res->dev, idx)) reset_resource(res); } else { @@ -938,8 +937,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, return; } - b_res->start = min_align; - b_res->end = b_res->start + size0 - 1; + resource_set_range(b_res, min_align, size0); b_res->flags |= IORESOURCE_STARTALIGN; if (bus->self && size1 > size0 && realloc_head) { add_to_list(realloc_head, bus->self, b_res, size1-size0, @@ -1202,8 +1200,7 @@ static void pci_bus_size_cardbus(struct pci_bus *bus, * Reserve some resources for CardBus. We reserve a fixed amount * of bus space for CardBus bridges. */ - b_res->start = pci_cardbus_io_size; - b_res->end = b_res->start + pci_cardbus_io_size - 1; + resource_set_range(b_res, pci_cardbus_io_size, pci_cardbus_io_size); b_res->flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN; if (realloc_head) { b_res->end -= pci_cardbus_io_size; @@ -1215,8 +1212,7 @@ handle_b_res_1: b_res = &bridge->resource[PCI_CB_BRIDGE_IO_1_WINDOW]; if (b_res->parent) goto handle_b_res_2; - b_res->start = pci_cardbus_io_size; - b_res->end = b_res->start + pci_cardbus_io_size - 1; + resource_set_range(b_res, pci_cardbus_io_size, pci_cardbus_io_size); b_res->flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN; if (realloc_head) { b_res->end -= pci_cardbus_io_size; @@ -1249,8 +1245,8 @@ handle_b_res_2: * Otherwise, allocate one region of twice the size. */ if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { - b_res->start = pci_cardbus_mem_size; - b_res->end = b_res->start + pci_cardbus_mem_size - 1; + resource_set_range(b_res, pci_cardbus_mem_size, + pci_cardbus_mem_size); b_res->flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_STARTALIGN; if (realloc_head) { @@ -1267,8 +1263,7 @@ handle_b_res_3: b_res = &bridge->resource[PCI_CB_BRIDGE_MEM_1_WINDOW]; if (b_res->parent) goto handle_done; - b_res->start = pci_cardbus_mem_size; - b_res->end = b_res->start + b_res_3_size - 1; + resource_set_range(b_res, pci_cardbus_mem_size, b_res_3_size); b_res->flags |= IORESOURCE_MEM | IORESOURCE_STARTALIGN; if (realloc_head) { b_res->end -= b_res_3_size; @@ -1847,7 +1842,7 @@ static void adjust_bridge_window(struct pci_dev *bridge, struct resource *res, return; } - res->end = res->start + new_size - 1; + resource_set_size(res, new_size); /* If the resource is part of the add_list, remove it now */ if (add_list) @@ -2010,8 +2005,8 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, * what is available). */ align = pci_resource_alignment(dev, res); - io.end = align ? io.start + ALIGN_DOWN(io_per_b, align) - 1 - : io.start + io_per_b - 1; + resource_set_size(&io, align ? ALIGN_DOWN(io_per_b, align) + : io_per_b); /* * The x_per_b holds the extra resource space that can be @@ -2023,15 +2018,15 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, res = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; align = pci_resource_alignment(dev, res); - mmio.end = align ? mmio.start + ALIGN_DOWN(mmio_per_b, align) - 1 - : mmio.start + mmio_per_b - 1; + resource_set_size(&mmio, align ? ALIGN_DOWN(mmio_per_b, align) + : mmio_per_b); mmio.start -= resource_size(res); res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; align = pci_resource_alignment(dev, res); - mmio_pref.end = align ? mmio_pref.start + - ALIGN_DOWN(mmio_pref_per_b, align) - 1 - : mmio_pref.start + mmio_pref_per_b - 1; + resource_set_size(&mmio_pref, + align ? ALIGN_DOWN(mmio_pref_per_b, align) + : mmio_pref_per_b); mmio_pref.start -= resource_size(res); pci_bus_distribute_available_resources(b, add_list, io, mmio, diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index c6d933ddfd46..ca14576bf2bf 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -211,8 +211,7 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, start = res->start; end = res->end; - res->start = fw_addr; - res->end = res->start + size - 1; + resource_set_range(res, fw_addr, size); res->flags &= ~IORESOURCE_UNSET; root = pci_find_parent_resource(dev, res); @@ -463,7 +462,7 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size) if (ret) return ret; - res->end = res->start + pci_rebar_size_to_bytes(size) - 1; + resource_set_size(res, pci_rebar_size_to_bytes(size)); /* Check if the new config works by trying to assign everything. */ if (dev->bus->self) { @@ -475,7 +474,7 @@ int pci_resize_resource(struct pci_dev *dev, int resno, int size) error_resize: pci_rebar_set_size(dev, resno, old); - res->end = res->start + pci_rebar_size_to_bytes(old) - 1; + resource_set_size(res, pci_rebar_size_to_bytes(old)); return ret; } EXPORT_SYMBOL(pci_resize_resource); From 9d3faf229c06d953999e2aa6b6dbb57afd252f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 14 Jun 2024 13:06:05 +0300 Subject: [PATCH 012/127] PCI: Use align and resource helpers, and SZ_* in quirk_s3_64M() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use IS_ALIGNED(), resource_size(), and SZ_* defines in quirk_s3_64M(). Link: https://lore.kernel.org/r/20240614100606.15830-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e25476a766a7..e85b89ca5601 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -12,6 +12,7 @@ * file, where their drivers can use them. */ +#include #include #include #include @@ -604,7 +605,7 @@ static void quirk_s3_64M(struct pci_dev *dev) { struct resource *r = &dev->resource[0]; - if ((r->start & 0x3ffffff) || r->end != r->start + 0x3ffffff) { + if (!IS_ALIGNED(r->start, SZ_64M) || resource_size(r) != SZ_64M) { r->flags |= IORESOURCE_UNSET; resource_set_range(r, 0, SZ_64M); } From e3bdd2dd3f618d0bb9f500aae17df2b4f726dca1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 14 Jun 2024 13:06:06 +0300 Subject: [PATCH 013/127] PCI: Add ALIGN_DOWN_IF_NONZERO() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pci_bus_distribute_available_resources() performs alignment in case of non-zero alignment requirement on three occasions. Add ALIGN_DOWN_IF_NONZERO() helper to avoid coding the non-zero check three times. Suggested-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240614100606.15830-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/setup-bus.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 7293958ab234..27da27c59e89 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1894,6 +1894,9 @@ static void remove_dev_resources(struct pci_dev *dev, struct resource *io, } } +#define ALIGN_DOWN_IF_NONZERO(addr, align) \ + ((align) ? ALIGN_DOWN((addr), (align)) : (addr)) + /* * io, mmio and mmio_pref contain the total amount of bridge window space * available. This includes the minimal space needed to cover all the @@ -2005,8 +2008,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, * what is available). */ align = pci_resource_alignment(dev, res); - resource_set_size(&io, align ? ALIGN_DOWN(io_per_b, align) - : io_per_b); + resource_set_size(&io, ALIGN_DOWN_IF_NONZERO(io_per_b, align)); /* * The x_per_b holds the extra resource space that can be @@ -2018,15 +2020,14 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, res = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; align = pci_resource_alignment(dev, res); - resource_set_size(&mmio, align ? ALIGN_DOWN(mmio_per_b, align) - : mmio_per_b); + resource_set_size(&mmio, + ALIGN_DOWN_IF_NONZERO(mmio_per_b,align)); mmio.start -= resource_size(res); res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; align = pci_resource_alignment(dev, res); resource_set_size(&mmio_pref, - align ? ALIGN_DOWN(mmio_pref_per_b, align) - : mmio_pref_per_b); + ALIGN_DOWN_IF_NONZERO(mmio_pref_per_b, align)); mmio_pref.start -= resource_size(res); pci_bus_distribute_available_resources(b, add_list, io, mmio, From ca3c342fb3c76eee739a1cfc4ff59841722ebee7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:41 +0900 Subject: [PATCH 014/127] PCI: endpoint: Introduce pci_epc_function_is_valid() Introduce the epc core helper function pci_epc_function_is_valid() to verify that an epc pointer, a physical function number and a virtual function number are all valid. This avoids repeating the code pattern: if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) return err; if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) return err; in many functions of the endpoint controller core code. Signed-off-by: Damien Le Moal Reviewed-by: Manivannan Sadhasivam Reviewed-by: Niklas Cassel Link: https://lore.kernel.org/r/20241012113246.95634-2-dlemoal@kernel.org Signed-off-by: Manivannan Sadhasivam --- drivers/pci/endpoint/pci-epc-core.c | 79 +++++++++++------------------ 1 file changed, 31 insertions(+), 48 deletions(-) diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index 17f007109255..b854f1bab26f 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -128,6 +128,18 @@ enum pci_barno pci_epc_get_next_free_bar(const struct pci_epc_features } EXPORT_SYMBOL_GPL(pci_epc_get_next_free_bar); +static bool pci_epc_function_is_valid(struct pci_epc *epc, + u8 func_no, u8 vfunc_no) +{ + if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) + return false; + + if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + return false; + + return true; +} + /** * pci_epc_get_features() - get the features supported by EPC * @epc: the features supported by *this* EPC device will be returned @@ -145,10 +157,7 @@ const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc, { const struct pci_epc_features *epc_features; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) - return NULL; - - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return NULL; if (!epc->ops->get_features) @@ -218,10 +227,7 @@ int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no, { int ret; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) - return -EINVAL; - - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return -EINVAL; if (!epc->ops->raise_irq) @@ -262,10 +268,7 @@ int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no, { int ret; - if (IS_ERR_OR_NULL(epc)) - return -EINVAL; - - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return -EINVAL; if (!epc->ops->map_msi_irq) @@ -293,10 +296,7 @@ int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no) { int interrupt; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) - return 0; - - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return 0; if (!epc->ops->get_msi) @@ -329,11 +329,10 @@ int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 interrupts) int ret; u8 encode_int; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions || - interrupts < 1 || interrupts > 32) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return -EINVAL; - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (interrupts < 1 || interrupts > 32) return -EINVAL; if (!epc->ops->set_msi) @@ -361,10 +360,7 @@ int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no) { int interrupt; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) - return 0; - - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return 0; if (!epc->ops->get_msix) @@ -397,11 +393,10 @@ int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no, { int ret; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions || - interrupts < 1 || interrupts > 2048) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return -EINVAL; - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (interrupts < 1 || interrupts > 2048) return -EINVAL; if (!epc->ops->set_msix) @@ -428,10 +423,7 @@ EXPORT_SYMBOL_GPL(pci_epc_set_msix); void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr) { - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) - return; - - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return; if (!epc->ops->unmap_addr) @@ -459,10 +451,7 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, { int ret; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) - return -EINVAL; - - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return -EINVAL; if (!epc->ops->map_addr) @@ -489,12 +478,11 @@ EXPORT_SYMBOL_GPL(pci_epc_map_addr); void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_bar *epf_bar) { - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions || - (epf_bar->barno == BAR_5 && - epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return; - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (epf_bar->barno == BAR_5 && + epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64) return; if (!epc->ops->clear_bar) @@ -521,18 +509,16 @@ int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no, int ret; int flags = epf_bar->flags; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions || - (epf_bar->barno == BAR_5 && - flags & PCI_BASE_ADDRESS_MEM_TYPE_64) || + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) + return -EINVAL; + + if ((epf_bar->barno == BAR_5 && flags & PCI_BASE_ADDRESS_MEM_TYPE_64) || (flags & PCI_BASE_ADDRESS_SPACE_IO && flags & PCI_BASE_ADDRESS_IO_MASK) || (upper_32_bits(epf_bar->size) && !(flags & PCI_BASE_ADDRESS_MEM_TYPE_64))) return -EINVAL; - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) - return -EINVAL; - if (!epc->ops->set_bar) return 0; @@ -561,10 +547,7 @@ int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no, { int ret; - if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) - return -EINVAL; - - if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no])) + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) return -EINVAL; /* Only Virtual Function #1 has deviceID */ From 2314c6ffe8113ac3c22c8112fa9623e30eec6c4a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:42 +0900 Subject: [PATCH 015/127] PCI: endpoint: Improve pci_epc_mem_alloc_addr() There is no point in attempting to allocate memory from an endpoint controller memory window if the requested size is larger than the memory window size. Add a check to skip bitmap_find_free_region() calls for such case. This check can be done without the mem->lock mutex held as memory window sizes are constant and never modified at runtime. Also change the final return to return NULL to simplify the code. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241012113246.95634-3-dlemoal@kernel.org Signed-off-by: Manivannan Sadhasivam --- drivers/pci/endpoint/pci-epc-mem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/pci/endpoint/pci-epc-mem.c b/drivers/pci/endpoint/pci-epc-mem.c index a9c028f58da1..218a60e945db 100644 --- a/drivers/pci/endpoint/pci-epc-mem.c +++ b/drivers/pci/endpoint/pci-epc-mem.c @@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pci_epc_mem_exit); void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size) { - void __iomem *virt_addr = NULL; + void __iomem *virt_addr; struct pci_epc_mem *mem; unsigned int page_shift; size_t align_size; @@ -188,10 +188,13 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, for (i = 0; i < epc->num_windows; i++) { mem = epc->windows[i]; - mutex_lock(&mem->lock); + if (size > mem->window.size) + continue; + align_size = ALIGN(size, mem->window.page_size); order = pci_epc_mem_get_order(mem, align_size); + mutex_lock(&mem->lock); pageno = bitmap_find_free_region(mem->bitmap, mem->pages, order); if (pageno >= 0) { @@ -211,7 +214,7 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, mutex_unlock(&mem->lock); } - return virt_addr; + return NULL; } EXPORT_SYMBOL_GPL(pci_epc_mem_alloc_addr); From 86efc62d031307e53ad4011e0aa8898e029cef47 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Fri, 4 Oct 2024 12:28:28 -0400 Subject: [PATCH 016/127] PCI/DOE: Poll DOE Busy bit for up to 1 second in pci_doe_send_req() During initial device probe, the PCI DOE busy bit for some CXL devices may be left set for a longer period than expected by the current driver logic. Despite local comments stating DOE Busy is unlikely to be detected, it appears commonly specifically during boot when CXL devices are being probed. The symptom was messages like this: endpoint6: DOE failed -EBUSY produced by cxl_cdat_get_length() or cxl_cdat_read_table(). This was observed on a single socket AMD platform with 2 CXL memory expanders attached to the single socket. It was not the case that concurrent accesses were being made, as validated by monitoring mailbox commands on the device side. This behavior has been observed with multiple CXL memory expanders from different vendors - so it appears unrelated to the model. In all observed tests, only a small period of the retry window is actually used - typically only a handful of loop iterations. Polling on the PCI DOE Busy Bit for (at max) one PCI DOE timeout interval (1 second), resolves this issue cleanly. Per PCIe r6.2 sec 6.30.3, the DOE Busy Bit being cleared does not raise an interrupt, so polling is the best option in this scenario. Subsequent code in doe_statemachine_work() and abort paths also wait for up to 1 PCI DOE timeout interval, so this order of (potential) additional delay is presumed acceptable. Suggested-by: Lukas Wunner Link: https://lore.kernel.org/r/20241004162828.314-1-gourry@gourry.net Signed-off-by: Gregory Price [bhelgaas: fix nits and add error message to commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron --- drivers/pci/doe.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index 652d63df9d22..7bd7892c5222 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -146,6 +146,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, { struct pci_dev *pdev = doe_mb->pdev; int offset = doe_mb->cap_offset; + unsigned long timeout_jiffies; size_t length, remainder; u32 val; int i; @@ -155,8 +156,19 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, * someone other than Linux (e.g. firmware) is using the mailbox. Note * it is expected that firmware and OS will negotiate access rights via * an, as yet to be defined, method. + * + * Wait up to one PCI_DOE_TIMEOUT period to allow the prior command to + * finish. Otherwise, simply error out as unable to field the request. + * + * PCIe r6.2 sec 6.30.3 states no interrupt is raised when the DOE Busy + * bit is cleared, so polling here is our best option for the moment. */ - pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val); + timeout_jiffies = jiffies + PCI_DOE_TIMEOUT; + do { + pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val); + } while (FIELD_GET(PCI_DOE_STATUS_BUSY, val) && + !time_after(jiffies, timeout_jiffies)); + if (FIELD_GET(PCI_DOE_STATUS_BUSY, val)) return -EBUSY; From ce1dfe6d328966b75821c1f043a940eb2569768a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:43 +0900 Subject: [PATCH 017/127] PCI: endpoint: Introduce pci_epc_mem_map()/unmap() Some endpoint controllers have requirements on the alignment of the controller physical memory address that must be used to map a RC PCI address region. For instance, the endpoint controller of the RK3399 SoC uses at most the lower 20 bits of a physical memory address region as the lower bits of a RC PCI address region. For mapping a PCI address region of size bytes starting from pci_addr, the exact number of address bits used is the number of address bits changing in the address range [pci_addr..pci_addr + size - 1]. For this example, this creates the following constraints: 1) The offset into the controller physical memory allocated for a mapping depends on the mapping size *and* the starting PCI address for the mapping. 2) A mapping size cannot exceed the controller windows size (1MB) minus the offset needed into the allocated physical memory, which can end up being a smaller size than the desired mapping size. Handling these constraints independently of the controller being used in an endpoint function driver is not possible with the current EPC API as only the ->align field in struct pci_epc_features is provided but used for BAR (inbound ATU mappings) mapping only. A new API is needed for function drivers to discover mapping constraints and handle non-static requirements based on the RC PCI address range to access. Introduce the endpoint controller operation ->align_addr() to allow the EPC core functions to obtain the size and the offset into a controller address region that must be allocated and mapped to access a RC PCI address region. The size of the mapping provided by the align_addr() operation can then be used as the size argument for the function pci_epc_mem_alloc_addr() and the offset into the allocated controller memory provided can be used to correctly handle data transfers. For endpoint controllers that have PCI address alignment constraints, the align_addr() operation may indicate upon return an effective PCI address mapping size that is smaller (but not 0) than the requested PCI address region size. The controller ->align_addr() operation is optional: controllers that do not have any alignment constraints for mapping RC PCI address regions do not need to implement this operation. For such controllers, it is always assumed that the mapping size is equal to the requested size of the PCI region and that the mapping offset is 0. The function pci_epc_mem_map() is introduced to use this new controller operation (if it is defined) to handle controller memory allocation and mapping to a RC PCI address region in endpoint function drivers. This function first uses the ->align_addr() controller operation to determine the controller memory address size (and offset into) needed for mapping an RC PCI address region. The result of this operation is used to allocate a controller physical memory region using pci_epc_mem_alloc_addr() and then to map that memory to the RC PCI address space with pci_epc_map_addr(). Since ->align_addr() () may indicate that not all of a RC PCI address region can be mapped, pci_epc_mem_map() may only partially map the RC PCI address region specified. It is the responsibility of the caller (an endpoint function driver) to handle such smaller mapping by repeatedly using pci_epc_mem_map() over the desried PCI address range. The counterpart of pci_epc_mem_map() to unmap and free a mapped controller memory address region is pci_epc_mem_unmap(). Both functions operate using the new struct pci_epc_map data structure. This new structure represents a mapping PCI address, mapping effective size, the size of the controller memory needed for the mapping as well as the physical and virtual CPU addresses of the mapping (phys_base and virt_base fields). For convenience, the physical and virtual CPU addresses within that mapping to use to access the target RC PCI address region are also provided (phys_addr and virt_addr fields). Endpoint function drivers can use struct pci_epc_map to access the mapped RC PCI address region using the ->virt_addr and ->pci_size fields. Co-developed-by: Rick Wertenbroek Signed-off-by: Rick Wertenbroek Signed-off-by: Damien Le Moal Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241012113246.95634-4-dlemoal@kernel.org [mani: squashed the patch that changed phy_addr_t to u64] Signed-off-by: Manivannan Sadhasivam --- drivers/pci/endpoint/pci-epc-core.c | 103 ++++++++++++++++++++++++++++ include/linux/pci-epc.h | 38 ++++++++++ 2 files changed, 141 insertions(+) diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index b854f1bab26f..04a85d2f7e2a 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -466,6 +466,109 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, } EXPORT_SYMBOL_GPL(pci_epc_map_addr); +/** + * pci_epc_mem_map() - allocate and map a PCI address to a CPU address + * @epc: the EPC device on which the CPU address is to be allocated and mapped + * @func_no: the physical endpoint function number in the EPC device + * @vfunc_no: the virtual endpoint function number in the physical function + * @pci_addr: PCI address to which the CPU address should be mapped + * @pci_size: the number of bytes to map starting from @pci_addr + * @map: where to return the mapping information + * + * Allocate a controller memory address region and map it to a RC PCI address + * region, taking into account the controller physical address mapping + * constraints using the controller operation align_addr(). If this operation is + * not defined, we assume that there are no alignment constraints for the + * mapping. + * + * The effective size of the PCI address range mapped from @pci_addr is + * indicated by @map->pci_size. This size may be less than the requested + * @pci_size. The local virtual CPU address for the mapping is indicated by + * @map->virt_addr (@map->phys_addr indicates the physical address). + * The size and CPU address of the controller memory allocated and mapped are + * respectively indicated by @map->map_size and @map->virt_base (and + * @map->phys_base for the physical address of @map->virt_base). + * + * Returns 0 on success and a negative error code in case of error. + */ +int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + u64 pci_addr, size_t pci_size, struct pci_epc_map *map) +{ + size_t map_size = pci_size; + size_t map_offset = 0; + int ret; + + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) + return -EINVAL; + + if (!pci_size || !map) + return -EINVAL; + + /* + * Align the PCI address to map. If the controller defines the + * .align_addr() operation, use it to determine the PCI address to map + * and the size of the mapping. Otherwise, assume that the controller + * has no alignment constraint. + */ + memset(map, 0, sizeof(*map)); + map->pci_addr = pci_addr; + if (epc->ops->align_addr) + map->map_pci_addr = + epc->ops->align_addr(epc, pci_addr, + &map_size, &map_offset); + else + map->map_pci_addr = pci_addr; + map->map_size = map_size; + if (map->map_pci_addr + map->map_size < pci_addr + pci_size) + map->pci_size = map->map_pci_addr + map->map_size - pci_addr; + else + map->pci_size = pci_size; + + map->virt_base = pci_epc_mem_alloc_addr(epc, &map->phys_base, + map->map_size); + if (!map->virt_base) + return -ENOMEM; + + map->phys_addr = map->phys_base + map_offset; + map->virt_addr = map->virt_base + map_offset; + + ret = pci_epc_map_addr(epc, func_no, vfunc_no, map->phys_base, + map->map_pci_addr, map->map_size); + if (ret) { + pci_epc_mem_free_addr(epc, map->phys_base, map->virt_base, + map->map_size); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(pci_epc_mem_map); + +/** + * pci_epc_mem_unmap() - unmap and free a CPU address region + * @epc: the EPC device on which the CPU address is allocated and mapped + * @func_no: the physical endpoint function number in the EPC device + * @vfunc_no: the virtual endpoint function number in the physical function + * @map: the mapping information + * + * Unmap and free a CPU address region that was allocated and mapped with + * pci_epc_mem_map(). + */ +void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + struct pci_epc_map *map) +{ + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) + return; + + if (!map || !map->virt_base) + return; + + pci_epc_unmap_addr(epc, func_no, vfunc_no, map->phys_base); + pci_epc_mem_free_addr(epc, map->phys_base, map->virt_base, + map->map_size); +} +EXPORT_SYMBOL_GPL(pci_epc_mem_unmap); + /** * pci_epc_clear_bar() - reset the BAR * @epc: the EPC device for which the BAR has to be cleared diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 42ef06136bd1..de8cc3658220 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -32,11 +32,43 @@ pci_epc_interface_string(enum pci_epc_interface_type type) } } +/** + * struct pci_epc_map - information about EPC memory for mapping a RC PCI + * address range + * @pci_addr: start address of the RC PCI address range to map + * @pci_size: size of the RC PCI address range mapped from @pci_addr + * @map_pci_addr: RC PCI address used as the first address mapped (may be lower + * than @pci_addr) + * @map_size: size of the controller memory needed for mapping the RC PCI address + * range @pci_addr..@pci_addr+@pci_size + * @phys_base: base physical address of the allocated EPC memory for mapping the + * RC PCI address range + * @phys_addr: physical address at which @pci_addr is mapped + * @virt_base: base virtual address of the allocated EPC memory for mapping the + * RC PCI address range + * @virt_addr: virtual address at which @pci_addr is mapped + */ +struct pci_epc_map { + u64 pci_addr; + size_t pci_size; + + u64 map_pci_addr; + size_t map_size; + + phys_addr_t phys_base; + phys_addr_t phys_addr; + void __iomem *virt_base; + void __iomem *virt_addr; +}; + /** * struct pci_epc_ops - set of function pointers for performing EPC operations * @write_header: ops to populate configuration space header * @set_bar: ops to configure the BAR * @clear_bar: ops to reset the BAR + * @align_addr: operation to get the mapping address, mapping size and offset + * into a controller memory window needed to map an RC PCI address + * region * @map_addr: ops to map CPU address to PCI address * @unmap_addr: ops to unmap CPU address and PCI address * @set_msi: ops to set the requested number of MSI interrupts in the MSI @@ -61,6 +93,8 @@ struct pci_epc_ops { struct pci_epf_bar *epf_bar); void (*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_bar *epf_bar); + u64 (*align_addr)(struct pci_epc *epc, u64 pci_addr, size_t *size, + size_t *offset); int (*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t addr, u64 pci_addr, size_t size); void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, @@ -278,6 +312,10 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size); void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, void __iomem *virt_addr, size_t size); +int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + u64 pci_addr, size_t pci_size, struct pci_epc_map *map); +void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + struct pci_epc_map *map); #else static inline void pci_epc_init_notify(struct pci_epc *epc) From 6067ccf36fbc8a5e5a32f15b267e3dc6aa6e7eb4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:44 +0900 Subject: [PATCH 018/127] PCI: endpoint: Update documentation Document the new functions pci_epc_mem_map() and pci_epc_mem_unmap(). Also add the documentation for the functions pci_epc_map_addr() and pci_epc_unmap_addr() that were missing. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241012113246.95634-5-dlemoal@kernel.org Signed-off-by: Manivannan Sadhasivam --- Documentation/PCI/endpoint/pci-endpoint.rst | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/PCI/endpoint/pci-endpoint.rst b/Documentation/PCI/endpoint/pci-endpoint.rst index 21507e3cc238..35f82f2d45f5 100644 --- a/Documentation/PCI/endpoint/pci-endpoint.rst +++ b/Documentation/PCI/endpoint/pci-endpoint.rst @@ -117,6 +117,35 @@ by the PCI endpoint function driver. The PCI endpoint function driver should use pci_epc_mem_free_addr() to free the memory space allocated using pci_epc_mem_alloc_addr(). +* pci_epc_map_addr() + + A PCI endpoint function driver should use pci_epc_map_addr() to map to a RC + PCI address the CPU address of local memory obtained with + pci_epc_mem_alloc_addr(). + +* pci_epc_unmap_addr() + + A PCI endpoint function driver should use pci_epc_unmap_addr() to unmap the + CPU address of local memory mapped to a RC address with pci_epc_map_addr(). + +* pci_epc_mem_map() + + A PCI endpoint controller may impose constraints on the RC PCI addresses that + can be mapped. The function pci_epc_mem_map() allows endpoint function + drivers to allocate and map controller memory while handling such + constraints. This function will determine the size of the memory that must be + allocated with pci_epc_mem_alloc_addr() for successfully mapping a RC PCI + address range. This function will also indicate the size of the PCI address + range that was actually mapped, which can be less than the requested size, as + well as the offset into the allocated memory to use for accessing the mapped + RC PCI address range. + +* pci_epc_mem_unmap() + + A PCI endpoint function driver can use pci_epc_mem_unmap() to unmap and free + controller memory that was allocated and mapped using pci_epc_mem_map(). + + Other EPC APIs ~~~~~~~~~~~~~~ From 08cac1006bfcc35fe363055a40e05d5410cdf010 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:45 +0900 Subject: [PATCH 019/127] PCI: endpoint: test: Use pci_epc_mem_map/unmap() Modify the endpoint test driver to use the functions pci_epc_mem_map() and pci_epc_mem_unmap() for the read, write and copy tests. For each test case, the transfer (dma or mmio) are executed in a loop to ensure that potentially partial mappings returned by pci_epc_mem_map() are correctly handled. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241012113246.95634-6-dlemoal@kernel.org Signed-off-by: Manivannan Sadhasivam --- drivers/pci/endpoint/functions/pci-epf-test.c | 374 +++++++++--------- 1 file changed, 194 insertions(+), 180 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 7c2ed6eae53a..a73bc0771d35 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -317,91 +317,92 @@ static void pci_epf_test_print_rate(struct pci_epf_test *epf_test, static void pci_epf_test_copy(struct pci_epf_test *epf_test, struct pci_epf_test_reg *reg) { - int ret; - void __iomem *src_addr; - void __iomem *dst_addr; - phys_addr_t src_phys_addr; - phys_addr_t dst_phys_addr; + int ret = 0; struct timespec64 start, end; struct pci_epf *epf = epf_test->epf; - struct device *dev = &epf->dev; struct pci_epc *epc = epf->epc; + struct device *dev = &epf->dev; + struct pci_epc_map src_map, dst_map; + u64 src_addr = reg->src_addr; + u64 dst_addr = reg->dst_addr; + size_t copy_size = reg->size; + ssize_t map_size = 0; + void *copy_buf = NULL, *buf; - src_addr = pci_epc_mem_alloc_addr(epc, &src_phys_addr, reg->size); - if (!src_addr) { - dev_err(dev, "Failed to allocate source address\n"); - reg->status = STATUS_SRC_ADDR_INVALID; - ret = -ENOMEM; - goto err; - } - - ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, src_phys_addr, - reg->src_addr, reg->size); - if (ret) { - dev_err(dev, "Failed to map source address\n"); - reg->status = STATUS_SRC_ADDR_INVALID; - goto err_src_addr; - } - - dst_addr = pci_epc_mem_alloc_addr(epc, &dst_phys_addr, reg->size); - if (!dst_addr) { - dev_err(dev, "Failed to allocate destination address\n"); - reg->status = STATUS_DST_ADDR_INVALID; - ret = -ENOMEM; - goto err_src_map_addr; - } - - ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, dst_phys_addr, - reg->dst_addr, reg->size); - if (ret) { - dev_err(dev, "Failed to map destination address\n"); - reg->status = STATUS_DST_ADDR_INVALID; - goto err_dst_addr; - } - - ktime_get_ts64(&start); if (reg->flags & FLAG_USE_DMA) { if (epf_test->dma_private) { dev_err(dev, "Cannot transfer data using DMA\n"); ret = -EINVAL; - goto err_map_addr; + goto set_status; } - - ret = pci_epf_test_data_transfer(epf_test, dst_phys_addr, - src_phys_addr, reg->size, 0, - DMA_MEM_TO_MEM); - if (ret) - dev_err(dev, "Data transfer failed\n"); } else { - void *buf; - - buf = kzalloc(reg->size, GFP_KERNEL); - if (!buf) { + copy_buf = kzalloc(copy_size, GFP_KERNEL); + if (!copy_buf) { ret = -ENOMEM; - goto err_map_addr; + goto set_status; + } + buf = copy_buf; + } + + while (copy_size) { + ret = pci_epc_mem_map(epc, epf->func_no, epf->vfunc_no, + src_addr, copy_size, &src_map); + if (ret) { + dev_err(dev, "Failed to map source address\n"); + reg->status = STATUS_SRC_ADDR_INVALID; + goto free_buf; } - memcpy_fromio(buf, src_addr, reg->size); - memcpy_toio(dst_addr, buf, reg->size); - kfree(buf); + ret = pci_epc_mem_map(epf->epc, epf->func_no, epf->vfunc_no, + dst_addr, copy_size, &dst_map); + if (ret) { + dev_err(dev, "Failed to map destination address\n"); + reg->status = STATUS_DST_ADDR_INVALID; + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, + &src_map); + goto free_buf; + } + + map_size = min_t(size_t, dst_map.pci_size, src_map.pci_size); + + ktime_get_ts64(&start); + if (reg->flags & FLAG_USE_DMA) { + ret = pci_epf_test_data_transfer(epf_test, + dst_map.phys_addr, src_map.phys_addr, + map_size, 0, DMA_MEM_TO_MEM); + if (ret) { + dev_err(dev, "Data transfer failed\n"); + goto unmap; + } + } else { + memcpy_fromio(buf, src_map.virt_addr, map_size); + memcpy_toio(dst_map.virt_addr, buf, map_size); + buf += map_size; + } + ktime_get_ts64(&end); + + copy_size -= map_size; + src_addr += map_size; + dst_addr += map_size; + + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, &dst_map); + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, &src_map); + map_size = 0; } - ktime_get_ts64(&end); - pci_epf_test_print_rate(epf_test, "COPY", reg->size, &start, &end, - reg->flags & FLAG_USE_DMA); -err_map_addr: - pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, dst_phys_addr); + pci_epf_test_print_rate(epf_test, "COPY", reg->size, &start, + &end, reg->flags & FLAG_USE_DMA); -err_dst_addr: - pci_epc_mem_free_addr(epc, dst_phys_addr, dst_addr, reg->size); +unmap: + if (map_size) { + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, &dst_map); + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, &src_map); + } -err_src_map_addr: - pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, src_phys_addr); +free_buf: + kfree(copy_buf); -err_src_addr: - pci_epc_mem_free_addr(epc, src_phys_addr, src_addr, reg->size); - -err: +set_status: if (!ret) reg->status |= STATUS_COPY_SUCCESS; else @@ -411,82 +412,89 @@ err: static void pci_epf_test_read(struct pci_epf_test *epf_test, struct pci_epf_test_reg *reg) { - int ret; - void __iomem *src_addr; - void *buf; + int ret = 0; + void *src_buf, *buf; u32 crc32; - phys_addr_t phys_addr; + struct pci_epc_map map; phys_addr_t dst_phys_addr; struct timespec64 start, end; struct pci_epf *epf = epf_test->epf; - struct device *dev = &epf->dev; struct pci_epc *epc = epf->epc; + struct device *dev = &epf->dev; struct device *dma_dev = epf->epc->dev.parent; + u64 src_addr = reg->src_addr; + size_t src_size = reg->size; + ssize_t map_size = 0; - src_addr = pci_epc_mem_alloc_addr(epc, &phys_addr, reg->size); - if (!src_addr) { - dev_err(dev, "Failed to allocate address\n"); - reg->status = STATUS_SRC_ADDR_INVALID; + src_buf = kzalloc(src_size, GFP_KERNEL); + if (!src_buf) { ret = -ENOMEM; - goto err; + goto set_status; } + buf = src_buf; - ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, phys_addr, - reg->src_addr, reg->size); - if (ret) { - dev_err(dev, "Failed to map address\n"); - reg->status = STATUS_SRC_ADDR_INVALID; - goto err_addr; - } - - buf = kzalloc(reg->size, GFP_KERNEL); - if (!buf) { - ret = -ENOMEM; - goto err_map_addr; - } - - if (reg->flags & FLAG_USE_DMA) { - dst_phys_addr = dma_map_single(dma_dev, buf, reg->size, - DMA_FROM_DEVICE); - if (dma_mapping_error(dma_dev, dst_phys_addr)) { - dev_err(dev, "Failed to map destination buffer addr\n"); - ret = -ENOMEM; - goto err_dma_map; + while (src_size) { + ret = pci_epc_mem_map(epc, epf->func_no, epf->vfunc_no, + src_addr, src_size, &map); + if (ret) { + dev_err(dev, "Failed to map address\n"); + reg->status = STATUS_SRC_ADDR_INVALID; + goto free_buf; } - ktime_get_ts64(&start); - ret = pci_epf_test_data_transfer(epf_test, dst_phys_addr, - phys_addr, reg->size, - reg->src_addr, DMA_DEV_TO_MEM); - if (ret) - dev_err(dev, "Data transfer failed\n"); - ktime_get_ts64(&end); + map_size = map.pci_size; + if (reg->flags & FLAG_USE_DMA) { + dst_phys_addr = dma_map_single(dma_dev, buf, map_size, + DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev, dst_phys_addr)) { + dev_err(dev, + "Failed to map destination buffer addr\n"); + ret = -ENOMEM; + goto unmap; + } - dma_unmap_single(dma_dev, dst_phys_addr, reg->size, - DMA_FROM_DEVICE); - } else { - ktime_get_ts64(&start); - memcpy_fromio(buf, src_addr, reg->size); - ktime_get_ts64(&end); + ktime_get_ts64(&start); + ret = pci_epf_test_data_transfer(epf_test, + dst_phys_addr, map.phys_addr, + map_size, src_addr, DMA_DEV_TO_MEM); + if (ret) + dev_err(dev, "Data transfer failed\n"); + ktime_get_ts64(&end); + + dma_unmap_single(dma_dev, dst_phys_addr, map_size, + DMA_FROM_DEVICE); + + if (ret) + goto unmap; + } else { + ktime_get_ts64(&start); + memcpy_fromio(buf, map.virt_addr, map_size); + ktime_get_ts64(&end); + } + + src_size -= map_size; + src_addr += map_size; + buf += map_size; + + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, &map); + map_size = 0; } - pci_epf_test_print_rate(epf_test, "READ", reg->size, &start, &end, - reg->flags & FLAG_USE_DMA); + pci_epf_test_print_rate(epf_test, "READ", reg->size, &start, + &end, reg->flags & FLAG_USE_DMA); - crc32 = crc32_le(~0, buf, reg->size); + crc32 = crc32_le(~0, src_buf, reg->size); if (crc32 != reg->checksum) ret = -EIO; -err_dma_map: - kfree(buf); +unmap: + if (map_size) + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, &map); -err_map_addr: - pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, phys_addr); +free_buf: + kfree(src_buf); -err_addr: - pci_epc_mem_free_addr(epc, phys_addr, src_addr, reg->size); - -err: +set_status: if (!ret) reg->status |= STATUS_READ_SUCCESS; else @@ -496,71 +504,79 @@ err: static void pci_epf_test_write(struct pci_epf_test *epf_test, struct pci_epf_test_reg *reg) { - int ret; - void __iomem *dst_addr; - void *buf; - phys_addr_t phys_addr; + int ret = 0; + void *dst_buf, *buf; + struct pci_epc_map map; phys_addr_t src_phys_addr; struct timespec64 start, end; struct pci_epf *epf = epf_test->epf; - struct device *dev = &epf->dev; struct pci_epc *epc = epf->epc; + struct device *dev = &epf->dev; struct device *dma_dev = epf->epc->dev.parent; + u64 dst_addr = reg->dst_addr; + size_t dst_size = reg->size; + ssize_t map_size = 0; - dst_addr = pci_epc_mem_alloc_addr(epc, &phys_addr, reg->size); - if (!dst_addr) { - dev_err(dev, "Failed to allocate address\n"); - reg->status = STATUS_DST_ADDR_INVALID; + dst_buf = kzalloc(dst_size, GFP_KERNEL); + if (!dst_buf) { ret = -ENOMEM; - goto err; + goto set_status; } + get_random_bytes(dst_buf, dst_size); + reg->checksum = crc32_le(~0, dst_buf, dst_size); + buf = dst_buf; - ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, phys_addr, - reg->dst_addr, reg->size); - if (ret) { - dev_err(dev, "Failed to map address\n"); - reg->status = STATUS_DST_ADDR_INVALID; - goto err_addr; - } - - buf = kzalloc(reg->size, GFP_KERNEL); - if (!buf) { - ret = -ENOMEM; - goto err_map_addr; - } - - get_random_bytes(buf, reg->size); - reg->checksum = crc32_le(~0, buf, reg->size); - - if (reg->flags & FLAG_USE_DMA) { - src_phys_addr = dma_map_single(dma_dev, buf, reg->size, - DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, src_phys_addr)) { - dev_err(dev, "Failed to map source buffer addr\n"); - ret = -ENOMEM; - goto err_dma_map; + while (dst_size) { + ret = pci_epc_mem_map(epc, epf->func_no, epf->vfunc_no, + dst_addr, dst_size, &map); + if (ret) { + dev_err(dev, "Failed to map address\n"); + reg->status = STATUS_DST_ADDR_INVALID; + goto free_buf; } - ktime_get_ts64(&start); + map_size = map.pci_size; + if (reg->flags & FLAG_USE_DMA) { + src_phys_addr = dma_map_single(dma_dev, buf, map_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev, src_phys_addr)) { + dev_err(dev, + "Failed to map source buffer addr\n"); + ret = -ENOMEM; + goto unmap; + } - ret = pci_epf_test_data_transfer(epf_test, phys_addr, - src_phys_addr, reg->size, - reg->dst_addr, - DMA_MEM_TO_DEV); - if (ret) - dev_err(dev, "Data transfer failed\n"); - ktime_get_ts64(&end); + ktime_get_ts64(&start); - dma_unmap_single(dma_dev, src_phys_addr, reg->size, - DMA_TO_DEVICE); - } else { - ktime_get_ts64(&start); - memcpy_toio(dst_addr, buf, reg->size); - ktime_get_ts64(&end); + ret = pci_epf_test_data_transfer(epf_test, + map.phys_addr, src_phys_addr, + map_size, dst_addr, + DMA_MEM_TO_DEV); + if (ret) + dev_err(dev, "Data transfer failed\n"); + ktime_get_ts64(&end); + + dma_unmap_single(dma_dev, src_phys_addr, map_size, + DMA_TO_DEVICE); + + if (ret) + goto unmap; + } else { + ktime_get_ts64(&start); + memcpy_toio(map.virt_addr, buf, map_size); + ktime_get_ts64(&end); + } + + dst_size -= map_size; + dst_addr += map_size; + buf += map_size; + + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, &map); + map_size = 0; } - pci_epf_test_print_rate(epf_test, "WRITE", reg->size, &start, &end, - reg->flags & FLAG_USE_DMA); + pci_epf_test_print_rate(epf_test, "WRITE", reg->size, &start, + &end, reg->flags & FLAG_USE_DMA); /* * wait 1ms inorder for the write to complete. Without this delay L3 @@ -568,16 +584,14 @@ static void pci_epf_test_write(struct pci_epf_test *epf_test, */ usleep_range(1000, 2000); -err_dma_map: - kfree(buf); +unmap: + if (map_size) + pci_epc_mem_unmap(epc, epf->func_no, epf->vfunc_no, &map); -err_map_addr: - pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, phys_addr); +free_buf: + kfree(dst_buf); -err_addr: - pci_epc_mem_free_addr(epc, phys_addr, dst_addr, reg->size); - -err: +set_status: if (!ret) reg->status |= STATUS_WRITE_SUCCESS; else From 6eaa83ec229b1e99130456c4f6658430d509c76c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 17 Oct 2024 17:11:08 +0300 Subject: [PATCH 020/127] PCI: Remove unused PCI_SUBTRACTIVE_DECODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2fe2abf896c1 ("PCI: augment bus resource table with a list") added PCI_SUBTRACTIVE_DECODE which is put into the struct pci_bus_resource flags field but is never read. There seems to never have been users for it. Remove both PCI_SUBTRACTIVE_DECODE and the flags field from the struct pci_bus_resource. Link: https://lore.kernel.org/r/20241017141111.44612-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- arch/s390/pci/pci_bus.c | 2 +- arch/x86/pci/fixup.c | 2 +- drivers/pci/bus.c | 4 +--- drivers/pci/probe.c | 5 ++--- include/linux/pci.h | 12 +----------- 5 files changed, 6 insertions(+), 19 deletions(-) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index daa5d7450c7d..5630af5deb8b 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -53,7 +53,7 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) zpci_setup_bus_resources(zdev); for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (zdev->bars[i].res) - pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0); + pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res); } } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 98a9bb92d75c..0681ecfe3430 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -757,7 +757,7 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", res); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - pci_bus_add_resource(dev->bus, res, 0); + pci_bus_add_resource(dev->bus, res); } base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 55c853686051..9cf6d0f3ab2b 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -46,8 +46,7 @@ void pci_free_resource_list(struct list_head *resources) } EXPORT_SYMBOL(pci_free_resource_list); -void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, - unsigned int flags) +void pci_bus_add_resource(struct pci_bus *bus, struct resource *res) { struct pci_bus_resource *bus_res; @@ -58,7 +57,6 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, } bus_res->res = res; - bus_res->flags = flags; list_add_tail(&bus_res->list, &bus->resources); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..4243b1e6ece2 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -546,8 +546,7 @@ void pci_read_bridge_bases(struct pci_bus *child) if (dev->transparent) { pci_bus_for_each_resource(child->parent, res) { if (res && res->flags) { - pci_bus_add_resource(child, res, - PCI_SUBTRACTIVE_DECODE); + pci_bus_add_resource(child, res); pci_info(dev, " bridge window %pR (subtractive decode)\n", res); } @@ -1032,7 +1031,7 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) if (res->flags & IORESOURCE_BUS) pci_bus_insert_busn_res(bus, bus->number, res->end); else - pci_bus_add_resource(bus, res, 0); + pci_bus_add_resource(bus, res); if (offset) { if (resource_type(res) == IORESOURCE_IO) diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..6a9cf80d0d4b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -633,18 +633,9 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); * Use pci_bus_for_each_resource() to iterate through all the resources. */ -/* - * PCI_SUBTRACTIVE_DECODE means the bridge forwards the window implicitly - * and there's no way to program the bridge with the details of the window. - * This does not apply to ACPI _CRS windows, even with the _DEC subtractive- - * decode bit set, because they are explicit and can be programmed with _SRS. - */ -#define PCI_SUBTRACTIVE_DECODE 0x1 - struct pci_bus_resource { struct list_head list; struct resource *res; - unsigned int flags; }; #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ @@ -1498,8 +1489,7 @@ void pci_add_resource(struct list_head *resources, struct resource *res); void pci_add_resource_offset(struct list_head *resources, struct resource *res, resource_size_t offset); void pci_free_resource_list(struct list_head *resources); -void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, - unsigned int flags); +void pci_bus_add_resource(struct pci_bus *bus, struct resource *res); struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n); void pci_bus_remove_resources(struct pci_bus *bus); void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res); From 469c9cb941480718db52876bbdb95a7a79b34889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 17 Oct 2024 17:11:09 +0300 Subject: [PATCH 021/127] PCI: Move struct pci_bus_resource into bus.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct pci_bus_resource is only used in bus.c, so move it there. Link: https://lore.kernel.org/r/20241017141111.44612-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/bus.c | 12 ++++++++++++ include/linux/pci.h | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 9cf6d0f3ab2b..e0a2441be6d3 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -18,6 +18,18 @@ #include "pci.h" +/* + * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond + * to P2P or CardBus bridge windows) go in a table. Additional ones (for + * buses below host bridges or subtractive decode bridges) go in the list. + * Use pci_bus_for_each_resource() to iterate through all the resources. + */ + +struct pci_bus_resource { + struct list_head list; + struct resource *res; +}; + void pci_add_resource_offset(struct list_head *resources, struct resource *res, resource_size_t offset) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 6a9cf80d0d4b..5a9d849b28ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -626,18 +626,6 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge, int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); -/* - * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond - * to P2P or CardBus bridge windows) go in a table. Additional ones (for - * buses below host bridges or subtractive decode bridges) go in the list. - * Use pci_bus_for_each_resource() to iterate through all the resources. - */ - -struct pci_bus_resource { - struct list_head list; - struct resource *res; -}; - #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ struct pci_bus { From 32ec7b362d904c1e67f8e64524fb0b6c43d5c9c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 17 Oct 2024 17:11:10 +0300 Subject: [PATCH 022/127] PCI: Simplify pci_read_bridge_bases() logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use reverse logic combined with return and continue to significantly reduce indentation level in pci_read_bridge_bases(). Link: https://lore.kernel.org/r/20241017141111.44612-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/probe.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4243b1e6ece2..f9524b90bdbc 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -543,14 +543,15 @@ void pci_read_bridge_bases(struct pci_bus *child) pci_read_bridge_mmio(child->self, child->resource[1], false); pci_read_bridge_mmio_pref(child->self, child->resource[2], false); - if (dev->transparent) { - pci_bus_for_each_resource(child->parent, res) { - if (res && res->flags) { - pci_bus_add_resource(child, res); - pci_info(dev, " bridge window %pR (subtractive decode)\n", - res); - } - } + if (!dev->transparent) + return; + + pci_bus_for_each_resource(child->parent, res) { + if (!res || !res->flags) + continue; + + pci_bus_add_resource(child, res); + pci_info(dev, " bridge window %pR (subtractive decode)\n", res); } } From 5b036cada481a7a3bf30d333298f6d83dfb19bed Mon Sep 17 00:00:00 2001 From: Guilherme Giacomo Simoes Date: Mon, 14 Oct 2024 10:19:17 -0300 Subject: [PATCH 023/127] PCI: cpcihp: Remove unused struct cpci_hp_controller_ops.hardware_test The 'hardware_test' field in struct cpci_hp_controller_ops is unused; remove it to reduce resource consumption. Link: https://lore.kernel.org/r/20241014131917.324667-1-trintaeoitogc@gmail.com Signed-off-by: Guilherme Giacomo Simoes [bhelgas: commit log] Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/cpci_hotplug.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/hotplug/cpci_hotplug.h b/drivers/pci/hotplug/cpci_hotplug.h index 6d8970d8c3f2..03fa39ab0c88 100644 --- a/drivers/pci/hotplug/cpci_hotplug.h +++ b/drivers/pci/hotplug/cpci_hotplug.h @@ -44,7 +44,6 @@ struct cpci_hp_controller_ops { int (*enable_irq)(void); int (*disable_irq)(void); int (*check_irq)(void *dev_id); - int (*hardware_test)(struct slot *slot, u32 value); u8 (*get_power)(struct slot *slot); int (*set_power)(struct slot *slot, int value); }; From 91a6296793591983f27db40f41daab53500ffc6e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 12 Oct 2024 21:53:42 +0200 Subject: [PATCH 024/127] PCI: cpqphp: Remove unused struct ctrl_dbg.ctrl 'ctrl' is unused; remove it to save a few bytes when the structure is allocated. Link: https://lore.kernel.org/r/551d0cdaabcf69fcd09a565475c428e09c61e1a3.1728762751.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/cpqphp_sysfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c index fed1360ee9b1..6143ebf71f21 100644 --- a/drivers/pci/hotplug/cpqphp_sysfs.c +++ b/drivers/pci/hotplug/cpqphp_sysfs.c @@ -123,7 +123,6 @@ static int spew_debug_info(struct controller *ctrl, char *data, int size) struct ctrl_dbg { int size; char *data; - struct controller *ctrl; }; #define MAX_OUTPUT (4*PAGE_SIZE) From 19f73e938df2b8edfa2e93e0280bd26fd4df5b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 17 Oct 2024 12:55:45 +0300 Subject: [PATCH 025/127] PCI: Improve pdev_sort_resources() warning message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use pci_resource_name() helper in pdev_sort_resources() to print resources in user-friendly format. Also replace the vague "bogus alignment" with a more precise explanation of the problem. Link: https://lore.kernel.org/r/20241017095545.1424-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Philipp Stanner --- drivers/pci/setup-bus.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 27da27c59e89..5e00cecf1f1a 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -134,6 +134,7 @@ static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head) int i; pci_dev_for_each_resource(dev, r, i) { + const char *r_name = pci_resource_name(dev, i); struct pci_dev_resource *dev_res, *tmp; resource_size_t r_align; struct list_head *n; @@ -146,8 +147,8 @@ static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head) r_align = pci_resource_alignment(dev, r); if (!r_align) { - pci_warn(dev, "BAR %d: %pR has bogus alignment\n", - i, r); + pci_warn(dev, "%s %pR: alignment must not be zero\n", + r_name, r); continue; } From 48d0fd2b903e397c2a9621ab35f3d8877f61aee4 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:52 -0500 Subject: [PATCH 026/127] PCI/TPH: Add TPH documentation Add a document for the TPH feature, including description of "notph" kernel parameter and the API interface. Co-developed-by: Eric Van Tassell Link: https://lore.kernel.org/r/20241002165954.128085-4-wei.huang2@amd.com Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek --- Documentation/PCI/index.rst | 1 + Documentation/PCI/tph.rst | 132 +++++++++++++++++++++++++++ Documentation/driver-api/pci/pci.rst | 3 + 3 files changed, 136 insertions(+) create mode 100644 Documentation/PCI/tph.rst diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst index e73f84aebde3..5e7c4e6e726b 100644 --- a/Documentation/PCI/index.rst +++ b/Documentation/PCI/index.rst @@ -18,3 +18,4 @@ PCI Bus Subsystem pcieaer-howto endpoint/index boot-interrupts + tph diff --git a/Documentation/PCI/tph.rst b/Documentation/PCI/tph.rst new file mode 100644 index 000000000000..e8993be64fd6 --- /dev/null +++ b/Documentation/PCI/tph.rst @@ -0,0 +1,132 @@ +.. SPDX-License-Identifier: GPL-2.0 + + +=========== +TPH Support +=========== + +:Copyright: 2024 Advanced Micro Devices, Inc. +:Authors: - Eric van Tassell + - Wei Huang + + +Overview +======== + +TPH (TLP Processing Hints) is a PCIe feature that allows endpoint devices +to provide optimization hints for requests that target memory space. +These hints, in a format called Steering Tags (STs), are embedded in the +requester's TLP headers, enabling the system hardware, such as the Root +Complex, to better manage platform resources for these requests. + +For example, on platforms with TPH-based direct data cache injection +support, an endpoint device can include appropriate STs in its DMA +traffic to specify which cache the data should be written to. This allows +the CPU core to have a higher probability of getting data from cache, +potentially improving performance and reducing latency in data +processing. + + +How to Use TPH +============== + +TPH is presented as an optional extended capability in PCIe. The Linux +kernel handles TPH discovery during boot, but it is up to the device +driver to request TPH enablement if it is to be utilized. Once enabled, +the driver uses the provided API to obtain the Steering Tag for the +target memory and to program the ST into the device's ST table. + +Enable TPH support in Linux +--------------------------- + +To support TPH, the kernel must be built with the CONFIG_PCIE_TPH option +enabled. + +Manage TPH +---------- + +To enable TPH for a device, use the following function:: + + int pcie_enable_tph(struct pci_dev *pdev, int mode); + +This function enables TPH support for device with a specific ST mode. +Current supported modes include: + + * PCI_TPH_ST_NS_MODE - NO ST Mode + * PCI_TPH_ST_IV_MODE - Interrupt Vector Mode + * PCI_TPH_ST_DS_MODE - Device Specific Mode + +`pcie_enable_tph()` checks whether the requested mode is actually +supported by the device before enabling. The device driver can figure out +which TPH mode is supported and can be properly enabled based on the +return value of `pcie_enable_tph()`. + +To disable TPH, use the following function:: + + void pcie_disable_tph(struct pci_dev *pdev); + +Manage ST +--------- + +Steering Tags are platform specific. PCIe spec does not specify where STs +are from. Instead PCI Firmware Specification defines an ACPI _DSM method +(see the `Revised _DSM for Cache Locality TPH Features ECN +`_) for retrieving +STs for a target memory of various properties. This method is what is +supported in this implementation. + +To retrieve a Steering Tag for a target memory associated with a specific +CPU, use the following function:: + + int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type type, + unsigned int cpu_uid, u16 *tag); + +The `type` argument is used to specify the memory type, either volatile +or persistent, of the target memory. The `cpu_uid` argument specifies the +CPU where the memory is associated to. + +After the ST value is retrieved, the device driver can use the following +function to write the ST into the device:: + + int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, + u16 tag); + +The `index` argument is the ST table entry index the ST tag will be +written into. `pcie_tph_set_st_entry()` will figure out the proper +location of ST table, either in the MSI-X table or in the TPH Extended +Capability space, and write the Steering Tag into the ST entry pointed by +the `index` argument. + +It is completely up to the driver to decide how to use these TPH +functions. For example a network device driver can use the TPH APIs above +to update the Steering Tag when interrupt affinity of a RX/TX queue has +been changed. Here is a sample code for IRQ affinity notifier: + +.. code-block:: c + + static void irq_affinity_notified(struct irq_affinity_notify *notify, + const cpumask_t *mask) + { + struct drv_irq *irq; + unsigned int cpu_id; + u16 tag; + + irq = container_of(notify, struct drv_irq, affinity_notify); + cpumask_copy(irq->cpu_mask, mask); + + /* Pick a right CPU as the target - here is just an example */ + cpu_id = cpumask_first(irq->cpu_mask); + + if (pcie_tph_get_cpu_st(irq->pdev, TPH_MEM_TYPE_VM, cpu_id, + &tag)) + return; + + if (pcie_tph_set_st_entry(irq->pdev, irq->msix_nr, tag)) + return; + } + +Disable TPH system-wide +----------------------- + +There is a kernel command line option available to control TPH feature: + * "notph": TPH will be disabled for all endpoint devices. diff --git a/Documentation/driver-api/pci/pci.rst b/Documentation/driver-api/pci/pci.rst index aa40b1cc243b..59d86e827198 100644 --- a/Documentation/driver-api/pci/pci.rst +++ b/Documentation/driver-api/pci/pci.rst @@ -46,6 +46,9 @@ PCI Support Library .. kernel-doc:: drivers/pci/pci-sysfs.c :internal: +.. kernel-doc:: drivers/pci/tph.c + :export: + PCI Hotplug Support Library --------------------------- From e2226dbc4a4919d9c8bd9293299b532090bdf020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 22 Oct 2024 12:11:37 +0300 Subject: [PATCH 027/127] PCI: cpqphp: Fix PCIBIOS_* return value confusion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code in and related to PCI_RefinedAccessConfig() has three types of return type confusion: - PCI_RefinedAccessConfig() tests pci_bus_read_config_dword() return value against -1. - PCI_RefinedAccessConfig() returns both -1 and PCIBIOS_* return codes. - Callers of PCI_RefinedAccessConfig() only test for -1. Make PCI_RefinedAccessConfig() return PCIBIOS_* codes consistently and adapt callers accordingly. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/20241022091140.3504-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/cpqphp_pci.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 718bc6cf12cb..974c7db3265b 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -135,11 +135,13 @@ int cpqhp_unconfigure_device(struct pci_func *func) static int PCI_RefinedAccessConfig(struct pci_bus *bus, unsigned int devfn, u8 offset, u32 *value) { u32 vendID = 0; + int ret; - if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &vendID) == -1) - return -1; + ret = pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &vendID); + if (ret != PCIBIOS_SUCCESSFUL) + return PCIBIOS_DEVICE_NOT_FOUND; if (PCI_POSSIBLE_ERROR(vendID)) - return -1; + return PCIBIOS_DEVICE_NOT_FOUND; return pci_bus_read_config_dword(bus, devfn, offset, value); } @@ -202,13 +204,15 @@ static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_ { u16 tdevice; u32 work; + int ret; u8 tbus; ctrl->pci_bus->number = bus_num; for (tdevice = 0; tdevice < 0xFF; tdevice++) { /* Scan for access first */ - if (PCI_RefinedAccessConfig(ctrl->pci_bus, tdevice, 0x08, &work) == -1) + ret = PCI_RefinedAccessConfig(ctrl->pci_bus, tdevice, 0x08, &work); + if (ret) continue; dbg("Looking for nonbridge bus_num %d dev_num %d\n", bus_num, tdevice); /* Yep we got one. Not a bridge ? */ @@ -220,7 +224,8 @@ static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_ } for (tdevice = 0; tdevice < 0xFF; tdevice++) { /* Scan for access first */ - if (PCI_RefinedAccessConfig(ctrl->pci_bus, tdevice, 0x08, &work) == -1) + ret = PCI_RefinedAccessConfig(ctrl->pci_bus, tdevice, 0x08, &work); + if (ret) continue; dbg("Looking for bridge bus_num %d dev_num %d\n", bus_num, tdevice); /* Yep we got one. bridge ? */ From 752430d1d33ed69653a6e8dbdf01f83cdf5be574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 22 Oct 2024 12:11:38 +0300 Subject: [PATCH 028/127] PCI: cpqphp: Use pci_bus_read_dev_vendor_id() to detect presence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intent of the first part in PCI_RefinedAccessConfig() is to read Vendor ID register and detect presence of the device that way. Remove PCI_RefinedAccessConfig() (which was not named very helpfully to begin with) and replace the call with pci_bus_read_dev_vendor_id() + read config because it makes the logic more obvious at the caller side. Link: https://lore.kernel.org/r/20241022091140.3504-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/cpqphp_pci.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 974c7db3265b..7844007dbc86 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -132,20 +132,6 @@ int cpqhp_unconfigure_device(struct pci_func *func) return 0; } -static int PCI_RefinedAccessConfig(struct pci_bus *bus, unsigned int devfn, u8 offset, u32 *value) -{ - u32 vendID = 0; - int ret; - - ret = pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &vendID); - if (ret != PCIBIOS_SUCCESSFUL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (PCI_POSSIBLE_ERROR(vendID)) - return PCIBIOS_DEVICE_NOT_FOUND; - return pci_bus_read_config_dword(bus, devfn, offset, value); -} - - /* * cpqhp_set_irq * @@ -211,7 +197,9 @@ static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_ for (tdevice = 0; tdevice < 0xFF; tdevice++) { /* Scan for access first */ - ret = PCI_RefinedAccessConfig(ctrl->pci_bus, tdevice, 0x08, &work); + if (!pci_bus_read_dev_vendor_id(ctrl->pci_bus, tdevice, &work, 0)) + continue; + ret = pci_bus_read_config_dword(ctrl->pci_bus, tdevice, 0x08, &work); if (ret) continue; dbg("Looking for nonbridge bus_num %d dev_num %d\n", bus_num, tdevice); @@ -224,7 +212,9 @@ static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_ } for (tdevice = 0; tdevice < 0xFF; tdevice++) { /* Scan for access first */ - ret = PCI_RefinedAccessConfig(ctrl->pci_bus, tdevice, 0x08, &work); + if (!pci_bus_read_dev_vendor_id(ctrl->pci_bus, tdevice, &work, 0)) + continue; + ret = pci_bus_read_config_dword(ctrl->pci_bus, tdevice, 0x08, &work); if (ret) continue; dbg("Looking for bridge bus_num %d dev_num %d\n", bus_num, tdevice); From de2cdf110a39a660e810979e480761fed9842436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 22 Oct 2024 12:11:39 +0300 Subject: [PATCH 029/127] PCI: cpqphp: Use define to read class/revision dword MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace literal 0x08 with PCI_CLASS_REVISION. Link: https://lore.kernel.org/r/20241022091140.3504-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/cpqphp_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 7844007dbc86..558866c15e03 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -199,7 +199,7 @@ static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_ /* Scan for access first */ if (!pci_bus_read_dev_vendor_id(ctrl->pci_bus, tdevice, &work, 0)) continue; - ret = pci_bus_read_config_dword(ctrl->pci_bus, tdevice, 0x08, &work); + ret = pci_bus_read_config_dword(ctrl->pci_bus, tdevice, PCI_CLASS_REVISION, &work); if (ret) continue; dbg("Looking for nonbridge bus_num %d dev_num %d\n", bus_num, tdevice); @@ -214,7 +214,7 @@ static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_ /* Scan for access first */ if (!pci_bus_read_dev_vendor_id(ctrl->pci_bus, tdevice, &work, 0)) continue; - ret = pci_bus_read_config_dword(ctrl->pci_bus, tdevice, 0x08, &work); + ret = pci_bus_read_config_dword(ctrl->pci_bus, tdevice, PCI_CLASS_REVISION, &work); if (ret) continue; dbg("Looking for bridge bus_num %d dev_num %d\n", bus_num, tdevice); From 5a02413a4586a7cfa10b7380377138e66db9df4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 22 Oct 2024 12:11:40 +0300 Subject: [PATCH 030/127] PCI: cpqphp: Simplify PCI_ScanBusForNonBridge() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI_ScanBusForNonBridge() has two loops, one to search for non-bridges and a second to look for bridges. The second loop has hints in a debug print it should do recursion for buses underneath the bridge, but no recursion is attempted. Since the second loop is quite useless in its current form, just eliminate it. This code hasn't been touched for very long time so either it's unused or the missing parts are not important enough for anyone to attempt to add them. Leave only a warning print and comment about the missing recursion for the unlikely case that somebody comes across the lack of functionality. In any case, search whether an endpoint exists downstream of a bridge sounds generic enough to belong to core so if the functionality is to be extended it should probably be moved into PCI core. Link: https://lore.kernel.org/r/20241022091140.3504-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/cpqphp_pci.c | 34 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 558866c15e03..ef7534a3ca40 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -12,8 +12,11 @@ * */ +#define pr_fmt(fmt) "cpqphp: " fmt + #include #include +#include #include #include #include @@ -190,8 +193,7 @@ static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_ { u16 tdevice; u32 work; - int ret; - u8 tbus; + int ret = -1; ctrl->pci_bus->number = bus_num; @@ -208,26 +210,20 @@ static int PCI_ScanBusForNonBridge(struct controller *ctrl, u8 bus_num, u8 *dev_ *dev_num = tdevice; dbg("found it !\n"); return 0; - } - } - for (tdevice = 0; tdevice < 0xFF; tdevice++) { - /* Scan for access first */ - if (!pci_bus_read_dev_vendor_id(ctrl->pci_bus, tdevice, &work, 0)) - continue; - ret = pci_bus_read_config_dword(ctrl->pci_bus, tdevice, PCI_CLASS_REVISION, &work); - if (ret) - continue; - dbg("Looking for bridge bus_num %d dev_num %d\n", bus_num, tdevice); - /* Yep we got one. bridge ? */ - if ((work >> 8) == PCI_TO_PCI_BRIDGE_CLASS) { - pci_bus_read_config_byte(ctrl->pci_bus, PCI_DEVFN(tdevice, 0), PCI_SECONDARY_BUS, &tbus); - /* XXX: no recursion, wtf? */ - dbg("Recurse on bus_num %d tdevice %d\n", tbus, tdevice); - return 0; + } else { + /* + * XXX: Code whose debug printout indicated + * recursion to buses underneath bridges might be + * necessary was removed because it never did + * any recursion. + */ + ret = 0; + pr_warn("missing feature: bridge scan recursion not implemented\n"); } } - return -1; + + return ret; } From 7447990137bf06b2aeecad9c6081e01a9f47f2aa Mon Sep 17 00:00:00 2001 From: Ajay Agarwal Date: Mon, 7 Oct 2024 08:59:17 +0530 Subject: [PATCH 031/127] PCI/ASPM: Disable L1 before disabling L1 PM Substates PCIe r6.2, sec 5.5.4, requires that: If setting either or both of the enable bits for ASPM L1 PM Substates, both ports must be configured as described in this section while ASPM L1 is disabled. Previously, pcie_config_aspm_l1ss() assumed that "setting enable bits" meant "setting them to 1", and it configured L1SS as follows: - Clear L1SS enable bits - Disable L1 - Configure L1SS enable bits as required - Enable L1 if required With this sequence, when disabling L1SS on an ARM A-core with a Synopsys DesignWare PCIe core, the CPU occasionally hangs when reading PCI_L1SS_CTL1, leading to a reboot when the CPU watchdog expires. Move the L1 disable to the caller (pcie_config_aspm_link(), where L1 was already enabled) so L1 is always disabled while updating the L1SS bits: - Disable L1 - Clear L1SS enable bits - Configure L1SS enable bits as required - Enable L1 if required Change pcie_aspm_cap_init() similarly. Link: https://lore.kernel.org/r/20241007032917.872262-1-ajayagarwal@google.com Signed-off-by: Ajay Agarwal [bhelgaas: comments, commit log, compute L1SS setting before config access] Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aspm.c | 94 ++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index cee2365e54b8..e943691bc931 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -805,6 +805,15 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl); pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl); + /* Disable L0s/L1 before updating L1SS config */ + if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, child_lnkctl) || + FIELD_GET(PCI_EXP_LNKCTL_ASPMC, parent_lnkctl)) { + pcie_capability_write_word(child, PCI_EXP_LNKCTL, + child_lnkctl & ~PCI_EXP_LNKCTL_ASPMC); + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, + parent_lnkctl & ~PCI_EXP_LNKCTL_ASPMC); + } + /* * Setup L0s state * @@ -829,6 +838,13 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) aspm_l1ss_init(link); + /* Restore L0s/L1 if they were enabled */ + if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, child_lnkctl) || + FIELD_GET(PCI_EXP_LNKCTL_ASPMC, parent_lnkctl)) { + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_lnkctl); + pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl); + } + /* Save default state */ link->aspm_default = link->aspm_enabled; @@ -845,43 +861,12 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) } } -/* Configure the ASPM L1 substates */ +/* Configure the ASPM L1 substates. Caller must disable L1 first. */ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) { - u32 val, enable_req; + u32 val; struct pci_dev *child = link->downstream, *parent = link->pdev; - enable_req = (link->aspm_enabled ^ state) & state; - - /* - * Here are the rules specified in the PCIe spec for enabling L1SS: - * - When enabling L1.x, enable bit at parent first, then at child - * - When disabling L1.x, disable bit at child first, then at parent - * - When enabling ASPM L1.x, need to disable L1 - * (at child followed by parent). - * - The ASPM/PCIPM L1.2 must be disabled while programming timing - * parameters - * - * To keep it simple, disable all L1SS bits first, and later enable - * what is needed. - */ - - /* Disable all L1 substates */ - pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_L1SS_MASK, 0); - pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, - PCI_L1SS_CTL1_L1SS_MASK, 0); - /* - * If needed, disable L1, and it gets enabled later - * in pcie_config_aspm_link(). - */ - if (enable_req & (PCIE_LINK_STATE_L1_1 | PCIE_LINK_STATE_L1_2)) { - pcie_capability_clear_word(child, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1); - pcie_capability_clear_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1); - } - val = 0; if (state & PCIE_LINK_STATE_L1_1) val |= PCI_L1SS_CTL1_ASPM_L1_1; @@ -892,6 +877,20 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) if (state & PCIE_LINK_STATE_L1_2_PCIPM) val |= PCI_L1SS_CTL1_PCIPM_L1_2; + /* + * PCIe r6.2, sec 5.5.4, rules for enabling L1 PM Substates: + * - Clear L1.x enable bits at child first, then at parent + * - Set L1.x enable bits at parent first, then at child + * - ASPM/PCIPM L1.2 must be disabled while programming timing + * parameters + */ + + /* Disable all L1 substates */ + pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, 0); + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1SS_MASK, 0); + /* Enable what we need to enable */ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, PCI_L1SS_CTL1_L1SS_MASK, val); @@ -937,21 +936,30 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) dwstream |= PCI_EXP_LNKCTL_ASPM_L1; } + /* + * Per PCIe r6.2, sec 5.5.4, setting either or both of the enable + * bits for ASPM L1 PM Substates must be done while ASPM L1 is + * disabled. Disable L1 here and apply new configuration after L1SS + * configuration has been completed. + * + * Per sec 7.5.3.7, when disabling ASPM L1, software must disable + * it in the Downstream component prior to disabling it in the + * Upstream component, and ASPM L1 must be enabled in the Upstream + * component prior to enabling it in the Downstream component. + * + * Sec 7.5.3.7 also recommends programming the same ASPM Control + * value for all functions of a multi-function device. + */ + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_config_aspm_dev(child, 0); + pcie_config_aspm_dev(parent, 0); + if (link->aspm_capable & PCIE_LINK_STATE_L1SS) pcie_config_aspm_l1ss(link, state); - /* - * Spec 2.0 suggests all functions should be configured the - * same setting for ASPM. Enabling ASPM L1 should be done in - * upstream component first and then downstream, and vice - * versa for disabling ASPM L1. Spec doesn't mention L0S. - */ - if (state & PCIE_LINK_STATE_L1) - pcie_config_aspm_dev(parent, upstream); + pcie_config_aspm_dev(parent, upstream); list_for_each_entry(child, &linkbus->devices, bus_list) pcie_config_aspm_dev(child, dwstream); - if (!(state & PCIE_LINK_STATE_L1)) - pcie_config_aspm_dev(parent, upstream); link->aspm_enabled = state; From fad610b987132868e3410c530871086552ce6155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:47 +0300 Subject: [PATCH 032/127] Documentation PCI: Reformat RMW ops documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the list of RMW protected PCIe Capability registers into a bullet list to make them easier to pick up on a glance. An upcoming change is going to add one more register among them so it will be much cleaner to have them as bullets. Link: https://lore.kernel.org/r/20241018144755.7875-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron --- Documentation/PCI/pciebus-howto.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Documentation/PCI/pciebus-howto.rst b/Documentation/PCI/pciebus-howto.rst index f344452651e1..e48d01422efc 100644 --- a/Documentation/PCI/pciebus-howto.rst +++ b/Documentation/PCI/pciebus-howto.rst @@ -217,8 +217,11 @@ capability structure except the PCI Express capability structure, that is shared between many drivers including the service drivers. RMW Capability accessors (pcie_capability_clear_and_set_word(), pcie_capability_set_word(), and pcie_capability_clear_word()) protect -a selected set of PCI Express Capability Registers (Link Control -Register and Root Control Register). Any change to those registers -should be performed using RMW accessors to avoid problems due to -concurrent updates. For the up-to-date list of protected registers, -see pcie_capability_clear_and_set_word(). +a selected set of PCI Express Capability Registers: + +* Link Control Register +* Root Control Register + +Any change to those registers should be performed using RMW accessors to +avoid problems due to concurrent updates. For the up-to-date list of +protected registers, see pcie_capability_clear_and_set_word(). From 04af8a399fa40310f831b4f1dc9f757085f41983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:48 +0300 Subject: [PATCH 033/127] PCI: Protect Link Control 2 Register with RMW locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe Bandwidth Controller performs RMW accesses the Link Control 2 Register which can occur concurrently to other sources of Link Control 2 Register writes. Therefore, add Link Control 2 Register among the PCI Express Capability Registers that need RMW locking. Link: https://lore.kernel.org/r/20241018144755.7875-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron --- Documentation/PCI/pciebus-howto.rst | 1 + include/linux/pci.h | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/PCI/pciebus-howto.rst b/Documentation/PCI/pciebus-howto.rst index e48d01422efc..375d9ce171f6 100644 --- a/Documentation/PCI/pciebus-howto.rst +++ b/Documentation/PCI/pciebus-howto.rst @@ -221,6 +221,7 @@ a selected set of PCI Express Capability Registers: * Link Control Register * Root Control Register +* Link Control 2 Register Any change to those registers should be performed using RMW accessors to avoid problems due to concurrent updates. For the up-to-date list of diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..be5ed534c39c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1274,6 +1274,7 @@ static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev, { switch (pos) { case PCI_EXP_LNKCTL: + case PCI_EXP_LNKCTL2: case PCI_EXP_RTCTL: return pcie_capability_clear_and_set_word_locked(dev, pos, clear, set); From 12dd12821f1eb2755643914df2cc900c5e8c9d12 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 4 Oct 2024 23:10:00 +0900 Subject: [PATCH 034/127] PCI: dwc: endpoint: Clear outbound address on unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clear the address mapped (outbound_addr array) in dw_pcie_ep_unmap_addr(), to ensure that dw_pcie_find_index() does not match an ATU index that was already unmapped. This is in addition to clearing the ATU index bit in ob_window_map. Link: https://lore.kernel.org/linux-pci/20241004141000.5080-1-dlemoal@kernel.org Signed-off-by: Damien Le Moal [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-designware-ep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 43ba5c6738df..b07ace7dc92e 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -280,6 +280,7 @@ static void dw_pcie_ep_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, if (ret < 0) return; + ep->outbound_addr[atu_index] = 0; dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_OB, atu_index); clear_bit(atu_index, ep->ob_window_map); } From d9d959c36bec59f11c0eb6b5308729e3c4901b5e Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:34 +0100 Subject: [PATCH 035/127] PCI: Make pcim_request_all_regions() a public function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to remove the deprecated function pcim_iomap_regions_request_all(), a few drivers need an interface to request all BARs a PCI device offers. Make pcim_request_all_regions() a public interface. Link: https://lore.kernel.org/r/20241030112743.104395-2-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Damien Le Moal Reviewed-by: Ilpo Järvinen --- drivers/pci/devres.c | 3 ++- include/linux/pci.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index b133967faef8..2a64da5c91fb 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -939,7 +939,7 @@ static void pcim_release_all_regions(struct pci_dev *pdev) * desired, release individual regions with pcim_release_region() or all of * them at once with pcim_release_all_regions(). */ -static int pcim_request_all_regions(struct pci_dev *pdev, const char *name) +int pcim_request_all_regions(struct pci_dev *pdev, const char *name) { int ret; int bar; @@ -957,6 +957,7 @@ err: return ret; } +EXPORT_SYMBOL(pcim_request_all_regions); /** * pcim_iomap_regions_request_all - Request all BARs and iomap specified ones diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..3b151c8331e5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2293,6 +2293,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) { } #endif +int pcim_request_all_regions(struct pci_dev *pdev, const char *name); void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, const char *name); From bdcddd0cdc39dab62813bad760905f69918d7a85 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:35 +0100 Subject: [PATCH 036/127] ata: ahci: Replace deprecated PCI functions pcim_iomap_regions_request_all() and pcim_iomap_table() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace these functions with their successors, pcim_iomap() and pcim_request_all_regions(). Link: https://lore.kernel.org/r/20241030112743.104395-3-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Acked-by: Damien Le Moal --- drivers/ata/acard-ahci.c | 6 ++++-- drivers/ata/ahci.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c index 547f56341705..3999305b5356 100644 --- a/drivers/ata/acard-ahci.c +++ b/drivers/ata/acard-ahci.c @@ -370,7 +370,7 @@ static int acard_ahci_init_one(struct pci_dev *pdev, const struct pci_device_id /* AHCI controllers often implement SFF compatible interface. * Grab all PCI BARs just in case. */ - rc = pcim_iomap_regions_request_all(pdev, 1 << AHCI_PCI_BAR, DRV_NAME); + rc = pcim_request_all_regions(pdev, DRV_NAME); if (rc == -EBUSY) pcim_pin_device(pdev); if (rc) @@ -386,7 +386,9 @@ static int acard_ahci_init_one(struct pci_dev *pdev, const struct pci_device_id if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) pci_enable_msi(pdev); - hpriv->mmio = pcim_iomap_table(pdev)[AHCI_PCI_BAR]; + hpriv->mmio = pcim_iomap(pdev, AHCI_PCI_BAR, 0); + if (!hpriv->mmio) + return -ENOMEM; /* save initial config */ ahci_save_initial_config(&pdev->dev, hpriv); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 45f63b09828a..2043dfb52ae8 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1869,7 +1869,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* AHCI controllers often implement SFF compatible interface. * Grab all PCI BARs just in case. */ - rc = pcim_iomap_regions_request_all(pdev, 1 << ahci_pci_bar, DRV_NAME); + rc = pcim_request_all_regions(pdev, DRV_NAME); if (rc == -EBUSY) pcim_pin_device(pdev); if (rc) @@ -1893,7 +1893,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ahci_sb600_enable_64bit(pdev)) hpriv->flags &= ~AHCI_HFLAG_32BIT_ONLY; - hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; + hpriv->mmio = pcim_iomap(pdev, ahci_pci_bar, 0); + if (!hpriv->mmio) + return -ENOMEM; /* detect remapped nvme devices */ ahci_remap_check(pdev, ahci_pci_bar, hpriv); From 86d17afd1a9fbb57b56035a86936f9cc7817674e Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:36 +0100 Subject: [PATCH 037/127] crypto: qat - replace deprecated PCI functions pcim_iomap_table() and pcim_iomap_regions_request_all() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace these functions with their successors, pcim_iomap() and pcim_request_all_regions(). Link: https://lore.kernel.org/r/20241030112743.104395-4-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Acked-by: Giovanni Cabiddu --- drivers/crypto/intel/qat/qat_420xx/adf_drv.c | 11 ++++++++--- drivers/crypto/intel/qat/qat_4xxx/adf_drv.c | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_drv.c b/drivers/crypto/intel/qat/qat_420xx/adf_drv.c index f49818a13013..788a11cdb34b 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_drv.c @@ -129,16 +129,21 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ bar_mask = pci_select_bars(pdev, IORESOURCE_MEM) & ADF_GEN4_BAR_MASK; - ret = pcim_iomap_regions_request_all(pdev, bar_mask, pci_name(pdev)); + ret = pcim_request_all_regions(pdev, pci_name(pdev)); if (ret) { - dev_err(&pdev->dev, "Failed to map pci regions.\n"); + dev_err(&pdev->dev, "Failed to request PCI regions.\n"); goto out_err; } i = 0; for_each_set_bit(bar_nr, &bar_mask, PCI_STD_NUM_BARS) { bar = &accel_pci_dev->pci_bars[i++]; - bar->virt_addr = pcim_iomap_table(pdev)[bar_nr]; + bar->virt_addr = pcim_iomap(pdev, bar_nr, 0); + if (!bar->virt_addr) { + dev_err(&pdev->dev, "Failed to ioremap PCI region.\n"); + ret = -ENOMEM; + goto out_err; + } } pci_set_master(pdev); diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c index 659905e45950..115eabfd1f6b 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c @@ -131,16 +131,21 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ bar_mask = pci_select_bars(pdev, IORESOURCE_MEM) & ADF_GEN4_BAR_MASK; - ret = pcim_iomap_regions_request_all(pdev, bar_mask, pci_name(pdev)); + ret = pcim_request_all_regions(pdev, pci_name(pdev)); if (ret) { - dev_err(&pdev->dev, "Failed to map pci regions.\n"); + dev_err(&pdev->dev, "Failed to request PCI regions.\n"); goto out_err; } i = 0; for_each_set_bit(bar_nr, &bar_mask, PCI_STD_NUM_BARS) { bar = &accel_pci_dev->pci_bars[i++]; - bar->virt_addr = pcim_iomap_table(pdev)[bar_nr]; + bar->virt_addr = pcim_iomap(pdev, bar_nr, 0); + if (!bar->virt_addr) { + dev_err(&pdev->dev, "Failed to ioremap PCI region.\n"); + ret = -ENOMEM; + goto out_err; + } } pci_set_master(pdev); From cf43d998fd9b0dbfafecdfeaae0b70c2862b810a Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:37 +0100 Subject: [PATCH 038/127] crypto: marvell - replace deprecated PCI functions pcim_iomap_table() and pcim_iomap_regions_request_all() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace these functions with their successors, pcim_iomap() and pcim_request_all_regions(). Link: https://lore.kernel.org/r/20241030112743.104395-5-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Acked-by: Bharat Bhushan --- drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c | 14 +++++++++----- drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c | 13 +++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 400e36d9908f..94d0e73e42de 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -739,18 +739,22 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, dev_err(dev, "Unable to get usable DMA configuration\n"); goto clear_drvdata; } - /* Map PF's configuration registers */ - err = pcim_iomap_regions_request_all(pdev, 1 << PCI_PF_REG_BAR_NUM, - OTX2_CPT_DRV_NAME); + err = pcim_request_all_regions(pdev, OTX2_CPT_DRV_NAME); if (err) { - dev_err(dev, "Couldn't get PCI resources 0x%x\n", err); + dev_err(dev, "Couldn't request PCI resources 0x%x\n", err); goto clear_drvdata; } pci_set_master(pdev); pci_set_drvdata(pdev, cptpf); cptpf->pdev = pdev; - cptpf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM]; + /* Map PF's configuration registers */ + cptpf->reg_base = pcim_iomap(pdev, PCI_PF_REG_BAR_NUM, 0); + if (!cptpf->reg_base) { + err = -ENOMEM; + dev_err(dev, "Couldn't ioremap PCI resource 0x%x\n", err); + goto clear_drvdata; + } /* Check if AF driver is up, otherwise defer probe */ err = cpt_is_pf_usable(cptpf); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 527d34cc258b..d0b6ee901f62 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -358,9 +358,8 @@ static int otx2_cptvf_probe(struct pci_dev *pdev, dev_err(dev, "Unable to get usable DMA configuration\n"); goto clear_drvdata; } - /* Map VF's configuration registers */ - ret = pcim_iomap_regions_request_all(pdev, 1 << PCI_PF_REG_BAR_NUM, - OTX2_CPTVF_DRV_NAME); + + ret = pcim_request_all_regions(pdev, OTX2_CPTVF_DRV_NAME); if (ret) { dev_err(dev, "Couldn't get PCI resources 0x%x\n", ret); goto clear_drvdata; @@ -369,7 +368,13 @@ static int otx2_cptvf_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, cptvf); cptvf->pdev = pdev; - cptvf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM]; + /* Map VF's configuration registers */ + cptvf->reg_base = pcim_iomap(pdev, PCI_PF_REG_BAR_NUM, 0); + if (!cptvf->reg_base) { + ret = -ENOMEM; + dev_err(dev, "Couldn't ioremap PCI resource 0x%x\n", ret); + goto clear_drvdata; + } otx2_cpt_set_hw_caps(pdev, &cptvf->cap_flag); From abbc299c71aa1ff118c48b392d1235122884c85a Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:38 +0100 Subject: [PATCH 039/127] intel_th: pci: Replace deprecated PCI functions pcim_iomap_table() and pcim_iomap_regions_request_all() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace these functions with their successors, pcim_iomap() and pcim_request_all_regions(). Link: https://lore.kernel.org/r/20241030112743.104395-6-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Acked-by: Alexander Shishkin --- drivers/hwtracing/intel_th/pci.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 0d7b9839e5b6..e9d8d28e055f 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -23,7 +23,6 @@ enum { TH_PCI_RTIT_BAR = 4, }; -#define BAR_MASK (BIT(TH_PCI_CONFIG_BAR) | BIT(TH_PCI_STH_SW_BAR)) #define PCI_REG_NPKDSC 0x80 #define NPKDSC_TSACT BIT(5) @@ -83,10 +82,16 @@ static int intel_th_pci_probe(struct pci_dev *pdev, if (err) return err; - err = pcim_iomap_regions_request_all(pdev, BAR_MASK, DRIVER_NAME); + err = pcim_request_all_regions(pdev, DRIVER_NAME); if (err) return err; + if (!pcim_iomap(pdev, TH_PCI_CONFIG_BAR, 0)) + return -ENOMEM; + + if (!pcim_iomap(pdev, TH_PCI_STH_SW_BAR, 0)) + return -ENOMEM; + if (pdev->resource[TH_PCI_RTIT_BAR].start) { resource[TH_MMIO_RTIT] = pdev->resource[TH_PCI_RTIT_BAR]; r++; From 3dd6ed25a90cf3fa0e414afe8c42d28de4a05928 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:39 +0100 Subject: [PATCH 040/127] wifi: iwlwifi: replace deprecated PCI functions pcim_iomap_table() and pcim_iomap_regions_request_all() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace these functions with their successors, pcim_iomap() and pcim_request_all_regions(). Link: https://lore.kernel.org/r/20241030112743.104395-7-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Acked-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 3b9943eb6934..4839dcb199c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3533,7 +3533,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, struct iwl_trans_pcie *trans_pcie, **priv; struct iwl_trans *trans; int ret, addr_size; - void __iomem * const *table; u32 bar0; /* reassign our BAR 0 if invalid due to possible runtime PM races */ @@ -3659,22 +3658,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } - ret = pcim_iomap_regions_request_all(pdev, BIT(0), DRV_NAME); + ret = pcim_request_all_regions(pdev, DRV_NAME); if (ret) { - dev_err(&pdev->dev, "pcim_iomap_regions_request_all failed\n"); + dev_err(&pdev->dev, "Requesting all PCI BARs failed.\n"); goto out_no_pci; } - table = pcim_iomap_table(pdev); - if (!table) { - dev_err(&pdev->dev, "pcim_iomap_table failed\n"); - ret = -ENOMEM; - goto out_no_pci; - } - - trans_pcie->hw_base = table[0]; + trans_pcie->hw_base = pcim_iomap(pdev, 0, 0); if (!trans_pcie->hw_base) { - dev_err(&pdev->dev, "couldn't find IO mem in first BAR\n"); + dev_err(&pdev->dev, "Could not ioremap PCI BAR 0.\n"); ret = -ENODEV; goto out_no_pci; } From 5915997a8eb4b8a8e8aa3b0fe6cb85265822c2ba Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:40 +0100 Subject: [PATCH 041/127] ntb: idt: Replace deprecated PCI functions pcim_iomap_table() and pcim_iomap_regions_request_all() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace these functions with their successors, pcim_iomap() and pcim_request_all_regions(). Link: https://lore.kernel.org/r/20241030112743.104395-8-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Acked-by: Serge Semin --- drivers/ntb/hw/idt/ntb_hw_idt.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index 6fc9dfe82474..544d8a4d2af5 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -2671,15 +2671,20 @@ static int idt_init_pci(struct idt_ntb_dev *ndev) */ pci_set_master(pdev); - /* Request all BARs resources and map BAR0 only */ - ret = pcim_iomap_regions_request_all(pdev, 1, NTB_NAME); + /* Request all BARs resources */ + ret = pcim_request_all_regions(pdev, NTB_NAME); if (ret != 0) { dev_err(&pdev->dev, "Failed to request resources\n"); goto err_clear_master; } - /* Retrieve virtual address of BAR0 - PCI configuration space */ - ndev->cfgspc = pcim_iomap_table(pdev)[0]; + /* ioremap BAR0 - PCI configuration space */ + ndev->cfgspc = pcim_iomap(pdev, 0, 0); + if (!ndev->cfgspc) { + dev_err(&pdev->dev, "Failed to ioremap BAR 0\n"); + ret = -ENOMEM; + goto err_clear_master; + } /* Put the IDT driver data pointer to the PCI-device private pointer */ pci_set_drvdata(pdev, ndev); From 55285d8fa2a17c25ae5f4b76df26f2823631baae Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:41 +0100 Subject: [PATCH 042/127] serial: rp2: Replace deprecated PCI functions pcim_iomap_table() and pcim_iomap_regions_request_all() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace these functions with their successors, pcim_iomap() and pcim_request_all_regions(). Link: https://lore.kernel.org/r/20241030112743.104395-9-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Jiri Slaby --- drivers/tty/serial/rp2.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c index 8bab2aedc499..6d99a02dd439 100644 --- a/drivers/tty/serial/rp2.c +++ b/drivers/tty/serial/rp2.c @@ -698,7 +698,6 @@ static int rp2_probe(struct pci_dev *pdev, const struct firmware *fw; struct rp2_card *card; struct rp2_uart_port *ports; - void __iomem * const *bars; int rc; card = devm_kzalloc(&pdev->dev, sizeof(*card), GFP_KERNEL); @@ -711,13 +710,16 @@ static int rp2_probe(struct pci_dev *pdev, if (rc) return rc; - rc = pcim_iomap_regions_request_all(pdev, 0x03, DRV_NAME); + rc = pcim_request_all_regions(pdev, DRV_NAME); if (rc) return rc; - bars = pcim_iomap_table(pdev); - card->bar0 = bars[0]; - card->bar1 = bars[1]; + card->bar0 = pcim_iomap(pdev, 0, 0); + if (!card->bar0) + return -ENOMEM; + card->bar1 = pcim_iomap(pdev, 1, 0); + if (!card->bar1) + return -ENOMEM; card->pdev = pdev; rp2_decode_cap(id, &card->n_ports, &card->smpte); From bfeb07b8aea64db7538cafd65ee2bf8a810aa2e8 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:42 +0100 Subject: [PATCH 043/127] ALSA: korg1212: Replace deprecated PCI functions pcim_iomap_table() and pcim_iomap_regions_request_all() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace these functions with their successors, pcim_iomap() and pcim_request_all_regions(). Link: https://lore.kernel.org/r/20241030112743.104395-10-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Takashi Iwai --- sound/pci/korg1212/korg1212.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index e62fb1ad6d77..49b71082c485 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -2108,7 +2108,7 @@ static int snd_korg1212_create(struct snd_card *card, struct pci_dev *pci) for (i=0; ivolumePhase[i] = 0; - err = pcim_iomap_regions_request_all(pci, 1 << 0, "korg1212"); + err = pcim_request_all_regions(pci, "korg1212"); if (err < 0) return err; @@ -2130,7 +2130,9 @@ static int snd_korg1212_create(struct snd_card *card, struct pci_dev *pci) korg1212->iomem2, iomem2_size, stateName[korg1212->cardState]); - korg1212->iobase = pcim_iomap_table(pci)[0]; + korg1212->iobase = pcim_iomap(pci, 0, 0); + if (!korg1212->iobase) + return -ENOMEM; err = devm_request_irq(&pci->dev, pci->irq, snd_korg1212_interrupt, IRQF_SHARED, From 6d9c59212523e719a63575222f1cd1b0aca3da4f Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:43 +0100 Subject: [PATCH 044/127] PCI: Remove pcim_iomap_regions_request_all() pcim_iomap_regions_request_all() have been deprecated in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). All users of this function have been ported to other interfaces by now. Remove pcim_iomap_regions_request_all(). Link: https://lore.kernel.org/r/20241030112743.104395-11-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Damien Le Moal --- .../driver-api/driver-model/devres.rst | 1 - drivers/pci/devres.c | 56 ------------------- include/linux/pci.h | 2 - 3 files changed, 59 deletions(-) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 5f2ee8d717b1..3a30cf4f6c0d 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -394,7 +394,6 @@ PCI pcim_enable_device() : after success, some PCI ops become managed pcim_iomap() : do iomap() on a single BAR pcim_iomap_regions() : do request_region() and iomap() on multiple BARs - pcim_iomap_regions_request_all() : do request_region() on all and iomap() on multiple BARs pcim_iomap_table() : array of mapped addresses indexed by BAR pcim_iounmap() : do iounmap() on a single BAR pcim_iounmap_regions() : do iounmap() and release_region() on multiple BARs diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index 2a64da5c91fb..319a477a2135 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -959,62 +959,6 @@ err: } EXPORT_SYMBOL(pcim_request_all_regions); -/** - * pcim_iomap_regions_request_all - Request all BARs and iomap specified ones - * (DEPRECATED) - * @pdev: PCI device to map IO resources for - * @mask: Mask of BARs to iomap - * @name: Name associated with the requests - * - * Returns: 0 on success, negative error code on failure. - * - * Request all PCI BARs and iomap regions specified by @mask. - * - * To release these resources manually, call pcim_release_region() for the - * regions and pcim_iounmap() for the mappings. - * - * This function is DEPRECATED. Don't use it in new code. Instead, use one - * of the pcim_* region request functions in combination with a pcim_* - * mapping function. - */ -int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, - const char *name) -{ - int bar; - int ret; - void __iomem **legacy_iomap_table; - - ret = pcim_request_all_regions(pdev, name); - if (ret != 0) - return ret; - - for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { - if (!mask_contains_bar(mask, bar)) - continue; - if (!pcim_iomap(pdev, bar, 0)) - goto err; - } - - return 0; - -err: - /* - * If bar is larger than 0, then pcim_iomap() above has most likely - * failed because of -EINVAL. If it is equal 0, most likely the table - * couldn't be created, indicating -ENOMEM. - */ - ret = bar > 0 ? -EINVAL : -ENOMEM; - legacy_iomap_table = (void __iomem **)pcim_iomap_table(pdev); - - while (--bar >= 0) - pcim_iounmap(pdev, legacy_iomap_table[bar]); - - pcim_release_all_regions(pdev); - - return ret; -} -EXPORT_SYMBOL(pcim_iomap_regions_request_all); - /** * pcim_iounmap_regions - Unmap and release PCI BARs * @pdev: PCI device to map IO resources for diff --git a/include/linux/pci.h b/include/linux/pci.h index 3b151c8331e5..b59197635c5c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2301,8 +2301,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); -int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, - const char *name); void pcim_iounmap_regions(struct pci_dev *pdev, int mask); void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, unsigned long offset, unsigned long len); From 4a6afd60733c75369680f4a40c82b7c8528f4a7a Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 16 Oct 2024 11:49:04 +0200 Subject: [PATCH 045/127] PCI: Make pcim_iounmap_region() a public function The function pcim_iounmap_regions() is problematic because it uses a bitmask mechanism to release / iounmap multiple BARs at once. It, thus, prevents getting rid of the problematic iomap table mechanism which was deprecated in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). pcim_iounmap_region() does not have that problem. Make it public as the successor of pcim_iounmap_regions(). Link: https://lore.kernel.org/r/20241016094911.24818-3-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas --- drivers/pci/devres.c | 3 ++- include/linux/pci.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index 319a477a2135..fc50d2456aed 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -773,7 +773,7 @@ EXPORT_SYMBOL(pcim_iomap_region); * Unmap a BAR and release its region manually. Only pass BARs that were * previously mapped by pcim_iomap_region(). */ -static void pcim_iounmap_region(struct pci_dev *pdev, int bar) +void pcim_iounmap_region(struct pci_dev *pdev, int bar) { struct pcim_addr_devres res_searched; @@ -784,6 +784,7 @@ static void pcim_iounmap_region(struct pci_dev *pdev, int bar) devres_release(&pdev->dev, pcim_addr_resource_release, pcim_addr_resources_match, &res_searched); } +EXPORT_SYMBOL(pcim_iounmap_region); /** * pcim_iomap_regions - Request and iomap PCI BARs (DEPRECATED) diff --git a/include/linux/pci.h b/include/linux/pci.h index b59197635c5c..b8d248c136ba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2297,6 +2297,7 @@ int pcim_request_all_regions(struct pci_dev *pdev, const char *name); void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, const char *name); +void pcim_iounmap_region(struct pci_dev *pdev, int bar); void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); From c7acef99642b763ba585f4a43af999fcdbcc3dc4 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 10 Oct 2024 19:10:34 +0200 Subject: [PATCH 046/127] PCI: Fix use-after-free of slot->bus on hot remove Dennis reports a boot crash on recent Lenovo laptops with a USB4 dock. Since commit 0fc70886569c ("thunderbolt: Reset USB4 v2 host router") and commit 59a54c5f3dbd ("thunderbolt: Reset topology created by the boot firmware"), USB4 v2 and v1 Host Routers are reset on probe of the thunderbolt driver. The reset clears the Presence Detect State and Data Link Layer Link Active bits at the USB4 Host Router's Root Port and thus causes hot removal of the dock. The crash occurs when pciehp is unbound from one of the dock's Downstream Ports: pciehp creates a pci_slot on bind and destroys it on unbind. The pci_slot contains a pointer to the pci_bus below the Downstream Port, but a reference on that pci_bus is never acquired. The pci_bus is destroyed before the pci_slot, so a use-after-free ensues when pci_slot_release() accesses slot->bus. In principle this should not happen because pci_stop_bus_device() unbinds pciehp (and therefore destroys the pci_slot) before the pci_bus is destroyed by pci_remove_bus_device(). However the stacktrace provided by Dennis shows that pciehp is unbound from pci_remove_bus_device() instead of pci_stop_bus_device(). To understand the significance of this, one needs to know that the PCI core uses a two step process to remove a portion of the hierarchy: It first unbinds all drivers in the sub-hierarchy in pci_stop_bus_device() and then actually removes the devices in pci_remove_bus_device(). There is no precaution to prevent driver binding in-between pci_stop_bus_device() and pci_remove_bus_device(). In Dennis' case, it seems removal of the hierarchy by pciehp races with driver binding by pci_bus_add_devices(). pciehp is bound to the Downstream Port after pci_stop_bus_device() has run, so it is unbound by pci_remove_bus_device() instead of pci_stop_bus_device(). Because the pci_bus has already been destroyed at that point, accesses to it result in a use-after-free. One might conclude that driver binding needs to be prevented after pci_stop_bus_device() has run. However it seems risky that pci_slot points to pci_bus without holding a reference. Solely relying on correct ordering of driver unbind versus pci_bus destruction is certainly not defensive programming. If pci_slot has a need to access data in pci_bus, it ought to acquire a reference. Amend pci_create_slot() accordingly. Dennis reports that the crash is not reproducible with this change. Abridged stacktrace: pcieport 0000:00:07.0: PME: Signaling with IRQ 156 pcieport 0000:00:07.0: pciehp: Slot #12 AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug+ Surprise+ Interlock- NoCompl+ IbPresDis- LLActRep+ pci_bus 0000:20: dev 00, created physical slot 12 pcieport 0000:00:07.0: pciehp: Slot(12): Card not present ... pcieport 0000:21:02.0: pciehp: pcie_disable_notification: SLOTCTRL d8 write cmd 0 Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6b6b: 0000 [#1] PREEMPT SMP NOPTI CPU: 13 UID: 0 PID: 134 Comm: irq/156-pciehp Not tainted 6.11.0-devel+ #1 RIP: 0010:dev_driver_string+0x12/0x40 pci_destroy_slot pciehp_remove pcie_port_remove_service device_release_driver_internal bus_remove_device device_del device_unregister remove_iter device_for_each_child pcie_portdrv_remove pci_device_remove device_release_driver_internal bus_remove_device device_del pci_remove_bus_device (recursive invocation) pci_remove_bus_device pciehp_unconfigure_device pciehp_disable_slot pciehp_handle_presence_or_link_change pciehp_ist Link: https://lore.kernel.org/r/4bfd4c0e976c1776cd08e76603903b338cf25729.1728579288.git.lukas@wunner.de Reported-by: Dennis Wassenberg Closes: https://lore.kernel.org/r/6de4b45ff2b32dd91a805ec02ec8ec73ef411bf6.camel@secunet.com/ Tested-by: Dennis Wassenberg Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg Cc: stable@vger.kernel.org --- drivers/pci/slot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 0f87cade10f7..ed645c7a4e4b 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -79,6 +79,7 @@ static void pci_slot_release(struct kobject *kobj) up_read(&pci_bus_sem); list_del(&slot->list); + pci_bus_put(slot->bus); kfree(slot); } @@ -261,7 +262,7 @@ placeholder: goto err; } - slot->bus = parent; + slot->bus = pci_bus_get(parent); slot->number = slot_nr; slot->kobj.kset = pci_slots_kset; @@ -269,6 +270,7 @@ placeholder: slot_name = make_slot_name(name); if (!slot_name) { err = -ENOMEM; + pci_bus_put(slot->bus); kfree(slot); goto err; } From d38cc57c14ff9590e03da77987217eca19ea350d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 17 Oct 2024 21:04:47 +0300 Subject: [PATCH 047/127] dt-bindings: PCI: qcom,pcie-sm8550: Add SAR2130P compatible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the Qualcomm SAR2130P platform the PCIe host is compatible with the DWC controller present on the SM8550 platorm, just using one additional clock. Link: https://lore.kernel.org/r/20241017-sar2130p-pci-v1-1-5b95e63d9624@linaro.org Signed-off-by: Dmitry Baryshkov Signed-off-by: Krzysztof Wilczyński Reviewed-by: Krzysztof Kozlowski --- Documentation/devicetree/bindings/pci/qcom,pcie-sm8550.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8550.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8550.yaml index 24cb38673581..2b5498a35dcc 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8550.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8550.yaml @@ -20,6 +20,7 @@ properties: - const: qcom,pcie-sm8550 - items: - enum: + - qcom,sar2130p-pcie - qcom,pcie-sm8650 - const: qcom,pcie-sm8550 @@ -39,7 +40,7 @@ properties: clocks: minItems: 7 - maxItems: 8 + maxItems: 9 clock-names: minItems: 7 @@ -52,6 +53,7 @@ properties: - const: ddrss_sf_tbu # PCIe SF TBU clock - const: noc_aggr # Aggre NoC PCIe AXI clock - const: cnoc_sf_axi # Config NoC PCIe1 AXI clock + - const: qmip_pcie_ahb # QMIP PCIe AHB clock interrupts: minItems: 8 From ba4a2e2317b9faeca9193ed6d3193ddc3cf2aba3 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 7 Oct 2024 10:42:55 +0530 Subject: [PATCH 048/127] PCI: qcom: Enable MSI interrupts together with Link up if 'Global IRQ' is supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if 'Global IRQ' is supported by the platform, only the Link up interrupt is enabled in the PARF_INT_ALL_MASK register. This masks MSIs on some platforms. The MSI bits in PARF_INT_ALL_MASK register are enabled by default in the hardware, but commit 4581403f6792 ("PCI: qcom: Enumerate endpoints based on Link up event in 'global_irq' interrupt") disabled them and enabled only the Link up interrupt. While MSI continued to work on the SM8450 platform that was used to test the offending commit, on other platforms like SM8250, X1E80100, MSIs are getting masked. And they require enabling the MSI interrupt bits in the register to unmask (enable) the MSIs. Even though the MSI interrupt enable bits in PARF_INT_ALL_MASK are described as 'diagnostic' interrupts in the internal documentation, disabling them masks MSI on these platforms. Due to this, MSIs were not reported to be received these platforms while supporting 'Global IRQ'. So, enable the MSI interrupts along with the Link up interrupt in the PARF_INT_ALL_MASK register if 'Global IRQ' is supported. This ensures that the MSIs continue to work and also the driver is able to catch the Link up interrupt for enumerating endpoint devices. Fixes: 4581403f6792 ("PCI: qcom: Enumerate endpoints based on Link up event in 'global_irq' interrupt") Closes: https://lore.kernel.org/linux-pci/9a692c98-eb0a-4d86-b642-ea655981ff53@kernel.org/ Link: https://lore.kernel.org/r/20241007051255.4378-1-manivannan.sadhasivam@linaro.org Reported-by: Konrad Dybcio Tested-by: Konrad Dybcio # SL7 Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Reviewed-by: Qiang Yu --- drivers/pci/controller/dwc/pcie-qcom.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index ef44a82be058..2b33d03ed054 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -133,6 +133,7 @@ /* PARF_INT_ALL_{STATUS/CLEAR/MASK} register fields */ #define PARF_INT_ALL_LINK_UP BIT(13) +#define PARF_INT_MSI_DEV_0_7 GENMASK(30, 23) /* PARF_NO_SNOOP_OVERIDE register fields */ #define WR_NO_SNOOP_OVERIDE_EN BIT(1) @@ -1716,7 +1717,8 @@ static int qcom_pcie_probe(struct platform_device *pdev) goto err_host_deinit; } - writel_relaxed(PARF_INT_ALL_LINK_UP, pcie->parf + PARF_INT_ALL_MASK); + writel_relaxed(PARF_INT_ALL_LINK_UP | PARF_INT_MSI_DEV_0_7, + pcie->parf + PARF_INT_ALL_MASK); } qcom_pcie_icc_opp_update(pcie); From 7d7cf89b119af433354f865fc01017b9f8aa411a Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sat, 17 Aug 2024 11:09:03 +0530 Subject: [PATCH 049/127] PCI: qcom-ep: Move controller cleanups to qcom_pcie_perst_deassert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the endpoint cleanup function dw_pcie_ep_cleanup() and EPF deinit notify function pci_epc_deinit_notify() are called during the execution of qcom_pcie_perst_assert() i.e., when the host has asserted PERST#. But quickly after this step, refclk will also be disabled by the host. All of the Qcom endpoint SoCs supported as of now depend on the refclk from the host for keeping the controller operational. Due to this limitation, any access to the hardware registers in the absence of refclk will result in a whole endpoint crash. Unfortunately, most of the controller cleanups require accessing the hardware registers (like eDMA cleanup performed in dw_pcie_ep_cleanup(), powering down MHI EPF etc...). So these cleanup functions are currently causing the crash in the endpoint SoC once host asserts PERST#. One way to address this issue is by generating the refclk in the endpoint itself and not depending on the host. But that is not always possible as some of the endpoint designs do require the endpoint to consume refclk from the host (as I was told by the Qcom engineers). Thus, fix this crash by moving the controller cleanups to the start of the qcom_pcie_perst_deassert() function. qcom_pcie_perst_deassert() is called whenever the host has deasserted PERST# and it is guaranteed that the refclk would be active at this point. So at the start of this function (after enabling resources), the controller cleanup can be performed. Once finished, rest of the code execution for PERST# deassert can continue as usual. Fixes: 473b2cf9c4d1 ("PCI: endpoint: Introduce 'epc_deinit' event and notify the EPF drivers") Fixes: 570d7715eed8 ("PCI: dwc: ep: Introduce dw_pcie_ep_cleanup() API for drivers supporting PERST#") Link: https://lore.kernel.org/r/20240817-pci-qcom-ep-cleanup-v1-1-d6b958226559@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński --- drivers/pci/controller/dwc/pcie-qcom-ep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index e588fcc54589..b5ca5260f904 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -396,6 +396,10 @@ static int qcom_pcie_perst_deassert(struct dw_pcie *pci) return ret; } + /* Perform cleanup that requires refclk */ + pci_epc_deinit_notify(pci->ep.epc); + dw_pcie_ep_cleanup(&pci->ep); + /* Assert WAKE# to RC to indicate device is ready */ gpiod_set_value_cansleep(pcie_ep->wake, 1); usleep_range(WAKE_DELAY_US, WAKE_DELAY_US + 500); @@ -540,8 +544,6 @@ static void qcom_pcie_perst_assert(struct dw_pcie *pci) { struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci); - pci_epc_deinit_notify(pci->ep.epc); - dw_pcie_ep_cleanup(&pci->ep); qcom_pcie_disable_resources(pcie_ep); pcie_ep->link_status = QCOM_PCIE_EP_LINK_DISABLED; } From 40e2125381dc11379112485e3eefdd25c6df5375 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sat, 17 Aug 2024 11:09:04 +0530 Subject: [PATCH 050/127] PCI: tegra194: Move controller cleanups to pex_ep_event_pex_rst_deassert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the endpoint cleanup function dw_pcie_ep_cleanup() and EPF deinit notify function pci_epc_deinit_notify() are called during the execution of pex_ep_event_pex_rst_assert() i.e., when the host has asserted PERST#. But quickly after this step, refclk will also be disabled by the host. All of the tegra194 endpoint SoCs supported as of now depend on the refclk from the host for keeping the controller operational. Due to this limitation, any access to the hardware registers in the absence of refclk will result in a whole endpoint crash. Unfortunately, most of the controller cleanups require accessing the hardware registers (like eDMA cleanup performed in dw_pcie_ep_cleanup(), etc...). So these cleanup functions can cause the crash in the endpoint SoC once host asserts PERST#. One way to address this issue is by generating the refclk in the endpoint itself and not depending on the host. But that is not always possible as some of the endpoint designs do require the endpoint to consume refclk from the host. Thus, fix this crash by moving the controller cleanups to the start of the pex_ep_event_pex_rst_deassert() function. This function is called whenever the host has deasserted PERST# and it is guaranteed that the refclk would be active at this point. So at the start of this function (after enabling resources) the controller cleanup can be performed. Once finished, rest of the code execution for PERST# deassert can continue as usual. Fixes: 473b2cf9c4d1 ("PCI: endpoint: Introduce 'epc_deinit' event and notify the EPF drivers") Fixes: 570d7715eed8 ("PCI: dwc: ep: Introduce dw_pcie_ep_cleanup() API for drivers supporting PERST#") Link: https://lore.kernel.org/r/20240817-pci-qcom-ep-cleanup-v1-2-d6b958226559@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Cc: Jonathan Hunter Cc: Thierry Reding Cc: Vidya Sagar Cc: linux-tegra@vger.kernel.org --- drivers/pci/controller/dwc/pcie-tegra194.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index c1394f2ab63f..ced3b7e7bdad 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -1704,9 +1704,6 @@ static void pex_ep_event_pex_rst_assert(struct tegra_pcie_dw *pcie) if (ret) dev_err(pcie->dev, "Failed to go Detect state: %d\n", ret); - pci_epc_deinit_notify(pcie->pci.ep.epc); - dw_pcie_ep_cleanup(&pcie->pci.ep); - reset_control_assert(pcie->core_rst); tegra_pcie_disable_phy(pcie); @@ -1785,6 +1782,10 @@ static void pex_ep_event_pex_rst_deassert(struct tegra_pcie_dw *pcie) goto fail_phy; } + /* Perform cleanup that requires refclk */ + pci_epc_deinit_notify(pcie->pci.ep.epc); + dw_pcie_ep_cleanup(&pcie->pci.ep); + /* Clear any stale interrupt statuses */ appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L0); appl_writel(pcie, 0xFFFFFFFF, APPL_INTR_STATUS_L1_0_0); From e0662dae178ddb2d890df0b6f622aa7f6f921791 Mon Sep 17 00:00:00 2001 From: devi priya Date: Thu, 1 Aug 2024 11:18:00 +0530 Subject: [PATCH 051/127] dt-bindings: PCI: qcom: Document the IPQ9574 PCIe controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the PCIe controller on IPQ9574 platform. Link: https://lore.kernel.org/r/20240801054803.3015572-2-quic_srichara@quicinc.com Signed-off-by: devi priya Signed-off-by: Sricharan Ramabadhran Signed-off-by: Krzysztof Wilczyński Reviewed-by: Krzysztof Kozlowski Reviewed-by: Manivannan Sadhasivam --- .../devicetree/bindings/pci/qcom,pcie.yaml | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml index ffabbac57fc1..bd87f6b49d68 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml @@ -26,6 +26,7 @@ properties: - qcom,pcie-ipq8064-v2 - qcom,pcie-ipq8074 - qcom,pcie-ipq8074-gen3 + - qcom,pcie-ipq9574 - qcom,pcie-msm8996 - qcom,pcie-qcs404 - qcom,pcie-sdm845 @@ -164,6 +165,7 @@ allOf: enum: - qcom,pcie-ipq6018 - qcom,pcie-ipq8074-gen3 + - qcom,pcie-ipq9574 then: properties: reg: @@ -400,6 +402,53 @@ allOf: - const: axi_m_sticky # AXI Master Sticky reset - const: axi_s_sticky # AXI Slave Sticky reset + - if: + properties: + compatible: + contains: + enum: + - qcom,pcie-ipq9574 + then: + properties: + clocks: + minItems: 6 + maxItems: 6 + clock-names: + items: + - const: axi_m # AXI Master clock + - const: axi_s # AXI Slave clock + - const: axi_bridge + - const: rchng + - const: ahb + - const: aux + + resets: + minItems: 8 + maxItems: 8 + reset-names: + items: + - const: pipe # PIPE reset + - const: sticky # Core Sticky reset + - const: axi_s_sticky # AXI Slave Sticky reset + - const: axi_s # AXI Slave reset + - const: axi_m_sticky # AXI Master Sticky reset + - const: axi_m # AXI Master reset + - const: aux # AUX Reset + - const: ahb # AHB Reset + + interrupts: + minItems: 8 + interrupt-names: + items: + - const: msi0 + - const: msi1 + - const: msi2 + - const: msi3 + - const: msi4 + - const: msi5 + - const: msi6 + - const: msi7 + - if: properties: compatible: @@ -510,6 +559,7 @@ allOf: - qcom,pcie-ipq8064v2 - qcom,pcie-ipq8074 - qcom,pcie-ipq8074-gen3 + - qcom,pcie-ipq9574 - qcom,pcie-qcs404 then: required: From 08e835268c35e851b308f326357224248cfb445b Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Fri, 24 May 2024 14:53:49 +0530 Subject: [PATCH 052/127] PCI: j721e: Add PCIe support for J722S SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TI's J722S SoC has one instance of PCIe namely PCIe0 which is a Gen3 single lane PCIe controller. Add support for the "ti,j722s-pcie-host" compatible specific to J722S SoC. Link: https://lore.kernel.org/r/20240524092349.158443-1-s-vadapalli@ti.com Signed-off-by: Siddharth Vadapalli Signed-off-by: Krzysztof Wilczyński --- drivers/pci/controller/cadence/pci-j721e.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c index 284f2e0e4d26..c9f3103d68e1 100644 --- a/drivers/pci/controller/cadence/pci-j721e.c +++ b/drivers/pci/controller/cadence/pci-j721e.c @@ -386,6 +386,13 @@ static const struct j721e_pcie_data j784s4_pcie_ep_data = { .max_lanes = 4, }; +static const struct j721e_pcie_data j722s_pcie_rc_data = { + .mode = PCI_MODE_RC, + .linkdown_irq_regfield = J7200_LINK_DOWN, + .byte_access_allowed = true, + .max_lanes = 1, +}; + static const struct of_device_id of_j721e_pcie_match[] = { { .compatible = "ti,j721e-pcie-host", @@ -419,6 +426,10 @@ static const struct of_device_id of_j721e_pcie_match[] = { .compatible = "ti,j784s4-pcie-ep", .data = &j784s4_pcie_ep_data, }, + { + .compatible = "ti,j722s-pcie-host", + .data = &j722s_pcie_rc_data, + }, {}, }; From a63b74f2e35be3829f256922037ae5cee6bb844a Mon Sep 17 00:00:00 2001 From: devi priya Date: Thu, 1 Aug 2024 11:18:03 +0530 Subject: [PATCH 053/127] PCI: qcom: Add support for IPQ9574 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the new IPQ9574 platform which is based on the Qcom IP rev. 1.27.0 and Synopsys IP rev. 5.80a. The platform itself has four PCIe Gen3 controllers: two single-lane and two dual-lane, all are based on Synopsys IP rev. 5.70a. As such, reuse all the members of 'ops_2_9_0'. Link: https://lore.kernel.org/r/20240801054803.3015572-5-quic_srichara@quicinc.com Co-developed-by: Anusha Rao Signed-off-by: Anusha Rao Signed-off-by: devi priya Signed-off-by: Sricharan Ramabadhran [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Dmitry Baryshkov Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 2b33d03ed054..07286fb9adc1 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1830,6 +1830,7 @@ static const struct of_device_id qcom_pcie_match[] = { { .compatible = "qcom,pcie-ipq8064-v2", .data = &cfg_2_1_0 }, { .compatible = "qcom,pcie-ipq8074", .data = &cfg_2_3_3 }, { .compatible = "qcom,pcie-ipq8074-gen3", .data = &cfg_2_9_0 }, + { .compatible = "qcom,pcie-ipq9574", .data = &cfg_2_9_0 }, { .compatible = "qcom,pcie-msm8996", .data = &cfg_2_3_2 }, { .compatible = "qcom,pcie-qcs404", .data = &cfg_2_4_0 }, { .compatible = "qcom,pcie-sa8540p", .data = &cfg_sc8280xp }, From 39a06b55df6c269315fc66c53804f8eb26502c12 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Thu, 31 Oct 2024 20:08:58 -0700 Subject: [PATCH 054/127] dt-bindings: PCI: qcom: Move OPP table to qcom,pcie-common.yaml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OPP table is a generic property that is also required by other Qcom platforms. Hence move this property to qcom,pcie-common.yaml so that PCIe on other Qcom platforms is able to adjust power domain performance state and ICC peak bandwidth according to the given PCIe generation speed and link width. Link: https://lore.kernel.org/r/20241101030902.579789-2-quic_qianyu@quicinc.com Signed-off-by: Qiang Yu [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Krzysztof Kozlowski Reviewed-by: Manivannan Sadhasivam --- Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml | 4 ++++ Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml index e18900c41576..0480c58f7d99 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml @@ -81,6 +81,10 @@ properties: vddpe-3v3-supply: description: PCIe endpoint power supply + operating-points-v2: true + opp-table: + type: object + required: - reg - reg-names diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml index 46bd59eefadb..6e0a6d8f0ed0 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml @@ -70,10 +70,6 @@ properties: - const: msi7 - const: global - operating-points-v2: true - opp-table: - type: object - resets: maxItems: 1 From 66dc205962c5a2c03b9861cd9ccc39178b49a003 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Thu, 31 Oct 2024 20:08:59 -0700 Subject: [PATCH 055/127] dt-bindings: PCI: qcom,pcie-x1e80100: Add 'global' interrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qcom PCIe RC controllers are capable of generating 'global' SPI interrupt to the host CPU. This interrupt can be used by the device driver to handle PCIe link specific events such as Link up and Link down, which give the driver a chance to start bus enumeration on its own when link is up and initiate link training if link goes to a bad state. The PCIe driver can still work without this interrupt but it will provide a nice user experience when device gets plugged and removed. Hence, document it in the binding along with the existing MSI interrupts. Global interrupt is parsed as optional in driver, so adding it in bindings will not break the ABI. Link: https://lore.kernel.org/r/20241101030902.579789-3-quic_qianyu@quicinc.com Signed-off-by: Qiang Yu Signed-off-by: Krzysztof Wilczyński Reviewed-by: Krzysztof Kozlowski --- .../devicetree/bindings/pci/qcom,pcie-x1e80100.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-x1e80100.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-x1e80100.yaml index a9db0a231563..257068a18264 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-x1e80100.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-x1e80100.yaml @@ -47,9 +47,10 @@ properties: interrupts: minItems: 8 - maxItems: 8 + maxItems: 9 interrupt-names: + minItems: 8 items: - const: msi0 - const: msi1 @@ -59,6 +60,7 @@ properties: - const: msi5 - const: msi6 - const: msi7 + - const: global resets: minItems: 1 @@ -130,9 +132,10 @@ examples: , , , - ; + , + ; interrupt-names = "msi0", "msi1", "msi2", "msi3", - "msi4", "msi5", "msi6", "msi7"; + "msi4", "msi5", "msi6", "msi7", "global"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; interrupt-map = <0 0 0 1 &intc 0 0 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ From 22a9120479a40a56c13c5e473a0100fad2e017c0 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Mon, 4 Nov 2024 13:14:20 +0530 Subject: [PATCH 056/127] PCI: j721e: Deassert PERST# after a delay of PCIE_T_PVPERL_MS milliseconds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Section 2.2 of the PCI Express Card Electromechanical Specification (Revision 5.1), in order to ensure that the power and the reference clock are stable, PERST# has to be deasserted after a delay of 100 milliseconds (TPVPERL). Currently, it is being assumed that the power is already stable, which is not necessarily true. Hence, change the delay to PCIE_T_PVPERL_MS to guarantee that power and reference clock are stable. Fixes: f3e25911a430 ("PCI: j721e: Add TI J721E PCIe driver") Fixes: f96b69713733 ("PCI: j721e: Use T_PERST_CLK_US macro") Link: https://lore.kernel.org/r/20241104074420.1862932-1-s-vadapalli@ti.com Signed-off-by: Siddharth Vadapalli Signed-off-by: Krzysztof Wilczyński --- drivers/pci/controller/cadence/pci-j721e.c | 26 ++++++++++------------ 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c index c9f3103d68e1..dc10d51c1352 100644 --- a/drivers/pci/controller/cadence/pci-j721e.c +++ b/drivers/pci/controller/cadence/pci-j721e.c @@ -583,15 +583,14 @@ static int j721e_pcie_probe(struct platform_device *pdev) pcie->refclk = clk; /* - * The "Power Sequencing and Reset Signal Timings" table of the - * PCI Express Card Electromechanical Specification, Revision - * 5.1, Section 2.9.2, Symbol "T_PERST-CLK", indicates PERST# - * should be deasserted after minimum of 100us once REFCLK is - * stable. The REFCLK to the connector in RC mode is selected - * while enabling the PHY. So deassert PERST# after 100 us. + * Section 2.2 of the PCI Express Card Electromechanical + * Specification (Revision 5.1) mandates that the deassertion + * of the PERST# signal should be delayed by 100 ms (TPVPERL). + * This shall ensure that the power and the reference clock + * are stable. */ if (gpiod) { - fsleep(PCIE_T_PERST_CLK_US); + msleep(PCIE_T_PVPERL_MS); gpiod_set_value_cansleep(gpiod, 1); } @@ -682,15 +681,14 @@ static int j721e_pcie_resume_noirq(struct device *dev) return ret; /* - * The "Power Sequencing and Reset Signal Timings" table of the - * PCI Express Card Electromechanical Specification, Revision - * 5.1, Section 2.9.2, Symbol "T_PERST-CLK", indicates PERST# - * should be deasserted after minimum of 100us once REFCLK is - * stable. The REFCLK to the connector in RC mode is selected - * while enabling the PHY. So deassert PERST# after 100 us. + * Section 2.2 of the PCI Express Card Electromechanical + * Specification (Revision 5.1) mandates that the deassertion + * of the PERST# signal should be delayed by 100 ms (TPVPERL). + * This shall ensure that the power and the reference clock + * are stable. */ if (pcie->reset_gpio) { - fsleep(PCIE_T_PERST_CLK_US); + msleep(PCIE_T_PVPERL_MS); gpiod_set_value_cansleep(pcie->reset_gpio, 1); } From 1362af92bcf5f18cdb25c095e3ba5155b823f570 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Thu, 31 Oct 2024 20:09:00 -0700 Subject: [PATCH 057/127] PCI: qcom: Remove BDF2SID mapping config for SC8280X family SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SC8280XP PCIe devicetree nodes do not specify an 'iommu-map' so the config_sid() callback is effectively a no-op. Hence introduce a new ops struct, namely ops_1_21_0 which is same as ops_1_9_0 except that it doesn't have config_sid() callback to clean it up. Link: https://lore.kernel.org/r/20241101030902.579789-4-quic_qianyu@quicinc.com Signed-off-by: Qiang Yu [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Dmitry Baryshkov Reviewed-by: Johan Hovold Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-qcom.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 07286fb9adc1..4448ca54bdda 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1365,6 +1365,16 @@ static const struct qcom_pcie_ops ops_1_9_0 = { .config_sid = qcom_pcie_config_sid_1_9_0, }; +/* Qcom IP rev.: 1.21.0 Synopsys IP rev.: 5.60a */ +static const struct qcom_pcie_ops ops_1_21_0 = { + .get_resources = qcom_pcie_get_resources_2_7_0, + .init = qcom_pcie_init_2_7_0, + .post_init = qcom_pcie_post_init_2_7_0, + .host_post_init = qcom_pcie_host_post_init_2_7_0, + .deinit = qcom_pcie_deinit_2_7_0, + .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, +}; + /* Qcom IP rev.: 2.9.0 Synopsys IP rev.: 5.00a */ static const struct qcom_pcie_ops ops_2_9_0 = { .get_resources = qcom_pcie_get_resources_2_9_0, @@ -1412,7 +1422,7 @@ static const struct qcom_pcie_cfg cfg_2_9_0 = { }; static const struct qcom_pcie_cfg cfg_sc8280xp = { - .ops = &ops_1_9_0, + .ops = &ops_1_21_0, .no_l0s = true, }; From fba6045161d686adc102b6ef71b2fd1e5f90a616 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Thu, 31 Oct 2024 20:09:01 -0700 Subject: [PATCH 058/127] PCI: qcom: Disable ASPM L0s for X1E80100 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the cfg_1_9_0 which is being used for X1E80100 doesn't disable ASPM L0s. However, hardware team recommends to disable L0s as the PHY init sequence is not tuned support L0s. Hence reuse cfg_sc8280xp for X1E80100. Note that the config_sid() callback is not present in cfg_sc8280xp, don't concern about this because config_sid() callback is originally a no-op for X1E80100. Fixes: 6d0c39324c5f ("PCI: qcom: Add X1E80100 PCIe support") Link: https://lore.kernel.org/r/20241101030902.579789-5-quic_qianyu@quicinc.com Signed-off-by: Qiang Yu Signed-off-by: Krzysztof Wilczyński Reviewed-by: Dmitry Baryshkov Reviewed-by: Johan Hovold Reviewed-by: Manivannan Sadhasivam Cc: # 6.9 --- drivers/pci/controller/dwc/pcie-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 4448ca54bdda..dc102d8bd58c 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1856,7 +1856,7 @@ static const struct of_device_id qcom_pcie_match[] = { { .compatible = "qcom,pcie-sm8450-pcie0", .data = &cfg_1_9_0 }, { .compatible = "qcom,pcie-sm8450-pcie1", .data = &cfg_1_9_0 }, { .compatible = "qcom,pcie-sm8550", .data = &cfg_1_9_0 }, - { .compatible = "qcom,pcie-x1e80100", .data = &cfg_1_9_0 }, + { .compatible = "qcom,pcie-x1e80100", .data = &cfg_sc8280xp }, { } }; From ade7da14954a5d1003ceb316a230189c445ba357 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 4 Nov 2024 12:49:34 +0100 Subject: [PATCH 059/127] PCI: mediatek-gen3: Add support for setting max-link-speed limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for respecting the max-link-speed devicetree property, forcing a maximum speed (Gen) for a PCI-Express port. Since the MediaTek PCIe Gen3 controllers also expose the maximum supported link speed in the PCIE_BASE_CFG register, if property max-link-speed is specified in devicetree, validate it against the controller capabilities and proceed setting the limitations only if the wanted Gen is lower than the maximum one that is supported by the controller itself (otherwise it makes no sense!). Link: https://lore.kernel.org/r/20241104114935.172908-2-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno [kwilczynski: change dev_dbg() to dev_info() and update message wording] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Fei Shao Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-mediatek-gen3.c | 55 ++++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c index 66ce4b5d309b..1d208d58e94a 100644 --- a/drivers/pci/controller/pcie-mediatek-gen3.c +++ b/drivers/pci/controller/pcie-mediatek-gen3.c @@ -28,7 +28,11 @@ #include "../pci.h" +#define PCIE_BASE_CFG_REG 0x14 +#define PCIE_BASE_CFG_SPEED GENMASK(15, 8) + #define PCIE_SETTING_REG 0x80 +#define PCIE_SETTING_GEN_SUPPORT GENMASK(14, 12) #define PCIE_PCI_IDS_1 0x9c #define PCI_CLASS(class) (class << 8) #define PCIE_RC_MODE BIT(0) @@ -125,6 +129,9 @@ struct mtk_gen3_pcie; +#define PCIE_CONF_LINK2_CTL_STS (PCIE_CFG_OFFSET_ADDR + 0xb0) +#define PCIE_CONF_LINK2_LCR2_LINK_SPEED GENMASK(3, 0) + /** * struct mtk_gen3_pcie_pdata - differentiate between host generations * @power_up: pcie power_up callback @@ -160,6 +167,7 @@ struct mtk_msi_set { * @phy: PHY controller block * @clks: PCIe clocks * @num_clks: PCIe clocks count for this port + * @max_link_speed: Maximum link speed (PCIe Gen) for this port * @irq: PCIe controller interrupt number * @saved_irq_state: IRQ enable state saved at suspend time * @irq_lock: lock protecting IRQ register access @@ -180,6 +188,7 @@ struct mtk_gen3_pcie { struct phy *phy; struct clk_bulk_data *clks; int num_clks; + u8 max_link_speed; int irq; u32 saved_irq_state; @@ -381,11 +390,27 @@ static int mtk_pcie_startup_port(struct mtk_gen3_pcie *pcie) int err; u32 val; - /* Set as RC mode */ + /* Set as RC mode and set controller PCIe Gen speed restriction, if any */ val = readl_relaxed(pcie->base + PCIE_SETTING_REG); val |= PCIE_RC_MODE; + if (pcie->max_link_speed) { + val &= ~PCIE_SETTING_GEN_SUPPORT; + + /* Can enable link speed support only from Gen2 onwards */ + if (pcie->max_link_speed >= 2) + val |= FIELD_PREP(PCIE_SETTING_GEN_SUPPORT, + GENMASK(pcie->max_link_speed - 2, 0)); + } writel_relaxed(val, pcie->base + PCIE_SETTING_REG); + /* Set Link Control 2 (LNKCTL2) speed restriction, if any */ + if (pcie->max_link_speed) { + val = readl_relaxed(pcie->base + PCIE_CONF_LINK2_CTL_STS); + val &= ~PCIE_CONF_LINK2_LCR2_LINK_SPEED; + val |= FIELD_PREP(PCIE_CONF_LINK2_LCR2_LINK_SPEED, pcie->max_link_speed); + writel_relaxed(val, pcie->base + PCIE_CONF_LINK2_CTL_STS); + } + /* Set class code */ val = readl_relaxed(pcie->base + PCIE_PCI_IDS_1); val &= ~GENMASK(31, 8); @@ -1004,9 +1029,21 @@ static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie) reset_control_bulk_assert(pcie->soc->phy_resets.num_resets, pcie->phy_resets); } +static int mtk_pcie_get_controller_max_link_speed(struct mtk_gen3_pcie *pcie) +{ + u32 val; + int ret; + + val = readl_relaxed(pcie->base + PCIE_BASE_CFG_REG); + val = FIELD_GET(PCIE_BASE_CFG_SPEED, val); + ret = fls(val); + + return ret > 0 ? ret : -EINVAL; +} + static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie) { - int err; + int err, max_speed; err = mtk_pcie_parse_port(pcie); if (err) @@ -1031,6 +1068,20 @@ static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie) if (err) return err; + err = of_pci_get_max_link_speed(pcie->dev->of_node); + if (err) { + /* Get the maximum speed supported by the controller */ + max_speed = mtk_pcie_get_controller_max_link_speed(pcie); + + /* Set max_link_speed only if the controller supports it */ + if (max_speed >= 0 && max_speed <= err) { + pcie->max_link_speed = err; + dev_info(pcie->dev, + "maximum controller link speed Gen%d, overriding to Gen%u", + max_speed, pcie->max_link_speed); + } + } + /* Try link up */ err = mtk_pcie_startup_port(pcie); if (err) From e73ea1c2d4d8f7ba5daaf7aa51171f63cf79bcd8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:46 +0900 Subject: [PATCH 060/127] PCI: dwc: endpoint: Implement the pci_epc_ops::align_addr() operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function dw_pcie_prog_outbound_atu() used to program outbound ATU entries for mapping RC PCI addresses to local CPU addresses does not allow PCI addresses that are not aligned to the value of region_align of struct dw_pcie. This value is determined from the iATU hardware registers during probing of the iATU (done by dw_pcie_iatu_detect()). This value is thus valid for all DWC PCIe controllers, and valid regardless of the hardware configuration used when synthesizing the DWC PCIe controller. Implement the ->align_addr() endpoint controller operation to allow this mapping alignment to be transparently handled by endpoint function drivers through the function pci_epc_mem_map(). Link: https://lore.kernel.org/linux-pci/20241012113246.95634-7-dlemoal@kernel.org Link: https://lore.kernel.org/linux-pci/20241015090712.112674-1-dlemoal@kernel.org Link: https://lore.kernel.org/linux-pci/20241017132052.4014605-5-cassel@kernel.org Co-developed-by: Niklas Cassel Signed-off-by: Damien Le Moal [mani: squashed the patch that changed phy_addr_t to u64] Signed-off-by: Manivannan Sadhasivam [kwilczynski: squashed patch that updated the pci_size variable] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-designware-ep.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 43ba5c6738df..20f67fd85e83 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -268,6 +268,20 @@ static int dw_pcie_find_index(struct dw_pcie_ep *ep, phys_addr_t addr, return -EINVAL; } +static u64 dw_pcie_ep_align_addr(struct pci_epc *epc, u64 pci_addr, + size_t *pci_size, size_t *offset) +{ + struct dw_pcie_ep *ep = epc_get_drvdata(epc); + struct dw_pcie *pci = to_dw_pcie_from_ep(ep); + u64 mask = pci->region_align - 1; + size_t ofst = pci_addr & mask; + + *pci_size = ALIGN(ofst + *pci_size, epc->mem->window.page_size); + *offset = ofst; + + return pci_addr & ~mask; +} + static void dw_pcie_ep_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t addr) { @@ -444,6 +458,7 @@ static const struct pci_epc_ops epc_ops = { .write_header = dw_pcie_ep_write_header, .set_bar = dw_pcie_ep_set_bar, .clear_bar = dw_pcie_ep_clear_bar, + .align_addr = dw_pcie_ep_align_addr, .map_addr = dw_pcie_ep_map_addr, .unmap_addr = dw_pcie_ep_unmap_addr, .set_msi = dw_pcie_ep_set_msi, From 97110d42680e692020154f9aa89298c038196055 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:06:48 +0900 Subject: [PATCH 061/127] PCI: endpoint: test: Synchronously cancel command handler work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use cancel_delayed_work_sync() in pci_epf_test_epc_deinit() to ensure that the command handler is really stopped before proceeding with DMA and BAR cleanup. The same change is also done in pci_epf_test_link_down() to ensure that the link down handling completes with the command handler fully stopped. Link: https://lore.kernel.org/r/20241017010648.189889-1-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Reviewed-by: Frank Li --- drivers/pci/endpoint/functions/pci-epf-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index a73bc0771d35..c2e7f67e5107 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -800,7 +800,7 @@ static void pci_epf_test_epc_deinit(struct pci_epf *epf) { struct pci_epf_test *epf_test = epf_get_drvdata(epf); - cancel_delayed_work(&epf_test->cmd_handler); + cancel_delayed_work_sync(&epf_test->cmd_handler); pci_epf_test_clean_dma_chan(epf_test); pci_epf_test_clear_bar(epf); } @@ -931,7 +931,7 @@ static void pci_epf_test_unbind(struct pci_epf *epf) struct pci_epf_test *epf_test = epf_get_drvdata(epf); struct pci_epc *epc = epf->epc; - cancel_delayed_work(&epf_test->cmd_handler); + cancel_delayed_work_sync(&epf_test->cmd_handler); if (epc->init_complete) { pci_epf_test_clean_dma_chan(epf_test); pci_epf_test_clear_bar(epf); From 718c157a0b941fe2d3b4cca689148775e6ea2330 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 5 Nov 2024 15:32:16 -0600 Subject: [PATCH 062/127] dt-bindings: PCI: snps,dw-pcie: Drop "#interrupt-cells" from example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "#interrupt-cells" is not valid without a corresponding "interrupt-map" or "interrupt-controller" property. As the example has neither, drop "#interrupt-cells". This fixes a dtc interrupt_provider warning. Link: https://lore.kernel.org/r/20241105213217.442809-1-robh@kernel.org Signed-off-by: Rob Herring (Arm) Signed-off-by: Krzysztof Wilczyński --- Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 548f59d76ef2..205326fb2d75 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -230,7 +230,6 @@ examples: interrupts = <25>, <24>; interrupt-names = "msi", "hp"; - #interrupt-cells = <1>; reset-gpios = <&port0 0 1>; From b609a15e7969d6a50d65067ee783b5d9365b04dd Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 4 Nov 2024 12:49:35 +0100 Subject: [PATCH 063/127] PCI: mediatek-gen3: Add support for restricting link width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for restricting the port's link width by specifying the num-lanes devicetree property in the PCIe node. The setting is done in the GEN_SETTINGS register (in the driver named as PCIE_SETTING_REG), where each set bit in [11:8] activates a set of lanes (from bits 11 to 8 respectively, x16/x8/x4/x2). Link: https://lore.kernel.org/r/20241104114935.172908-3-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Krzysztof Wilczyński Reviewed-by: Fei Shao Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-mediatek-gen3.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c index 1d208d58e94a..bbfbfe5752e6 100644 --- a/drivers/pci/controller/pcie-mediatek-gen3.c +++ b/drivers/pci/controller/pcie-mediatek-gen3.c @@ -32,6 +32,7 @@ #define PCIE_BASE_CFG_SPEED GENMASK(15, 8) #define PCIE_SETTING_REG 0x80 +#define PCIE_SETTING_LINK_WIDTH GENMASK(11, 8) #define PCIE_SETTING_GEN_SUPPORT GENMASK(14, 12) #define PCIE_PCI_IDS_1 0x9c #define PCI_CLASS(class) (class << 8) @@ -168,6 +169,7 @@ struct mtk_msi_set { * @clks: PCIe clocks * @num_clks: PCIe clocks count for this port * @max_link_speed: Maximum link speed (PCIe Gen) for this port + * @num_lanes: Number of PCIe lanes for this port * @irq: PCIe controller interrupt number * @saved_irq_state: IRQ enable state saved at suspend time * @irq_lock: lock protecting IRQ register access @@ -189,6 +191,7 @@ struct mtk_gen3_pcie { struct clk_bulk_data *clks; int num_clks; u8 max_link_speed; + u8 num_lanes; int irq; u32 saved_irq_state; @@ -401,6 +404,14 @@ static int mtk_pcie_startup_port(struct mtk_gen3_pcie *pcie) val |= FIELD_PREP(PCIE_SETTING_GEN_SUPPORT, GENMASK(pcie->max_link_speed - 2, 0)); } + if (pcie->num_lanes) { + val &= ~PCIE_SETTING_LINK_WIDTH; + + /* Zero means one lane, each bit activates x2/x4/x8/x16 */ + if (pcie->num_lanes > 1) + val |= FIELD_PREP(PCIE_SETTING_LINK_WIDTH, + GENMASK(fls(pcie->num_lanes >> 2), 0)); + }; writel_relaxed(val, pcie->base + PCIE_SETTING_REG); /* Set Link Control 2 (LNKCTL2) speed restriction, if any */ @@ -838,6 +849,7 @@ static int mtk_pcie_parse_port(struct mtk_gen3_pcie *pcie) struct device *dev = pcie->dev; struct platform_device *pdev = to_platform_device(dev); struct resource *regs; + u32 num_lanes; regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcie-mac"); if (!regs) @@ -883,6 +895,14 @@ static int mtk_pcie_parse_port(struct mtk_gen3_pcie *pcie) return pcie->num_clks; } + ret = of_property_read_u32(dev->of_node, "num-lanes", &num_lanes); + if (ret == 0) { + if (num_lanes == 0 || num_lanes > 16 || (num_lanes != 1 && num_lanes % 2)) + dev_warn(dev, "invalid num-lanes, using controller defaults\n"); + else + pcie->num_lanes = num_lanes; + } + return 0; } From e1714f3b1f4d2af5df26e4a0e4a70ef64b15e3ee Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 1 Oct 2024 16:34:40 +0800 Subject: [PATCH 064/127] PCI/ASPM: Add notes about enabling PCI-PM L1SS to pci_enable_link_state(_locked) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to "PCIe r6.0, sec 5.5.4", add note about D0 requirement in pci_enable_link_state() kernel-doc. Link: https://lore.kernel.org/r/20241001083438.10070-6-jhp@endlessos.org Signed-off-by: Jian-Hong Pan Signed-off-by: Krzysztof Wilczyński Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas --- drivers/pci/pcie/aspm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index e943691bc931..28567d457613 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1450,6 +1450,9 @@ static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) * touch the LNKCTL register. Also note that this does not enable states * disabled by pci_disable_link_state(). Return 0 or a negative errno. * + * Note: Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per + * PCIe r6.0, sec 5.5.4. + * * @pdev: PCI device * @state: Mask of ASPM link states to enable */ @@ -1466,6 +1469,9 @@ EXPORT_SYMBOL(pci_enable_link_state); * can't touch the LNKCTL register. Also note that this does not enable states * disabled by pci_disable_link_state(). Return 0 or a negative errno. * + * Note: Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per + * PCIe r6.0, sec 5.5.4. + * * @pdev: PCI device * @state: Mask of ASPM link states to enable * From 0a726f542d7c8cc0f9c5ed7df5a4bd4b59ac21b3 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Wed, 30 Oct 2024 11:32:45 +0100 Subject: [PATCH 065/127] PCI: imx6: Fix suspend/resume support on i.MX6QDL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The suspend/resume functionality is currently broken on the i.MX6QDL platform, as documented in the NXP errata (ERR005723): https://www.nxp.com/docs/en/errata/IMX6DQCE.pdf This patch addresses the issue by sharing most of the suspend/resume sequences used by other i.MX devices, while avoiding modifications to critical registers that disrupt the PCIe functionality. It targets the same problem as the following downstream commit: https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995 Unlike the downstream commit, this patch also resets the connected PCIe device if possible. Without this reset, certain drivers, such as ath10k or iwlwifi, will crash on resume. The device reset is also done by the driver on other i.MX platforms, making this patch consistent with existing practices. Upon resuming, the kernel will hang and display an error. Here's an example of the error encountered with the ath10k driver: ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible Unhandled fault: imprecise external abort (0x1406) at 0x0106f944 Without this patch, suspend/resume will fail on i.MX6QDL devices if a PCIe device is connected. Link: https://lore.kernel.org/r/20241030103250.83640-1-eichest@gmail.com Signed-off-by: Stefan Eichenberger [kwilczynski: commit log, added tag for stable releases] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam Acked-by: Richard Zhu Cc: stable@vger.kernel.org --- drivers/pci/controller/dwc/pci-imx6.c | 57 +++++++++++++++++++++------ 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 808d1f105417..c8d5c90aa4d4 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -82,6 +82,11 @@ enum imx_pcie_variants { #define IMX_PCIE_FLAG_HAS_SERDES BIT(6) #define IMX_PCIE_FLAG_SUPPORT_64BIT BIT(7) #define IMX_PCIE_FLAG_CPU_ADDR_FIXUP BIT(8) +/* + * Because of ERR005723 (PCIe does not support L2 power down) we need to + * workaround suspend resume on some devices which are affected by this errata. + */ +#define IMX_PCIE_FLAG_BROKEN_SUSPEND BIT(9) #define imx_check_flag(pci, val) (pci->drvdata->flags & val) @@ -1237,9 +1242,19 @@ static int imx_pcie_suspend_noirq(struct device *dev) return 0; imx_pcie_msi_save_restore(imx_pcie, true); - imx_pcie_pm_turnoff(imx_pcie); - imx_pcie_stop_link(imx_pcie->pci); - imx_pcie_host_exit(pp); + if (imx_check_flag(imx_pcie, IMX_PCIE_FLAG_BROKEN_SUSPEND)) { + /* + * The minimum for a workaround would be to set PERST# and to + * set the PCIE_TEST_PD flag. However, we can also disable the + * clock which saves some power. + */ + imx_pcie_assert_core_reset(imx_pcie); + imx_pcie->drvdata->enable_ref_clk(imx_pcie, false); + } else { + imx_pcie_pm_turnoff(imx_pcie); + imx_pcie_stop_link(imx_pcie->pci); + imx_pcie_host_exit(pp); + } return 0; } @@ -1253,14 +1268,32 @@ static int imx_pcie_resume_noirq(struct device *dev) if (!(imx_pcie->drvdata->flags & IMX_PCIE_FLAG_SUPPORTS_SUSPEND)) return 0; - ret = imx_pcie_host_init(pp); - if (ret) - return ret; - imx_pcie_msi_save_restore(imx_pcie, false); - dw_pcie_setup_rc(pp); + if (imx_check_flag(imx_pcie, IMX_PCIE_FLAG_BROKEN_SUSPEND)) { + ret = imx_pcie->drvdata->enable_ref_clk(imx_pcie, true); + if (ret) + return ret; + ret = imx_pcie_deassert_core_reset(imx_pcie); + if (ret) + return ret; + /* + * Using PCIE_TEST_PD seems to disable MSI and powers down the + * root complex. This is why we have to setup the rc again and + * why we have to restore the MSI register. + */ + ret = dw_pcie_setup_rc(&imx_pcie->pci->pp); + if (ret) + return ret; + imx_pcie_msi_save_restore(imx_pcie, false); + } else { + ret = imx_pcie_host_init(pp); + if (ret) + return ret; + imx_pcie_msi_save_restore(imx_pcie, false); + dw_pcie_setup_rc(pp); - if (imx_pcie->link_is_up) - imx_pcie_start_link(imx_pcie->pci); + if (imx_pcie->link_is_up) + imx_pcie_start_link(imx_pcie->pci); + } return 0; } @@ -1485,7 +1518,9 @@ static const struct imx_pcie_drvdata drvdata[] = { [IMX6Q] = { .variant = IMX6Q, .flags = IMX_PCIE_FLAG_IMX_PHY | - IMX_PCIE_FLAG_IMX_SPEED_CHANGE, + IMX_PCIE_FLAG_IMX_SPEED_CHANGE | + IMX_PCIE_FLAG_BROKEN_SUSPEND | + IMX_PCIE_FLAG_SUPPORTS_SUSPEND, .dbi_length = 0x200, .gpr = "fsl,imx6q-iomuxc-gpr", .clk_names = imx6q_clks, From b727484cace4be22be9321cc0bc9487648ba447b Mon Sep 17 00:00:00 2001 From: Nirmal Patel Date: Fri, 11 Oct 2024 10:56:57 -0700 Subject: [PATCH 066/127] PCI: vmd: Add DID 8086:B06F and 8086:B60B for Intel client SKUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for this VMD device which supports the bus restriction mode. The feature that turns off vector 0 for MSI-X remapping is also enabled. Link: https://lore.kernel.org/r/20241011175657.249948-1-nirmal.patel@linux.intel.com Signed-off-by: Nirmal Patel Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/vmd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 264a180403a0..8a036d6b7d49 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -1100,6 +1100,10 @@ static const struct pci_device_id vmd_ids[] = { .driver_data = VMD_FEATS_CLIENT,}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B), .driver_data = VMD_FEATS_CLIENT,}, + {PCI_VDEVICE(INTEL, 0xb60b), + .driver_data = VMD_FEATS_CLIENT,}, + {PCI_VDEVICE(INTEL, 0xb06f), + .driver_data = VMD_FEATS_CLIENT,}, {0,} }; MODULE_DEVICE_TABLE(pci, vmd_ids); From d66041063192497a4a97d21dbf86b79a03a7f4fb Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 1 Oct 2024 16:34:38 +0800 Subject: [PATCH 067/127] PCI: vmd: Set devices to D0 before enabling PM L1 Substates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remapped PCIe Root Port and the child device have PM L1 Substates capability, but they are disabled originally. Here is a failed example on ASUS B1400CEAE: Capabilities: [900 v1] L1 PM Substates L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1- ASPM_L1.2+ ASPM_L1.1- L1_PM_Substates+ PortCommonModeRestoreTime=32us PortTPowerOnTime=10us L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2+ ASPM_L1.1- T_CommonMode=0us LTR1.2_Threshold=101376ns L1SubCtl2: T_PwrOn=50us Enable PCI-PM L1 PM Substates for devices below VMD while they are in D0 (see PCIe r6.0, sec 5.5.4). Link: https://lore.kernel.org/r/20241001083438.10070-4-jhp@endlessos.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=218394 Signed-off-by: Jian-Hong Pan Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan --- drivers/pci/controller/vmd.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 264a180403a0..11870d1fc818 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -740,11 +740,9 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata) if (!(features & VMD_FEAT_BIOS_PM_QUIRK)) return 0; - pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_LTR); if (!pos) - return 0; + goto out_state_change; /* * Skip if the max snoop LTR is non-zero, indicating BIOS has set it @@ -752,7 +750,7 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata) */ pci_read_config_dword(pdev, pos + PCI_LTR_MAX_SNOOP_LAT, <r_reg); if (!!(ltr_reg & (PCI_LTR_VALUE_MASK | PCI_LTR_SCALE_MASK))) - return 0; + goto out_state_change; /* * Set the default values to the maximum required by the platform to @@ -764,6 +762,13 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata) pci_write_config_dword(pdev, pos + PCI_LTR_MAX_SNOOP_LAT, ltr_reg); pci_info(pdev, "VMD: Default LTR value set by driver\n"); +out_state_change: + /* + * Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per + * PCIe r6.0, sec 5.5.4. + */ + pci_set_power_state_locked(pdev, PCI_D0); + pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); return 0; } From 3b96b895127b7c0aed63d82c974b46340e8466c1 Mon Sep 17 00:00:00 2001 From: Esther Shimanovich Date: Tue, 10 Sep 2024 17:57:45 +0000 Subject: [PATCH 068/127] PCI: Detect and trust built-in Thunderbolt chips Some computers with CPUs that lack Thunderbolt features use discrete Thunderbolt chips to add Thunderbolt functionality. These Thunderbolt chips are located within the chassis; between the Root Port labeled ExternalFacingPort and the USB-C port. These Thunderbolt PCIe devices should be labeled as fixed and trusted, as they are built into the computer. Otherwise, security policies that rely on those flags may have unintended results, such as preventing USB-C ports from enumerating. Detect the above scenario through the process of elimination. 1) Integrated Thunderbolt host controllers already have Thunderbolt implemented, so anything outside their external facing Root Port is removable and untrusted. Detect them using the following properties: - Most integrated host controllers have the "usb4-host-interface" ACPI property, as described here: https://learn.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#mapping-native-protocols-pcie-displayport-tunneled-through-usb4-to-usb4-host-routers - Integrated Thunderbolt PCIe Root Ports before Alder Lake do not have the "usb4-host-interface" ACPI property. Identify those by their PCI IDs instead. 2) If a Root Port does not have integrated Thunderbolt capabilities, but has the "ExternalFacingPort" ACPI property, that means the manufacturer has opted to use a discrete Thunderbolt host controller that is built into the computer. This host controller can be identified by virtue of being located directly below an external-facing Root Port that lacks integrated Thunderbolt. Label it as trusted and fixed. Everything downstream from it is untrusted and removable. The "ExternalFacingPort" ACPI property is described here: https://learn.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#identifying-externally-exposed-pcie-root-ports Link: https://lore.kernel.org/r/20240910-trust-tbt-fix-v5-1-7a7a42a5f496@chromium.org Suggested-by: Mika Westerberg Signed-off-by: Esther Shimanovich Signed-off-by: Bjorn Helgaas Tested-by: Mika Westerberg Tested-by: Mario Limonciello Reviewed-by: Mika Westerberg Reviewed-by: Mario Limonciello --- arch/x86/pci/acpi.c | 119 ++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/probe.c | 30 ++++++++--- include/linux/pci.h | 6 +++ 3 files changed, 148 insertions(+), 7 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 55c4b07ec1f6..0c316bae1726 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -250,6 +250,125 @@ void __init pci_acpi_crs_quirks(void) pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); } +/* + * Check if pdev is part of a PCIe switch that is directly below the + * specified bridge. + */ +static bool pcie_switch_directly_under(struct pci_dev *bridge, + struct pci_dev *pdev) +{ + struct pci_dev *parent = pci_upstream_bridge(pdev); + + /* If the device doesn't have a parent, it's not under anything */ + if (!parent) + return false; + + /* + * If the device has a PCIe type, check if it is below the + * corresponding PCIe switch components (if applicable). Then check + * if its upstream port is directly beneath the specified bridge. + */ + switch (pci_pcie_type(pdev)) { + case PCI_EXP_TYPE_UPSTREAM: + return parent == bridge; + + case PCI_EXP_TYPE_DOWNSTREAM: + if (pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM) + return false; + parent = pci_upstream_bridge(parent); + return parent == bridge; + + case PCI_EXP_TYPE_ENDPOINT: + if (pci_pcie_type(parent) != PCI_EXP_TYPE_DOWNSTREAM) + return false; + parent = pci_upstream_bridge(parent); + if (!parent || pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM) + return false; + parent = pci_upstream_bridge(parent); + return parent == bridge; + } + + return false; +} + +static bool pcie_has_usb4_host_interface(struct pci_dev *pdev) +{ + struct fwnode_handle *fwnode; + + /* + * For USB4, the tunneled PCIe Root or Downstream Ports are marked + * with the "usb4-host-interface" ACPI property, so we look for + * that first. This should cover most cases. + */ + fwnode = fwnode_find_reference(dev_fwnode(&pdev->dev), + "usb4-host-interface", 0); + if (!IS_ERR(fwnode)) { + fwnode_handle_put(fwnode); + return true; + } + + /* + * Any integrated Thunderbolt 3/4 PCIe Root Ports from Intel + * before Alder Lake do not have the "usb4-host-interface" + * property so we use their PCI IDs instead. All these are + * tunneled. This list is not expected to grow. + */ + if (pdev->vendor == PCI_VENDOR_ID_INTEL) { + switch (pdev->device) { + /* Ice Lake Thunderbolt 3 PCIe Root Ports */ + case 0x8a1d: + case 0x8a1f: + case 0x8a21: + case 0x8a23: + /* Tiger Lake-LP Thunderbolt 4 PCIe Root Ports */ + case 0x9a23: + case 0x9a25: + case 0x9a27: + case 0x9a29: + /* Tiger Lake-H Thunderbolt 4 PCIe Root Ports */ + case 0x9a2b: + case 0x9a2d: + case 0x9a2f: + case 0x9a31: + return true; + } + } + + return false; +} + +bool arch_pci_dev_is_removable(struct pci_dev *pdev) +{ + struct pci_dev *parent, *root; + + /* pdev without a parent or Root Port is never tunneled */ + parent = pci_upstream_bridge(pdev); + if (!parent) + return false; + root = pcie_find_root_port(pdev); + if (!root) + return false; + + /* Internal PCIe devices are not tunneled */ + if (!root->external_facing) + return false; + + /* Anything directly behind a "usb4-host-interface" is tunneled */ + if (pcie_has_usb4_host_interface(parent)) + return true; + + /* + * Check if this is a discrete Thunderbolt/USB4 controller that is + * directly behind the non-USB4 PCIe Root Port marked as + * "ExternalFacingPort". Those are not behind a PCIe tunnel. + */ + if (pcie_switch_directly_under(root, pdev)) + return false; + + /* PCIe devices after the discrete chip are tunneled */ + return true; +} + #ifdef CONFIG_PCI_MMCONFIG static int check_segment(u16 seg, struct device *dev, char *estr) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..924b23962c45 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1633,23 +1633,33 @@ static void set_pcie_thunderbolt(struct pci_dev *dev) static void set_pcie_untrusted(struct pci_dev *dev) { - struct pci_dev *parent; + struct pci_dev *parent = pci_upstream_bridge(dev); + if (!parent) + return; /* - * If the upstream bridge is untrusted we treat this device + * If the upstream bridge is untrusted we treat this device as * untrusted as well. */ - parent = pci_upstream_bridge(dev); - if (parent && (parent->untrusted || parent->external_facing)) + if (parent->untrusted) { dev->untrusted = true; + return; + } + + if (arch_pci_dev_is_removable(dev)) { + pci_dbg(dev, "marking as untrusted\n"); + dev->untrusted = true; + } } static void pci_set_removable(struct pci_dev *dev) { struct pci_dev *parent = pci_upstream_bridge(dev); + if (!parent) + return; /* - * We (only) consider everything downstream from an external_facing + * We (only) consider everything tunneled below an external_facing * device to be removable by the user. We're mainly concerned with * consumer platforms with user accessible thunderbolt ports that are * vulnerable to DMA attacks, and we expect those ports to be marked by @@ -1659,9 +1669,15 @@ static void pci_set_removable(struct pci_dev *dev) * accessible to user / may not be removed by end user, and thus not * exposed as "removable" to userspace. */ - if (parent && - (parent->external_facing || dev_is_removable(&parent->dev))) + if (dev_is_removable(&parent->dev)) { dev_set_removable(&dev->dev, DEVICE_REMOVABLE); + return; + } + + if (arch_pci_dev_is_removable(dev)) { + pci_dbg(dev, "marking as removable\n"); + dev_set_removable(&dev->dev, DEVICE_REMOVABLE); + } } /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..4e77c4230c0a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2609,6 +2609,12 @@ pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; } static inline bool pci_pr3_present(struct pci_dev *pdev) { return false; } #endif +#if defined(CONFIG_X86) && defined(CONFIG_ACPI) +bool arch_pci_dev_is_removable(struct pci_dev *pdev); +#else +static inline bool arch_pci_dev_is_removable(struct pci_dev *pdev) { return false; } +#endif + #ifdef CONFIG_EEH static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) { From 083b0ac4f880e54ec7f6a611a899477bbfee9faf Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 16 Oct 2024 11:49:05 +0200 Subject: [PATCH 069/127] PCI: Deprecate pcim_iounmap_regions() pcim_ioumap_region() has recently been made a public function and does not have the disadvantage of having to deal with the legacy iomap table, as pcim_iounmap_regions() does. Deprecate pcim_iounmap_regions(). Link: https://lore.kernel.org/r/20241016094911.24818-4-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas --- drivers/pci/devres.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index fc50d2456aed..3b59a86a764b 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -961,11 +961,14 @@ err: EXPORT_SYMBOL(pcim_request_all_regions); /** - * pcim_iounmap_regions - Unmap and release PCI BARs + * pcim_iounmap_regions - Unmap and release PCI BARs (DEPRECATED) * @pdev: PCI device to map IO resources for * @mask: Mask of BARs to unmap and release * * Unmap and release regions specified by @mask. + * + * This function is DEPRECATED. Do not use it in new code. + * Use pcim_iounmap_region() instead. */ void pcim_iounmap_regions(struct pci_dev *pdev, int mask) { From 4365792438902be22cc26ef0624b024c3fb2defa Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 16 Oct 2024 11:49:06 +0200 Subject: [PATCH 070/127] fpga/dfl-pci.c: Replace deprecated PCI functions pcim_iomap_regions() and pcim_iomap_table() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Port dfl-pci.c to the successor, pcim_iomap_region(). Consistently, replace pcim_iounmap_regions() with pcim_iounmap_region(). Link: https://lore.kernel.org/r/20241016094911.24818-5-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Acked-by: Xu Yilun --- drivers/fpga/dfl-pci.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/fpga/dfl-pci.c b/drivers/fpga/dfl-pci.c index 80cac3a5f976..602807d6afcc 100644 --- a/drivers/fpga/dfl-pci.c +++ b/drivers/fpga/dfl-pci.c @@ -39,14 +39,6 @@ struct cci_drvdata { struct dfl_fpga_cdev *cdev; /* container device */ }; -static void __iomem *cci_pci_ioremap_bar0(struct pci_dev *pcidev) -{ - if (pcim_iomap_regions(pcidev, BIT(0), DRV_NAME)) - return NULL; - - return pcim_iomap_table(pcidev)[0]; -} - static int cci_pci_alloc_irq(struct pci_dev *pcidev) { int ret, nvec = pci_msix_vec_count(pcidev); @@ -235,9 +227,9 @@ static int find_dfls_by_default(struct pci_dev *pcidev, u64 v; /* start to find Device Feature List from Bar 0 */ - base = cci_pci_ioremap_bar0(pcidev); - if (!base) - return -ENOMEM; + base = pcim_iomap_region(pcidev, 0, DRV_NAME); + if (IS_ERR(base)) + return PTR_ERR(base); /* * PF device has FME and Ports/AFUs, and VF device only has one @@ -296,7 +288,7 @@ static int find_dfls_by_default(struct pci_dev *pcidev, } /* release I/O mappings for next step enumeration */ - pcim_iounmap_regions(pcidev, BIT(0)); + pcim_iounmap_region(pcidev, 0); return ret; } From 499665679af4ed1e72202e448815dc9c2c1220a7 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 16 Oct 2024 11:49:08 +0200 Subject: [PATCH 071/127] gpio: Replace deprecated PCI functions pcim_iomap_regions() and pcim_iomap_table() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace those functions with calls to pcim_iomap_region(). Link: https://lore.kernel.org/r/20241016094911.24818-7-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Acked-by: Bartosz Golaszewski --- drivers/gpio/gpio-merrifield.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 421d7e3a6c66..cd20604f26de 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -78,24 +78,25 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id if (retval) return retval; - retval = pcim_iomap_regions(pdev, BIT(1) | BIT(0), pci_name(pdev)); - if (retval) - return dev_err_probe(dev, retval, "I/O memory mapping error\n"); - - base = pcim_iomap_table(pdev)[1]; + base = pcim_iomap_region(pdev, 1, pci_name(pdev)); + if (IS_ERR(base)) + return dev_err_probe(dev, PTR_ERR(base), "I/O memory mapping error\n"); irq_base = readl(base + 0 * sizeof(u32)); gpio_base = readl(base + 1 * sizeof(u32)); /* Release the IO mapping, since we already get the info from BAR1 */ - pcim_iounmap_regions(pdev, BIT(1)); + pcim_iounmap_region(pdev, 1); priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; priv->dev = dev; - priv->reg_base = pcim_iomap_table(pdev)[0]; + priv->reg_base = pcim_iomap_region(pdev, 0, pci_name(pdev)); + if (IS_ERR(priv->reg_base)) + return dev_err_probe(dev, PTR_ERR(priv->reg_base), + "I/O memory mapping error\n"); priv->pin_info.pin_ranges = mrfld_gpio_ranges; priv->pin_info.nranges = ARRAY_SIZE(mrfld_gpio_ranges); From 64fe9bc34f781538682af34e7adb6aee1a52c425 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 16 Oct 2024 11:49:09 +0200 Subject: [PATCH 072/127] ethernet: cavium: Replace deprecated PCI functions pcim_iomap_regions() and pcim_iomap_table() have been deprecated by the PCI subsystem in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). Replace the deprecated PCI functions with their successors. Link: https://lore.kernel.org/r/20241016094911.24818-8-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Acked-by: Paolo Abeni --- drivers/net/ethernet/cavium/common/cavium_ptp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c index 9fd717b9cf69..984f0dd7b62e 100644 --- a/drivers/net/ethernet/cavium/common/cavium_ptp.c +++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c @@ -239,12 +239,11 @@ static int cavium_ptp_probe(struct pci_dev *pdev, if (err) goto error_free; - err = pcim_iomap_regions(pdev, 1 << PCI_PTP_BAR_NO, pci_name(pdev)); + clock->reg_base = pcim_iomap_region(pdev, PCI_PTP_BAR_NO, pci_name(pdev)); + err = PTR_ERR_OR_ZERO(clock->reg_base); if (err) goto error_free; - clock->reg_base = pcim_iomap_table(pdev)[PCI_PTP_BAR_NO]; - spin_lock_init(&clock->spin_lock); cc = &clock->cycle_counter; @@ -292,7 +291,7 @@ error_stop: clock_cfg = readq(clock->reg_base + PTP_CLOCK_CFG); clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN; writeq(clock_cfg, clock->reg_base + PTP_CLOCK_CFG); - pcim_iounmap_regions(pdev, 1 << PCI_PTP_BAR_NO); + pcim_iounmap_region(pdev, PCI_PTP_BAR_NO); error_free: devm_kfree(dev, clock); From e329b762a31eb59da1b78cb69cbe1b9ff843e081 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 7 Nov 2024 10:59:34 +0000 Subject: [PATCH 073/127] dt-bindings: PCI: microchip,pcie-host: Add reg for Root Port 2 The PCI host controller on PolarFire SoC has multiple Root Port instances, each with their own bridge and ctrl address spaces. The original binding has an "apb" register region, and it is expected to be set to the base address of the Root Complex register space. Some defines in the Linux driver were used to compute the addresses of the bridge and ctrl address ranges corresponding to Root Port instance 1. Some customers want to use Root Port instance 2 however, which requires changing the defines in the driver, which is clearly not a portable solution. Remove this "apb" register region from the binding and add "bridge" & "ctrl" regions instead, that will directly communicate the address of these regions for a specific Root Port. Fixes: 6ee6c89aac35 ("dt-bindings: PCI: microchip: Add Microchip PolarFire host binding") Link: https://lore.kernel.org/r/20241107-barcode-whinny-b1a4e8834b4f@spud Signed-off-by: Conor Dooley [bhelgaas: Capitalize PCIe spec terms] Signed-off-by: Bjorn Helgaas Acked-by: Krzysztof Kozlowski Acked-by: Daire McNamara --- .../bindings/pci/microchip,pcie-host.yaml | 11 +++++++++-- .../bindings/pci/plda,xpressrich3-axi-common.yaml | 14 ++++++++++---- .../bindings/pci/starfive,jh7110-pcie.yaml | 7 +++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml b/Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml index 612633ba59e2..2e1547569702 100644 --- a/Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml +++ b/Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml @@ -17,6 +17,12 @@ properties: compatible: const: microchip,pcie-host-1.0 # PolarFire + reg: + minItems: 3 + + reg-names: + minItems: 3 + clocks: description: Fabric Interface Controllers, FICs, are the interface between the FPGA @@ -62,8 +68,9 @@ examples: pcie0: pcie@2030000000 { compatible = "microchip,pcie-host-1.0"; reg = <0x0 0x70000000 0x0 0x08000000>, - <0x0 0x43000000 0x0 0x00010000>; - reg-names = "cfg", "apb"; + <0x0 0x43008000 0x0 0x00002000>, + <0x0 0x4300a000 0x0 0x00002000>; + reg-names = "cfg", "bridge", "ctrl"; device_type = "pci"; #address-cells = <3>; #size-cells = <2>; diff --git a/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi-common.yaml b/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi-common.yaml index 7a57a80052a0..039eecdbd6aa 100644 --- a/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi-common.yaml +++ b/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi-common.yaml @@ -18,12 +18,18 @@ allOf: properties: reg: - maxItems: 2 + maxItems: 3 + minItems: 2 reg-names: - items: - - const: cfg - - const: apb + oneOf: + - items: + - const: cfg + - const: apb + - items: + - const: cfg + - const: bridge + - const: ctrl interrupts: minItems: 1 diff --git a/Documentation/devicetree/bindings/pci/starfive,jh7110-pcie.yaml b/Documentation/devicetree/bindings/pci/starfive,jh7110-pcie.yaml index 67151aaa3948..5f432452c815 100644 --- a/Documentation/devicetree/bindings/pci/starfive,jh7110-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/starfive,jh7110-pcie.yaml @@ -16,6 +16,13 @@ properties: compatible: const: starfive,jh7110-pcie + + reg: + maxItems: 2 + + reg-names: + maxItems: 2 + clocks: items: - description: NOC bus clock From ac7f53b7e7283fee35ad12de8359f20989a47eb5 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 7 Nov 2024 10:59:35 +0000 Subject: [PATCH 074/127] PCI: microchip: Add support for using either Root Port 1 or 2 The PCI host controller on PolarFire SoC has multiple Root Port instances, each with their own bridge and ctrl address spaces. The original binding has an "apb" register region, and it is expected to be set to the base address of the Root Complex register space. Some defines in the Linux driver were used to compute the addresses of the bridge and ctrl address ranges corresponding to Root Port instance 1. Some customers want to use Root Port instance 2 however, which requires changing the defines in the driver, which is clearly not a portable solution. The binding has been changed from a single register region to a pair, corresponding to the bridge and ctrl regions respectively, so modify the driver to read these regions directly from the devicetree rather than compute them from the base address of the abp region. To maintain backwards compatibility with the existing binding, the driver retains code to handle the "abp" reg and computes the base address of the bridge and ctrl regions using the defines if it is present. reg-names has always been a required property, so this is safe to do. Link: https://lore.kernel.org/r/20241107-surrender-brisket-287d563a5de1@spud Signed-off-by: Conor Dooley [bhelgaas: Capitalize PCIe spec terms] Signed-off-by: Bjorn Helgaas --- .../pci/controller/plda/pcie-microchip-host.c | 126 ++++++++++-------- 1 file changed, 67 insertions(+), 59 deletions(-) diff --git a/drivers/pci/controller/plda/pcie-microchip-host.c b/drivers/pci/controller/plda/pcie-microchip-host.c index 48f60a04b740..6630cacef301 100644 --- a/drivers/pci/controller/plda/pcie-microchip-host.c +++ b/drivers/pci/controller/plda/pcie-microchip-host.c @@ -25,9 +25,6 @@ #define MC_PCIE1_BRIDGE_ADDR 0x00008000u #define MC_PCIE1_CTRL_ADDR 0x0000a000u -#define MC_PCIE_BRIDGE_ADDR (MC_PCIE1_BRIDGE_ADDR) -#define MC_PCIE_CTRL_ADDR (MC_PCIE1_CTRL_ADDR) - /* PCIe Controller Phy Regs */ #define SEC_ERROR_EVENT_CNT 0x20 #define DED_ERROR_EVENT_CNT 0x24 @@ -128,7 +125,6 @@ [EVENT_LOCAL_ ## x] = { __stringify(x), s } #define PCIE_EVENT(x) \ - .base = MC_PCIE_CTRL_ADDR, \ .offset = PCIE_EVENT_INT, \ .mask_offset = PCIE_EVENT_INT, \ .mask_high = 1, \ @@ -136,7 +132,6 @@ .enb_mask = PCIE_EVENT_INT_ENB_MASK #define SEC_EVENT(x) \ - .base = MC_PCIE_CTRL_ADDR, \ .offset = SEC_ERROR_INT, \ .mask_offset = SEC_ERROR_INT_MASK, \ .mask = SEC_ERROR_INT_ ## x ## _INT, \ @@ -144,7 +139,6 @@ .enb_mask = 0 #define DED_EVENT(x) \ - .base = MC_PCIE_CTRL_ADDR, \ .offset = DED_ERROR_INT, \ .mask_offset = DED_ERROR_INT_MASK, \ .mask_high = 1, \ @@ -152,7 +146,6 @@ .enb_mask = 0 #define LOCAL_EVENT(x) \ - .base = MC_PCIE_BRIDGE_ADDR, \ .offset = ISTATUS_LOCAL, \ .mask_offset = IMASK_LOCAL, \ .mask_high = 0, \ @@ -179,7 +172,8 @@ struct event_map { struct mc_pcie { struct plda_pcie_rp plda; - void __iomem *axi_base_addr; + void __iomem *bridge_base_addr; + void __iomem *ctrl_base_addr; }; struct cause { @@ -253,7 +247,6 @@ static struct event_map local_status_to_event[] = { }; static struct { - u32 base; u32 offset; u32 mask; u32 shift; @@ -325,8 +318,7 @@ static inline u32 reg_to_event(u32 reg, struct event_map field) static u32 pcie_events(struct mc_pcie *port) { - void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR; - u32 reg = readl_relaxed(ctrl_base_addr + PCIE_EVENT_INT); + u32 reg = readl_relaxed(port->ctrl_base_addr + PCIE_EVENT_INT); u32 val = 0; int i; @@ -338,8 +330,7 @@ static u32 pcie_events(struct mc_pcie *port) static u32 sec_errors(struct mc_pcie *port) { - void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR; - u32 reg = readl_relaxed(ctrl_base_addr + SEC_ERROR_INT); + u32 reg = readl_relaxed(port->ctrl_base_addr + SEC_ERROR_INT); u32 val = 0; int i; @@ -351,8 +342,7 @@ static u32 sec_errors(struct mc_pcie *port) static u32 ded_errors(struct mc_pcie *port) { - void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR; - u32 reg = readl_relaxed(ctrl_base_addr + DED_ERROR_INT); + u32 reg = readl_relaxed(port->ctrl_base_addr + DED_ERROR_INT); u32 val = 0; int i; @@ -364,8 +354,7 @@ static u32 ded_errors(struct mc_pcie *port) static u32 local_events(struct mc_pcie *port) { - void __iomem *bridge_base_addr = port->axi_base_addr + MC_PCIE_BRIDGE_ADDR; - u32 reg = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL); + u32 reg = readl_relaxed(port->bridge_base_addr + ISTATUS_LOCAL); u32 val = 0; int i; @@ -412,8 +401,12 @@ static void mc_ack_event_irq(struct irq_data *data) void __iomem *addr; u32 mask; - addr = mc_port->axi_base_addr + event_descs[event].base + - event_descs[event].offset; + if (event_descs[event].offset == ISTATUS_LOCAL) + addr = mc_port->bridge_base_addr; + else + addr = mc_port->ctrl_base_addr; + + addr += event_descs[event].offset; mask = event_descs[event].mask; mask |= event_descs[event].enb_mask; @@ -429,8 +422,12 @@ static void mc_mask_event_irq(struct irq_data *data) u32 mask; u32 val; - addr = mc_port->axi_base_addr + event_descs[event].base + - event_descs[event].mask_offset; + if (event_descs[event].offset == ISTATUS_LOCAL) + addr = mc_port->bridge_base_addr; + else + addr = mc_port->ctrl_base_addr; + + addr += event_descs[event].mask_offset; mask = event_descs[event].mask; if (event_descs[event].enb_mask) { mask <<= PCIE_EVENT_INT_ENB_SHIFT; @@ -460,8 +457,12 @@ static void mc_unmask_event_irq(struct irq_data *data) u32 mask; u32 val; - addr = mc_port->axi_base_addr + event_descs[event].base + - event_descs[event].mask_offset; + if (event_descs[event].offset == ISTATUS_LOCAL) + addr = mc_port->bridge_base_addr; + else + addr = mc_port->ctrl_base_addr; + + addr += event_descs[event].mask_offset; mask = event_descs[event].mask; if (event_descs[event].enb_mask) @@ -554,26 +555,20 @@ static const struct plda_event mc_event = { static inline void mc_clear_secs(struct mc_pcie *port) { - void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR; - - writel_relaxed(SEC_ERROR_INT_ALL_RAM_SEC_ERR_INT, ctrl_base_addr + - SEC_ERROR_INT); - writel_relaxed(0, ctrl_base_addr + SEC_ERROR_EVENT_CNT); + writel_relaxed(SEC_ERROR_INT_ALL_RAM_SEC_ERR_INT, + port->ctrl_base_addr + SEC_ERROR_INT); + writel_relaxed(0, port->ctrl_base_addr + SEC_ERROR_EVENT_CNT); } static inline void mc_clear_deds(struct mc_pcie *port) { - void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR; - - writel_relaxed(DED_ERROR_INT_ALL_RAM_DED_ERR_INT, ctrl_base_addr + - DED_ERROR_INT); - writel_relaxed(0, ctrl_base_addr + DED_ERROR_EVENT_CNT); + writel_relaxed(DED_ERROR_INT_ALL_RAM_DED_ERR_INT, + port->ctrl_base_addr + DED_ERROR_INT); + writel_relaxed(0, port->ctrl_base_addr + DED_ERROR_EVENT_CNT); } static void mc_disable_interrupts(struct mc_pcie *port) { - void __iomem *bridge_base_addr = port->axi_base_addr + MC_PCIE_BRIDGE_ADDR; - void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR; u32 val; /* Ensure ECC bypass is enabled */ @@ -581,22 +576,22 @@ static void mc_disable_interrupts(struct mc_pcie *port) ECC_CONTROL_RX_RAM_ECC_BYPASS | ECC_CONTROL_PCIE2AXI_RAM_ECC_BYPASS | ECC_CONTROL_AXI2PCIE_RAM_ECC_BYPASS; - writel_relaxed(val, ctrl_base_addr + ECC_CONTROL); + writel_relaxed(val, port->ctrl_base_addr + ECC_CONTROL); /* Disable SEC errors and clear any outstanding */ - writel_relaxed(SEC_ERROR_INT_ALL_RAM_SEC_ERR_INT, ctrl_base_addr + - SEC_ERROR_INT_MASK); + writel_relaxed(SEC_ERROR_INT_ALL_RAM_SEC_ERR_INT, + port->ctrl_base_addr + SEC_ERROR_INT_MASK); mc_clear_secs(port); /* Disable DED errors and clear any outstanding */ - writel_relaxed(DED_ERROR_INT_ALL_RAM_DED_ERR_INT, ctrl_base_addr + - DED_ERROR_INT_MASK); + writel_relaxed(DED_ERROR_INT_ALL_RAM_DED_ERR_INT, + port->ctrl_base_addr + DED_ERROR_INT_MASK); mc_clear_deds(port); /* Disable local interrupts and clear any outstanding */ - writel_relaxed(0, bridge_base_addr + IMASK_LOCAL); - writel_relaxed(GENMASK(31, 0), bridge_base_addr + ISTATUS_LOCAL); - writel_relaxed(GENMASK(31, 0), bridge_base_addr + ISTATUS_MSI); + writel_relaxed(0, port->bridge_base_addr + IMASK_LOCAL); + writel_relaxed(GENMASK(31, 0), port->bridge_base_addr + ISTATUS_LOCAL); + writel_relaxed(GENMASK(31, 0), port->bridge_base_addr + ISTATUS_MSI); /* Disable PCIe events and clear any outstanding */ val = PCIE_EVENT_INT_L2_EXIT_INT | @@ -605,11 +600,11 @@ static void mc_disable_interrupts(struct mc_pcie *port) PCIE_EVENT_INT_L2_EXIT_INT_MASK | PCIE_EVENT_INT_HOTRST_EXIT_INT_MASK | PCIE_EVENT_INT_DLUP_EXIT_INT_MASK; - writel_relaxed(val, ctrl_base_addr + PCIE_EVENT_INT); + writel_relaxed(val, port->ctrl_base_addr + PCIE_EVENT_INT); /* Disable host interrupts and clear any outstanding */ - writel_relaxed(0, bridge_base_addr + IMASK_HOST); - writel_relaxed(GENMASK(31, 0), bridge_base_addr + ISTATUS_HOST); + writel_relaxed(0, port->bridge_base_addr + IMASK_HOST); + writel_relaxed(GENMASK(31, 0), port->bridge_base_addr + ISTATUS_HOST); } static int mc_platform_init(struct pci_config_window *cfg) @@ -617,12 +612,10 @@ static int mc_platform_init(struct pci_config_window *cfg) struct device *dev = cfg->parent; struct platform_device *pdev = to_platform_device(dev); struct pci_host_bridge *bridge = platform_get_drvdata(pdev); - void __iomem *bridge_base_addr = - port->axi_base_addr + MC_PCIE_BRIDGE_ADDR; int ret; /* Configure address translation table 0 for PCIe config space */ - plda_pcie_setup_window(bridge_base_addr, 0, cfg->res.start, + plda_pcie_setup_window(port->bridge_base_addr, 0, cfg->res.start, cfg->res.start, resource_size(&cfg->res)); @@ -649,7 +642,7 @@ static int mc_platform_init(struct pci_config_window *cfg) static int mc_host_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - void __iomem *bridge_base_addr; + void __iomem *apb_base_addr; struct plda_pcie_rp *plda; int ret; u32 val; @@ -661,30 +654,45 @@ static int mc_host_probe(struct platform_device *pdev) plda = &port->plda; plda->dev = dev; - port->axi_base_addr = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(port->axi_base_addr)) - return PTR_ERR(port->axi_base_addr); + port->bridge_base_addr = devm_platform_ioremap_resource_byname(pdev, + "bridge"); + port->ctrl_base_addr = devm_platform_ioremap_resource_byname(pdev, + "ctrl"); + if (!IS_ERR(port->bridge_base_addr) && !IS_ERR(port->ctrl_base_addr)) + goto addrs_set; + /* + * The original, incorrect, binding that lumped the control and + * bridge addresses together still needs to be handled by the driver. + */ + apb_base_addr = devm_platform_ioremap_resource_byname(pdev, "apb"); + if (IS_ERR(apb_base_addr)) + return dev_err_probe(dev, PTR_ERR(apb_base_addr), + "both legacy apb register and ctrl/bridge regions missing"); + + port->bridge_base_addr = apb_base_addr + MC_PCIE1_BRIDGE_ADDR; + port->ctrl_base_addr = apb_base_addr + MC_PCIE1_CTRL_ADDR; + +addrs_set: mc_disable_interrupts(port); - bridge_base_addr = port->axi_base_addr + MC_PCIE_BRIDGE_ADDR; - plda->bridge_addr = bridge_base_addr; + plda->bridge_addr = port->bridge_base_addr; plda->num_events = NUM_EVENTS; /* Allow enabling MSI by disabling MSI-X */ - val = readl(bridge_base_addr + PCIE_PCI_IRQ_DW0); + val = readl(port->bridge_base_addr + PCIE_PCI_IRQ_DW0); val &= ~MSIX_CAP_MASK; - writel(val, bridge_base_addr + PCIE_PCI_IRQ_DW0); + writel(val, port->bridge_base_addr + PCIE_PCI_IRQ_DW0); /* Pick num vectors from bitfile programmed onto FPGA fabric */ - val = readl(bridge_base_addr + PCIE_PCI_IRQ_DW0); + val = readl(port->bridge_base_addr + PCIE_PCI_IRQ_DW0); val &= NUM_MSI_MSGS_MASK; val >>= NUM_MSI_MSGS_SHIFT; plda->msi.num_vectors = 1 << val; /* Pick vector address from design */ - plda->msi.vector_phy = readl_relaxed(bridge_base_addr + IMSI_ADDR); + plda->msi.vector_phy = readl_relaxed(port->bridge_base_addr + IMSI_ADDR); ret = mc_pcie_init_clks(dev); if (ret) { From 5a938ed9481b0c06cb97aec45e722a80568256fd Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 24 May 2024 16:27:13 +0530 Subject: [PATCH 075/127] PCI: keystone: Set mode as Root Complex for "ti,keystone-pcie" compatible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 23284ad677a9 ("PCI: keystone: Add support for PCIe EP in AM654x Platforms") introduced configuring "enum dw_pcie_device_mode" as part of device data ("struct ks_pcie_of_data"). However it failed to set the mode for "ti,keystone-pcie" compatible. Since the mode defaults to "DW_PCIE_UNKNOWN_TYPE", the following error message is displayed for the v3.65a controller: "INVALID device type 0" Despite the driver probing successfully, the controller may not be functional in the Root Complex mode of operation. So, set the mode as Root Complex for "ti,keystone-pcie" compatible to fix this. Fixes: 23284ad677a9 ("PCI: keystone: Add support for PCIe EP in AM654x Platforms") Link: https://lore.kernel.org/r/20240524105714.191642-2-s-vadapalli@ti.com Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Siddharth Vadapalli [kwilczynski: commit log, added tag for stable releases] Signed-off-by: Krzysztof Wilczyński Cc: stable@vger.kernel.org --- drivers/pci/controller/dwc/pci-keystone.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index 2219b1a866fa..b99bc4071fe9 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -1093,6 +1093,7 @@ static int ks_pcie_am654_set_mode(struct device *dev, static const struct ks_pcie_of_data ks_pcie_rc_of_data = { .host_ops = &ks_pcie_host_ops, + .mode = DW_PCIE_RC_TYPE, .version = DW_PCIE_VER_365A, }; From 9e9ec8d8692a6f64d81ef67d4fb6255af6be684b Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 24 May 2024 16:27:14 +0530 Subject: [PATCH 076/127] PCI: keystone: Add link up check to ks_pcie_other_map_bus() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit K2G forwards the error triggered by a link-down state (e.g., no connected endpoint device) on the system bus for PCI configuration transactions; these errors are reported as an SError at system level, which is fatal and hangs the system. So, apply fix similar to how it was done in the DesignWare Core driver commit 15b23906347c ("PCI: dwc: Add link up check in dw_child_pcie_ops.map_bus()"). Fixes: 10a797c6e54a ("PCI: dwc: keystone: Use pci_ops for config space accessors") Link: https://lore.kernel.org/r/20240524105714.191642-3-s-vadapalli@ti.com Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Siddharth Vadapalli [kwilczynski: commit log, added tag for stable releases] Signed-off-by: Krzysztof Wilczyński Cc: stable@vger.kernel.org --- drivers/pci/controller/dwc/pci-keystone.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index b99bc4071fe9..44b34559de1a 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -455,6 +455,17 @@ static void __iomem *ks_pcie_other_map_bus(struct pci_bus *bus, struct keystone_pcie *ks_pcie = to_keystone_pcie(pci); u32 reg; + /* + * Checking whether the link is up here is a last line of defense + * against platforms that forward errors on the system bus as + * SError upon PCI configuration transactions issued when the link + * is down. This check is racy by definition and does not stop + * the system from triggering an SError if the link goes down + * after this check is performed. + */ + if (!dw_pcie_link_up(pci)) + return NULL; + reg = CFG_BUS(bus->number) | CFG_DEVICE(PCI_SLOT(devfn)) | CFG_FUNC(PCI_FUNC(devfn)); if (!pci_is_root_bus(bus->parent)) From 93093ea1f05928b123dae38b710631362bef1601 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 22 Oct 2024 15:48:47 -0700 Subject: [PATCH 077/127] PCI: Make pci_stop_dev() concurrent safe Use the atomic ADDED flag to ensure concurrent callers can't attempt to stop the device multiple times. Callers should currently all be holding the pci_rescan_remove_lock, so there shouldn't be an existing race. But that global lock can cause lock dependency issues, so this is preparing to reduce reliance on that lock by using the existing existing atomic bit ops. Link: https://lore.kernel.org/r/20241022224851.340648-2-kbusch@meta.com Signed-off-by: Keith Busch [bhelgaas: squash https://lore.kernel.org/r/20241111180659.3321671-1-kbusch@meta.com] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/bus.c | 2 +- drivers/pci/pci.h | 11 +++++++++-- drivers/pci/remove.c | 17 ++++++++--------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 55c853686051..e41dfece0d96 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -348,7 +348,7 @@ void pci_bus_add_device(struct pci_dev *dev) if (retval < 0 && retval != -EPROBE_DEFER) pci_warn(dev, "device attach failed (%d)\n", retval); - pci_dev_assign_added(dev, true); + pci_dev_assign_added(dev); if (dev_of_node(&dev->dev) && pci_is_bridge(dev)) { retval = of_platform_populate(dev_of_node(&dev->dev), NULL, NULL, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 14d00ce45bfa..0d9169e0e7d2 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -470,9 +470,16 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 -static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) +static inline void pci_dev_assign_added(struct pci_dev *dev) { - assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added); + smp_mb__before_atomic(); + set_bit(PCI_DEV_ADDED, &dev->priv_flags); + smp_mb__after_atomic(); +} + +static inline bool pci_dev_test_and_clear_added(struct pci_dev *dev) +{ + return test_and_clear_bit(PCI_DEV_ADDED, &dev->priv_flags); } static inline bool pci_dev_is_added(const struct pci_dev *dev) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index e4ce1145aa3e..78863f241a88 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -33,16 +33,15 @@ static void pci_stop_dev(struct pci_dev *dev) { pci_pme_active(dev, false); - if (pci_dev_is_added(dev)) { - device_for_each_child(dev->dev.parent, dev_of_node(&dev->dev), - pci_pwrctl_unregister); - device_release_driver(&dev->dev); - pci_proc_detach_device(dev); - pci_remove_sysfs_dev_files(dev); - of_pci_remove_node(dev); + if (!pci_dev_test_and_clear_added(dev)) + return; - pci_dev_assign_added(dev, false); - } + device_for_each_child(dev->dev.parent, dev_of_node(&dev->dev), + pci_pwrctl_unregister); + device_release_driver(&dev->dev); + pci_proc_detach_device(dev); + pci_remove_sysfs_dev_files(dev); + of_pci_remove_node(dev); } static void pci_destroy_dev(struct pci_dev *dev) From e3f30d563a388220a7c4e3b9a7b52ac0b0324b26 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 22 Oct 2024 15:48:48 -0700 Subject: [PATCH 078/127] PCI: Make pci_destroy_dev() concurrent safe Use an atomic flag instead of the racy check against the device's kobj parent. We shouldn't be poking into device implementation details at this level anyway. Link: https://lore.kernel.org/r/20241022224851.340648-3-kbusch@meta.com Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pci.h | 6 ++++++ drivers/pci/remove.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0d9169e0e7d2..187c5c83412d 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -469,6 +469,7 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 +#define PCI_DEV_REMOVED 3 static inline void pci_dev_assign_added(struct pci_dev *dev) { @@ -487,6 +488,11 @@ static inline bool pci_dev_is_added(const struct pci_dev *dev) return test_bit(PCI_DEV_ADDED, &dev->priv_flags); } +static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) +{ + return test_and_set_bit(PCI_DEV_REMOVED, &dev->priv_flags); +} + #ifdef CONFIG_PCIEAER #include diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 78863f241a88..1f35945459fd 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -46,7 +46,7 @@ static void pci_stop_dev(struct pci_dev *dev) static void pci_destroy_dev(struct pci_dev *dev) { - if (!dev->dev.kobj.parent) + if (pci_dev_test_and_set_removed(dev)) return; pci_npem_remove(dev); From 4d6dcd6c2fa3a80898651d323c150e5ebc03881d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 22 Oct 2024 15:48:49 -0700 Subject: [PATCH 079/127] PCI: Move __pci_walk_bus() mutex to where we need it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify __pci_walk_bus() by moving the pci_bus_sem mutex into pci_walk_bus(), the only place it is needed, and removing the parameter that told __pci_walk_bus() whether to acquire the mutex. Link: https://lore.kernel.org/r/20241022224851.340648-4-kbusch@meta.com Signed-off-by: Keith Busch [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Reviewed-by: Ilpo Järvinen --- drivers/pci/bus.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index e41dfece0d96..7c07a141e877 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -390,7 +390,7 @@ void pci_bus_add_devices(const struct pci_bus *bus) EXPORT_SYMBOL(pci_bus_add_devices); static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata, bool locked) + void *userdata) { struct pci_dev *dev; struct pci_bus *bus; @@ -398,8 +398,6 @@ static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void int retval; bus = top; - if (!locked) - down_read(&pci_bus_sem); next = top->devices.next; for (;;) { if (next == &bus->devices) { @@ -422,8 +420,6 @@ static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void if (retval) break; } - if (!locked) - up_read(&pci_bus_sem); } /** @@ -441,7 +437,9 @@ static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void */ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) { - __pci_walk_bus(top, cb, userdata, false); + down_read(&pci_bus_sem); + __pci_walk_bus(top, cb, userdata); + up_read(&pci_bus_sem); } EXPORT_SYMBOL_GPL(pci_walk_bus); @@ -449,7 +447,7 @@ void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void * { lockdep_assert_held(&pci_bus_sem); - __pci_walk_bus(top, cb, userdata, true); + __pci_walk_bus(top, cb, userdata); } EXPORT_SYMBOL_GPL(pci_walk_bus_locked); From ee061da777f704976c6d3fdc1707788d11a052c5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 22 Oct 2024 15:48:50 -0700 Subject: [PATCH 080/127] PCI: Convert __pci_walk_bus() to be recursive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original implementation of __pci_walk_bus() chose a non-recursive walk, presumably as a precaution on stack use. We do recursive bus walking in other places though. For example: pci_bus_resettable() pci_stop_bus_device() pci_remove_bus_device() pci_bus_allocate_dev_resources() So recursive pci bus walking is well tested and safe, and is easier to follow. Convert __pci_walk_bus() to be recursive to make it easier to introduce finer grain locking in the future. Link: https://lore.kernel.org/r/20241022224851.340648-5-kbusch@meta.com Signed-off-by: Keith Busch [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Ilpo Järvinen --- drivers/pci/bus.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 7c07a141e877..8491e9c7f058 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -389,37 +389,23 @@ void pci_bus_add_devices(const struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_add_devices); -static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata) +static int __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata) { struct pci_dev *dev; - struct pci_bus *bus; - struct list_head *next; - int retval; + int ret = 0; - bus = top; - next = top->devices.next; - for (;;) { - if (next == &bus->devices) { - /* end of this bus, go up or finish */ - if (bus == top) - break; - next = bus->self->bus_list.next; - bus = bus->self->bus; - continue; - } - dev = list_entry(next, struct pci_dev, bus_list); - if (dev->subordinate) { - /* this is a pci-pci bridge, do its devices next */ - next = dev->subordinate->devices.next; - bus = dev->subordinate; - } else - next = dev->bus_list.next; - - retval = cb(dev, userdata); - if (retval) + list_for_each_entry(dev, &top->devices, bus_list) { + ret = cb(dev, userdata); + if (ret) break; + if (dev->subordinate) { + ret = __pci_walk_bus(dev->subordinate, cb, userdata); + if (ret) + break; + } } + return ret; } /** From d2bd39c0456b75be9dfc7d774b8d021355c26ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:49 +0300 Subject: [PATCH 081/127] PCI: Store all PCIe Supported Link Speeds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe bandwidth controller added by a subsequent commit will require selecting PCIe Link Speeds that are lower than the Maximum Link Speed. The struct pci_bus only stores max_bus_speed. Even if PCIe r6.1 sec 8.2.1 currently disallows gaps in supported Link Speeds, the Implementation Note in PCIe r6.1 sec 7.5.3.18, recommends determining supported Link Speeds using the Supported Link Speeds Vector in the Link Capabilities 2 Register (when available) to "avoid software being confused if a future specification defines Links that do not require support for all slower speeds." Reuse code in pcie_get_speed_cap() to add pcie_get_supported_speeds() to query the Supported Link Speeds Vector of a PCIe device. The value is taken directly from the Supported Link Speeds Vector or synthesized from the Max Link Speed in the Link Capabilities Register when the Link Capabilities 2 Register is not available. The Supported Link Speeds Vector in the Link Capabilities Register 2 corresponds to the bus below on Root Ports and Downstream Ports, whereas it corresponds to the bus above on Upstream Ports and Endpoints (PCIe r6.1 sec 7.5.3.18): Supported Link Speeds Vector - This field indicates the supported Link speed(s) of the associated Port. Add supported_speeds into the struct pci_dev that caches the Supported Link Speeds Vector. supported_speeds contains a set of Link Speeds only in the case where PCIe Link Speed can be determined. Root Complex Integrated Endpoints do not have a well-defined Link Speed because they do not implement either of the Link Capabilities Registers, which is allowed by PCIe r6.1 sec 7.5.3 (the same limitation applies to determining cur_bus_speed and max_bus_speed that are PCI_SPEED_UNKNOWN in such case). This is of no concern from PCIe bandwidth controller point of view because such devices are not attached into a PCIe Root Port that could be controlled. The supported_speeds field keeps the extra reserved zero at the least significant bit to match the Link Capabilities 2 Register layout. An attempt was made to store supported_speeds field into the struct pci_bus as an intersection of both ends of the Link, however, the subordinate struct pci_bus is not available early enough. The Target Speed quirk (in pcie_failed_link_retrain()) can run either during initial scan or later, requiring it to use the API provided by the PCIe bandwidth controller to set the Target Link Speed in order to co-exist with the bandwidth controller. When the Target Speed quirk is calling the bandwidth controller during initial scan, the struct pci_bus is not yet initialized. As such, storing supported_speeds into the struct pci_bus is not viable. Suggested-by: Lukas Wunner Link: https://lore.kernel.org/r/20241018144755.7875-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: move pcie_get_supported_speeds() decl to drivers/pci/pci.h] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pci.c | 78 +++++++++++++++++++++++------------ drivers/pci/pci.h | 1 + drivers/pci/probe.c | 3 ++ include/linux/pci.h | 10 ++++- include/uapi/linux/pci_regs.h | 1 + 5 files changed, 66 insertions(+), 27 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7d85c04fbba2..3d67e8b50ba2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6188,39 +6188,65 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, } EXPORT_SYMBOL(pcie_bandwidth_available); +/** + * pcie_get_supported_speeds - query Supported Link Speed Vector + * @dev: PCI device to query + * + * Query @dev supported link speeds. + * + * Implementation Note in PCIe r6.0 sec 7.5.3.18 recommends determining + * supported link speeds using the Supported Link Speeds Vector in the Link + * Capabilities 2 Register (when available). + * + * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18. + * + * Without Link Capabilities 2, i.e., prior to PCIe r3.0, Supported Link + * Speeds field in Link Capabilities is used and only 2.5 GT/s and 5.0 GT/s + * speeds were defined. + * + * For @dev without Supported Link Speed Vector, the field is synthesized + * from the Max Link Speed field in the Link Capabilities Register. + * + * Return: Supported Link Speeds Vector (+ reserved 0 at LSB). + */ +u8 pcie_get_supported_speeds(struct pci_dev *dev) +{ + u32 lnkcap2, lnkcap; + u8 speeds; + + /* + * Speeds retain the reserved 0 at LSB before PCIe Supported Link + * Speeds Vector to allow using SLS Vector bit defines directly. + */ + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2); + speeds = lnkcap2 & PCI_EXP_LNKCAP2_SLS; + + /* PCIe r3.0-compliant */ + if (speeds) + return speeds; + + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); + + /* Synthesize from the Max Link Speed field */ + if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB) + speeds = PCI_EXP_LNKCAP2_SLS_5_0GB | PCI_EXP_LNKCAP2_SLS_2_5GB; + else if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_2_5GB) + speeds = PCI_EXP_LNKCAP2_SLS_2_5GB; + + return speeds; +} + /** * pcie_get_speed_cap - query for the PCI device's link speed capability * @dev: PCI device to query * - * Query the PCI device speed capability. Return the maximum link speed - * supported by the device. + * Query the PCI device speed capability. + * + * Return: the maximum link speed supported by the device. */ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev) { - u32 lnkcap2, lnkcap; - - /* - * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18. The - * implementation note there recommends using the Supported Link - * Speeds Vector in Link Capabilities 2 when supported. - * - * Without Link Capabilities 2, i.e., prior to PCIe r3.0, software - * should use the Supported Link Speeds field in Link Capabilities, - * where only 2.5 GT/s and 5.0 GT/s speeds were defined. - */ - pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2); - - /* PCIe r3.0-compliant */ - if (lnkcap2) - return PCIE_LNKCAP2_SLS2SPEED(lnkcap2); - - pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); - if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB) - return PCIE_SPEED_5_0GT; - else if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_2_5GB) - return PCIE_SPEED_2_5GT; - - return PCI_SPEED_UNKNOWN; + return PCIE_LNKCAP2_SLS2SPEED(dev->supported_speeds); } EXPORT_SYMBOL(pcie_get_speed_cap); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 14d00ce45bfa..d0a46ecf7289 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -373,6 +373,7 @@ static inline int pcie_dev_speed_mbps(enum pci_bus_speed speed) return -EINVAL; } +u8 pcie_get_supported_speeds(struct pci_dev *dev); const char *pci_speed_string(enum pci_bus_speed speed); enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..af153a8e8225 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1947,6 +1947,9 @@ int pci_setup_device(struct pci_dev *dev) set_pcie_untrusted(dev); + if (pci_is_pcie(dev)) + dev->supported_speeds = pcie_get_supported_speeds(dev); + /* "Unknown power state" */ dev->current_state = PCI_UNKNOWN; diff --git a/include/linux/pci.h b/include/linux/pci.h index be5ed534c39c..99c6fa30d25b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -318,7 +318,14 @@ struct pci_sriov; struct pci_p2pdma; struct rcec_ea; -/* The pci_dev structure describes PCI devices */ +/* struct pci_dev - describes a PCI device + * + * @supported_speeds: PCIe Supported Link Speeds Vector (+ reserved 0 at + * LSB). 0 when the supported speeds cannot be + * determined (e.g., for Root Complex Integrated + * Endpoints without the relevant Capability + * Registers). + */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ struct pci_bus *bus; /* Bus this device is on */ @@ -522,6 +529,7 @@ struct pci_dev { struct npem *npem; /* Native PCIe Enclosure Management */ #endif u16 acs_cap; /* ACS Capability offset */ + u8 supported_speeds; /* Supported Link Speeds Vector */ phys_addr_t rom; /* Physical address if not from BAR */ size_t romlen; /* Length if not from BAR */ /* diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 12323b3334a9..f3c9de0a497c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -678,6 +678,7 @@ #define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */ #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */ #define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */ +#define PCI_EXP_LNKCAP2_SLS 0x000000fe /* Supported Link Speeds Vector */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ From e93d9fcfd7dc643eb5fce43053774d27bea2b263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:50 +0300 Subject: [PATCH 082/127] PCI: Refactor pcie_update_link_speed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pcie_update_link_speed() is passed the Link Status register but not all callers have that value at hand nor need the value. Refactor pcie_update_link_speed() to include reading the Link Status register and create __pcie_update_link_speed() which can be used by the hotplug code that has the register value at hand beforehand (and needs the value for other purposes). Link: https://lore.kernel.org/r/20241018144755.7875-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/hotplug/pciehp_hpc.c | 2 +- drivers/pci/pci.h | 7 ++++++- drivers/pci/probe.c | 12 +++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 736ad8baa2a5..bb5a8d9f03ad 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -319,7 +319,7 @@ int pciehp_check_link_status(struct controller *ctrl) return -1; } - pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status); + __pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status); if (!found) { ctrl_info(ctrl, "Slot(%s): No device found\n", diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d0a46ecf7289..8137ad2fead8 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -379,7 +379,12 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); void __pcie_print_link_status(struct pci_dev *dev, bool verbose); void pcie_report_downtraining(struct pci_dev *dev); -void pcie_update_link_speed(struct pci_bus *bus, u16 link_status); + +static inline void __pcie_update_link_speed(struct pci_bus *bus, u16 linksta) +{ + bus->cur_bus_speed = pcie_link_speed[linksta & PCI_EXP_LNKSTA_CLS]; +} +void pcie_update_link_speed(struct pci_bus *bus); /* Single Root I/O Virtualization */ struct pci_sriov { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index af153a8e8225..c138daf78961 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -742,9 +742,13 @@ const char *pci_speed_string(enum pci_bus_speed speed) } EXPORT_SYMBOL_GPL(pci_speed_string); -void pcie_update_link_speed(struct pci_bus *bus, u16 linksta) +void pcie_update_link_speed(struct pci_bus *bus) { - bus->cur_bus_speed = pcie_link_speed[linksta & PCI_EXP_LNKSTA_CLS]; + struct pci_dev *bridge = bus->self; + u16 linksta; + + pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta); + __pcie_update_link_speed(bus, linksta); } EXPORT_SYMBOL_GPL(pcie_update_link_speed); @@ -827,13 +831,11 @@ static void pci_set_bus_speed(struct pci_bus *bus) if (pci_is_pcie(bridge)) { u32 linkcap; - u16 linksta; pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, &linkcap); bus->max_bus_speed = pcie_link_speed[linkcap & PCI_EXP_LNKCAP_SLS]; - pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta); - pcie_update_link_speed(bus, linksta); + pcie_update_link_speed(bus); } } From 3491f509666865412ad344cd4dde9f3c5a52326e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:51 +0300 Subject: [PATCH 083/127] PCI: Abstract LBMS seen check into pcie_lbms_seen() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Target Speed quirk in pcie_failed_link_retrain() uses the presence of LBMS bit as one of the triggering conditions, effectively monopolizing the use of that bit. An upcoming change will introduce a PCIe bandwidth controller which sets up an interrupt to track LBMS. As LBMS will be cleared by the interrupt handler, the Target Speed quirk will no longer be able to observe LBMS directly. As a preparatory step for the change, extract the LBMS seen check from pcie_failed_link_retrain() into a new function pcie_lmbs_seen(). Link: https://lore.kernel.org/r/20241018144755.7875-6-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/quirks.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index dccb60c1d9cc..a560ea403b8e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -33,6 +33,11 @@ #include #include "pci.h" +static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta) +{ + return lnksta & PCI_EXP_LNKSTA_LBMS; +} + /* * Retrain the link of a downstream PCIe port by hand if necessary. * @@ -96,8 +101,7 @@ int pcie_failed_link_retrain(struct pci_dev *dev) pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2); pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); - if ((lnksta & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_DLLLA)) == - PCI_EXP_LNKSTA_LBMS) { + if (!(lnksta & PCI_EXP_LNKSTA_DLLLA) && pcie_lbms_seen(dev, lnksta)) { u16 oldlnkctl2 = lnkctl2; pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n"); From 38a18dfe9035d5a02a53271824de1854129c61dc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 22 Oct 2024 15:48:51 -0700 Subject: [PATCH 084/127] PCI: Unexport pci_walk_bus_locked() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's only one user of pci_walk_bus_locked(), and it's internal to the PCI core. Unexport it and make it private to drivers/pci/. Link: https://lore.kernel.org/r/20241022224851.340648-6-kbusch@meta.com Signed-off-by: Keith Busch [bhelgaas: move decl to drivers/pci/pci.h] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Reviewed-by: Ilpo Järvinen --- drivers/pci/bus.c | 1 - drivers/pci/pci.h | 3 +++ include/linux/pci.h | 2 -- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 8491e9c7f058..30620f3bb0e2 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -435,7 +435,6 @@ void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void * __pci_walk_bus(top, cb, userdata); } -EXPORT_SYMBOL_GPL(pci_walk_bus_locked); struct pci_bus *pci_bus_get(struct pci_bus *bus) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 187c5c83412d..28cccfeb8076 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -323,6 +323,9 @@ void __pci_bus_assign_resources(const struct pci_bus *bus, struct list_head *realloc_head, struct list_head *fail_head); bool pci_bus_clip_resource(struct pci_dev *dev, int idx); +void pci_walk_bus_locked(struct pci_bus *top, + int (*cb)(struct pci_dev *, void *), + void *userdata); const char *pci_resource_name(struct pci_dev *dev, unsigned int i); diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..056290377273 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1623,8 +1623,6 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); -void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); From e3e309b2bea8d2c37ed6f52c837848b601d3245e Mon Sep 17 00:00:00 2001 From: Bartosz Wawrzyniak Date: Fri, 18 Oct 2024 11:30:43 +0000 Subject: [PATCH 085/127] PCI: cadence: Lower severity of message when phy-names property is absent in DTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "phy-names" property is optional, so the message indicating its absence during the probe should be of "info" severity rather than "error" severity. Link: https://lore.kernel.org/r/20241018113045.2050295-1-bwawrzyn@cisco.com Signed-off-by: Bartosz Wawrzyniak [kwilczynski: update log messages wording, commit log] Signed-off-by: Krzysztof Wilczyński --- drivers/pci/controller/cadence/pcie-cadence.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/cadence/pcie-cadence.c b/drivers/pci/controller/cadence/pcie-cadence.c index 4251fac5e310..204e045aed8c 100644 --- a/drivers/pci/controller/cadence/pcie-cadence.c +++ b/drivers/pci/controller/cadence/pcie-cadence.c @@ -197,7 +197,7 @@ int cdns_pcie_init_phy(struct device *dev, struct cdns_pcie *pcie) phy_count = of_property_count_strings(np, "phy-names"); if (phy_count < 1) { - dev_err(dev, "no phy-names. PHY will not be initialized\n"); + dev_info(dev, "no \"phy-names\" property found; PHY will not be initialized\n"); pcie->phy_count = 0; return 0; } @@ -260,7 +260,7 @@ static int cdns_pcie_resume_noirq(struct device *dev) ret = cdns_pcie_enable_phy(pcie); if (ret) { - dev_err(dev, "failed to enable phy\n"); + dev_err(dev, "failed to enable PHY\n"); return ret; } From d19ea320d3029a12993c671f4815bf6c02ae0727 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 11 Nov 2024 09:09:35 +0800 Subject: [PATCH 086/127] PCI: mediatek-gen3: Remove unneeded semicolon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove a surplus semicolon and fix the following warning: ./drivers/pci/controller/pcie-mediatek-gen3.c:414:2-3: Unneeded semicolon Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=11789 Link: https://lore.kernel.org/r/20241111010935.20208-1-yang.lee@linux.alibaba.com Reported-by: Abaci Robot Signed-off-by: Yang Li [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno --- drivers/pci/controller/pcie-mediatek-gen3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c index bbfbfe5752e6..64ef8ff71b03 100644 --- a/drivers/pci/controller/pcie-mediatek-gen3.c +++ b/drivers/pci/controller/pcie-mediatek-gen3.c @@ -411,7 +411,7 @@ static int mtk_pcie_startup_port(struct mtk_gen3_pcie *pcie) if (pcie->num_lanes > 1) val |= FIELD_PREP(PCIE_SETTING_LINK_WIDTH, GENMASK(fls(pcie->num_lanes >> 2), 0)); - }; + } writel_relaxed(val, pcie->base + PCIE_SETTING_REG); /* Set Link Control 2 (LNKCTL2) speed restriction, if any */ From 6168efbebace0db443185d4c6701ca8170a8788d Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Mon, 11 Nov 2024 14:11:52 +0530 Subject: [PATCH 087/127] PCI: starfive: Enable controller runtime PM before probing host bridge A PCI controller device, e.g., StarFive, is parent to PCI host bridge device. We must enable runtime PM of the controller before enabling runtime PM of the host bridge, which will happen in pci_host_probe(), to avoid this warning: pcie-starfive 940000000.pcie: Enabling runtime PM for inactive device with active children Fix this issue by enabling StarFive controller device's runtime PM before calling pci_host_probe() in plda_pcie_host_init(). Link: https://lore.kernel.org/r/20241111-runtime_pm-v7-1-9c164eefcd87@quicinc.com Tested-by: Marek Szyprowski Signed-off-by: Mayank Rana [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/plda/pcie-starfive.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/plda/pcie-starfive.c b/drivers/pci/controller/plda/pcie-starfive.c index c9933ecf6833..0564fdce47c2 100644 --- a/drivers/pci/controller/plda/pcie-starfive.c +++ b/drivers/pci/controller/plda/pcie-starfive.c @@ -404,6 +404,9 @@ static int starfive_pcie_probe(struct platform_device *pdev) if (ret) return ret; + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + plda->host_ops = &sf_host_ops; plda->num_events = PLDA_MAX_EVENT_NUM; /* mask doorbell event */ @@ -413,11 +416,12 @@ static int starfive_pcie_probe(struct platform_device *pdev) plda->events_bitmap <<= PLDA_NUM_DMA_EVENTS; ret = plda_pcie_host_init(&pcie->plda, &starfive_pcie_ops, &stf_pcie_event); - if (ret) + if (ret) { + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; + } - pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); platform_set_drvdata(pdev, pcie); return 0; From dc421bb3c0db2aac926b548d259d3b550394908e Mon Sep 17 00:00:00 2001 From: Krishna chaitanya chundru Date: Mon, 11 Nov 2024 14:11:53 +0530 Subject: [PATCH 088/127] PCI: Enable runtime PM of the host bridge The controller driver is the parent device of the PCIe host bridge, PCI-PCI bridge and PCIe endpoint as shown below. PCIe controller (Top level parent & parent of host bridge) | v PCIe Host bridge (Parent of PCI-PCI bridge) | v PCI-PCI bridge (Parent of endpoint driver) | v PCIe endpoint driver Now, when the controller device goes to runtime suspend, PM framework will check the runtime PM state of the child device (host bridge) and will find it to be disabled. So it will allow the parent (controller device) to go to runtime suspend. Only if the child device's state was 'active' it will prevent the parent to get suspended. It is a property of the runtime PM framework that it can only follow continuous dependency chains. That is, if there is a device with runtime PM disabled in a dependency chain, runtime PM cannot be enabled for devices below it and above it in that chain both at the same time. Since runtime PM is disabled for host bridge, the state of the child devices under the host bridge is not taken into account by PM framework for the top level parent, PCIe controller. So the PM framework allows the controller driver to enter runtime PM irrespective of the state of the devices under the host bridge. And this causes the topology breakage and also possible PM issues like controller driver going to runtime suspend while the endpoint driver is doing transfers. Because of the above, in order to enable runtime PM for a PCIe controller device, one needs to ensure that runtime PM is enabled for all devices in every dependency chain between it and any PCIe endpoint (as runtime PM is enabled for PCIe endpoints). This means that runtime PM needs to be enabled for the host bridge device, which is present in all of these dependency chains. After this change, the host bridge device will be runtime-suspended by the runtime PM framework automatically after suspending its last child and it will be runtime-resumed automatically before resuming its first child which will allow the runtime PM framework to track dependencies between the host bridge device and all of its descendants. The PM framework expects parent runtime PM to be enabled before enabling runtime PM of the child. Ensure pm_runtime_enable() is called for the controller drivers before calling pci_host_probe(). Link: https://lore.kernel.org/r/20241111-runtime_pm-v7-2-9c164eefcd87@quicinc.com Signed-off-by: Krishna chaitanya chundru Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/probe.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..8b14d3af057a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -3106,6 +3106,17 @@ int pci_host_probe(struct pci_host_bridge *bridge) pcie_bus_configure_settings(child); pci_bus_add_devices(bus); + + /* + * Ensure pm_runtime_enable() is called for the controller drivers + * before calling pci_host_probe(). The PM framework expects that + * if the parent device supports runtime PM, it will be enabled + * before child runtime PM is enabled. + */ + pm_runtime_set_active(&bridge->dev); + pm_runtime_no_callbacks(&bridge->dev); + devm_pm_runtime_enable(&bridge->dev); + return 0; } EXPORT_SYMBOL_GPL(pci_host_probe); From 2fa046449a82a7d0f6d9721dd83e348816038444 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 25 Oct 2024 15:27:54 -0700 Subject: [PATCH 089/127] PCI: Add 'reset_subordinate' to reset hierarchy below bridge The "bus" and "cxl_bus" reset methods reset a device by asserting Secondary Bus Reset on the bridge leading to the device. These only work if the device is the only device below the bridge. Add a sysfs 'reset_subordinate' attribute on bridges that can assert Secondary Bus Reset regardless of how many devices are below the bridge. This resets all the devices below a bridge in a single command, including the locking and config space save/restore that reset methods normally do. This may be the only way to reset devices that don't support other reset methods (ACPI, FLR, PM reset, etc). Link: https://lore.kernel.org/r/20241025222755.3756162-1-kbusch@meta.com Signed-off-by: Keith Busch [bhelgaas: commit log, add capable(CAP_SYS_ADMIN) check] Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson Reviewed-by: Amey Narkhede --- Documentation/ABI/testing/sysfs-bus-pci | 11 +++++++++++ drivers/pci/pci-sysfs.c | 26 +++++++++++++++++++++++++ drivers/pci/pci.c | 2 +- drivers/pci/pci.h | 1 + 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 7f63c7e97773..5da6a14dc326 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -163,6 +163,17 @@ Description: will be present in sysfs. Writing 1 to this file will perform reset. +What: /sys/bus/pci/devices/.../reset_subordinate +Date: October 2024 +Contact: linux-pci@vger.kernel.org +Description: + This is visible only for bridge devices. If you want to reset + all devices attached through the subordinate bus of a specific + bridge device, writing 1 to this will try to do it. This will + affect all devices attached to the system through this bridge + similiar to writing 1 to their individual "reset" file, so use + with caution. + What: /sys/bus/pci/devices/.../vpd Date: February 2008 Contact: Ben Hutchings diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 5d0f4db1cab7..3e5a117f5b5d 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -521,6 +521,31 @@ static ssize_t bus_rescan_store(struct device *dev, static struct device_attribute dev_attr_bus_rescan = __ATTR(rescan, 0200, NULL, bus_rescan_store); +static ssize_t reset_subordinate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->subordinate; + unsigned long val; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + int ret = __pci_reset_bus(bus); + + if (ret) + return ret; + } + + return count; +} +static DEVICE_ATTR_WO(reset_subordinate); + #if defined(CONFIG_PM) && defined(CONFIG_ACPI) static ssize_t d3cold_allowed_store(struct device *dev, struct device_attribute *attr, @@ -625,6 +650,7 @@ static struct attribute *pci_dev_attrs[] = { static struct attribute *pci_bridge_attrs[] = { &dev_attr_subordinate_bus_number.attr, &dev_attr_secondary_bus_number.attr, + &dev_attr_reset_subordinate.attr, NULL, }; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7d85c04fbba2..338dfcd983f1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5880,7 +5880,7 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_bus); * * Same as above except return -EAGAIN if the bus cannot be locked */ -static int __pci_reset_bus(struct pci_bus *bus) +int __pci_reset_bus(struct pci_bus *bus) { int rc; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 14d00ce45bfa..1cdc2c9547a7 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -104,6 +104,7 @@ bool pci_reset_supported(struct pci_dev *dev); void pci_init_reset_methods(struct pci_dev *dev); int pci_bridge_secondary_bus_reset(struct pci_dev *dev); int pci_bus_error_reset(struct pci_dev *dev); +int __pci_reset_bus(struct pci_bus *bus); struct pci_cap_saved_data { u16 cap_nr; From a3151e6daaec171b7d46ac79170ec420ad874cae Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 25 Oct 2024 15:27:55 -0700 Subject: [PATCH 090/127] PCI: Warn if a running device is unaware of reset If a reset is issued to a running device with a driver that didn't register the notification callbacks, the driver may be unaware of this event and have an inconsistent view of the device's state. Log a warning of this event because there's nothing else indicating the event occured, which could be confusing when debugging such situations. Link: https://lore.kernel.org/r/20241025222755.3756162-2-kbusch@meta.com Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Reviewed-by: Amey Narkhede Reviewed-by: Alex Williamson --- drivers/pci/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 338dfcd983f1..bbf12d499826 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5158,6 +5158,8 @@ static void pci_dev_save_and_disable(struct pci_dev *dev) */ if (err_handler && err_handler->reset_prepare) err_handler->reset_prepare(dev); + else if (dev->driver) + pci_warn(dev, "resetting"); /* * Wake-up device prior to save. PM registers default to D0 after @@ -5191,6 +5193,8 @@ static void pci_dev_restore(struct pci_dev *dev) */ if (err_handler && err_handler->reset_done) err_handler->reset_done(dev); + else if (dev->driver) + pci_warn(dev, "reset done"); } /* dev->reset_methods[] is a 0-terminated list of indices into this array */ From e434e54d3ffcd17eeadfcf3cf434bc1dff36daff Mon Sep 17 00:00:00 2001 From: Shijith Thotton Date: Mon, 11 Nov 2024 19:15:11 +0530 Subject: [PATCH 091/127] PCI: hotplug: Add OCTEON PCI hotplug controller driver Add a PCI hotplug controller driver for the OCTEON PCIe device. The OCTEON PCIe device is a multi-function device where function 0 serves as the PCI hotplug controller. There is an out-of-band management console interface to firmware running on function 0 whereby an administrator can disable functions to save power or enable them with one of several personalities (virtio-net, virtio-crypto, NVMe, etc) for the other functions. Function 0 initiates hotplug events handled by this driver when the other functions are enabled or disabled. +--------------------------------+ | Root Port | +--------------------------------+ | PCIe | +---------------------------------------------------------------+ | OCTEON PCIe Multifunction Device | +---------------------------------------------------------------+ | | | | | | | | +---------------------+ +----------------+ +-----+ +----------------+ | Function 0 | | Function 1 | | ... | | Function 7 | | (Hotplug controller)| | (Hotplug slot) | | | | (Hotplug slot) | +---------------------+ +----------------+ +-----+ +----------------+ | | +-------------------------+ | Controller Firmware | +-------------------------+ The hotplug controller driver enables hotplugging of non-controller functions within the same device. During probing, the driver removes the non-controller functions and registers them as PCI hotplug slots. These slots are added back by the driver, only upon request from the device firmware. The controller uses MSI-X interrupts to notify the host of hotplug events initiated by the OCTEON firmware. Additionally, the driver allows users to enable or disable individual functions via sysfs slot entries, as provided by the PCI hotplug framework. Link: https://lore.kernel.org/r/20241111134523.2796699-1-sthotton@marvell.com Co-developed-by: Vamsi Attunuru Signed-off-by: Vamsi Attunuru Signed-off-by: Shijith Thotton [bhelgaas: use pci_info() when possible] Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 6 + drivers/pci/hotplug/Kconfig | 10 + drivers/pci/hotplug/Makefile | 1 + drivers/pci/hotplug/octep_hp.c | 427 +++++++++++++++++++++++++++++++++ 4 files changed, 444 insertions(+) create mode 100644 drivers/pci/hotplug/octep_hp.c diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..1e8714bbdea1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13882,6 +13882,12 @@ R: schalla@marvell.com R: vattunuru@marvell.com F: drivers/vdpa/octeon_ep/ +MARVELL OCTEON HOTPLUG DRIVER +R: Shijith Thotton +R: Vamsi Attunuru +S: Supported +F: drivers/pci/hotplug/octep_hp.c + MATROX FRAMEBUFFER DRIVER L: linux-fbdev@vger.kernel.org S: Orphan diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index 1472aef0fb81..123c4c7c2ab5 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -118,6 +118,16 @@ config HOTPLUG_PCI_CPCI_GENERIC When in doubt, say N. +config HOTPLUG_PCI_OCTEONEP + bool "Marvell OCTEON PCI Hotplug driver" + depends on HOTPLUG_PCI + help + Say Y here if you have an OCTEON PCIe device with a hotplug + controller. This driver enables the non-controller functions of the + device to be registered as hotplug slots. + + When in doubt, say N. + config HOTPLUG_PCI_SHPC bool "SHPC PCI Hotplug driver" help diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 240c99517d5e..40aaf31fe338 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_ACPI) += acpiphp.o obj-$(CONFIG_HOTPLUG_PCI_S390) += s390_pci_hpc.o +obj-$(CONFIG_HOTPLUG_PCI_OCTEONEP) += octep_hp.o # acpiphp_ibm extends acpiphp, so should be linked afterwards. diff --git a/drivers/pci/hotplug/octep_hp.c b/drivers/pci/hotplug/octep_hp.c new file mode 100644 index 000000000000..2bce7296c050 --- /dev/null +++ b/drivers/pci/hotplug/octep_hp.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Marvell. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OCTEP_HP_INTR_OFFSET(x) (0x20400 + ((x) << 4)) +#define OCTEP_HP_INTR_VECTOR(x) (16 + (x)) +#define OCTEP_HP_DRV_NAME "octep_hp" + +/* + * Type of MSI-X interrupts. OCTEP_HP_INTR_VECTOR() and + * OCTEP_HP_INTR_OFFSET() generate the vector and offset for an interrupt + * type. + */ +enum octep_hp_intr_type { + OCTEP_HP_INTR_INVALID = -1, + OCTEP_HP_INTR_ENA = 0, + OCTEP_HP_INTR_DIS = 1, + OCTEP_HP_INTR_MAX = 2, +}; + +struct octep_hp_cmd { + struct list_head list; + enum octep_hp_intr_type intr_type; + u64 intr_val; +}; + +struct octep_hp_slot { + struct list_head list; + struct hotplug_slot slot; + u16 slot_number; + struct pci_dev *hp_pdev; + unsigned int hp_devfn; + struct octep_hp_controller *ctrl; +}; + +struct octep_hp_intr_info { + enum octep_hp_intr_type type; + int number; + char name[16]; +}; + +struct octep_hp_controller { + void __iomem *base; + struct pci_dev *pdev; + struct octep_hp_intr_info intr[OCTEP_HP_INTR_MAX]; + struct work_struct work; + struct list_head slot_list; + struct mutex slot_lock; /* Protects slot_list */ + struct list_head hp_cmd_list; + spinlock_t hp_cmd_lock; /* Protects hp_cmd_list */ +}; + +static void octep_hp_enable_pdev(struct octep_hp_controller *hp_ctrl, + struct octep_hp_slot *hp_slot) +{ + guard(mutex)(&hp_ctrl->slot_lock); + if (hp_slot->hp_pdev) { + pci_dbg(hp_slot->hp_pdev, "Slot %s is already enabled\n", + hotplug_slot_name(&hp_slot->slot)); + return; + } + + /* Scan the device and add it to the bus */ + hp_slot->hp_pdev = pci_scan_single_device(hp_ctrl->pdev->bus, + hp_slot->hp_devfn); + pci_bus_assign_resources(hp_ctrl->pdev->bus); + pci_bus_add_device(hp_slot->hp_pdev); + + dev_dbg(&hp_slot->hp_pdev->dev, "Enabled slot %s\n", + hotplug_slot_name(&hp_slot->slot)); +} + +static void octep_hp_disable_pdev(struct octep_hp_controller *hp_ctrl, + struct octep_hp_slot *hp_slot) +{ + guard(mutex)(&hp_ctrl->slot_lock); + if (!hp_slot->hp_pdev) { + pci_dbg(hp_ctrl->pdev, "Slot %s is already disabled\n", + hotplug_slot_name(&hp_slot->slot)); + return; + } + + pci_dbg(hp_slot->hp_pdev, "Disabling slot %s\n", + hotplug_slot_name(&hp_slot->slot)); + + /* Remove the device from the bus */ + pci_stop_and_remove_bus_device_locked(hp_slot->hp_pdev); + hp_slot->hp_pdev = NULL; +} + +static int octep_hp_enable_slot(struct hotplug_slot *slot) +{ + struct octep_hp_slot *hp_slot = + container_of(slot, struct octep_hp_slot, slot); + + octep_hp_enable_pdev(hp_slot->ctrl, hp_slot); + return 0; +} + +static int octep_hp_disable_slot(struct hotplug_slot *slot) +{ + struct octep_hp_slot *hp_slot = + container_of(slot, struct octep_hp_slot, slot); + + octep_hp_disable_pdev(hp_slot->ctrl, hp_slot); + return 0; +} + +static struct hotplug_slot_ops octep_hp_slot_ops = { + .enable_slot = octep_hp_enable_slot, + .disable_slot = octep_hp_disable_slot, +}; + +#define SLOT_NAME_SIZE 16 +static struct octep_hp_slot * +octep_hp_register_slot(struct octep_hp_controller *hp_ctrl, + struct pci_dev *pdev, u16 slot_number) +{ + char slot_name[SLOT_NAME_SIZE]; + struct octep_hp_slot *hp_slot; + int ret; + + hp_slot = kzalloc(sizeof(*hp_slot), GFP_KERNEL); + if (!hp_slot) + return ERR_PTR(-ENOMEM); + + hp_slot->ctrl = hp_ctrl; + hp_slot->hp_pdev = pdev; + hp_slot->hp_devfn = pdev->devfn; + hp_slot->slot_number = slot_number; + hp_slot->slot.ops = &octep_hp_slot_ops; + + snprintf(slot_name, sizeof(slot_name), "octep_hp_%u", slot_number); + ret = pci_hp_register(&hp_slot->slot, hp_ctrl->pdev->bus, + PCI_SLOT(pdev->devfn), slot_name); + if (ret) { + kfree(hp_slot); + return ERR_PTR(ret); + } + + pci_info(pdev, "Registered slot %s for device %s\n", + slot_name, pci_name(pdev)); + + list_add_tail(&hp_slot->list, &hp_ctrl->slot_list); + octep_hp_disable_pdev(hp_ctrl, hp_slot); + + return hp_slot; +} + +static void octep_hp_deregister_slot(void *data) +{ + struct octep_hp_slot *hp_slot = data; + struct octep_hp_controller *hp_ctrl = hp_slot->ctrl; + + pci_hp_deregister(&hp_slot->slot); + octep_hp_enable_pdev(hp_ctrl, hp_slot); + list_del(&hp_slot->list); + kfree(hp_slot); +} + +static const char *octep_hp_cmd_name(enum octep_hp_intr_type type) +{ + switch (type) { + case OCTEP_HP_INTR_ENA: + return "hotplug enable"; + case OCTEP_HP_INTR_DIS: + return "hotplug disable"; + default: + return "invalid"; + } +} + +static void octep_hp_cmd_handler(struct octep_hp_controller *hp_ctrl, + struct octep_hp_cmd *hp_cmd) +{ + struct octep_hp_slot *hp_slot; + + /* + * Enable or disable the slots based on the slot mask. + * intr_val is a bit mask where each bit represents a slot. + */ + list_for_each_entry(hp_slot, &hp_ctrl->slot_list, list) { + if (!(hp_cmd->intr_val & BIT(hp_slot->slot_number))) + continue; + + pci_info(hp_ctrl->pdev, "Received %s command for slot %s\n", + octep_hp_cmd_name(hp_cmd->intr_type), + hotplug_slot_name(&hp_slot->slot)); + + switch (hp_cmd->intr_type) { + case OCTEP_HP_INTR_ENA: + octep_hp_enable_pdev(hp_ctrl, hp_slot); + break; + case OCTEP_HP_INTR_DIS: + octep_hp_disable_pdev(hp_ctrl, hp_slot); + break; + default: + break; + } + } +} + +static void octep_hp_work_handler(struct work_struct *work) +{ + struct octep_hp_controller *hp_ctrl; + struct octep_hp_cmd *hp_cmd; + unsigned long flags; + + hp_ctrl = container_of(work, struct octep_hp_controller, work); + + /* Process all the hotplug commands */ + spin_lock_irqsave(&hp_ctrl->hp_cmd_lock, flags); + while (!list_empty(&hp_ctrl->hp_cmd_list)) { + hp_cmd = list_first_entry(&hp_ctrl->hp_cmd_list, + struct octep_hp_cmd, list); + list_del(&hp_cmd->list); + spin_unlock_irqrestore(&hp_ctrl->hp_cmd_lock, flags); + + octep_hp_cmd_handler(hp_ctrl, hp_cmd); + kfree(hp_cmd); + + spin_lock_irqsave(&hp_ctrl->hp_cmd_lock, flags); + } + spin_unlock_irqrestore(&hp_ctrl->hp_cmd_lock, flags); +} + +static enum octep_hp_intr_type octep_hp_intr_type(struct octep_hp_intr_info *intr, + int irq) +{ + enum octep_hp_intr_type type; + + for (type = OCTEP_HP_INTR_ENA; type < OCTEP_HP_INTR_MAX; type++) { + if (intr[type].number == irq) + return type; + } + + return OCTEP_HP_INTR_INVALID; +} + +static irqreturn_t octep_hp_intr_handler(int irq, void *data) +{ + struct octep_hp_controller *hp_ctrl = data; + struct pci_dev *pdev = hp_ctrl->pdev; + enum octep_hp_intr_type type; + struct octep_hp_cmd *hp_cmd; + u64 intr_val; + + type = octep_hp_intr_type(hp_ctrl->intr, irq); + if (type == OCTEP_HP_INTR_INVALID) { + pci_err(pdev, "Invalid interrupt %d\n", irq); + return IRQ_HANDLED; + } + + /* Read and clear the interrupt */ + intr_val = readq(hp_ctrl->base + OCTEP_HP_INTR_OFFSET(type)); + writeq(intr_val, hp_ctrl->base + OCTEP_HP_INTR_OFFSET(type)); + + hp_cmd = kzalloc(sizeof(*hp_cmd), GFP_ATOMIC); + if (!hp_cmd) + return IRQ_HANDLED; + + hp_cmd->intr_val = intr_val; + hp_cmd->intr_type = type; + + /* Add the command to the list and schedule the work */ + spin_lock(&hp_ctrl->hp_cmd_lock); + list_add_tail(&hp_cmd->list, &hp_ctrl->hp_cmd_list); + spin_unlock(&hp_ctrl->hp_cmd_lock); + schedule_work(&hp_ctrl->work); + + return IRQ_HANDLED; +} + +static void octep_hp_irq_cleanup(void *data) +{ + struct octep_hp_controller *hp_ctrl = data; + + pci_free_irq_vectors(hp_ctrl->pdev); + flush_work(&hp_ctrl->work); +} + +static int octep_hp_request_irq(struct octep_hp_controller *hp_ctrl, + enum octep_hp_intr_type type) +{ + struct pci_dev *pdev = hp_ctrl->pdev; + struct octep_hp_intr_info *intr; + int irq; + + irq = pci_irq_vector(pdev, OCTEP_HP_INTR_VECTOR(type)); + if (irq < 0) + return irq; + + intr = &hp_ctrl->intr[type]; + intr->number = irq; + intr->type = type; + snprintf(intr->name, sizeof(intr->name), "octep_hp_%d", type); + + return devm_request_irq(&pdev->dev, irq, octep_hp_intr_handler, + IRQF_SHARED, intr->name, hp_ctrl); +} + +static int octep_hp_controller_setup(struct pci_dev *pdev, + struct octep_hp_controller *hp_ctrl) +{ + struct device *dev = &pdev->dev; + enum octep_hp_intr_type type; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable PCI device\n"); + + hp_ctrl->base = pcim_iomap_region(pdev, 0, OCTEP_HP_DRV_NAME); + if (IS_ERR(hp_ctrl->base)) + return dev_err_probe(dev, PTR_ERR(hp_ctrl->base), + "Failed to map PCI device region\n"); + + pci_set_master(pdev); + pci_set_drvdata(pdev, hp_ctrl); + + INIT_LIST_HEAD(&hp_ctrl->slot_list); + INIT_LIST_HEAD(&hp_ctrl->hp_cmd_list); + mutex_init(&hp_ctrl->slot_lock); + spin_lock_init(&hp_ctrl->hp_cmd_lock); + INIT_WORK(&hp_ctrl->work, octep_hp_work_handler); + hp_ctrl->pdev = pdev; + + ret = pci_alloc_irq_vectors(pdev, 1, + OCTEP_HP_INTR_VECTOR(OCTEP_HP_INTR_MAX), + PCI_IRQ_MSIX); + if (ret < 0) + return dev_err_probe(dev, ret, "Failed to alloc MSI-X vectors\n"); + + ret = devm_add_action(&pdev->dev, octep_hp_irq_cleanup, hp_ctrl); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to add IRQ cleanup action\n"); + + for (type = OCTEP_HP_INTR_ENA; type < OCTEP_HP_INTR_MAX; type++) { + ret = octep_hp_request_irq(hp_ctrl, type); + if (ret) + return dev_err_probe(dev, ret, + "Failed to request IRQ for vector %d\n", + OCTEP_HP_INTR_VECTOR(type)); + } + + return 0; +} + +static int octep_hp_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct octep_hp_controller *hp_ctrl; + struct pci_dev *tmp_pdev, *next; + struct octep_hp_slot *hp_slot; + u16 slot_number = 0; + int ret; + + hp_ctrl = devm_kzalloc(&pdev->dev, sizeof(*hp_ctrl), GFP_KERNEL); + if (!hp_ctrl) + return -ENOMEM; + + ret = octep_hp_controller_setup(pdev, hp_ctrl); + if (ret) + return ret; + + /* + * Register all hotplug slots. Hotplug controller is the first function + * of the PCI device. The hotplug slots are the remaining functions of + * the PCI device. The hotplug slot functions are logically removed from + * the bus during probing and are re-enabled by the driver when a + * hotplug event is received. + */ + list_for_each_entry_safe(tmp_pdev, next, &pdev->bus->devices, bus_list) { + if (tmp_pdev == pdev) + continue; + + hp_slot = octep_hp_register_slot(hp_ctrl, tmp_pdev, slot_number); + if (IS_ERR(hp_slot)) + return dev_err_probe(&pdev->dev, PTR_ERR(hp_slot), + "Failed to register hotplug slot %u\n", + slot_number); + + ret = devm_add_action(&pdev->dev, octep_hp_deregister_slot, + hp_slot); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to add action for deregistering slot %u\n", + slot_number); + slot_number++; + } + + return 0; +} + +#define PCI_DEVICE_ID_CAVIUM_OCTEP_HP_CTLR 0xa0e3 +static struct pci_device_id octep_hp_pci_map[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_CAVIUM_OCTEP_HP_CTLR) }, + { }, +}; + +static struct pci_driver octep_hp = { + .name = OCTEP_HP_DRV_NAME, + .id_table = octep_hp_pci_map, + .probe = octep_hp_pci_probe, +}; + +module_pci_driver(octep_hp); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Marvell"); +MODULE_DESCRIPTION("Marvell OCTEON PCI Hotplug driver"); From 5e316d34b53039346e252d0019e2f4167af2c0ef Mon Sep 17 00:00:00 2001 From: Andrea della Porta Date: Fri, 8 Nov 2024 10:42:56 +0100 Subject: [PATCH 092/127] PCI: of_property: Assign PCI instead of CPU bus address to dynamic PCI nodes When populating "ranges" property for a PCI bridge or endpoint, of_pci_prop_ranges() incorrectly uses the CPU address of the resource. In such PCI nodes, the window should instead be in PCI address space. Call pci_bus_address() on the resource in order to obtain the PCI bus address. [Previous discussion at: https://lore.kernel.org/all/8b4fa91380fc4754ea80f47330c613e4f6b6592c.1724159867.git.andrea.porta@suse.com/] Link: https://lore.kernel.org/r/20241108094256.28933-1-andrea.porta@suse.com Fixes: 407d1a51921e ("PCI: Create device tree node for bridge") Tested-by: Herve Codina Signed-off-by: Andrea della Porta Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org --- drivers/pci/of_property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/of_property.c b/drivers/pci/of_property.c index 5a0b98e69795..886c236e5de6 100644 --- a/drivers/pci/of_property.c +++ b/drivers/pci/of_property.c @@ -126,7 +126,7 @@ static int of_pci_prop_ranges(struct pci_dev *pdev, struct of_changeset *ocs, if (of_pci_get_addr_flags(&res[j], &flags)) continue; - val64 = res[j].start; + val64 = pci_bus_address(pdev, &res[j] - pdev->resource); of_pci_set_address(pdev, rp[i].parent_addr, val64, 0, flags, false); if (pci_is_bridge(pdev)) { From 31457d4cea19dc807e3beab15883c912f19acef7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 2 Nov 2024 12:45:37 -0500 Subject: [PATCH 093/127] PCI: Fix typos Fix typos and whitespace errors. Link: https://lore.kernel.org/r/20241102174537.1362183-1-helgaas@kernel.org Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rcar-host.c | 4 ++-- drivers/pci/pcie/aer.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/pci/controller/pcie-rcar-host.c b/drivers/pci/controller/pcie-rcar-host.c index 3dd653f3d784..7c92eada04af 100644 --- a/drivers/pci/controller/pcie-rcar-host.c +++ b/drivers/pci/controller/pcie-rcar-host.c @@ -796,8 +796,8 @@ static int rcar_pcie_enable_msi(struct rcar_pcie_host *host) rcar_pci_write_reg(pcie, 0, PCIEMSIIER); /* - * Setup MSI data target using RC base address address, which - * is guaranteed to be in the low 32bit range on any R-Car HW. + * Setup MSI data target using RC base address, which is guaranteed + * to be in the low 32bit range on any R-Car HW. */ rcar_pci_write_reg(pcie, lower_32_bits(res.start) | MSIFE, PCIEMSIALR); rcar_pci_write_reg(pcie, upper_32_bits(res.start), PCIEMSIAUR); diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 13b8586924ea..80c5ba8d8296 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -180,7 +180,8 @@ static int disable_ecrc_checking(struct pci_dev *dev) } /** - * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy + * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based + * on global policy * @dev: the PCI device */ void pcie_set_ecrc_checking(struct pci_dev *dev) @@ -1148,14 +1149,16 @@ static void aer_recover_work_func(struct work_struct *work) continue; } pci_print_aer(pdev, entry.severity, entry.regs); + /* - * Memory for aer_capability_regs(entry.regs) is being allocated from the - * ghes_estatus_pool to protect it from overwriting when multiple sections - * are present in the error status. Thus free the same after processing - * the data. + * Memory for aer_capability_regs(entry.regs) is being + * allocated from the ghes_estatus_pool to protect it from + * overwriting when multiple sections are present in the + * error status. Thus free the same after processing the + * data. */ ghes_estatus_pool_region_free((unsigned long)entry.regs, - sizeof(struct aer_capability_regs)); + sizeof(struct aer_capability_regs)); if (entry.severity == AER_NONFATAL) pcie_do_recovery(pdev, pci_channel_io_normal, From aa46a3736afcb7b0793766d22479b8b99fc1b322 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Fri, 15 Nov 2024 10:46:04 +0800 Subject: [PATCH 094/127] PCI: Add ACS quirk for Wangxun FF5xxx NICs Wangxun FF5xxx NICs are similar to SFxxx, RP1000 and RP2000 NICs. They may be multi-function devices, but they do not advertise an ACS capability. But the hardware does isolate FF5xxx functions as though it had an ACS capability and PCI_ACS_RR and PCI_ACS_CR were set in the ACS Control register, i.e., all peer-to-peer traffic is directed upstream instead of being routed internally. Add ACS quirk for FF5xxx NICs in pci_quirk_wangxun_nic_acs() so the functions can be in independent IOMMU groups. Link: https://lore.kernel.org/r/E16053DB2B80E9A5+20241115024604.30493-1-mengyuanlou@net-swift.com Signed-off-by: Mengyuan Lou Signed-off-by: Bjorn Helgaas --- drivers/pci/quirks.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index dccb60c1d9cc..8103bc24a54e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4996,18 +4996,21 @@ static int pci_quirk_brcm_acs(struct pci_dev *dev, u16 acs_flags) } /* - * Wangxun 10G/1G NICs have no ACS capability, and on multi-function - * devices, peer-to-peer transactions are not be used between the functions. - * So add an ACS quirk for below devices to isolate functions. + * Wangxun 40G/25G/10G/1G NICs have no ACS capability, but on + * multi-function devices, the hardware isolates the functions by + * directing all peer-to-peer traffic upstream as though PCI_ACS_RR and + * PCI_ACS_CR were set. * SFxxx 1G NICs(em). * RP1000/RP2000 10G NICs(sp). + * FF5xxx 40G/25G/10G NICs(aml). */ static int pci_quirk_wangxun_nic_acs(struct pci_dev *dev, u16 acs_flags) { switch (dev->device) { - case 0x0100 ... 0x010F: - case 0x1001: - case 0x2001: + case 0x0100 ... 0x010F: /* EM */ + case 0x1001: case 0x2001: /* SP */ + case 0x5010: case 0x5025: case 0x5040: /* AML */ + case 0x5110: case 0x5125: case 0x5140: /* AML */ return pci_acs_ctrl_enabled(acs_flags, PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF); } From 665745f274870c921020f610e2c99a3b1613519b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:52 +0300 Subject: [PATCH 095/127] PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mostly reverts the commit b4c7d2076b4e ("PCI/LINK: Remove bandwidth notification"). An upcoming commit extends this driver building PCIe bandwidth controller on top of it. PCIe bandwidth notifications were first added in the commit e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification") but later had to be removed. The significant changes compared with the old bandwidth notification driver include: 1) Don't print the notifications into kernel log, just keep the Link Speed cached in struct pci_bus updated. While somewhat unfortunate, the log spam was the source of complaints that eventually lead to the removal of the bandwidth notifications driver (see the links below for further information). 2) Besides the Link Bandwidth Management Interrupt, also enable Link Autonomous Bandwidth Interrupt to cover the other source of bandwidth changes. 3) Handle Link Speed updates robustly. Refresh the cached Link Speed when enabling Bandwidth Notification Interrupts, and solve the race between Link Speed read and LBMS/LABS update in pcie_bwnotif_irq_thread(). 4) Use concurrency safe LNKCTL RMW operations. 5) The driver is now called PCIe bwctrl (bandwidth controller) instead of just bandwidth notifications because of increased scope and functionality within the driver. 6) Coexist with the Target Link Speed quirk in pcie_failed_link_retrain(). Provide LBMS counting API for it. 7) Tweaks to variable/functions names for consistency and length reasons. Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to keep track PCIe Link Speed changes. [bhelgaas: This is based on previous work by Alexandru Gagniuc ; see e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification")] Link: https://lore.kernel.org/r/20241018144755.7875-7-ilpo.jarvinen@linux.intel.com Link: https://lore.kernel.org/all/20190429185611.121751-1-helgaas@kernel.org/ Link: https://lore.kernel.org/linux-pci/20190501142942.26972-1-keith.busch@intel.com/ Link: https://lore.kernel.org/linux-pci/20200115221008.GA191037@google.com/ Suggested-by: Lukas Wunner # Building bwctrl on top of bwnotif Signed-off-by: Ilpo Järvinen [bhelgaas: squash fix to drop IRQF_ONESHOT and convert to hardirq handler: https://lore.kernel.org/r/20241115165717.15233-1-ilpo.jarvinen@linux.intel.com] Signed-off-by: Bjorn Helgaas Tested-by: Stefan Wahren Reviewed-by: Jonathan Cameron --- MAINTAINERS | 6 + drivers/pci/hotplug/pciehp_ctrl.c | 5 + drivers/pci/pci.c | 2 +- drivers/pci/pci.h | 11 ++ drivers/pci/pcie/Makefile | 2 +- drivers/pci/pcie/bwctrl.c | 186 ++++++++++++++++++++++++++++++ drivers/pci/pcie/portdrv.c | 9 +- drivers/pci/pcie/portdrv.h | 6 +- drivers/pci/quirks.c | 9 +- include/linux/pci.h | 2 + 10 files changed, 229 insertions(+), 9 deletions(-) create mode 100644 drivers/pci/pcie/bwctrl.c diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..8c555b3325d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17933,6 +17933,12 @@ F: include/linux/of_pci.h F: include/linux/pci* F: include/uapi/linux/pci* +PCIE BANDWIDTH CONTROLLER +M: Ilpo Järvinen +L: linux-pci@vger.kernel.org +S: Supported +F: drivers/pci/pcie/bwctrl.c + PCIE DRIVER FOR AMAZON ANNAPURNA LABS M: Jonathan Chocron L: linux-pci@vger.kernel.org diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index dcdbfcf404dd..d603a7aa7483 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -19,6 +19,8 @@ #include #include #include + +#include "../pci.h" #include "pciehp.h" /* The following routines constitute the bulk of the @@ -127,6 +129,9 @@ static void remove_board(struct controller *ctrl, bool safe_removal) pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, INDICATOR_NOOP); + + /* Don't carry LBMS indications across */ + pcie_reset_lbms_count(ctrl->pcie->port); } static int pciehp_enable_slot(struct controller *ctrl); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3d67e8b50ba2..f85f380cdb9b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4740,7 +4740,7 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt) * to track link speed or width changes made by hardware itself * in attempt to correct unreliable link operation. */ - pcie_capability_write_word(pdev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); + pcie_reset_lbms_count(pdev); return rc; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 8137ad2fead8..8ec189c8f3f5 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -698,6 +698,17 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { } static inline void pcie_ecrc_get_policy(char *str) { } #endif +#ifdef CONFIG_PCIEPORTBUS +void pcie_reset_lbms_count(struct pci_dev *port); +int pcie_lbms_count(struct pci_dev *port, unsigned long *val); +#else +static inline void pcie_reset_lbms_count(struct pci_dev *port) {} +static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val) +{ + return -EOPNOTSUPP; +} +#endif + struct pci_dev_reset_methods { u16 vendor; u16 device; diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 6461aa93fe76..53ccab62314d 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -4,7 +4,7 @@ pcieportdrv-y := portdrv.o rcec.o -obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o +obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o obj-$(CONFIG_PCIEAER) += aer.o err.o diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c new file mode 100644 index 000000000000..b3da24900271 --- /dev/null +++ b/drivers/pci/pcie/bwctrl.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PCIe bandwidth controller + * + * Author: Alexandru Gagniuc + * + * Copyright (C) 2019 Dell Inc + * Copyright (C) 2023-2024 Intel Corporation + * + * This service port driver hooks into the Bandwidth Notification interrupt + * watching for changes or links becoming degraded in operation. It updates + * the cached Current Link Speed that is exposed to user space through sysfs. + */ + +#define dev_fmt(fmt) "bwctrl: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../pci.h" +#include "portdrv.h" + +/** + * struct pcie_bwctrl_data - PCIe bandwidth controller + * @lbms_count: Count for LBMS (since last reset) + */ +struct pcie_bwctrl_data { + atomic_t lbms_count; +}; + +/* Prevents port removal during LBMS count accessors */ +static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); + +static void pcie_bwnotif_enable(struct pcie_device *srv) +{ + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + struct pci_dev *port = srv->port; + u16 link_status; + int ret; + + /* Count LBMS seen so far as one */ + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) + atomic_inc(&data->lbms_count); + + pcie_capability_set_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + + /* + * Update after enabling notifications & clearing status bits ensures + * link speed is up to date. + */ + pcie_update_link_speed(port->subordinate); +} + +static void pcie_bwnotif_disable(struct pci_dev *port) +{ + pcie_capability_clear_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); +} + +static irqreturn_t pcie_bwnotif_irq(int irq, void *context) +{ + struct pcie_device *srv = context; + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + struct pci_dev *port = srv->port; + u16 link_status, events; + int ret; + + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret != PCIBIOS_SUCCESSFUL) + return IRQ_NONE; + + events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + if (!events) + return IRQ_NONE; + + if (events & PCI_EXP_LNKSTA_LBMS) + atomic_inc(&data->lbms_count); + + pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); + + /* + * Interrupts will not be triggered from any further Link Speed + * change until LBMS is cleared by the write. Therefore, re-read the + * speed (inside pcie_update_link_speed()) after LBMS has been + * cleared to avoid missing link speed changes. + */ + pcie_update_link_speed(port->subordinate); + + return IRQ_HANDLED; +} + +void pcie_reset_lbms_count(struct pci_dev *port) +{ + struct pcie_bwctrl_data *data; + + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); + data = port->link_bwctrl; + if (data) + atomic_set(&data->lbms_count, 0); + else + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS); +} + +int pcie_lbms_count(struct pci_dev *port, unsigned long *val) +{ + struct pcie_bwctrl_data *data; + + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); + data = port->link_bwctrl; + if (!data) + return -ENOTTY; + + *val = atomic_read(&data->lbms_count); + + return 0; +} + +static int pcie_bwnotif_probe(struct pcie_device *srv) +{ + struct pci_dev *port = srv->port; + int ret; + + struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device, + sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq, + IRQF_SHARED, "PCIe bwctrl", srv); + if (ret) + return ret; + + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { + port->link_bwctrl = no_free_ptr(data); + pcie_bwnotif_enable(srv); + } + + pci_dbg(port, "enabled with IRQ %d\n", srv->irq); + + return 0; +} + +static void pcie_bwnotif_remove(struct pcie_device *srv) +{ + pcie_bwnotif_disable(srv->port); + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) + srv->port->link_bwctrl = NULL; +} + +static int pcie_bwnotif_suspend(struct pcie_device *srv) +{ + pcie_bwnotif_disable(srv->port); + return 0; +} + +static int pcie_bwnotif_resume(struct pcie_device *srv) +{ + pcie_bwnotif_enable(srv); + return 0; +} + +static struct pcie_port_service_driver pcie_bwctrl_driver = { + .name = "pcie_bwctrl", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_BWCTRL, + .probe = pcie_bwnotif_probe, + .suspend = pcie_bwnotif_suspend, + .resume = pcie_bwnotif_resume, + .remove = pcie_bwnotif_remove, +}; + +int __init pcie_bwctrl_init(void) +{ + return pcie_port_service_register(&pcie_bwctrl_driver); +} diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index 6af5e0425872..5e10306b6308 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -68,7 +68,7 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask, */ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | - PCIE_PORT_SERVICE_BWNOTIF)) { + PCIE_PORT_SERVICE_BWCTRL)) { pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16); *pme = FIELD_GET(PCI_EXP_FLAGS_IRQ, reg16); nvec = *pme + 1; @@ -150,11 +150,11 @@ static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask) /* PME, hotplug and bandwidth notification share an MSI/MSI-X vector */ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | - PCIE_PORT_SERVICE_BWNOTIF)) { + PCIE_PORT_SERVICE_BWCTRL)) { pcie_irq = pci_irq_vector(dev, pme); irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq; irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq; - irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq; + irqs[PCIE_PORT_SERVICE_BWCTRL_SHIFT] = pcie_irq; } if (mask & PCIE_PORT_SERVICE_AER) @@ -271,7 +271,7 @@ static int get_port_device_capability(struct pci_dev *dev) pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap); if (linkcap & PCI_EXP_LNKCAP_LBNC) - services |= PCIE_PORT_SERVICE_BWNOTIF; + services |= PCIE_PORT_SERVICE_BWCTRL; } return services; @@ -828,6 +828,7 @@ static void __init pcie_init_services(void) pcie_aer_init(); pcie_pme_init(); pcie_dpc_init(); + pcie_bwctrl_init(); pcie_hp_init(); } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 12c89ea0313b..bd29d1cc7b8b 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -20,8 +20,8 @@ #define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) #define PCIE_PORT_SERVICE_DPC_SHIFT 3 /* Downstream Port Containment */ #define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT) -#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT 4 /* Bandwidth notification */ -#define PCIE_PORT_SERVICE_BWNOTIF (1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT) +#define PCIE_PORT_SERVICE_BWCTRL_SHIFT 4 /* Bandwidth Controller (notifications) */ +#define PCIE_PORT_SERVICE_BWCTRL (1 << PCIE_PORT_SERVICE_BWCTRL_SHIFT) #define PCIE_PORT_DEVICE_MAXSERVICES 5 @@ -51,6 +51,8 @@ int pcie_dpc_init(void); static inline int pcie_dpc_init(void) { return 0; } #endif +int pcie_bwctrl_init(void); + /* Port Type */ #define PCIE_ANY_PORT (~0) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index a560ea403b8e..e6d502dca939 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -35,7 +35,14 @@ static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta) { - return lnksta & PCI_EXP_LNKSTA_LBMS; + unsigned long count; + int ret; + + ret = pcie_lbms_count(dev, &count); + if (ret < 0) + return lnksta & PCI_EXP_LNKSTA_LBMS; + + return count > 0; } /* diff --git a/include/linux/pci.h b/include/linux/pci.h index 99c6fa30d25b..579b28833429 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -313,6 +313,7 @@ struct pci_vpd { }; struct irq_affinity; +struct pcie_bwctrl_data; struct pcie_link_state; struct pci_sriov; struct pci_p2pdma; @@ -502,6 +503,7 @@ struct pci_dev { unsigned int dpc_rp_extensions:1; u8 dpc_rp_log_size; #endif + struct pcie_bwctrl_data *link_bwctrl; #ifdef CONFIG_PCI_ATS union { struct pci_sriov *sriov; /* PF: SR-IOV info */ From de9a6c8d5dbfedb5eb3722c822da0490f6a59a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:53 +0300 Subject: [PATCH 096/127] PCI/bwctrl: Add pcie_set_target_speed() to set PCIe Link Speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, PCIe Link Speeds are adjusted by custom code rather than in a common function provided in PCI core. The PCIe bandwidth controller (bwctrl) introduces an in-kernel API, pcie_set_target_speed(), to set PCIe Link Speed. Convert Target Speed quirk to use the new API. The Target Speed quirk runs very early when bwctrl is not yet probed for a Port and can also run later when bwctrl is already setup for the Port, which requires the per port mutex (set_speed_mutex) to be only taken if the bwctrl setup is already complete. The new API is also intended to be used in an upcoming commit that adds a thermal cooling device to throttle PCIe bandwidth when thermal thresholds are reached. The PCIe bandwidth control procedure is as follows. The highest speed supported by the Port and the PCIe device which is not higher than the requested speed is selected and written into the Target Link Speed in the Link Control 2 Register. Then bandwidth controller retrains the PCIe Link. Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to keep track PCIe Link Speed changes. While Bandwidth Notifications should also be generated when bandwidth controller alters the PCIe Link Speed, a few platforms do not deliver LMBS interrupt after Link Training as expected. Thus, after changing the Link Speed, bandwidth controller makes additional read for the Link Status Register to ensure cur_bus_speed is consistent with the new PCIe Link Speed. Link: https://lore.kernel.org/r/20241018144755.7875-8-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: squash devm_mutex_init() error checking from https://lore.kernel.org/r/20241030163139.2111689-1-andriy.shevchenko@linux.intel.com, drop export of pcie_set_target_speed()] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pci.h | 20 +++++ drivers/pci/pcie/bwctrl.c | 180 ++++++++++++++++++++++++++++++++++++-- drivers/pci/quirks.c | 17 +--- include/linux/pci.h | 10 +++ 4 files changed, 208 insertions(+), 19 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 8ec189c8f3f5..7573f81f58c4 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -331,6 +331,17 @@ void pci_disable_bridge_window(struct pci_dev *dev); struct pci_bus *pci_bus_get(struct pci_bus *bus); void pci_bus_put(struct pci_bus *bus); +#define PCIE_LNKCAP_SLS2SPEED(lnkcap) \ +({ \ + ((lnkcap) == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ + PCI_SPEED_UNKNOWN); \ +}) + /* PCIe link information from Link Capabilities 2 */ #define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \ ((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ @@ -341,6 +352,15 @@ void pci_bus_put(struct pci_bus *bus); (lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN) +#define PCIE_LNKCTL2_TLS2SPEED(lnkctl2) \ + ((lnkctl2) == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \ + PCI_SPEED_UNKNOWN) + /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ ((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \ diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index b3da24900271..3cd3e2d066c9 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -7,6 +7,11 @@ * Copyright (C) 2019 Dell Inc * Copyright (C) 2023-2024 Intel Corporation * + * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds + * and notify the operating system when the Link Width or Speed changes. The + * notification capability is required for all Root Ports and Downstream + * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. + * * This service port driver hooks into the Bandwidth Notification interrupt * watching for changes or links becoming degraded in operation. It updates * the cached Current Link Speed that is exposed to user space through sysfs. @@ -15,9 +20,12 @@ #define dev_fmt(fmt) "bwctrl: " fmt #include +#include +#include #include #include #include +#include #include #include #include @@ -28,14 +36,166 @@ /** * struct pcie_bwctrl_data - PCIe bandwidth controller + * @set_speed_mutex: Serializes link speed changes * @lbms_count: Count for LBMS (since last reset) */ struct pcie_bwctrl_data { + struct mutex set_speed_mutex; atomic_t lbms_count; }; -/* Prevents port removal during LBMS count accessors */ +/* + * Prevent port removal during LBMS count accessors and Link Speed changes. + * + * These have to be differentiated because pcie_bwctrl_change_speed() calls + * pcie_retrain_link() which uses LBMS count reset accessor on success + * (using just one rwsem triggers "possible recursive locking detected" + * warning). + */ static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); +static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); + +static bool pcie_valid_speed(enum pci_bus_speed speed) +{ + return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); +} + +static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) +{ + static const u8 speed_conv[] = { + [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, + [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, + [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, + [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, + [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, + [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, + }; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed))) + return 0; + + return speed_conv[speed]; +} + +static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) +{ + return __fls(supported_speeds); +} + +/** + * pcie_bwctrl_select_speed - Select Target Link Speed + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * + * Select Target Link Speed by take into account Supported Link Speeds of + * both the Root Port and the Endpoint. + * + * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) + */ +static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) +{ + struct pci_bus *bus = port->subordinate; + u8 desired_speeds, supported_speeds; + struct pci_dev *dev; + + desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), + __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); + + supported_speeds = port->supported_speeds; + if (bus) { + down_read(&pci_bus_sem); + dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); + if (dev) + supported_speeds &= dev->supported_speeds; + up_read(&pci_bus_sem); + } + if (!supported_speeds) + return PCI_EXP_LNKCAP2_SLS_2_5GB; + + return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds); +} + +static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) +{ + int ret; + + ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, target_speed); + if (ret != PCIBIOS_SUCCESSFUL) + return pcibios_err_to_errno(ret); + + ret = pcie_retrain_link(port, use_lt); + if (ret < 0) + return ret; + + /* + * Ensure link speed updates also with platforms that have problems + * with notifications. + */ + if (port->subordinate) + pcie_update_link_speed(port->subordinate); + + return 0; +} + +/** + * pcie_set_target_speed - Set downstream Link Speed for PCIe Port + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training + * + * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be + * adjusted downwards to the best speed supported by both the Port and PCIe + * Device underneath it. + * + * Return: + * * 0 - on success + * * -EINVAL - @speed_req is not a PCIe Link Speed + * * -ENODEV - @port is not controllable + * * -ETIMEDOUT - changing Link Speed took too long + * * -EAGAIN - Link Speed was changed but @speed_req was not achieved + */ +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt) +{ + struct pci_bus *bus = port->subordinate; + u16 target_speed; + int ret; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) + return -EINVAL; + + if (bus && bus->cur_bus_speed == speed_req) + return 0; + + target_speed = pcie_bwctrl_select_speed(port, speed_req); + + scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { + struct pcie_bwctrl_data *data = port->link_bwctrl; + + /* + * port->link_bwctrl is NULL during initial scan when called + * e.g. from the Target Speed quirk. + */ + if (data) + mutex_lock(&data->set_speed_mutex); + + ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); + + if (data) + mutex_unlock(&data->set_speed_mutex); + } + + /* + * Despite setting higher speed into the Target Link Speed, empty + * bus won't train to 5GT+ speeds. + */ + if (!ret && bus && bus->cur_bus_speed != speed_req && + !list_empty(&bus->devices)) + ret = -EAGAIN; + + return ret; +} static void pcie_bwnotif_enable(struct pcie_device *srv) { @@ -136,14 +296,20 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) if (!data) return -ENOMEM; + ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); + if (ret) + return ret; + ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq, IRQF_SHARED, "PCIe bwctrl", srv); if (ret) return ret; - scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { - port->link_bwctrl = no_free_ptr(data); - pcie_bwnotif_enable(srv); + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { + port->link_bwctrl = no_free_ptr(data); + pcie_bwnotif_enable(srv); + } } pci_dbg(port, "enabled with IRQ %d\n", srv->irq); @@ -154,8 +320,10 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) static void pcie_bwnotif_remove(struct pcie_device *srv) { pcie_bwnotif_disable(srv->port); - scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) - srv->port->link_bwctrl = NULL; + + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) + srv->port->link_bwctrl = NULL; } static int pcie_bwnotif_suspend(struct pcie_device *srv) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e6d502dca939..dcf1c86a5488 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -113,16 +113,11 @@ int pcie_failed_link_retrain(struct pci_dev *dev) pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n"); - lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; - lnkctl2 |= PCI_EXP_LNKCTL2_TLS_2_5GT; - pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); - - ret = pcie_retrain_link(dev, false); + ret = pcie_set_target_speed(dev, PCIE_SPEED_2_5GT, false); if (ret) { pci_info(dev, "retraining failed\n"); - pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, - oldlnkctl2); - pcie_retrain_link(dev, true); + pcie_set_target_speed(dev, PCIE_LNKCTL2_TLS2SPEED(oldlnkctl2), + true); return ret; } @@ -136,11 +131,7 @@ int pcie_failed_link_retrain(struct pci_dev *dev) pci_info(dev, "removing 2.5GT/s downstream link speed restriction\n"); pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); - lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; - lnkctl2 |= lnkcap & PCI_EXP_LNKCAP_SLS; - pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); - - ret = pcie_retrain_link(dev, false); + ret = pcie_set_target_speed(dev, PCIE_LNKCAP_SLS2SPEED(lnkcap), false); if (ret) { pci_info(dev, "retraining failed\n"); return ret; diff --git a/include/linux/pci.h b/include/linux/pci.h index 579b28833429..4f02d9f91399 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1797,9 +1797,19 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d, #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; extern bool pcie_ports_native; + +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt); #else #define pcie_ports_disabled true #define pcie_ports_native false + +static inline int pcie_set_target_speed(struct pci_dev *port, + enum pci_bus_speed speed_req, + bool use_lt) +{ + return -EOPNOTSUPP; +} #endif #define PCIE_LINK_STATE_L0S (BIT(0) | BIT(1)) /* Upstr/dwnstr L0s */ From d278b098282d1327f6e1be82aacb18457a4d244d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:54 +0300 Subject: [PATCH 097/127] thermal: Add PCIe cooling driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a thermal cooling driver to provide path to access PCIe bandwidth controller using the usual thermal interfaces. A cooling device is instantiated for controllable PCIe Ports from the bwctrl service driver. If registering the cooling device fails, allow bwctrl's probe to succeed regardless. As cdev in that case contains IS_ERR() pseudo "pointer", clean that up inside the probe function so the remove side doesn't need to suddenly make an odd looking IS_ERR() check. The thermal side state 0 means no throttling, i.e., maximum supported PCIe Link Speed. Link: https://lore.kernel.org/r/20241018144755.7875-9-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: dropped data->cdev test per https://lore.kernel.org/r/ZzRm1SJTwEMRsAr8@wunner.de] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Acked-by: Rafael J. Wysocki # From the cooling device interface perspective --- MAINTAINERS | 2 + drivers/pci/pcie/bwctrl.c | 12 +++++ drivers/thermal/Kconfig | 9 ++++ drivers/thermal/Makefile | 2 + drivers/thermal/pcie_cooling.c | 80 ++++++++++++++++++++++++++++++++++ include/linux/pci-bwctrl.h | 28 ++++++++++++ 6 files changed, 133 insertions(+) create mode 100644 drivers/thermal/pcie_cooling.c create mode 100644 include/linux/pci-bwctrl.h diff --git a/MAINTAINERS b/MAINTAINERS index 8c555b3325d6..393ed7ce5ea1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17938,6 +17938,8 @@ M: Ilpo Järvinen L: linux-pci@vger.kernel.org S: Supported F: drivers/pci/pcie/bwctrl.c +F: drivers/thermal/pcie_cooling.c +F: include/linux/pci-bwctrl.h PCIE DRIVER FOR AMAZON ANNAPURNA LABS M: Jonathan Chocron diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index 3cd3e2d066c9..b59cacc740fa 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -38,10 +39,12 @@ * struct pcie_bwctrl_data - PCIe bandwidth controller * @set_speed_mutex: Serializes link speed changes * @lbms_count: Count for LBMS (since last reset) + * @cdev: Thermal cooling device associated with the port */ struct pcie_bwctrl_data { struct mutex set_speed_mutex; atomic_t lbms_count; + struct thermal_cooling_device *cdev; }; /* @@ -314,11 +317,20 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) pci_dbg(port, "enabled with IRQ %d\n", srv->irq); + /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ + port->link_bwctrl->cdev = pcie_cooling_device_register(port); + if (IS_ERR(port->link_bwctrl->cdev)) + port->link_bwctrl->cdev = NULL; + return 0; } static void pcie_bwnotif_remove(struct pcie_device *srv) { + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + + pcie_cooling_device_unregister(data->cdev); + pcie_bwnotif_disable(srv->port); scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 61e7ae524b1f..d3f9686e26e7 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -220,6 +220,15 @@ config DEVFREQ_THERMAL If you want this support, you should say Y here. +config PCIE_THERMAL + bool "PCIe cooling support" + depends on PCIEPORTBUS + help + This implements PCIe cooling mechanism through bandwidth reduction + for PCIe devices. + + If you want this support, you should say Y here. + config THERMAL_EMULATION bool "Thermal emulation mode support" help diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 41c4d56beb40..210c16c91461 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -31,6 +31,8 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL) += cpuidle_cooling.o # devfreq cooling thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o +thermal_sys-$(CONFIG_PCIE_THERMAL) += pcie_cooling.o + obj-$(CONFIG_K3_THERMAL) += k3_bandgap.o k3_j72xx_bandgap.o # platform thermal drivers obj-y += broadcom/ diff --git a/drivers/thermal/pcie_cooling.c b/drivers/thermal/pcie_cooling.c new file mode 100644 index 000000000000..a876d64f1582 --- /dev/null +++ b/drivers/thermal/pcie_cooling.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PCIe cooling device + * + * Copyright (C) 2023-2024 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define COOLING_DEV_TYPE_PREFIX "PCIe_Port_Link_Speed_" + +static int pcie_cooling_get_max_level(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct pci_dev *port = cdev->devdata; + + /* cooling state 0 is same as the maximum PCIe speed */ + *state = port->subordinate->max_bus_speed - PCIE_SPEED_2_5GT; + + return 0; +} + +static int pcie_cooling_get_cur_level(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct pci_dev *port = cdev->devdata; + + /* cooling state 0 is same as the maximum PCIe speed */ + *state = cdev->max_state - (port->subordinate->cur_bus_speed - PCIE_SPEED_2_5GT); + + return 0; +} + +static int pcie_cooling_set_cur_level(struct thermal_cooling_device *cdev, unsigned long state) +{ + struct pci_dev *port = cdev->devdata; + enum pci_bus_speed speed; + + /* cooling state 0 is same as the maximum PCIe speed */ + speed = (cdev->max_state - state) + PCIE_SPEED_2_5GT; + + return pcie_set_target_speed(port, speed, true); +} + +static struct thermal_cooling_device_ops pcie_cooling_ops = { + .get_max_state = pcie_cooling_get_max_level, + .get_cur_state = pcie_cooling_get_cur_level, + .set_cur_state = pcie_cooling_set_cur_level, +}; + +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port) +{ + char *name __free(kfree) = + kasprintf(GFP_KERNEL, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port)); + if (!name) + return ERR_PTR(-ENOMEM); + + return thermal_cooling_device_register(name, port, &pcie_cooling_ops); +} + +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev) +{ + thermal_cooling_device_unregister(cdev); +} + +/* For bus_speed <-> state arithmetic */ +static_assert(PCIE_SPEED_2_5GT + 1 == PCIE_SPEED_5_0GT); +static_assert(PCIE_SPEED_5_0GT + 1 == PCIE_SPEED_8_0GT); +static_assert(PCIE_SPEED_8_0GT + 1 == PCIE_SPEED_16_0GT); +static_assert(PCIE_SPEED_16_0GT + 1 == PCIE_SPEED_32_0GT); +static_assert(PCIE_SPEED_32_0GT + 1 == PCIE_SPEED_64_0GT); + +MODULE_AUTHOR("Ilpo Järvinen "); +MODULE_DESCRIPTION("PCIe cooling driver"); diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h new file mode 100644 index 000000000000..cee07127455b --- /dev/null +++ b/include/linux/pci-bwctrl.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PCIe bandwidth controller + * + * Copyright (C) 2023-2024 Intel Corporation + */ + +#ifndef LINUX_PCI_BWCTRL_H +#define LINUX_PCI_BWCTRL_H + +#include + +struct thermal_cooling_device; + +#ifdef CONFIG_PCIE_THERMAL +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port); +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev); +#else +static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port) +{ + return NULL; +} +static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev) +{ +} +#endif + +#endif From 838f12c3d551f8941295ed7085ad360c3d3ad665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:55 +0300 Subject: [PATCH 098/127] selftests/pcie_bwctrl: Create selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create selftests for PCIe BW control through the PCIe cooling device sysfs interface. First, the BW control selftest finds the PCIe Port to test with. By default, the PCIe Port with the highest Link Speed is selected but another PCIe Port can be provided with -d parameter. The actual test steps the cur_state of the cooling device one-by-one from max_state to what the cur_state was initially. The speed change is confirmed by observing the current_link_speed for the corresponding PCIe Port. Link: https://lore.kernel.org/r/20241018144755.7875-10-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- MAINTAINERS | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/pcie_bwctrl/Makefile | 2 + .../pcie_bwctrl/set_pcie_cooling_state.sh | 122 ++++++++++++++++++ .../selftests/pcie_bwctrl/set_pcie_speed.sh | 67 ++++++++++ 5 files changed, 193 insertions(+) create mode 100644 tools/testing/selftests/pcie_bwctrl/Makefile create mode 100755 tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh create mode 100755 tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh diff --git a/MAINTAINERS b/MAINTAINERS index 393ed7ce5ea1..d7ffef4382df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17940,6 +17940,7 @@ S: Supported F: drivers/pci/pcie/bwctrl.c F: drivers/thermal/pcie_cooling.c F: include/linux/pci-bwctrl.h +F: tools/testing/selftests/pcie_bwctrl/ PCIE DRIVER FOR AMAZON ANNAPURNA LABS M: Jonathan Chocron diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b38199965f99..7181756f47ff 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -72,6 +72,7 @@ TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao TARGETS += nsfs +TARGETS += pcie_bwctrl TARGETS += perf_events TARGETS += pidfd TARGETS += pid_namespace diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile new file mode 100644 index 000000000000..3e84e26341d1 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -0,0 +1,2 @@ +TEST_PROGS = set_pcie_cooling_state.sh +include ../lib.mk diff --git a/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh b/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh new file mode 100755 index 000000000000..9df606552af3 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +SYSFS= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +retval=0 +skipmsg="skip all tests:" + +PCIEPORTTYPE="PCIe_Port_Link_Speed" + +prerequisite() +{ + local ports + + if [ $UID != 0 ]; then + echo $skipmsg must be run as root >&2 + exit $ksft_skip + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $skipmsg sysfs is not mounted >&2 + exit $ksft_skip + fi + + if ! ls $SYSFS/class/thermal/cooling_device* > /dev/null 2>&1; then + echo $skipmsg thermal cooling devices missing >&2 + exit $ksft_skip + fi + + ports=`grep -e "^$PCIEPORTTYPE" $SYSFS/class/thermal/cooling_device*/type | wc -l` + if [ $ports -eq 0 ]; then + echo $skipmsg pcie cooling devices missing >&2 + exit $ksft_skip + fi +} + +testport= +find_pcie_port() +{ + local patt="$1" + local pcieports + local max + local cur + local delta + local bestdelta=-1 + + pcieports=`grep -l -F -e "$patt" /sys/class/thermal/cooling_device*/type` + if [ -z "$pcieports" ]; then + return + fi + pcieports=${pcieports//\/type/} + # Find the port with the highest PCIe Link Speed + for port in $pcieports; do + max=`cat $port/max_state` + cur=`cat $port/cur_state` + delta=$((max-cur)) + if [ $delta -gt $bestdelta ]; then + testport="$port" + bestdelta=$delta + fi + done +} + +sysfspcidev= +find_sysfs_pci_dev() +{ + local typefile="$1/type" + local pcidir + + pcidir="$SYSFS/bus/pci/devices/`sed -e "s|^${PCIEPORTTYPE}_||g" $typefile`" + + if [ -r "$pcidir/current_link_speed" ]; then + sysfspcidev="$pcidir/current_link_speed" + fi +} + +usage() +{ + echo "Usage $0 [ -d dev ]" + echo -e "\t-d: PCIe port BDF string (e.g., 0000:00:04.0)" +} + +pattern="$PCIEPORTTYPE" +parse_arguments() +{ + while getopts d:h opt; do + case $opt in + h) + usage "$0" + exit 0 + ;; + d) + pattern="$PCIEPORTTYPE_$OPTARG" + ;; + *) + usage "$0" + exit 0 + ;; + esac + done +} + +parse_arguments "$@" +prerequisite +find_pcie_port "$pattern" +if [ -z "$testport" ]; then + echo $skipmsg "pcie cooling device not found from sysfs" >&2 + exit $ksft_skip +fi +find_sysfs_pci_dev "$testport" +if [ -z "$sysfspcidev" ]; then + echo $skipmsg "PCIe port device not found from sysfs" >&2 + exit $ksft_skip +fi + +./set_pcie_speed.sh "$testport" "$sysfspcidev" +retval=$? + +exit $retval diff --git a/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh b/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh new file mode 100755 index 000000000000..584596949312 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +set -e + +TESTNAME=set_pcie_speed + +declare -a PCIELINKSPEED=( + "2.5 GT/s PCIe" + "5.0 GT/s PCIe" + "8.0 GT/s PCIe" + "16.0 GT/s PCIe" + "32.0 GT/s PCIe" + "64.0 GT/s PCIe" +) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +retval=0 + +coolingdev="$1" +statefile="$coolingdev/cur_state" +maxfile="$coolingdev/max_state" +linkspeedfile="$2" + +oldstate=`cat $statefile` +maxstate=`cat $maxfile` + +set_state() +{ + local state=$1 + local linkspeed + local expected_linkspeed + + echo $state > $statefile + + sleep 1 + + linkspeed="`cat $linkspeedfile`" + expected_linkspeed=$((maxstate-state)) + expected_str="${PCIELINKSPEED[$expected_linkspeed]}" + if [ ! "${expected_str}" = "${linkspeed}" ]; then + echo "$TESTNAME failed: expected: ${expected_str}; got ${linkspeed}" + retval=1 + fi +} + +cleanup_skip () +{ + set_state $oldstate + exit $ksft_skip +} + +trap cleanup_skip EXIT + +echo "$TESTNAME: testing states $maxstate .. $oldstate with $coolingdev" +for i in $(seq $maxstate -1 $oldstate); do + set_state "$i" +done + +trap EXIT +if [ $retval -eq 0 ]; then + echo "$TESTNAME [PASS]" +else + echo "$TESTNAME [FAIL]" +fi +exit $retval From ba58eee1c57b2ad45c36f782861c18faef170a55 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 11 Nov 2024 14:21:33 -0600 Subject: [PATCH 099/127] PCI: Drop duplicate pcie_get_speed_cap(), pcie_get_width_cap() declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 6cf57be0f78e ("PCI: Add pcie_get_speed_cap() to find max supported link speed") and c70b65fb7f12 ("PCI: Add pcie_get_width_cap() to find max supported link width") added declarations to drivers/pci/pci.h. 576c7218a154 ("PCI: Export pcie_get_speed_cap and pcie_get_width_cap") subsequently added duplicates to include/linux/pci.h. Remove the originals from drivers/pci/pci.h. Both interfaces are used by amdgpu, so they must be in include/linux/pci.h. Signed-off-by: Bjorn Helgaas Acked-by: Alex Deucher Acked-by: Krzysztof Wilczyński --- drivers/pci/pci.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 7573f81f58c4..1d5c519e19b1 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -395,8 +395,6 @@ static inline int pcie_dev_speed_mbps(enum pci_bus_speed speed) u8 pcie_get_supported_speeds(struct pci_dev *dev); const char *pci_speed_string(enum pci_bus_speed speed); -enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); -enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); void __pcie_print_link_status(struct pci_dev *dev, bool verbose); void pcie_report_downtraining(struct pci_dev *dev); From 3fafc38b77bebeeea5faa2a588b92353775bb390 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 17 Oct 2024 15:20:55 +0200 Subject: [PATCH 100/127] PCI: dwc: ep: Use align addr function for dw_pcie_ep_raise_{msi,msix}_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the dw_pcie_ep_align_addr() function to calculate the alignment in dw_pcie_ep_raise_{msi,msix}_irq() instead of open coding the same. Link: https://lore.kernel.org/r/20241017132052.4014605-6-cassel@kernel.org Link: https://lore.kernel.org/r/20241104205144.409236-2-cassel@kernel.org Tested-by: Damien Le Moal Signed-off-by: Niklas Cassel [kwilczynski: squashed patch that fixes memory map sizes] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Damien Le Moal Reviewed-by: Frank Li Reviewed-by: Manivannan Sadhasivam --- .../pci/controller/dwc/pcie-designware-ep.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 20f67fd85e83..507e40bd18c8 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -503,7 +503,8 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, u32 msg_addr_lower, msg_addr_upper, reg; struct dw_pcie_ep_func *ep_func; struct pci_epc *epc = ep->epc; - unsigned int aligned_offset; + size_t map_size = sizeof(u32); + size_t offset; u16 msg_ctrl, msg_data; bool has_upper; u64 msg_addr; @@ -531,14 +532,13 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, } msg_addr = ((u64)msg_addr_upper) << 32 | msg_addr_lower; - aligned_offset = msg_addr & (epc->mem->window.page_size - 1); - msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); + msg_addr = dw_pcie_ep_align_addr(epc, msg_addr, &map_size, &offset); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, - epc->mem->window.page_size); + map_size); if (ret) return ret; - writel(msg_data | (interrupt_num - 1), ep->msi_mem + aligned_offset); + writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset); dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys); @@ -589,8 +589,9 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, struct pci_epf_msix_tbl *msix_tbl; struct dw_pcie_ep_func *ep_func; struct pci_epc *epc = ep->epc; + size_t map_size = sizeof(u32); + size_t offset; u32 reg, msg_data, vec_ctrl; - unsigned int aligned_offset; u32 tbl_offset; u64 msg_addr; int ret; @@ -615,14 +616,13 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, return -EPERM; } - aligned_offset = msg_addr & (epc->mem->window.page_size - 1); - msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); + msg_addr = dw_pcie_ep_align_addr(epc, msg_addr, &map_size, &offset); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, - epc->mem->window.page_size); + map_size); if (ret) return ret; - writel(msg_data, ep->msi_mem + aligned_offset); + writel(msg_data, ep->msi_mem + offset); dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys); From 9b80bdb10aee04ce7289896e6bdad13e33972636 Mon Sep 17 00:00:00 2001 From: Wang Jiang Date: Mon, 4 Nov 2024 18:05:14 +0800 Subject: [PATCH 101/127] PCI: endpoint: Remove surplus return statement from pci_epf_test_clean_dma_chan() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove a surplus return statement from the void function that has been added in the commit commit 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities"). Especially, as an empty return statements at the end of a void functions serve little purpose. This fixes the following checkpatch.pl script warning: WARNING: void function return statements are not generally useful #296: FILE: drivers/pci/endpoint/functions/pci-epf-test.c:296: + return; +} Link: https://lore.kernel.org/r/tencent_F250BEE2A65745A524E2EFE70CF615CA8F06@qq.com Signed-off-by: Wang Jiang [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński --- drivers/pci/endpoint/functions/pci-epf-test.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index c2e7f67e5107..ef6677f34116 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -291,8 +291,6 @@ static void pci_epf_test_clean_dma_chan(struct pci_epf_test *epf_test) dma_release_channel(epf_test->dma_chan_rx); epf_test->dma_chan_rx = NULL; - - return; } static void pci_epf_test_print_rate(struct pci_epf_test *epf_test, From 5089b3d874e9933d9842e90410d3af1520494757 Mon Sep 17 00:00:00 2001 From: Zhongqiu Han Date: Tue, 5 Nov 2024 20:07:35 +0800 Subject: [PATCH 102/127] PCI: endpoint: epf-mhi: Avoid NULL dereference if DT lacks 'mmio' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If platform_get_resource_byname() fails and returns NULL because DT lacks an 'mmio' property for the MHI endpoint, dereferencing res->start will cause a NULL pointer access. Add a check to prevent it. Fixes: 1bf5f25324f7 ("PCI: endpoint: Add PCI Endpoint function driver for MHI bus") Link: https://lore.kernel.org/r/20241105120735.1240728-1-quic_zhonhan@quicinc.com Signed-off-by: Zhongqiu Han [kwilczynski: error message update per the review feedback] Signed-off-by: Krzysztof Wilczyński [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel --- drivers/pci/endpoint/functions/pci-epf-mhi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pci/endpoint/functions/pci-epf-mhi.c b/drivers/pci/endpoint/functions/pci-epf-mhi.c index 7d070b1def11..54286a40bdfb 100644 --- a/drivers/pci/endpoint/functions/pci-epf-mhi.c +++ b/drivers/pci/endpoint/functions/pci-epf-mhi.c @@ -867,12 +867,18 @@ static int pci_epf_mhi_bind(struct pci_epf *epf) { struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf); struct pci_epc *epc = epf->epc; + struct device *dev = &epf->dev; struct platform_device *pdev = to_platform_device(epc->dev.parent); struct resource *res; int ret; /* Get MMIO base address from Endpoint controller */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mmio"); + if (!res) { + dev_err(dev, "Failed to get \"mmio\" resource\n"); + return -ENODEV; + } + epf_mhi->mmio_phys = res->start; epf_mhi->mmio_size = resource_size(res); From 154fc1f6420840e8aa65e5b43a456ae8f34816dd Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Mon, 4 Nov 2024 13:07:13 -0600 Subject: [PATCH 103/127] PCI: dwc: Use of_property_present() for non-boolean properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The use of of_property_read_bool() for non-boolean properties is deprecated in favor of of_property_present() when testing for property presence. Link: https://lore.kernel.org/r/20241104190714.275977-1-robh@kernel.org Signed-off-by: Rob Herring (Arm) Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Acked-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-designware-host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 3e41865c7290..d2291c3ceb8b 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -474,8 +474,8 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp) if (pci_msi_enabled()) { pp->has_msi_ctrl = !(pp->ops->msi_init || - of_property_read_bool(np, "msi-parent") || - of_property_read_bool(np, "msi-map")); + of_property_present(np, "msi-parent") || + of_property_present(np, "msi-map")); /* * For the has_msi_ctrl case the default assignment is handled From 118397c9baaac0b7ec81896f8d755d09aa82c485 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 16 Nov 2024 01:59:51 +0100 Subject: [PATCH 104/127] PCI: dwc: ep: Fix advertised resizable BAR size regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The advertised resizable BAR size was fixed in commit 72e34b8593e0 ("PCI: dwc: endpoint: Fix advertised resizable BAR size"). Commit 867ab111b242 ("PCI: dwc: ep: Add a generic dw_pcie_ep_linkdown() API to handle Link Down event") was included shortly after this, and moved the code to another function. When the code was moved, this fix was mistakenly lost. According to the spec, it is illegal to not have a bit set in PCI_REBAR_CAP, and 1 MB is the smallest size allowed. So, set bit 4 in PCI_REBAR_CAP, so that we actually advertise support for a 1 MB BAR size. Fixes: 867ab111b242 ("PCI: dwc: ep: Add a generic dw_pcie_ep_linkdown() API to handle Link Down event") Link: https://lore.kernel.org/r/20241116005950.2480427-2-cassel@kernel.org Link: https://lore.kernel.org/r/20240606-pci-deinit-v1-3-4395534520dc@linaro.org Link: https://lore.kernel.org/r/20240307111520.3303774-1-cassel@kernel.org Signed-off-by: Niklas Cassel Signed-off-by: Krzysztof Wilczyński Cc: stable@vger.kernel.org --- drivers/pci/controller/dwc/pcie-designware-ep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index b07ace7dc92e..b11cca6298f2 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -690,7 +690,7 @@ static void dw_pcie_ep_init_non_sticky_registers(struct dw_pcie *pci) * for 1 MB BAR size only. */ for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL) - dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, BIT(4)); } dw_pcie_setup(pci); From 4acc902ed3743edd4ac2d3846604a99d17104359 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Thu, 7 Nov 2024 08:53:08 +0800 Subject: [PATCH 105/127] PCI: endpoint: Fix PCI domain ID release in pci_epc_destroy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pci_epc_destroy() invokes pci_bus_release_domain_nr() to release the PCI domain ID, but there are two issues: - 'epc->dev' is passed to pci_bus_release_domain_nr() which was already freed by device_unregister(), leading to a use-after-free issue. - Domain ID corresponds to the EPC device parent, so passing 'epc->dev' is also wrong. Fix these issues by passing 'epc->dev.parent' to pci_bus_release_domain_nr() and also do it before device_unregister(). Fixes: 0328947c5032 ("PCI: endpoint: Assign PCI domain number for endpoint controllers") Signed-off-by: Zijun Hu Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241107-epc_rfc-v2-1-da5b6a99a66f@quicinc.com [mani: reworded subject and description] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Cc: stable@vger.kernel.org --- drivers/pci/endpoint/pci-epc-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index 04a85d2f7e2a..21af2dbacc33 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -923,11 +923,10 @@ EXPORT_SYMBOL_GPL(pci_epc_bus_master_enable_notify); void pci_epc_destroy(struct pci_epc *epc) { pci_ep_cfs_remove_epc_group(epc->group); - device_unregister(&epc->dev); - #ifdef CONFIG_PCI_DOMAINS_GENERIC - pci_bus_release_domain_nr(&epc->dev, epc->domain_nr); + pci_bus_release_domain_nr(epc->dev.parent, epc->domain_nr); #endif + device_unregister(&epc->dev); } EXPORT_SYMBOL_GPL(pci_epc_destroy); From 688d2eb4c6fcfdcdaed0592f9df9196573ff5ce2 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Thu, 7 Nov 2024 08:53:09 +0800 Subject: [PATCH 106/127] PCI: endpoint: Clear secondary (not primary) EPC in pci_epc_remove_epf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In addition to a primary endpoint controller, an endpoint function may be associated with a secondary endpoint controller, epf->sec_epc, to provide NTB (non-transparent bridge) functionality. Previously, pci_epc_remove_epf() incorrectly cleared epf->epc instead of epf->sec_epc when removing from the secondary endpoint controller. Extend the epc->list_lock coverage and clear either epf->epc or epf->sec_epc as indicated. Link: https://lore.kernel.org/r/20241107-epc_rfc-v2-2-da5b6a99a66f@quicinc.com Fixes: 63840ff53223 ("PCI: endpoint: Add support to associate secondary EPC with EPF") Signed-off-by: Zijun Hu Reviewed-by: Manivannan Sadhasivam [mani: reworded subject and description] Signed-off-by: Manivannan Sadhasivam [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Cc: stable@vger.kernel.org --- drivers/pci/endpoint/pci-epc-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index 21af2dbacc33..bed7c7d1fe3c 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -746,18 +746,18 @@ void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf, if (IS_ERR_OR_NULL(epc) || !epf) return; + mutex_lock(&epc->list_lock); if (type == PRIMARY_INTERFACE) { func_no = epf->func_no; list = &epf->list; + epf->epc = NULL; } else { func_no = epf->sec_epc_func_no; list = &epf->sec_epc_list; + epf->sec_epc = NULL; } - - mutex_lock(&epc->list_lock); clear_bit(func_no, &epc->function_num_map); list_del(list); - epf->epc = NULL; mutex_unlock(&epc->list_lock); } EXPORT_SYMBOL_GPL(pci_epc_remove_epf); From 28b6acd75e3cefbe746ec7402c7ff4fdb114f327 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Thu, 14 Nov 2024 17:10:32 +0100 Subject: [PATCH 107/127] PCI: endpoint: Fix pci_epc_map map_size kerneldoc string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because some endpoint controllers have requirements on the alignment of the controller physical memory address that must be used to map a RC PCI address region, the map PCI start address is not necessarily the desired PCI base address to be mapped. This can result in map_pci_addr being lower than pci_addr as documented. This results in map_size covering the range map_pci_addr..pci_addr+pci_size. The old text had the pci_addr twice instead of map_pci_addr..pci_addr, so replace the erroneous kerneldoc string to reflect the actual range. Link: https://lore.kernel.org/r/20241114161032.3046202-1-rick.wertenbroek@gmail.com Signed-off-by: Rick Wertenbroek Signed-off-by: Krzysztof Wilczyński --- include/linux/pci-epc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index de8cc3658220..e818e3fdcded 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -40,7 +40,7 @@ pci_epc_interface_string(enum pci_epc_interface_type type) * @map_pci_addr: RC PCI address used as the first address mapped (may be lower * than @pci_addr) * @map_size: size of the controller memory needed for mapping the RC PCI address - * range @pci_addr..@pci_addr+@pci_size + * range @map_pci_addr..@pci_addr+@pci_size * @phys_base: base physical address of the allocated EPC memory for mapping the * RC PCI address range * @phys_addr: physical address at which @pci_addr is mapped From d9db393f2b9ed674f965dd823c1692b41bad7bfb Mon Sep 17 00:00:00 2001 From: Luo Yifan Date: Tue, 12 Nov 2024 17:09:24 +0800 Subject: [PATCH 108/127] tools: PCI: Fix incorrect printf format specifiers Fix several incorrect printf format specifiers that misused signed and unsigned versions. Link: https://lore.kernel.org/r/20241112090924.287056-1-luoyifan@cmss.chinamobile.com Signed-off-by: Luo Yifan Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- tools/pci/pcitest.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 470258009ddc..7b530d838d40 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -95,7 +95,7 @@ static int run_test(struct pci_test *test) if (test->msinum > 0 && test->msinum <= 32) { ret = ioctl(fd, PCITEST_MSI, test->msinum); - fprintf(stdout, "MSI%d:\t\t", test->msinum); + fprintf(stdout, "MSI%u:\t\t", test->msinum); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -104,7 +104,7 @@ static int run_test(struct pci_test *test) if (test->msixnum > 0 && test->msixnum <= 2048) { ret = ioctl(fd, PCITEST_MSIX, test->msixnum); - fprintf(stdout, "MSI-X%d:\t\t", test->msixnum); + fprintf(stdout, "MSI-X%u:\t\t", test->msixnum); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -116,7 +116,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_WRITE, ¶m); - fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size); + fprintf(stdout, "WRITE (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -128,7 +128,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_READ, ¶m); - fprintf(stdout, "READ (%7ld bytes):\t\t", test->size); + fprintf(stdout, "READ (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -140,7 +140,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_COPY, ¶m); - fprintf(stdout, "COPY (%7ld bytes):\t\t", test->size); + fprintf(stdout, "COPY (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else From 7582fe07f4ca4c560eb47800b640997f06a8baa2 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 25 Oct 2024 13:24:51 +0530 Subject: [PATCH 109/127] PCI/pwrctl: Use of_platform_device_create() to create pwrctl devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The of_platform_populate() API creates platform devices by descending through the children of the parent node. But it provides no control over the child nodes, which makes it difficult to add checks for the child nodes in the future. Use of_platform_device_create() and for_each_child_of_node_scoped() to make it possible to add checks for each node before creating the platform device. Link: https://lore.kernel.org/r/20241025-pci-pwrctl-rework-v2-1-568756156cbe@linaro.org Tested-by: Bartosz Golaszewski Tested-by: Krishna chaitanya chundru Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Reviewed-by: Bartosz Golaszewski --- drivers/pci/bus.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 55c853686051..9d278d3a19ff 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -329,6 +330,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { } void pci_bus_add_device(struct pci_dev *dev) { struct device_node *dn = dev->dev.of_node; + struct platform_device *pdev; int retval; /* @@ -351,11 +353,11 @@ void pci_bus_add_device(struct pci_dev *dev) pci_dev_assign_added(dev, true); if (dev_of_node(&dev->dev) && pci_is_bridge(dev)) { - retval = of_platform_populate(dev_of_node(&dev->dev), NULL, NULL, - &dev->dev); - if (retval) - pci_err(dev, "failed to populate child OF nodes (%d)\n", - retval); + for_each_available_child_of_node_scoped(dn, child) { + pdev = of_platform_device_create(child, NULL, &dev->dev); + if (!pdev) + pci_err(dev, "failed to create OF node: %s\n", child->name); + } } } EXPORT_SYMBOL_GPL(pci_bus_add_device); From 278dd091e95d428eea0a2c3c517d895b052de5ff Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 25 Oct 2024 13:24:52 +0530 Subject: [PATCH 110/127] PCI/pwrctl: Create pwrctl device only if at least one power supply is present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, pwrctl devices are created if the corresponding PCI nodes are defined in devicetree. But this is not correct, because not all PCI nodes require pwrctl support. Pwrctl comes into the picture only when the device requires kernel to manage its power state. This can be determined using the power supply properties present in the devicetree node of the device. Add of_pci_supply_present() to check whether the devicetree contains at least one power supply property for a device. If one is present, create a pwrctl device for that PCI node. Suggested-by: Dmitry Baryshkov Fixes: 8fb18619d910 ("PCI/pwrctl: Create platform devices for child OF nodes of the port node") Link: https://lore.kernel.org/r/20241025-pci-pwrctl-rework-v2-2-568756156cbe@linaro.org Tested-by: Bartosz Golaszewski Tested-by: Krishna chaitanya chundru Signed-off-by: Manivannan Sadhasivam [bhelgaas: rename of_pci_is_supply_present() to of_pci_supply_present() for readability] Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Reviewed-by: Bartosz Golaszewski Cc: stable+noautosel@kernel.org # Depends on of_platform_device_create() rework --- drivers/pci/bus.c | 11 +++++++++++ drivers/pci/of.c | 27 +++++++++++++++++++++++++++ drivers/pci/pci.h | 5 +++++ 3 files changed, 43 insertions(+) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 9d278d3a19ff..93bc42170122 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -354,6 +354,17 @@ void pci_bus_add_device(struct pci_dev *dev) if (dev_of_node(&dev->dev) && pci_is_bridge(dev)) { for_each_available_child_of_node_scoped(dn, child) { + /* + * First check whether the pwrctl device needs to be + * created or not. This is decided based on at least + * one of the power supplies being defined in the + * devicetree node of the device. + */ + if (!of_pci_supply_present(child)) { + pci_dbg(dev, "skipping OF node: %s\n", child->name); + continue; + } + pdev = of_platform_device_create(child, NULL, &dev->dev); if (!pdev) pci_err(dev, "failed to create OF node: %s\n", child->name); diff --git a/drivers/pci/of.c b/drivers/pci/of.c index dacea3fc5128..52f770bcc481 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -728,6 +728,33 @@ out_free_name: } #endif +/** + * of_pci_supply_present() - Check if the power supply is present for the PCI + * device + * @np: Device tree node + * + * Check if the power supply for the PCI device is present in the device tree + * node or not. + * + * Return: true if at least one power supply exists; false otherwise. + */ +bool of_pci_supply_present(struct device_node *np) +{ + struct property *prop; + char *supply; + + if (!np) + return false; + + for_each_property_of_node(np, prop) { + supply = strrchr(prop->name, '-'); + if (supply && !strcmp(supply, "-supply")) + return true; + } + + return false; +} + #endif /* CONFIG_PCI */ /** diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 14d00ce45bfa..64c6ca6de802 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -746,6 +746,7 @@ void pci_set_bus_of_node(struct pci_bus *bus); void pci_release_bus_of_node(struct pci_bus *bus); int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge); +bool of_pci_supply_present(struct device_node *np); #else static inline int @@ -793,6 +794,10 @@ static inline int devm_of_pci_bridge_init(struct device *dev, struct pci_host_br return 0; } +static inline bool of_pci_supply_present(struct device_node *np) +{ + return false; +} #endif /* CONFIG_OF */ struct of_changeset; From b458ff7e8176523b9d5a8d33f2487f29d7096eb1 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 25 Oct 2024 13:24:53 +0530 Subject: [PATCH 111/127] PCI/pwrctl: Ensure that pwrctl drivers are probed before PCI client drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per the kernel device driver model, a pwrctl device is the supplier for the PCI device, but the device link that enforces the supplier-consumer relationship was previously created by the pwrctl driver. Therefore, the driver model didn't prevent probing PCI client drivers before probing the corresponding pwrctl drivers. This may lead to a race condition if the PCI device was already powered on by the bootloader (before the pwrctl driver). If the bootloader did not power on the PCI device, this wouldn't create any problem as the pwrctl driver will be the one powering on the device, so the PCI client driver always gets probed afterward. But if the device was already powered on, then the device will be seen by the PCI core and the PCI client driver may get probed before its pwrctl driver. This creates a race condition as the pwrctl driver may change the device power state while the device is being accessed by the client driver. One such issue was already reported on the Qcom X13s platform with the WLAN device and fixed with a hack in the WCN pwrseq driver by a9aaf1ff88a8 ("power: sequencing: request the WLAN enable GPIO as-is"). A cleaner way to fix the above mentioned race condition is to ensure that the pwrctl drivers are always probed before the client drivers. If the PCI device is associated with a pwrctl platform device with a power supply, add a device link between the PCI device and the pwrctl device before device_attach() in pci_bus_add_device(). Note that there is no need to explicitly remove the device link as that will be taken care of by the driver core when the PCI device gets removed. Fixes: 4565d2652a37 ("PCI/pwrctl: Add PCI power control core code") Fixes: 8fb18619d910 ("PCI/pwrctl: Create platform devices for child OF nodes of the port node") Link: https://lore.kernel.org/r/20241025-pci-pwrctl-rework-v2-3-568756156cbe@linaro.org Tested-by: Bartosz Golaszewski Tested-by: Krishna chaitanya chundru Signed-off-by: Manivannan Sadhasivam [bhelgaas: squash fix from https://lore.kernel.org/r/20241120062459.6371-1-manivannan.sadhasivam@linaro.org for SPARCv9 issue reported by Jonathan Currier ] Signed-off-by: Bjorn Helgaas [kwilczynski: wrap code to 80 columns] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Bartosz Golaszewski Cc: stable+noautosel@kernel.org # Depends on power supply check --- drivers/pci/bus.c | 75 +++++++++++++++++++++++++++++---------- drivers/pci/pwrctl/core.c | 10 ------ 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 93bc42170122..90d775601789 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -321,6 +321,46 @@ void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { } void __weak pcibios_bus_add_device(struct pci_dev *pdev) { } +/* + * Create pwrctl devices (if required) for the PCI devices to handle the power + * state. + */ +static void pci_pwrctl_create_devices(struct pci_dev *dev) +{ + struct device_node *np = dev_of_node(&dev->dev); + struct device *parent = &dev->dev; + struct platform_device *pdev; + + /* + * First ensure that we are starting from a PCI bridge and it has a + * corresponding devicetree node. + */ + if (np && pci_is_bridge(dev)) { + /* + * Now look for the child PCI device nodes and create pwrctl + * devices for them. The pwrctl device drivers will manage the + * power state of the devices. + */ + for_each_available_child_of_node_scoped(np, child) { + /* + * First check whether the pwrctl device really needs to + * be created or not. This is decided based on at least + * one of the power supplies being defined in the + * devicetree node of the device. + */ + if (!of_pci_supply_present(child)) { + pci_dbg(dev, "skipping OF node: %s\n", child->name); + return; + } + + /* Now create the pwrctl device */ + pdev = of_platform_device_create(child, NULL, parent); + if (!pdev) + pci_err(dev, "failed to create OF node: %s\n", child->name); + } + } +} + /** * pci_bus_add_device - start driver for a single device * @dev: device to add @@ -345,31 +385,28 @@ void pci_bus_add_device(struct pci_dev *dev) pci_proc_attach_device(dev); pci_bridge_d3_update(dev); + pci_pwrctl_create_devices(dev); + + /* + * If the PCI device is associated with a pwrctl device with a + * power supply, create a device link between the PCI device and + * pwrctl device. This ensures that pwrctl drivers are probed + * before PCI client drivers. + */ + pdev = of_find_device_by_node(dn); + if (pdev && of_pci_supply_present(dn)) { + if (!device_link_add(&dev->dev, &pdev->dev, + DL_FLAG_AUTOREMOVE_CONSUMER)) + pci_err(dev, "failed to add device link to power control device %s\n", + pdev->name); + } + dev->match_driver = !dn || of_device_is_available(dn); retval = device_attach(&dev->dev); if (retval < 0 && retval != -EPROBE_DEFER) pci_warn(dev, "device attach failed (%d)\n", retval); pci_dev_assign_added(dev, true); - - if (dev_of_node(&dev->dev) && pci_is_bridge(dev)) { - for_each_available_child_of_node_scoped(dn, child) { - /* - * First check whether the pwrctl device needs to be - * created or not. This is decided based on at least - * one of the power supplies being defined in the - * devicetree node of the device. - */ - if (!of_pci_supply_present(child)) { - pci_dbg(dev, "skipping OF node: %s\n", child->name); - continue; - } - - pdev = of_platform_device_create(child, NULL, &dev->dev); - if (!pdev) - pci_err(dev, "failed to create OF node: %s\n", child->name); - } - } } EXPORT_SYMBOL_GPL(pci_bus_add_device); diff --git a/drivers/pci/pwrctl/core.c b/drivers/pci/pwrctl/core.c index 01d913b60316..bb5a23712434 100644 --- a/drivers/pci/pwrctl/core.c +++ b/drivers/pci/pwrctl/core.c @@ -33,16 +33,6 @@ static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action, */ dev->of_node_reused = true; break; - case BUS_NOTIFY_BOUND_DRIVER: - pwrctl->link = device_link_add(dev, pwrctl->dev, - DL_FLAG_AUTOREMOVE_CONSUMER); - if (!pwrctl->link) - dev_err(pwrctl->dev, "Failed to add device link\n"); - break; - case BUS_NOTIFY_UNBOUND_DRIVER: - if (pwrctl->link) - device_link_remove(dev, pwrctl->dev); - break; } return NOTIFY_DONE; From 681725afb6b91dfa581f4eba5f256ac5e953b463 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 25 Oct 2024 13:24:55 +0530 Subject: [PATCH 112/127] PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to iterate over all children of the pwrctl device parent to remove the pwrctl device. Since the pwrctl device associated with the PCI device can be found using of_find_device_by_node() API, use it directly instead. Any pwrctl devices lying around without getting associated with the PCI devices will be removed once their parent device gets removed. Link: https://lore.kernel.org/r/20241025-pci-pwrctl-rework-v2-5-568756156cbe@linaro.org Tested-by: Bartosz Golaszewski Tested-by: Krishna chaitanya chundru Signed-off-by: Manivannan Sadhasivam [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Reviewed-by: Bartosz Golaszewski --- drivers/pci/remove.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index e4ce1145aa3e..3dd9b3024956 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -17,16 +17,16 @@ static void pci_free_resources(struct pci_dev *dev) } } -static int pci_pwrctl_unregister(struct device *dev, void *data) +static void pci_pwrctl_unregister(struct device *dev) { - struct device_node *pci_node = data, *plat_node = dev_of_node(dev); + struct platform_device *pdev; - if (dev_is_platform(dev) && plat_node && plat_node == pci_node) { - of_device_unregister(to_platform_device(dev)); - of_node_clear_flag(plat_node, OF_POPULATED); - } + pdev = of_find_device_by_node(dev_of_node(dev)); + if (!pdev) + return; - return 0; + of_device_unregister(pdev); + of_node_clear_flag(dev_of_node(dev), OF_POPULATED); } static void pci_stop_dev(struct pci_dev *dev) @@ -34,8 +34,7 @@ static void pci_stop_dev(struct pci_dev *dev) pci_pme_active(dev, false); if (pci_dev_is_added(dev)) { - device_for_each_child(dev->dev.parent, dev_of_node(&dev->dev), - pci_pwrctl_unregister); + pci_pwrctl_unregister(&dev->dev); device_release_driver(&dev->dev); pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); From b88cbaaa6fa109c2eb455d8fe2a318de0d197ea2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 Nov 2024 15:44:27 -0600 Subject: [PATCH 113/127] PCI/pwrctrl: Rename pwrctl files to pwrctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To slightly reduce confusion between "pwrctl" (the power controller and power sequencing framework) and "bwctrl" (the bandwidth controller), rename "pwrctl" to "pwrctrl" so they use the same "ctrl" suffix. Rename drivers/pci/pwrctl/ to drivers/pci/pwrctrl/, including the related MAINTAINERS, include file (include/linux/pci-pwrctl.h), Makefile, and Kconfig changes. This is the minimal rename of files only. A subsequent commit will rename functions and data structures. Link: https://lore.kernel.org/r/20241115214428.2061153-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Acked-by: Krzysztof Wilczyński --- MAINTAINERS | 4 ++-- drivers/pci/Kconfig | 2 +- drivers/pci/Makefile | 2 +- drivers/pci/pwrctl/Makefile | 6 ------ drivers/pci/{pwrctl => pwrctrl}/Kconfig | 0 drivers/pci/pwrctrl/Makefile | 6 ++++++ drivers/pci/{pwrctl => pwrctrl}/core.c | 2 +- .../pci-pwrctl-pwrseq.c => pwrctrl/pci-pwrctrl-pwrseq.c} | 2 +- include/linux/{pci-pwrctl.h => pci-pwrctrl.h} | 0 9 files changed, 12 insertions(+), 12 deletions(-) delete mode 100644 drivers/pci/pwrctl/Makefile rename drivers/pci/{pwrctl => pwrctrl}/Kconfig (100%) create mode 100644 drivers/pci/pwrctrl/Makefile rename drivers/pci/{pwrctl => pwrctrl}/core.c (99%) rename drivers/pci/{pwrctl/pci-pwrctl-pwrseq.c => pwrctrl/pci-pwrctrl-pwrseq.c} (98%) rename include/linux/{pci-pwrctl.h => pci-pwrctrl.h} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..1a6096135424 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17910,8 +17910,8 @@ M: Bartosz Golaszewski L: linux-pci@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git -F: drivers/pci/pwrctl/* -F: include/linux/pci-pwrctl.h +F: drivers/pci/pwrctrl/* +F: include/linux/pci-pwrctrl.h PCI SUBSYSTEM M: Bjorn Helgaas diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 0d94e4a967d8..e1c025698a28 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -305,6 +305,6 @@ source "drivers/pci/hotplug/Kconfig" source "drivers/pci/controller/Kconfig" source "drivers/pci/endpoint/Kconfig" source "drivers/pci/switch/Kconfig" -source "drivers/pci/pwrctl/Kconfig" +source "drivers/pci/pwrctrl/Kconfig" endif diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 374c5c06d92f..39a07890abd1 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_PCI) += access.o bus.o probe.o host-bridge.o \ obj-$(CONFIG_PCI) += msi/ obj-$(CONFIG_PCI) += pcie/ -obj-$(CONFIG_PCI) += pwrctl/ +obj-$(CONFIG_PCI) += pwrctrl/ ifdef CONFIG_PCI obj-$(CONFIG_PROC_FS) += proc.o diff --git a/drivers/pci/pwrctl/Makefile b/drivers/pci/pwrctl/Makefile deleted file mode 100644 index d308aae4800c..000000000000 --- a/drivers/pci/pwrctl/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_PCI_PWRCTL) += pci-pwrctl-core.o -pci-pwrctl-core-y := core.o - -obj-$(CONFIG_PCI_PWRCTL_PWRSEQ) += pci-pwrctl-pwrseq.o diff --git a/drivers/pci/pwrctl/Kconfig b/drivers/pci/pwrctrl/Kconfig similarity index 100% rename from drivers/pci/pwrctl/Kconfig rename to drivers/pci/pwrctrl/Kconfig diff --git a/drivers/pci/pwrctrl/Makefile b/drivers/pci/pwrctrl/Makefile new file mode 100644 index 000000000000..75c7ce531c7e --- /dev/null +++ b/drivers/pci/pwrctrl/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_PCI_PWRCTL) += pci-pwrctrl-core.o +pci-pwrctrl-core-y := core.o + +obj-$(CONFIG_PCI_PWRCTL_PWRSEQ) += pci-pwrctrl-pwrseq.o diff --git a/drivers/pci/pwrctl/core.c b/drivers/pci/pwrctrl/core.c similarity index 99% rename from drivers/pci/pwrctl/core.c rename to drivers/pci/pwrctrl/core.c index bb5a23712434..88f06e9271ba 100644 --- a/drivers/pci/pwrctl/core.c +++ b/drivers/pci/pwrctrl/core.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/pci/pwrctl/pci-pwrctl-pwrseq.c b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c similarity index 98% rename from drivers/pci/pwrctl/pci-pwrctl-pwrseq.c rename to drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c index a23a4312574b..cc19ad61dd6e 100644 --- a/drivers/pci/pwrctl/pci-pwrctl-pwrseq.c +++ b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctrl.h similarity index 100% rename from include/linux/pci-pwrctl.h rename to include/linux/pci-pwrctrl.h From 3f925cd6287401bbc9d568f56d796a69c8bd292a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 Nov 2024 15:44:28 -0600 Subject: [PATCH 114/127] PCI/pwrctrl: Rename pwrctrl functions and structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename pwrctrl functions and structures from "pwrctl" to "pwrctrl" to match the similar file renames. Link: https://lore.kernel.org/r/20241115214428.2061153-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Acked-by: Krzysztof Wilczyński --- drivers/pci/bus.c | 25 ++++---- drivers/pci/pwrctrl/core.c | 77 ++++++++++++------------ drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c | 32 +++++----- drivers/pci/remove.c | 4 +- include/linux/pci-pwrctrl.h | 22 +++---- 5 files changed, 81 insertions(+), 79 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 90d775601789..0d27809746df 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -322,10 +322,10 @@ void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { } void __weak pcibios_bus_add_device(struct pci_dev *pdev) { } /* - * Create pwrctl devices (if required) for the PCI devices to handle the power + * Create pwrctrl devices (if required) for the PCI devices to handle the power * state. */ -static void pci_pwrctl_create_devices(struct pci_dev *dev) +static void pci_pwrctrl_create_devices(struct pci_dev *dev) { struct device_node *np = dev_of_node(&dev->dev); struct device *parent = &dev->dev; @@ -337,23 +337,24 @@ static void pci_pwrctl_create_devices(struct pci_dev *dev) */ if (np && pci_is_bridge(dev)) { /* - * Now look for the child PCI device nodes and create pwrctl - * devices for them. The pwrctl device drivers will manage the + * Now look for the child PCI device nodes and create pwrctrl + * devices for them. The pwrctrl device drivers will manage the * power state of the devices. */ for_each_available_child_of_node_scoped(np, child) { /* - * First check whether the pwrctl device really needs to - * be created or not. This is decided based on at least - * one of the power supplies being defined in the - * devicetree node of the device. + * First check whether the pwrctrl device really + * needs to be created or not. This is decided + * based on at least one of the power supplies + * being defined in the devicetree node of the + * device. */ if (!of_pci_supply_present(child)) { pci_dbg(dev, "skipping OF node: %s\n", child->name); return; } - /* Now create the pwrctl device */ + /* Now create the pwrctrl device */ pdev = of_platform_device_create(child, NULL, parent); if (!pdev) pci_err(dev, "failed to create OF node: %s\n", child->name); @@ -385,12 +386,12 @@ void pci_bus_add_device(struct pci_dev *dev) pci_proc_attach_device(dev); pci_bridge_d3_update(dev); - pci_pwrctl_create_devices(dev); + pci_pwrctrl_create_devices(dev); /* - * If the PCI device is associated with a pwrctl device with a + * If the PCI device is associated with a pwrctrl device with a * power supply, create a device link between the PCI device and - * pwrctl device. This ensures that pwrctl drivers are probed + * pwrctrl device. This ensures that pwrctrl drivers are probed * before PCI client drivers. */ pdev = of_find_device_by_node(dn); diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c index 88f06e9271ba..2fb174db91e5 100644 --- a/drivers/pci/pwrctrl/core.c +++ b/drivers/pci/pwrctrl/core.c @@ -11,13 +11,13 @@ #include #include -static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action, - void *data) +static int pci_pwrctrl_notify(struct notifier_block *nb, unsigned long action, + void *data) { - struct pci_pwrctl *pwrctl = container_of(nb, struct pci_pwrctl, nb); + struct pci_pwrctrl *pwrctrl = container_of(nb, struct pci_pwrctrl, nb); struct device *dev = data; - if (dev_fwnode(dev) != dev_fwnode(pwrctl->dev)) + if (dev_fwnode(dev) != dev_fwnode(pwrctrl->dev)) return NOTIFY_DONE; switch (action) { @@ -40,31 +40,32 @@ static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action, static void rescan_work_func(struct work_struct *work) { - struct pci_pwrctl *pwrctl = container_of(work, struct pci_pwrctl, work); + struct pci_pwrctrl *pwrctrl = container_of(work, + struct pci_pwrctrl, work); pci_lock_rescan_remove(); - pci_rescan_bus(to_pci_dev(pwrctl->dev->parent)->bus); + pci_rescan_bus(to_pci_dev(pwrctrl->dev->parent)->bus); pci_unlock_rescan_remove(); } /** - * pci_pwrctl_init() - Initialize the PCI power control context struct + * pci_pwrctrl_init() - Initialize the PCI power control context struct * - * @pwrctl: PCI power control data + * @pwrctrl: PCI power control data * @dev: Parent device */ -void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev) +void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev) { - pwrctl->dev = dev; - INIT_WORK(&pwrctl->work, rescan_work_func); + pwrctrl->dev = dev; + INIT_WORK(&pwrctrl->work, rescan_work_func); } -EXPORT_SYMBOL_GPL(pci_pwrctl_init); +EXPORT_SYMBOL_GPL(pci_pwrctrl_init); /** - * pci_pwrctl_device_set_ready() - Notify the pwrctl subsystem that the PCI + * pci_pwrctrl_device_set_ready() - Notify the pwrctrl subsystem that the PCI * device is powered-up and ready to be detected. * - * @pwrctl: PCI power control data. + * @pwrctrl: PCI power control data. * * Returns: * 0 on success, negative error number on error. @@ -74,31 +75,31 @@ EXPORT_SYMBOL_GPL(pci_pwrctl_init); * that the bus rescan was successfully started. The device will get bound to * its PCI driver asynchronously. */ -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl) +int pci_pwrctrl_device_set_ready(struct pci_pwrctrl *pwrctrl) { int ret; - if (!pwrctl->dev) + if (!pwrctrl->dev) return -ENODEV; - pwrctl->nb.notifier_call = pci_pwrctl_notify; - ret = bus_register_notifier(&pci_bus_type, &pwrctl->nb); + pwrctrl->nb.notifier_call = pci_pwrctrl_notify; + ret = bus_register_notifier(&pci_bus_type, &pwrctrl->nb); if (ret) return ret; - schedule_work(&pwrctl->work); + schedule_work(&pwrctrl->work); return 0; } -EXPORT_SYMBOL_GPL(pci_pwrctl_device_set_ready); +EXPORT_SYMBOL_GPL(pci_pwrctrl_device_set_ready); /** - * pci_pwrctl_device_unset_ready() - Notify the pwrctl subsystem that the PCI + * pci_pwrctrl_device_unset_ready() - Notify the pwrctrl subsystem that the PCI * device is about to be powered-down. * - * @pwrctl: PCI power control data. + * @pwrctrl: PCI power control data. */ -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) +void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl) { /* * We don't have to delete the link here. Typically, this function @@ -106,41 +107,41 @@ void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) * it is being detached then the child PCI device must have already * been unbound too or the device core wouldn't let us unbind. */ - bus_unregister_notifier(&pci_bus_type, &pwrctl->nb); + bus_unregister_notifier(&pci_bus_type, &pwrctrl->nb); } -EXPORT_SYMBOL_GPL(pci_pwrctl_device_unset_ready); +EXPORT_SYMBOL_GPL(pci_pwrctrl_device_unset_ready); -static void devm_pci_pwrctl_device_unset_ready(void *data) +static void devm_pci_pwrctrl_device_unset_ready(void *data) { - struct pci_pwrctl *pwrctl = data; + struct pci_pwrctrl *pwrctrl = data; - pci_pwrctl_device_unset_ready(pwrctl); + pci_pwrctrl_device_unset_ready(pwrctrl); } /** - * devm_pci_pwrctl_device_set_ready - Managed variant of - * pci_pwrctl_device_set_ready(). + * devm_pci_pwrctrl_device_set_ready - Managed variant of + * pci_pwrctrl_device_set_ready(). * - * @dev: Device managing this pwrctl provider. - * @pwrctl: PCI power control data. + * @dev: Device managing this pwrctrl provider. + * @pwrctrl: PCI power control data. * * Returns: * 0 on success, negative error number on error. */ -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl) +int devm_pci_pwrctrl_device_set_ready(struct device *dev, + struct pci_pwrctrl *pwrctrl) { int ret; - ret = pci_pwrctl_device_set_ready(pwrctl); + ret = pci_pwrctrl_device_set_ready(pwrctrl); if (ret) return ret; return devm_add_action_or_reset(dev, - devm_pci_pwrctl_device_unset_ready, - pwrctl); + devm_pci_pwrctrl_device_unset_ready, + pwrctrl); } -EXPORT_SYMBOL_GPL(devm_pci_pwrctl_device_set_ready); +EXPORT_SYMBOL_GPL(devm_pci_pwrctrl_device_set_ready); MODULE_AUTHOR("Bartosz Golaszewski "); MODULE_DESCRIPTION("PCI Device Power Control core driver"); diff --git a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c index cc19ad61dd6e..e9f89866b7c2 100644 --- a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c +++ b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c @@ -13,21 +13,21 @@ #include #include -struct pci_pwrctl_pwrseq_data { - struct pci_pwrctl ctx; +struct pci_pwrctrl_pwrseq_data { + struct pci_pwrctrl ctx; struct pwrseq_desc *pwrseq; }; -static void devm_pci_pwrctl_pwrseq_power_off(void *data) +static void devm_pci_pwrctrl_pwrseq_power_off(void *data) { struct pwrseq_desc *pwrseq = data; pwrseq_power_off(pwrseq); } -static int pci_pwrctl_pwrseq_probe(struct platform_device *pdev) +static int pci_pwrctrl_pwrseq_probe(struct platform_device *pdev) { - struct pci_pwrctl_pwrseq_data *data; + struct pci_pwrctrl_pwrseq_data *data; struct device *dev = &pdev->dev; int ret; @@ -45,22 +45,22 @@ static int pci_pwrctl_pwrseq_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "Failed to power-on the device\n"); - ret = devm_add_action_or_reset(dev, devm_pci_pwrctl_pwrseq_power_off, + ret = devm_add_action_or_reset(dev, devm_pci_pwrctrl_pwrseq_power_off, data->pwrseq); if (ret) return ret; - pci_pwrctl_init(&data->ctx, dev); + pci_pwrctrl_init(&data->ctx, dev); - ret = devm_pci_pwrctl_device_set_ready(dev, &data->ctx); + ret = devm_pci_pwrctrl_device_set_ready(dev, &data->ctx); if (ret) return dev_err_probe(dev, ret, - "Failed to register the pwrctl wrapper\n"); + "Failed to register the pwrctrl wrapper\n"); return 0; } -static const struct of_device_id pci_pwrctl_pwrseq_of_match[] = { +static const struct of_device_id pci_pwrctrl_pwrseq_of_match[] = { { /* ATH11K in QCA6390 package. */ .compatible = "pci17cb,1101", @@ -78,16 +78,16 @@ static const struct of_device_id pci_pwrctl_pwrseq_of_match[] = { }, { } }; -MODULE_DEVICE_TABLE(of, pci_pwrctl_pwrseq_of_match); +MODULE_DEVICE_TABLE(of, pci_pwrctrl_pwrseq_of_match); -static struct platform_driver pci_pwrctl_pwrseq_driver = { +static struct platform_driver pci_pwrctrl_pwrseq_driver = { .driver = { - .name = "pci-pwrctl-pwrseq", - .of_match_table = pci_pwrctl_pwrseq_of_match, + .name = "pci-pwrctrl-pwrseq", + .of_match_table = pci_pwrctrl_pwrseq_of_match, }, - .probe = pci_pwrctl_pwrseq_probe, + .probe = pci_pwrctrl_pwrseq_probe, }; -module_platform_driver(pci_pwrctl_pwrseq_driver); +module_platform_driver(pci_pwrctrl_pwrseq_driver); MODULE_AUTHOR("Bartosz Golaszewski "); MODULE_DESCRIPTION("Generic PCI Power Control module for power sequenced devices"); diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 3dd9b3024956..27ef371a8443 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -17,7 +17,7 @@ static void pci_free_resources(struct pci_dev *dev) } } -static void pci_pwrctl_unregister(struct device *dev) +static void pci_pwrctrl_unregister(struct device *dev) { struct platform_device *pdev; @@ -34,7 +34,7 @@ static void pci_stop_dev(struct pci_dev *dev) pci_pme_active(dev, false); if (pci_dev_is_added(dev)) { - pci_pwrctl_unregister(&dev->dev); + pci_pwrctrl_unregister(&dev->dev); device_release_driver(&dev->dev); pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h index 0d23dddf59ec..7d439b0675e9 100644 --- a/include/linux/pci-pwrctrl.h +++ b/include/linux/pci-pwrctrl.h @@ -3,8 +3,8 @@ * Copyright (C) 2024 Linaro Ltd. */ -#ifndef __PCI_PWRCTL_H__ -#define __PCI_PWRCTL_H__ +#ifndef __PCI_PWRCTRL_H__ +#define __PCI_PWRCTRL_H__ #include #include @@ -29,14 +29,14 @@ struct device_link; */ /** - * struct pci_pwrctl - PCI device power control context. + * struct pci_pwrctrl - PCI device power control context. * @dev: Address of the power controlling device. * * An object of this type must be allocated by the PCI power control device and - * passed to the pwrctl subsystem to trigger a bus rescan and setup a device + * passed to the pwrctrl subsystem to trigger a bus rescan and setup a device * link with the device once it's up. */ -struct pci_pwrctl { +struct pci_pwrctrl { struct device *dev; /* Private: don't use. */ @@ -45,10 +45,10 @@ struct pci_pwrctl { struct work_struct work; }; -void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev); -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl); +void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev); +int pci_pwrctrl_device_set_ready(struct pci_pwrctrl *pwrctrl); +void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl); +int devm_pci_pwrctrl_device_set_ready(struct device *dev, + struct pci_pwrctrl *pwrctrl); -#endif /* __PCI_PWRCTL_H__ */ +#endif /* __PCI_PWRCTRL_H__ */ From 64f093c4d99d797b68b407a9d8767aadc3e3ea7a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:36 +0900 Subject: [PATCH 115/127] PCI: rockchip-ep: Fix address translation unit programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Rockchip PCIe endpoint controller handles PCIe transfers addresses by masking the lower bits of the programmed PCI address and using the same number of lower bits masked from the CPU address space used for the mapping. For a PCI mapping of bytes starting from , the number of bits masked is the number of address bits changing in the address range [pci_addr..pci_addr + size - 1]. However, rockchip_pcie_prog_ep_ob_atu() calculates num_pass_bits only using the size of the mapping, resulting in an incorrect number of mask bits depending on the value of the PCI address to map. Fix this by introducing the helper function rockchip_pcie_ep_ob_atu_num_bits() to correctly calculate the number of mask bits to use to program the address translation unit. The number of mask bits is calculated depending on both the PCI address and size of the mapping, and clamped between 8 and 20 using the macros ROCKCHIP_PCIE_AT_MIN_NUM_BITS and ROCKCHIP_PCIE_AT_MAX_NUM_BITS. As defined in the Rockchip RK3399 TRM V1.3 Part2, Sections 17.5.5.1.1 and 17.6.8.2.1, this clamping is necessary because: 1) The lower 8 bits of the PCI address to be mapped by the outbound region are ignored. So a minimum of 8 address bits are needed and imply that the PCI address must be aligned to 256. 2) The outbound memory regions are 1MB in size. So while we can specify up to 63-bits for the PCI address (num_bits filed uses bits 0 to 5 of the outbound address region 0 register), we must limit the number of valid address bits to 20 to match the memory window maximum size (1 << 20 = 1MB). Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Link: https://lore.kernel.org/r/20241017015849.190271-2-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org --- drivers/pci/controller/pcie-rockchip-ep.c | 16 +++++++++++++--- drivers/pci/controller/pcie-rockchip.h | 4 ++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 136274533656..a6805b005798 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -63,15 +63,25 @@ static void rockchip_pcie_clear_ep_ob_atu(struct rockchip_pcie *rockchip, ROCKCHIP_PCIE_AT_OB_REGION_DESC1(region)); } +static int rockchip_pcie_ep_ob_atu_num_bits(struct rockchip_pcie *rockchip, + u64 pci_addr, size_t size) +{ + int num_pass_bits = fls64(pci_addr ^ (pci_addr + size - 1)); + + return clamp(num_pass_bits, + ROCKCHIP_PCIE_AT_MIN_NUM_BITS, + ROCKCHIP_PCIE_AT_MAX_NUM_BITS); +} + static void rockchip_pcie_prog_ep_ob_atu(struct rockchip_pcie *rockchip, u8 fn, u32 r, u64 cpu_addr, u64 pci_addr, size_t size) { - int num_pass_bits = fls64(size - 1); + int num_pass_bits; u32 addr0, addr1, desc0; - if (num_pass_bits < 8) - num_pass_bits = 8; + num_pass_bits = rockchip_pcie_ep_ob_atu_num_bits(rockchip, + pci_addr, size); addr0 = ((num_pass_bits - 1) & PCIE_CORE_OB_REGION_ADDR0_NUM_BITS) | (lower_32_bits(pci_addr) & PCIE_CORE_OB_REGION_ADDR0_LO_ADDR); diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 6111de35f84c..15ee949f2485 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -245,6 +245,10 @@ (PCIE_EP_PF_CONFIG_REGS_BASE + (((fn) << 12) & GENMASK(19, 12))) #define ROCKCHIP_PCIE_EP_VIRT_FUNC_BASE(fn) \ (PCIE_EP_PF_CONFIG_REGS_BASE + 0x10000 + (((fn) << 12) & GENMASK(19, 12))) + +#define ROCKCHIP_PCIE_AT_MIN_NUM_BITS 8 +#define ROCKCHIP_PCIE_AT_MAX_NUM_BITS 20 + #define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar) \ (PCIE_CORE_AXI_CONF_BASE + 0x0828 + (fn) * 0x0040 + (bar) * 0x0008) #define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar) \ From 739e25f51aedaebf20496fbc99c6ef43fc6d6ca1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:37 +0900 Subject: [PATCH 116/127] PCI: rockchip-ep: Use a macro to define EP controller .align feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the macro ROCKCHIP_PCIE_AT_SIZE_ALIGN to initialize the .align field of the controller epc_features structure to 256. This is defined as a shift using the macro ROCKCHIP_PCIE_AT_MIN_NUM_BITS (to avoid using the "magic" value 8 directly). Link: https://lore.kernel.org/r/20241017015849.190271-3-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-rockchip-ep.c | 2 +- drivers/pci/controller/pcie-rockchip.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index a6805b005798..65014681d859 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -449,7 +449,7 @@ static const struct pci_epc_features rockchip_pcie_epc_features = { .linkup_notifier = false, .msi_capable = true, .msix_capable = false, - .align = 256, + .align = ROCKCHIP_PCIE_AT_SIZE_ALIGN, }; static const struct pci_epc_features* diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 15ee949f2485..02368ce9bd54 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -248,6 +248,7 @@ #define ROCKCHIP_PCIE_AT_MIN_NUM_BITS 8 #define ROCKCHIP_PCIE_AT_MAX_NUM_BITS 20 +#define ROCKCHIP_PCIE_AT_SIZE_ALIGN (1UL << ROCKCHIP_PCIE_AT_MIN_NUM_BITS) #define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar) \ (PCIE_CORE_AXI_CONF_BASE + 0x0828 + (fn) * 0x0040 + (bar) * 0x0008) From 57ed93fe799b3564388fd61279fc274ae2716508 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:38 +0900 Subject: [PATCH 117/127] PCI: rockchip-ep: Improve rockchip_pcie_ep_unmap_addr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to loop over all regions to find the memory window used to map an address. We can use rockchip_ob_region() to determine the region index, together with a check that the address passed as argument is the address used to create the mapping. Furthermore, the ob_region_map bitmap should also be checked to ensure that we are not attempting to unmap an address that is not mapped. Link: https://lore.kernel.org/r/20241017015849.190271-4-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 65014681d859..b4db5459727f 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -259,13 +259,9 @@ static void rockchip_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn, { struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; - u32 r; + u32 r = rockchip_ob_region(addr); - for (r = 0; r < ep->max_regions; r++) - if (ep->ob_addr[r] == addr) - break; - - if (r == ep->max_regions) + if (addr != ep->ob_addr[r] || !test_bit(r, &ep->ob_region_map)) return; rockchip_pcie_clear_ep_ob_atu(rockchip, r); From d8dbd21cfafd653b02ec187e59e0bcfaebd9a884 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:39 +0900 Subject: [PATCH 118/127] PCI: rockchip-ep: Improve rockchip_pcie_ep_map_addr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a check to verify that the outbound region to be used for mapping an address is not already in use. Link: https://lore.kernel.org/r/20241017015849.190271-5-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-rockchip-ep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index b4db5459727f..24ee1df17a49 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -246,6 +246,9 @@ static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn, struct rockchip_pcie *pcie = &ep->rockchip; u32 r = rockchip_ob_region(addr); + if (test_bit(r, &ep->ob_region_map)) + return -EBUSY; + rockchip_pcie_prog_ep_ob_atu(pcie, fn, r, addr, pci_addr, size); set_bit(r, &ep->ob_region_map); From b21255326db2d736089b3df26c3cd4e4a131d75f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:40 +0900 Subject: [PATCH 119/127] PCI: rockchip-ep: Implement the pci_epc_ops::align_addr() operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Rockchip PCIe endpoint controller handles PCIe transfers addresses by masking the lower bits of the programmed PCI address and using the same number of lower bits from the CPU address space used for the mapping. For a PCI mapping of size bytes starting from pci_addr, the number of bits masked is the number of address bits changing in the address range [pci_addr..pci_addr + size - 1], up to 20 bits, that is, up to 1MB mappings. This means that when preparing a PCI address mapping, an endpoint function driver must use an offset into the allocated controller memory region that is equal to the mask of the starting PCI address over rockchip_pcie_ep_ob_atu_num_bits() bits. This offset also determines the maximum size of the mapping given the starting PCI address and the fixed 1MB controller memory window size. Implement the ->align_addr() endpoint controller operation to allow the mapping alignment to be transparently handled by endpoint function drivers through the function pci_epc_mem_map(). Link: https://lore.kernel.org/r/20241017015849.190271-6-dlemoal@kernel.org Co-developed-by: Rick Wertenbroek Signed-off-by: Rick Wertenbroek Signed-off-by: Damien Le Moal [kwilczynski: change local variable name for address offset] Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 23 +++++++++++++++++++++++ drivers/pci/controller/pcie-rockchip.h | 5 +++++ 2 files changed, 28 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 24ee1df17a49..5a4279591f79 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -238,6 +238,28 @@ static inline u32 rockchip_ob_region(phys_addr_t addr) return (addr >> ilog2(SZ_1M)) & 0x1f; } +static u64 rockchip_pcie_ep_align_addr(struct pci_epc *epc, u64 pci_addr, + size_t *pci_size, size_t *addr_offset) +{ + struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); + size_t size = *pci_size; + u64 offset, mask; + int num_bits; + + num_bits = rockchip_pcie_ep_ob_atu_num_bits(&ep->rockchip, + pci_addr, size); + mask = (1ULL << num_bits) - 1; + + offset = pci_addr & mask; + if (size + offset > SZ_1M) + size = SZ_1M - offset; + + *pci_size = ALIGN(offset + size, ROCKCHIP_PCIE_AT_SIZE_ALIGN); + *addr_offset = offset; + + return pci_addr & ~mask; +} + static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn, phys_addr_t addr, u64 pci_addr, size_t size) @@ -461,6 +483,7 @@ static const struct pci_epc_ops rockchip_pcie_epc_ops = { .write_header = rockchip_pcie_ep_write_header, .set_bar = rockchip_pcie_ep_set_bar, .clear_bar = rockchip_pcie_ep_clear_bar, + .align_addr = rockchip_pcie_ep_align_addr, .map_addr = rockchip_pcie_ep_map_addr, .unmap_addr = rockchip_pcie_ep_unmap_addr, .set_msi = rockchip_pcie_ep_set_msi, diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 02368ce9bd54..30398156095f 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -241,6 +241,11 @@ #define ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK GENMASK(15, 8) #define ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR 0x1 #define ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR 0x3 + +#define ROCKCHIP_PCIE_AT_MIN_NUM_BITS 8 +#define ROCKCHIP_PCIE_AT_MAX_NUM_BITS 20 +#define ROCKCHIP_PCIE_AT_SIZE_ALIGN (1UL << ROCKCHIP_PCIE_AT_MIN_NUM_BITS) + #define ROCKCHIP_PCIE_EP_FUNC_BASE(fn) \ (PCIE_EP_PF_CONFIG_REGS_BASE + (((fn) << 12) & GENMASK(19, 12))) #define ROCKCHIP_PCIE_EP_VIRT_FUNC_BASE(fn) \ From 9f737cca6c54edd0af0a29bb0a702837f1fc9645 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:41 +0900 Subject: [PATCH 120/127] PCI: rockchip-ep: Fix MSI IRQ data mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to rockchip_pcie_prog_ep_ob_atu() used to map the PCI address of MSI data to the memory window allocated on probe for IRQs is done in rockchip_pcie_ep_send_msi_irq() assuming a fixed alignment to a 256B boundary of the PCI address. This is not correct as the alignment constraint for the RK3399 PCI mapping depends on the number of bits of address changing in the mapped region. This leads to an unstable system which sometimes work and sometimes does not (crashing on paging faults when memcpy_toio() or memcpy_fromio() are used). Similar to regular data mapping, the MSI data mapping must thus be handled according to the information provided by rockchip_pcie_ep_align_addr(). Modify rockchip_pcie_ep_send_msi_irq() to use rockchip_pcie_ep_align_addr() to correctly program entry 0 of the ATU for sending MSI IRQs. Link: https://lore.kernel.org/r/20241017015849.190271-7-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 5a4279591f79..299a8a95aab7 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -382,9 +382,10 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn, { struct rockchip_pcie *rockchip = &ep->rockchip; u32 flags, mme, data, data_mask; + size_t irq_pci_size, offset; + u64 irq_pci_addr; u8 msi_count; u64 pci_addr; - u32 r; /* Check MSI enable bit */ flags = rockchip_pcie_read(&ep->rockchip, @@ -420,18 +421,21 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn, PCI_MSI_ADDRESS_LO); /* Set the outbound region if needed. */ - if (unlikely(ep->irq_pci_addr != (pci_addr & PCIE_ADDR_MASK) || + irq_pci_size = ~PCIE_ADDR_MASK + 1; + irq_pci_addr = rockchip_pcie_ep_align_addr(ep->epc, + pci_addr & PCIE_ADDR_MASK, + &irq_pci_size, &offset); + if (unlikely(ep->irq_pci_addr != irq_pci_addr || ep->irq_pci_fn != fn)) { - r = rockchip_ob_region(ep->irq_phys_addr); - rockchip_pcie_prog_ep_ob_atu(rockchip, fn, r, - ep->irq_phys_addr, - pci_addr & PCIE_ADDR_MASK, - ~PCIE_ADDR_MASK + 1); - ep->irq_pci_addr = (pci_addr & PCIE_ADDR_MASK); + rockchip_pcie_prog_ep_ob_atu(rockchip, fn, + rockchip_ob_region(ep->irq_phys_addr), + ep->irq_phys_addr, + irq_pci_addr, irq_pci_size); + ep->irq_pci_addr = irq_pci_addr; ep->irq_pci_fn = fn; } - writew(data, ep->irq_cpu_addr + (pci_addr & ~PCIE_ADDR_MASK)); + writew(data, ep->irq_cpu_addr + offset + (pci_addr & ~PCIE_ADDR_MASK)); return 0; } From 2968534e63e5bbe3d3f3c317ba32012393ee6637 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:42 +0900 Subject: [PATCH 121/127] PCI: rockchip-ep: Rename rockchip_pcie_parse_ep_dt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be consistent with the usual "get_resources" naming of driver functions that acquire controller resources like clocks, PHY etc, rename the function rockchip_pcie_parse_ep_dt() to rockchip_pcie_ep_get_resources(). No functional changes. Link: https://lore.kernel.org/r/20241017015849.190271-8-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 299a8a95aab7..c7c4afe1ef69 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -497,8 +497,8 @@ static const struct pci_epc_ops rockchip_pcie_epc_ops = { .get_features = rockchip_pcie_ep_get_features, }; -static int rockchip_pcie_parse_ep_dt(struct rockchip_pcie *rockchip, - struct rockchip_pcie_ep *ep) +static int rockchip_pcie_ep_get_resources(struct rockchip_pcie *rockchip, + struct rockchip_pcie_ep *ep) { struct device *dev = rockchip->dev; int err; @@ -560,7 +560,7 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) ep->epc = epc; epc_set_drvdata(epc, ep); - err = rockchip_pcie_parse_ep_dt(rockchip, ep); + err = rockchip_pcie_ep_get_resources(rockchip, ep); if (err) return err; From 945648019466db06dca189a639af913f2834ee9b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:43 +0900 Subject: [PATCH 122/127] PCI: rockchip-ep: Refactor rockchip_pcie_ep_probe() memory allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the function rockchip_pcie_ep_init_ob_mem() allocate the outbound memory regions and memory needed for IRQ handling. These changes tidy up rockchip_pcie_ep_probe(). No functional change. Link: https://lore.kernel.org/r/20241017015849.190271-9-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 107 ++++++++++++---------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index c7c4afe1ef69..d497f880eb2c 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -532,15 +532,66 @@ static const struct of_device_id rockchip_pcie_ep_of_match[] = { {}, }; +static int rockchip_pcie_ep_init_ob_mem(struct rockchip_pcie_ep *ep) +{ + struct rockchip_pcie *rockchip = &ep->rockchip; + struct device *dev = rockchip->dev; + struct pci_epc_mem_window *windows = NULL; + int err, i; + + ep->ob_addr = devm_kcalloc(dev, ep->max_regions, sizeof(*ep->ob_addr), + GFP_KERNEL); + + if (!ep->ob_addr) + return -ENOMEM; + + windows = devm_kcalloc(dev, ep->max_regions, + sizeof(struct pci_epc_mem_window), GFP_KERNEL); + if (!windows) + return -ENOMEM; + + for (i = 0; i < ep->max_regions; i++) { + windows[i].phys_base = rockchip->mem_res->start + (SZ_1M * i); + windows[i].size = SZ_1M; + windows[i].page_size = SZ_1M; + } + err = pci_epc_multi_mem_init(ep->epc, windows, ep->max_regions); + devm_kfree(dev, windows); + + if (err < 0) { + dev_err(dev, "failed to initialize the memory space\n"); + return err; + } + + ep->irq_cpu_addr = pci_epc_mem_alloc_addr(ep->epc, &ep->irq_phys_addr, + SZ_1M); + if (!ep->irq_cpu_addr) { + dev_err(dev, "failed to reserve memory space for MSI\n"); + goto err_epc_mem_exit; + } + + ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR; + + return 0; + +err_epc_mem_exit: + pci_epc_mem_exit(ep->epc); + + return err; +} + +static void rockchip_pcie_ep_exit_ob_mem(struct rockchip_pcie_ep *ep) +{ + pci_epc_mem_exit(ep->epc); +} + static int rockchip_pcie_ep_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct rockchip_pcie_ep *ep; struct rockchip_pcie *rockchip; struct pci_epc *epc; - size_t max_regions; - struct pci_epc_mem_window *windows = NULL; - int err, i; + int err; u32 cfg_msi, cfg_msix_cp; ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL); @@ -564,10 +615,14 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) if (err) return err; - err = rockchip_pcie_enable_clocks(rockchip); + err = rockchip_pcie_ep_init_ob_mem(ep); if (err) return err; + err = rockchip_pcie_enable_clocks(rockchip); + if (err) + goto err_exit_ob_mem; + err = rockchip_pcie_init_port(rockchip); if (err) goto err_disable_clocks; @@ -576,47 +631,9 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE, PCIE_CLIENT_CONFIG); - max_regions = ep->max_regions; - ep->ob_addr = devm_kcalloc(dev, max_regions, sizeof(*ep->ob_addr), - GFP_KERNEL); - - if (!ep->ob_addr) { - err = -ENOMEM; - goto err_uninit_port; - } - /* Only enable function 0 by default */ rockchip_pcie_write(rockchip, BIT(0), PCIE_CORE_PHY_FUNC_CFG); - windows = devm_kcalloc(dev, ep->max_regions, - sizeof(struct pci_epc_mem_window), GFP_KERNEL); - if (!windows) { - err = -ENOMEM; - goto err_uninit_port; - } - for (i = 0; i < ep->max_regions; i++) { - windows[i].phys_base = rockchip->mem_res->start + (SZ_1M * i); - windows[i].size = SZ_1M; - windows[i].page_size = SZ_1M; - } - err = pci_epc_multi_mem_init(epc, windows, ep->max_regions); - devm_kfree(dev, windows); - - if (err < 0) { - dev_err(dev, "failed to initialize the memory space\n"); - goto err_uninit_port; - } - - ep->irq_cpu_addr = pci_epc_mem_alloc_addr(epc, &ep->irq_phys_addr, - SZ_1M); - if (!ep->irq_cpu_addr) { - dev_err(dev, "failed to reserve memory space for MSI\n"); - err = -ENOMEM; - goto err_epc_mem_exit; - } - - ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR; - /* * MSI-X is not supported but the controller still advertises the MSI-X * capability by default, which can lead to the Root Complex side @@ -646,10 +663,8 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) pci_epc_init_notify(epc); return 0; -err_epc_mem_exit: - pci_epc_mem_exit(epc); -err_uninit_port: - rockchip_pcie_deinit_phys(rockchip); +err_exit_ob_mem: + rockchip_pcie_ep_exit_ob_mem(ep); err_disable_clocks: rockchip_pcie_disable_clocks(rockchip); return err; From 8efda8aebeed59bab984489407a94008f65f4d1e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:44 +0900 Subject: [PATCH 123/127] PCI: rockchip-ep: Refactor rockchip_pcie_ep_probe() MSI-X hiding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the code in rockchip_pcie_ep_probe() to hide the MSI-X capability to its own function, rockchip_pcie_ep_hide_broken_msix_cap(). No functional changes. Link: https://lore.kernel.org/r/20241017015849.190271-10-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-rockchip-ep.c | 54 +++++++++++++---------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index d497f880eb2c..a53ff16b1661 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -585,6 +585,34 @@ static void rockchip_pcie_ep_exit_ob_mem(struct rockchip_pcie_ep *ep) pci_epc_mem_exit(ep->epc); } +static void rockchip_pcie_ep_hide_broken_msix_cap(struct rockchip_pcie *rockchip) +{ + u32 cfg_msi, cfg_msix_cp; + + /* + * MSI-X is not supported but the controller still advertises the MSI-X + * capability by default, which can lead to the Root Complex side + * allocating MSI-X vectors which cannot be used. Avoid this by skipping + * the MSI-X capability entry in the PCIe capabilities linked-list: get + * the next pointer from the MSI-X entry and set that in the MSI + * capability entry (which is the previous entry). This way the MSI-X + * entry is skipped (left out of the linked-list) and not advertised. + */ + cfg_msi = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + + ROCKCHIP_PCIE_EP_MSI_CTRL_REG); + + cfg_msi &= ~ROCKCHIP_PCIE_EP_MSI_CP1_MASK; + + cfg_msix_cp = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + + ROCKCHIP_PCIE_EP_MSIX_CAP_REG) & + ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK; + + cfg_msi |= cfg_msix_cp; + + rockchip_pcie_write(rockchip, cfg_msi, + PCIE_EP_CONFIG_BASE + ROCKCHIP_PCIE_EP_MSI_CTRL_REG); +} + static int rockchip_pcie_ep_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -592,7 +620,6 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) struct rockchip_pcie *rockchip; struct pci_epc *epc; int err; - u32 cfg_msi, cfg_msix_cp; ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL); if (!ep) @@ -627,6 +654,8 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) if (err) goto err_disable_clocks; + rockchip_pcie_ep_hide_broken_msix_cap(rockchip); + /* Establish the link automatically */ rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE, PCIE_CLIENT_CONFIG); @@ -634,29 +663,6 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) /* Only enable function 0 by default */ rockchip_pcie_write(rockchip, BIT(0), PCIE_CORE_PHY_FUNC_CFG); - /* - * MSI-X is not supported but the controller still advertises the MSI-X - * capability by default, which can lead to the Root Complex side - * allocating MSI-X vectors which cannot be used. Avoid this by skipping - * the MSI-X capability entry in the PCIe capabilities linked-list: get - * the next pointer from the MSI-X entry and set that in the MSI - * capability entry (which is the previous entry). This way the MSI-X - * entry is skipped (left out of the linked-list) and not advertised. - */ - cfg_msi = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + - ROCKCHIP_PCIE_EP_MSI_CTRL_REG); - - cfg_msi &= ~ROCKCHIP_PCIE_EP_MSI_CP1_MASK; - - cfg_msix_cp = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE + - ROCKCHIP_PCIE_EP_MSIX_CAP_REG) & - ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK; - - cfg_msi |= cfg_msix_cp; - - rockchip_pcie_write(rockchip, cfg_msi, - PCIE_EP_CONFIG_BASE + ROCKCHIP_PCIE_EP_MSI_CTRL_REG); - rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE, PCIE_CLIENT_CONFIG); From 091022f5f94513e5f11aa9e2f1cd9fdb4d00f83c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:45 +0900 Subject: [PATCH 124/127] PCI: rockchip-ep: Refactor endpoint link training enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function rockchip_pcie_init_port() enables link training for a controller configured in EP mode. Enabling link training is again done in rockchip_pcie_ep_probe() after that function executed rockchip_pcie_init_port(). Enabling link training only needs to be done once, and doing so at the probe stage before the controller is actually started by the user serves no purpose. Refactor this by removing the link training enablement from both rockchip_pcie_init_port() and rockchip_pcie_ep_probe() and moving it to the endpoint start operation defined with rockchip_pcie_ep_start(). Enabling the controller configuration using the PCIE_CLIENT_CONF_ENABLE bit in the same PCIE_CLIENT_CONFIG register is also moved to rockchip_pcie_ep_start() and both the controller configuration and link training enable bits are set with a single call to rockchip_pcie_write(). Link: https://lore.kernel.org/r/20241017015849.190271-11-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 13 ++++++------- drivers/pci/controller/pcie-rockchip.c | 5 +++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index a53ff16b1661..6c4169b7930e 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -467,6 +467,12 @@ static int rockchip_pcie_ep_start(struct pci_epc *epc) rockchip_pcie_write(rockchip, cfg, PCIE_CORE_PHY_FUNC_CFG); + /* Enable configuration and start link training */ + rockchip_pcie_write(rockchip, + PCIE_CLIENT_LINK_TRAIN_ENABLE | + PCIE_CLIENT_CONF_ENABLE, + PCIE_CLIENT_CONFIG); + return 0; } @@ -656,16 +662,9 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) rockchip_pcie_ep_hide_broken_msix_cap(rockchip); - /* Establish the link automatically */ - rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE, - PCIE_CLIENT_CONFIG); - /* Only enable function 0 by default */ rockchip_pcie_write(rockchip, BIT(0), PCIE_CORE_PHY_FUNC_CFG); - rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE, - PCIE_CLIENT_CONFIG); - pci_epc_init_notify(epc); return 0; diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c index c07d7129f1c7..154e78819e6e 100644 --- a/drivers/pci/controller/pcie-rockchip.c +++ b/drivers/pci/controller/pcie-rockchip.c @@ -244,11 +244,12 @@ int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) rockchip_pcie_write(rockchip, PCIE_CLIENT_GEN_SEL_1, PCIE_CLIENT_CONFIG); - regs = PCIE_CLIENT_LINK_TRAIN_ENABLE | PCIE_CLIENT_ARI_ENABLE | + regs = PCIE_CLIENT_ARI_ENABLE | PCIE_CLIENT_CONF_LANE_NUM(rockchip->lanes); if (rockchip->is_rc) - regs |= PCIE_CLIENT_CONF_ENABLE | PCIE_CLIENT_MODE_RC; + regs |= PCIE_CLIENT_LINK_TRAIN_ENABLE | + PCIE_CLIENT_CONF_ENABLE | PCIE_CLIENT_MODE_RC; else regs |= PCIE_CLIENT_CONF_DISABLE | PCIE_CLIENT_MODE_EP; From 00080d0887df8c197c5b89e38215c14980eae544 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:46 +0900 Subject: [PATCH 125/127] PCI: rockship-ep: Implement the pci_epc_ops::stop_link() operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define the EPC operation ->stop() for the Rockchip endpoint driver with the function rockchip_pcie_ep_stop(). This function disables link training and the controller configuration, as the reverse to what the start operation defined with rockchip_pcie_ep_start() does. Link: https://lore.kernel.org/r/20241017015849.190271-12-dlemoal@kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/pcie-rockchip-ep.c | 13 +++++++++++++ drivers/pci/controller/pcie-rockchip.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 6c4169b7930e..13755e4ed59d 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -476,6 +476,18 @@ static int rockchip_pcie_ep_start(struct pci_epc *epc) return 0; } +static void rockchip_pcie_ep_stop(struct pci_epc *epc) +{ + struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); + struct rockchip_pcie *rockchip = &ep->rockchip; + + /* Stop link training and disable configuration */ + rockchip_pcie_write(rockchip, + PCIE_CLIENT_CONF_DISABLE | + PCIE_CLIENT_LINK_TRAIN_DISABLE, + PCIE_CLIENT_CONFIG); +} + static const struct pci_epc_features rockchip_pcie_epc_features = { .linkup_notifier = false, .msi_capable = true, @@ -500,6 +512,7 @@ static const struct pci_epc_ops rockchip_pcie_epc_ops = { .get_msi = rockchip_pcie_ep_get_msi, .raise_irq = rockchip_pcie_ep_raise_irq, .start = rockchip_pcie_ep_start, + .stop = rockchip_pcie_ep_stop, .get_features = rockchip_pcie_ep_get_features, }; diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 30398156095f..0263f158ee8d 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -32,6 +32,7 @@ #define PCIE_CLIENT_CONF_ENABLE HIWORD_UPDATE_BIT(0x0001) #define PCIE_CLIENT_CONF_DISABLE HIWORD_UPDATE(0x0001, 0) #define PCIE_CLIENT_LINK_TRAIN_ENABLE HIWORD_UPDATE_BIT(0x0002) +#define PCIE_CLIENT_LINK_TRAIN_DISABLE HIWORD_UPDATE(0x0002, 0) #define PCIE_CLIENT_ARI_ENABLE HIWORD_UPDATE_BIT(0x0008) #define PCIE_CLIENT_CONF_LANE_NUM(x) HIWORD_UPDATE(0x0030, ENCODE_LANES(x)) #define PCIE_CLIENT_MODE_RC HIWORD_UPDATE_BIT(0x0040) From bd6e61df4b2e69985daa312ce28b6af629b30870 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:47 +0900 Subject: [PATCH 126/127] PCI: rockchip-ep: Improve link training MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Rockchip RK3399 TRM V1.3 Part2, Section 17.5.8.1.2, step 7, describes the endpoint mode link training process clearly and states that: Insure link training completion and success by observing link_st field in PCIe Client BASIC_STATUS1 register change to 2'b11. If both side support PCIe Gen2 speed, re-train can be Initiated by asserting the Retrain Link field in Link Control and Status Register. The software should insure the BASIC_STATUS0[negotiated_speed] changes to "1", that indicates re-train to Gen2 successfully. This procedure is very similar to what is done for the root-port mode in rockchip_pcie_host_init_port(). Implement this link training procedure for the endpoint mode as well. Given that the RK3399 SoC does not have an interrupt signaling link status changes, training is implemented as a delayed work which is rescheduled until the link training completes or the endpoint controller is stopped. The link training work is first scheduled in rockchip_pcie_ep_start() when the endpoint function is started. Link training completion is signaled to the function using pci_epc_linkup(). Accordingly, the linkup_notifier field of the Rockchip pci_epc_features structure is changed to true. Link: https://lore.kernel.org/r/20241017015849.190271-13-dlemoal@kernel.org Signed-off-by: Damien Le Moal [kwilczynski: update log messages to make them consistent] Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 82 ++++++++++++++++++++++- drivers/pci/controller/pcie-rockchip.h | 11 +++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 13755e4ed59d..b4d405e4c954 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -10,12 +10,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include "pcie-rockchip.h" @@ -48,6 +50,7 @@ struct rockchip_pcie_ep { u64 irq_pci_addr; u8 irq_pci_fn; u8 irq_pending; + struct delayed_work link_training; }; static void rockchip_pcie_clear_ep_ob_atu(struct rockchip_pcie *rockchip, @@ -473,6 +476,8 @@ static int rockchip_pcie_ep_start(struct pci_epc *epc) PCIE_CLIENT_CONF_ENABLE, PCIE_CLIENT_CONFIG); + schedule_delayed_work(&ep->link_training, 0); + return 0; } @@ -481,6 +486,8 @@ static void rockchip_pcie_ep_stop(struct pci_epc *epc) struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; + cancel_delayed_work_sync(&ep->link_training); + /* Stop link training and disable configuration */ rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_DISABLE | @@ -488,8 +495,80 @@ static void rockchip_pcie_ep_stop(struct pci_epc *epc) PCIE_CLIENT_CONFIG); } +static void rockchip_pcie_ep_retrain_link(struct rockchip_pcie *rockchip) +{ + u32 status; + + status = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_LCS); + status |= PCI_EXP_LNKCTL_RL; + rockchip_pcie_write(rockchip, status, PCIE_EP_CONFIG_LCS); +} + +static bool rockchip_pcie_ep_link_up(struct rockchip_pcie *rockchip) +{ + u32 val = rockchip_pcie_read(rockchip, PCIE_CLIENT_BASIC_STATUS1); + + return PCIE_LINK_UP(val); +} + +static void rockchip_pcie_ep_link_training(struct work_struct *work) +{ + struct rockchip_pcie_ep *ep = + container_of(work, struct rockchip_pcie_ep, link_training.work); + struct rockchip_pcie *rockchip = &ep->rockchip; + struct device *dev = rockchip->dev; + u32 val; + int ret; + + /* Enable Gen1 training and wait for its completion */ + ret = readl_poll_timeout(rockchip->apb_base + PCIE_CORE_CTRL, + val, PCIE_LINK_TRAINING_DONE(val), 50, + LINK_TRAIN_TIMEOUT); + if (ret) + goto again; + + /* Make sure that the link is up */ + ret = readl_poll_timeout(rockchip->apb_base + PCIE_CLIENT_BASIC_STATUS1, + val, PCIE_LINK_UP(val), 50, + LINK_TRAIN_TIMEOUT); + if (ret) + goto again; + + /* + * Check the current speed: if gen2 speed was requested and we are not + * at gen2 speed yet, retrain again for gen2. + */ + val = rockchip_pcie_read(rockchip, PCIE_CORE_CTRL); + if (!PCIE_LINK_IS_GEN2(val) && rockchip->link_gen == 2) { + /* Enable retrain for gen2 */ + rockchip_pcie_ep_retrain_link(rockchip); + readl_poll_timeout(rockchip->apb_base + PCIE_CORE_CTRL, + val, PCIE_LINK_IS_GEN2(val), 50, + LINK_TRAIN_TIMEOUT); + } + + /* Check again that the link is up */ + if (!rockchip_pcie_ep_link_up(rockchip)) + goto again; + + val = rockchip_pcie_read(rockchip, PCIE_CLIENT_BASIC_STATUS0); + dev_info(dev, + "link up (negotiated speed: %sGT/s, width: x%lu)\n", + (val & PCIE_CLIENT_NEG_LINK_SPEED) ? "5" : "2.5", + ((val & PCIE_CLIENT_NEG_LINK_WIDTH_MASK) >> + PCIE_CLIENT_NEG_LINK_WIDTH_SHIFT) << 1); + + /* Notify the function */ + pci_epc_linkup(ep->epc); + + return; + +again: + schedule_delayed_work(&ep->link_training, msecs_to_jiffies(5)); +} + static const struct pci_epc_features rockchip_pcie_epc_features = { - .linkup_notifier = false, + .linkup_notifier = true, .msi_capable = true, .msix_capable = false, .align = ROCKCHIP_PCIE_AT_SIZE_ALIGN, @@ -647,6 +726,7 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) rockchip = &ep->rockchip; rockchip->is_rc = false; rockchip->dev = dev; + INIT_DELAYED_WORK(&ep->link_training, rockchip_pcie_ep_link_training); epc = devm_pci_epc_create(dev, &rockchip_pcie_epc_ops); if (IS_ERR(epc)) { diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 0263f158ee8d..24796176f658 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -26,6 +26,7 @@ #define MAX_LANE_NUM 4 #define MAX_REGION_LIMIT 32 #define MIN_EP_APERTURE 28 +#define LINK_TRAIN_TIMEOUT (500 * USEC_PER_MSEC) #define PCIE_CLIENT_BASE 0x0 #define PCIE_CLIENT_CONFIG (PCIE_CLIENT_BASE + 0x00) @@ -50,6 +51,10 @@ #define PCIE_CLIENT_DEBUG_LTSSM_MASK GENMASK(5, 0) #define PCIE_CLIENT_DEBUG_LTSSM_L1 0x18 #define PCIE_CLIENT_DEBUG_LTSSM_L2 0x19 +#define PCIE_CLIENT_BASIC_STATUS0 (PCIE_CLIENT_BASE + 0x44) +#define PCIE_CLIENT_NEG_LINK_WIDTH_MASK GENMASK(7, 6) +#define PCIE_CLIENT_NEG_LINK_WIDTH_SHIFT 6 +#define PCIE_CLIENT_NEG_LINK_SPEED BIT(5) #define PCIE_CLIENT_BASIC_STATUS1 (PCIE_CLIENT_BASE + 0x48) #define PCIE_CLIENT_LINK_STATUS_UP 0x00300000 #define PCIE_CLIENT_LINK_STATUS_MASK 0x00300000 @@ -87,6 +92,8 @@ #define PCIE_CORE_CTRL_MGMT_BASE 0x900000 #define PCIE_CORE_CTRL (PCIE_CORE_CTRL_MGMT_BASE + 0x000) +#define PCIE_CORE_PL_CONF_LS_MASK 0x00000001 +#define PCIE_CORE_PL_CONF_LS_READY 0x00000001 #define PCIE_CORE_PL_CONF_SPEED_5G 0x00000008 #define PCIE_CORE_PL_CONF_SPEED_MASK 0x00000018 #define PCIE_CORE_PL_CONF_LANE_MASK 0x00000006 @@ -144,6 +151,7 @@ #define PCIE_RC_CONFIG_BASE 0xa00000 #define PCIE_EP_CONFIG_BASE 0xa00000 #define PCIE_EP_CONFIG_DID_VID (PCIE_EP_CONFIG_BASE + 0x00) +#define PCIE_EP_CONFIG_LCS (PCIE_EP_CONFIG_BASE + 0xd0) #define PCIE_RC_CONFIG_RID_CCR (PCIE_RC_CONFIG_BASE + 0x08) #define PCIE_RC_CONFIG_DCR (PCIE_RC_CONFIG_BASE + 0xc4) #define PCIE_RC_CONFIG_DCR_CSPL_SHIFT 18 @@ -155,6 +163,7 @@ #define PCIE_RC_CONFIG_LINK_CAP (PCIE_RC_CONFIG_BASE + 0xcc) #define PCIE_RC_CONFIG_LINK_CAP_L0S BIT(10) #define PCIE_RC_CONFIG_LCS (PCIE_RC_CONFIG_BASE + 0xd0) +#define PCIE_EP_CONFIG_LCS (PCIE_EP_CONFIG_BASE + 0xd0) #define PCIE_RC_CONFIG_L1_SUBSTATE_CTRL2 (PCIE_RC_CONFIG_BASE + 0x90c) #define PCIE_RC_CONFIG_THP_CAP (PCIE_RC_CONFIG_BASE + 0x274) #define PCIE_RC_CONFIG_THP_CAP_NEXT_MASK GENMASK(31, 20) @@ -192,6 +201,8 @@ #define ROCKCHIP_VENDOR_ID 0x1d87 #define PCIE_LINK_IS_L2(x) \ (((x) & PCIE_CLIENT_DEBUG_LTSSM_MASK) == PCIE_CLIENT_DEBUG_LTSSM_L2) +#define PCIE_LINK_TRAINING_DONE(x) \ + (((x) & PCIE_CORE_PL_CONF_LS_MASK) == PCIE_CORE_PL_CONF_LS_READY) #define PCIE_LINK_UP(x) \ (((x) & PCIE_CLIENT_LINK_STATUS_MASK) == PCIE_CLIENT_LINK_STATUS_UP) #define PCIE_LINK_IS_GEN2(x) \ From a7137cbf6bd53a9f9c40c64fc8b12b88289b3d4a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 17 Oct 2024 10:58:48 +0900 Subject: [PATCH 127/127] PCI: rockchip-ep: Handle PERST# signal in EP mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the Rockchip PCIe endpoint controller driver does not handle the PERST# signal, which prevents detecting when link training should actually be started or if the host resets the device. This however can be supported using the controller reset_gpios property set as an input GPIO for endpoint mode. Modify the Rockchip PCI endpoint controller driver to get the reset_gpio and its associated interrupt which is serviced using a threaded IRQ with the function rockchip_pcie_ep_perst_irq_thread() as handler. This handler function notifies a link down event corresponding to the RC side asserting the PERST# signal using pci_epc_linkdown() when the gpio is high. Once the gpio value goes down, corresponding to the RC de-asserting the PERST# signal, link training is started. The polarity of the gpio interrupt trigger is changed from high to low after the RC asserted PERST#, and conversely changed from low to high after the RC de-asserts PERST#. Also, given that the host mode controller and the endpoint mode controller use two different property names for the same PERST# signal (ep_gpios property and reset_gpios property respectively), for clarity, rename the ep_gpio field of struct rockchip_pcie to perst_gpio. Link: https://lore.kernel.org/r/20241017015849.190271-14-dlemoal@kernel.org Signed-off-by: Damien Le Moal [kwilczynski: make log messages consistent, add missing include] Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-rockchip-ep.c | 133 +++++++++++++++++++- drivers/pci/controller/pcie-rockchip-host.c | 4 +- drivers/pci/controller/pcie-rockchip.c | 16 +-- drivers/pci/controller/pcie-rockchip.h | 2 +- 4 files changed, 141 insertions(+), 14 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index b4d405e4c954..1064b7b06cef 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -10,8 +10,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -50,6 +52,9 @@ struct rockchip_pcie_ep { u64 irq_pci_addr; u8 irq_pci_fn; u8 irq_pending; + int perst_irq; + bool perst_asserted; + bool link_up; struct delayed_work link_training; }; @@ -470,13 +475,17 @@ static int rockchip_pcie_ep_start(struct pci_epc *epc) rockchip_pcie_write(rockchip, cfg, PCIE_CORE_PHY_FUNC_CFG); + if (rockchip->perst_gpio) + enable_irq(ep->perst_irq); + /* Enable configuration and start link training */ rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE | PCIE_CLIENT_CONF_ENABLE, PCIE_CLIENT_CONFIG); - schedule_delayed_work(&ep->link_training, 0); + if (!rockchip->perst_gpio) + schedule_delayed_work(&ep->link_training, 0); return 0; } @@ -486,6 +495,11 @@ static void rockchip_pcie_ep_stop(struct pci_epc *epc) struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; + if (rockchip->perst_gpio) { + ep->perst_asserted = true; + disable_irq(ep->perst_irq); + } + cancel_delayed_work_sync(&ep->link_training); /* Stop link training and disable configuration */ @@ -551,6 +565,13 @@ static void rockchip_pcie_ep_link_training(struct work_struct *work) if (!rockchip_pcie_ep_link_up(rockchip)) goto again; + /* + * If PERST# was asserted while polling the link, do not notify + * the function. + */ + if (ep->perst_asserted) + return; + val = rockchip_pcie_read(rockchip, PCIE_CLIENT_BASIC_STATUS0); dev_info(dev, "link up (negotiated speed: %sGT/s, width: x%lu)\n", @@ -560,6 +581,7 @@ static void rockchip_pcie_ep_link_training(struct work_struct *work) /* Notify the function */ pci_epc_linkup(ep->epc); + ep->link_up = true; return; @@ -567,6 +589,103 @@ again: schedule_delayed_work(&ep->link_training, msecs_to_jiffies(5)); } +static void rockchip_pcie_ep_perst_assert(struct rockchip_pcie_ep *ep) +{ + struct rockchip_pcie *rockchip = &ep->rockchip; + + dev_dbg(rockchip->dev, "PERST# asserted, link down\n"); + + if (ep->perst_asserted) + return; + + ep->perst_asserted = true; + + cancel_delayed_work_sync(&ep->link_training); + + if (ep->link_up) { + pci_epc_linkdown(ep->epc); + ep->link_up = false; + } +} + +static void rockchip_pcie_ep_perst_deassert(struct rockchip_pcie_ep *ep) +{ + struct rockchip_pcie *rockchip = &ep->rockchip; + + dev_dbg(rockchip->dev, "PERST# de-asserted, starting link training\n"); + + if (!ep->perst_asserted) + return; + + ep->perst_asserted = false; + + /* Enable link re-training */ + rockchip_pcie_ep_retrain_link(rockchip); + + /* Start link training */ + schedule_delayed_work(&ep->link_training, 0); +} + +static irqreturn_t rockchip_pcie_ep_perst_irq_thread(int irq, void *data) +{ + struct pci_epc *epc = data; + struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); + struct rockchip_pcie *rockchip = &ep->rockchip; + u32 perst = gpiod_get_value(rockchip->perst_gpio); + + if (perst) + rockchip_pcie_ep_perst_assert(ep); + else + rockchip_pcie_ep_perst_deassert(ep); + + irq_set_irq_type(ep->perst_irq, + (perst ? IRQF_TRIGGER_HIGH : IRQF_TRIGGER_LOW)); + + return IRQ_HANDLED; +} + +static int rockchip_pcie_ep_setup_irq(struct pci_epc *epc) +{ + struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); + struct rockchip_pcie *rockchip = &ep->rockchip; + struct device *dev = rockchip->dev; + int ret; + + if (!rockchip->perst_gpio) + return 0; + + /* PCIe reset interrupt */ + ep->perst_irq = gpiod_to_irq(rockchip->perst_gpio); + if (ep->perst_irq < 0) { + dev_err(dev, + "failed to get IRQ for PERST# GPIO: %d\n", + ep->perst_irq); + + return ep->perst_irq; + } + + /* + * The perst_gpio is active low, so when it is inactive on start, it + * is high and will trigger the perst_irq handler. So treat this initial + * IRQ as a dummy one by faking the host asserting PERST#. + */ + ep->perst_asserted = true; + irq_set_status_flags(ep->perst_irq, IRQ_NOAUTOEN); + ret = devm_request_threaded_irq(dev, ep->perst_irq, NULL, + rockchip_pcie_ep_perst_irq_thread, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "pcie-ep-perst", epc); + if (ret) { + dev_err(dev, + "failed to request IRQ for PERST# GPIO: %d\n", + ret); + + return ret; + } + + return 0; +} + static const struct pci_epc_features rockchip_pcie_epc_features = { .linkup_notifier = true, .msi_capable = true, @@ -730,7 +849,7 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) epc = devm_pci_epc_create(dev, &rockchip_pcie_epc_ops); if (IS_ERR(epc)) { - dev_err(dev, "failed to create epc device\n"); + dev_err(dev, "failed to create EPC device\n"); return PTR_ERR(epc); } @@ -760,11 +879,17 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) pci_epc_init_notify(epc); + err = rockchip_pcie_ep_setup_irq(epc); + if (err < 0) + goto err_uninit_port; + return 0; -err_exit_ob_mem: - rockchip_pcie_ep_exit_ob_mem(ep); +err_uninit_port: + rockchip_pcie_deinit_phys(rockchip); err_disable_clocks: rockchip_pcie_disable_clocks(rockchip); +err_exit_ob_mem: + rockchip_pcie_ep_exit_ob_mem(ep); return err; } diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index cbec71114825..7471d9fd18bc 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -294,7 +294,7 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) int err, i = MAX_LANE_NUM; u32 status; - gpiod_set_value_cansleep(rockchip->ep_gpio, 0); + gpiod_set_value_cansleep(rockchip->perst_gpio, 0); err = rockchip_pcie_init_port(rockchip); if (err) @@ -323,7 +323,7 @@ static int rockchip_pcie_host_init_port(struct rockchip_pcie *rockchip) PCIE_CLIENT_CONFIG); msleep(PCIE_T_PVPERL_MS); - gpiod_set_value_cansleep(rockchip->ep_gpio, 1); + gpiod_set_value_cansleep(rockchip->perst_gpio, 1); msleep(PCIE_T_RRS_READY_MS); diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c index 154e78819e6e..b9ade7632e11 100644 --- a/drivers/pci/controller/pcie-rockchip.c +++ b/drivers/pci/controller/pcie-rockchip.c @@ -119,13 +119,15 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) return PTR_ERR(rockchip->aclk_rst); } - if (rockchip->is_rc) { - rockchip->ep_gpio = devm_gpiod_get_optional(dev, "ep", - GPIOD_OUT_LOW); - if (IS_ERR(rockchip->ep_gpio)) - return dev_err_probe(dev, PTR_ERR(rockchip->ep_gpio), - "failed to get ep GPIO\n"); - } + if (rockchip->is_rc) + rockchip->perst_gpio = devm_gpiod_get_optional(dev, "ep", + GPIOD_OUT_LOW); + else + rockchip->perst_gpio = devm_gpiod_get_optional(dev, "reset", + GPIOD_IN); + if (IS_ERR(rockchip->perst_gpio)) + return dev_err_probe(dev, PTR_ERR(rockchip->perst_gpio), + "failed to get PERST# GPIO\n"); rockchip->aclk_pcie = devm_clk_get(dev, "aclk"); if (IS_ERR(rockchip->aclk_pcie)) { diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 24796176f658..a51b087ce878 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -329,7 +329,7 @@ struct rockchip_pcie { struct regulator *vpcie3v3; /* 3.3V power supply */ struct regulator *vpcie1v8; /* 1.8V power supply */ struct regulator *vpcie0v9; /* 0.9V power supply */ - struct gpio_desc *ep_gpio; + struct gpio_desc *perst_gpio; u32 lanes; u8 lanes_map; int link_gen;