From 0ecee66990644c3482209ff7c12faa7bc40449b1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 16 Oct 2019 19:48:14 -0700 Subject: [PATCH 001/146] fs/namespace.c: fix use-after-free of mount in mnt_warn_timestamp_expiry() After do_add_mount() returns success, the caller doesn't hold a reference to the 'struct mount' anymore. So it's invalid to access it in mnt_warn_timestamp_expiry(). Fix it by calling mnt_warn_timestamp_expiry() before do_add_mount() rather than after, and adjusting the warning message accordingly. Reported-by: syzbot+da4f525235510683d855@syzkaller.appspotmail.com Fixes: f8b92ba67c5d ("mount: Add mount warning for impending timestamp expiry") Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- fs/namespace.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index fe0e9e1410fe..2adfe7b166a3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2478,8 +2478,10 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount * time64_to_tm(sb->s_time_max, 0, &tm); - pr_warn("Mounted %s file system at %s supports timestamps until %04ld (0x%llx)\n", - sb->s_type->name, mntpath, + pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n", + sb->s_type->name, + is_mounted(mnt) ? "remounted" : "mounted", + mntpath, tm.tm_year+1900, (unsigned long long)sb->s_time_max); free_page((unsigned long)buf); @@ -2764,14 +2766,11 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, if (IS_ERR(mnt)) return PTR_ERR(mnt); - error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags); - if (error < 0) { - mntput(mnt); - return error; - } - mnt_warn_timestamp_expiry(mountpoint, mnt); + error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags); + if (error < 0) + mntput(mnt); return error; } From 97eba80fcca754856d09e048f469db22773bec68 Mon Sep 17 00:00:00 2001 From: Guillem Jover Date: Wed, 21 Aug 2019 05:38:20 +0200 Subject: [PATCH 002/146] aio: Fix io_pgetevents() struct __compat_aio_sigset layout This type is used to pass the sigset_t from userland to the kernel, but it was using the kernel native pointer type for the member representing the compat userland pointer to the userland sigset_t. This messes up the layout, and makes the kernel eat up both the userland pointer and the size members into the kernel pointer, and then reads garbage into the kernel sigsetsize. Which makes the sigset_t size consistency check fail, and consequently the syscall always returns -EINVAL. This breaks both libaio and strace on 32-bit userland running on 64-bit kernels. And there are apparently no users in the wild of the current broken layout (at least according to codesearch.debian.org and a brief check over github.com search). So it looks safe to fix this directly in the kernel, instead of either letting userland deal with this permanently with the additional overhead or trying to make the syscall infer what layout userland used, even though this is also being worked around in libaio to temporarily cope with kernels that have not yet been fixed. We use a proper compat_uptr_t instead of a compat_sigset_t pointer. Fixes: 7a074e96dee6 ("aio: implement io_pgetevents") Signed-off-by: Guillem Jover Signed-off-by: Al Viro --- fs/aio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 01e0fb9ae45a..0d9a559d488c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2179,7 +2179,7 @@ SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, #ifdef CONFIG_COMPAT struct __compat_aio_sigset { - compat_sigset_t __user *sigmask; + compat_uptr_t sigmask; compat_size_t sigsetsize; }; @@ -2193,7 +2193,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, struct old_timespec32 __user *, timeout, const struct __compat_aio_sigset __user *, usig) { - struct __compat_aio_sigset ksig = { NULL, }; + struct __compat_aio_sigset ksig = { 0, }; struct timespec64 t; bool interrupted; int ret; @@ -2204,7 +2204,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize); + ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize); if (ret) return ret; @@ -2228,7 +2228,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, struct __kernel_timespec __user *, timeout, const struct __compat_aio_sigset __user *, usig) { - struct __compat_aio_sigset ksig = { NULL, }; + struct __compat_aio_sigset ksig = { 0, }; struct timespec64 t; bool interrupted; int ret; @@ -2239,7 +2239,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize); + ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize); if (ret) return ret; From 03ad0d703df75c43f78bd72e16124b5b94a95188 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Oct 2019 00:03:11 -0400 Subject: [PATCH 003/146] autofs: fix a leak in autofs_expire_indirect() if the second call of should_expire() in there ends up grabbing and returning a new reference to dentry, we need to drop it before continuing. Signed-off-by: Al Viro --- fs/autofs/expire.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c index 2866fabf497f..91f5787dae7c 100644 --- a/fs/autofs/expire.c +++ b/fs/autofs/expire.c @@ -459,9 +459,10 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb, */ how &= ~AUTOFS_EXP_LEAVES; found = should_expire(expired, mnt, timeout, how); - if (!found || found != expired) - /* Something has changed, continue */ + if (found != expired) { // something has changed, continue + dput(found); goto next; + } if (expired != dentry) dput(dentry); From 637346748245e94c877aa746e6fe0d7079b7736a Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 31 Oct 2019 10:46:04 +0100 Subject: [PATCH 004/146] MIPS: SGI-IP27: fix exception handler replication Commit 775b089aeffa ("MIPS: tlbex: Remove cpu_has_local_ebase") removed generating tlb refill handlers for every CPU, which was needed for generating per node exception handlers on IP27. Instead of resurrecting (and fixing) refill handler generation, we simply copy all exception vectors from the boot node to the other nodes. Also remove the config option since the memory tradeoff for expection handler replication is just 8k per node. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/sgi-ip27/Kconfig | 7 ------- arch/mips/sgi-ip27/ip27-init.c | 21 ++++++--------------- arch/mips/sgi-ip27/ip27-memory.c | 4 ---- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/arch/mips/sgi-ip27/Kconfig b/arch/mips/sgi-ip27/Kconfig index ef3847e7aee0..e5b6cadbec85 100644 --- a/arch/mips/sgi-ip27/Kconfig +++ b/arch/mips/sgi-ip27/Kconfig @@ -38,10 +38,3 @@ config REPLICATE_KTEXT Say Y here to enable replicating the kernel text across multiple nodes in a NUMA cluster. This trades memory for speed. -config REPLICATE_EXHANDLERS - bool "Exception handler replication support" - depends on SGI_IP27 - help - Say Y here to enable replicating the kernel exception handlers - across multiple nodes in a NUMA cluster. This trades memory for - speed. diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c index 59d5375c9021..79a52c472782 100644 --- a/arch/mips/sgi-ip27/ip27-init.c +++ b/arch/mips/sgi-ip27/ip27-init.c @@ -69,23 +69,14 @@ static void per_hub_init(cnodeid_t cnode) hub_rtc_init(cnode); -#ifdef CONFIG_REPLICATE_EXHANDLERS - /* - * If this is not a headless node initialization, - * copy over the caliased exception handlers. - */ - if (get_compact_nodeid() == cnode) { - extern char except_vec2_generic, except_vec3_generic; - extern void build_tlb_refill_handler(void); - - memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80); - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80); - build_tlb_refill_handler(); - memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80); - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100); + if (nasid) { + /* copy exception handlers from first node to current node */ + memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0), + (void *)CKSEG0, 0x200); __flush_cache_all(); + /* switch to node local exception handlers */ + REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); } -#endif } void per_cpu_init(void) diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index fb077a947575..8624a885d95b 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -332,11 +332,7 @@ static void __init mlreset(void) * thinks it is a node 0 address. */ REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1)); -#ifdef CONFIG_REPLICATE_EXHANDLERS - REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); -#else REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0); -#endif #ifdef LATER /* From f6929c92e283a35b183c293574adcbca409bf144 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 18 Oct 2019 10:16:51 -0700 Subject: [PATCH 005/146] MAINTAINERS: Remove Kevin as maintainer of BMIPS generic platforms The last time Kevin did a review was sometime around 2014, since then, he has not been active for the BMIPS generic platform changes. Signed-off-by: Florian Fainelli [paulburton@kernel.org: Drop the non-technical commit message content; Kevin's absence from the role is ample reasoning for this change.] Signed-off-by: Paul Burton --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c6c34d04ce95..5eb8a9ba7015 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3260,7 +3260,6 @@ S: Maintained F: drivers/cpufreq/bmips-cpufreq.c BROADCOM BMIPS MIPS ARCHITECTURE -M: Kevin Cernekee M: Florian Fainelli L: bcm-kernel-feedback-list@broadcom.com L: linux-mips@vger.kernel.org From 003f01c780020daa9a06dea1db495b553a868c29 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 4 Nov 2019 15:58:34 -0800 Subject: [PATCH 006/146] Input: synaptics-rmi4 - fix video buffer size The video buffer used by the queue is a vb2_v4l2_buffer, not a plain vb2_buffer. Using the wrong type causes the allocation of the buffer storage to be too small, causing a out of bounds write when __init_vb2_v4l2_buffer initializes the buffer. Signed-off-by: Lucas Stach Fixes: 3a762dbd5347 ("[media] Input: synaptics-rmi4 - add support for F54 diagnostics") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191104114454.10500-1-l.stach@pengutronix.de Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f54.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index 710b02595486..4841354af0d7 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -359,7 +359,7 @@ static const struct vb2_ops rmi_f54_queue_ops = { static const struct vb2_queue rmi_f54_queue = { .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, .io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ, - .buf_struct_size = sizeof(struct vb2_buffer), + .buf_struct_size = sizeof(struct vb2_v4l2_buffer), .ops = &rmi_f54_queue_ops, .mem_ops = &vb2_vmalloc_memops, .timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, From f6aabe1ff1d9d7bad0879253011216438bdb2530 Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Mon, 4 Nov 2019 16:06:44 -0800 Subject: [PATCH 007/146] Input: synaptics-rmi4 - disable the relative position IRQ in the F12 driver This patch fixes an issue seen on HID touchpads which report finger positions using RMI4 Function 12. The issue manifests itself as spurious button presses as described in: https://www.spinics.net/lists/linux-input/msg58618.html Commit 24d28e4f1271 ("Input: synaptics-rmi4 - convert irq distribution to irq_domain") switched the RMI4 driver to using an irq_domain to handle RMI4 function interrupts. Functions with more then one interrupt now have each interrupt mapped to their own IRQ and IRQ handler. The result of this change is that the F12 IRQ handler was now getting called twice. Once for the absolute data interrupt and once for the relative data interrupt. For HID devices, calling rmi_f12_attention() a second time causes the attn_data data pointer and size to be set incorrectly. When the touchpad button is pressed, F30 will generate an interrupt and attempt to read the F30 data from the invalid attn_data data pointer and report incorrect button events. This patch disables the F12 relative interrupt which prevents rmi_f12_attention() from being called twice. Signed-off-by: Andrew Duggan Reported-by: Simon Wood Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191025002527.3189-2-aduggan@synaptics.com Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f12.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c index d20a5d6780d1..734077f2c40b 100644 --- a/drivers/input/rmi4/rmi_f12.c +++ b/drivers/input/rmi4/rmi_f12.c @@ -55,6 +55,9 @@ struct f12_data { const struct rmi_register_desc_item *data15; u16 data15_offset; + + unsigned long *abs_mask; + unsigned long *rel_mask; }; static int rmi_f12_read_sensor_tuning(struct f12_data *f12) @@ -291,9 +294,18 @@ static int rmi_f12_write_control_regs(struct rmi_function *fn) static int rmi_f12_config(struct rmi_function *fn) { struct rmi_driver *drv = fn->rmi_dev->driver; + struct f12_data *f12 = dev_get_drvdata(&fn->dev); + struct rmi_2d_sensor *sensor; int ret; - drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); + sensor = &f12->sensor; + + if (!sensor->report_abs) + drv->clear_irq_bits(fn->rmi_dev, f12->abs_mask); + else + drv->set_irq_bits(fn->rmi_dev, f12->abs_mask); + + drv->clear_irq_bits(fn->rmi_dev, f12->rel_mask); ret = rmi_f12_write_control_regs(fn); if (ret) @@ -315,9 +327,12 @@ static int rmi_f12_probe(struct rmi_function *fn) struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); u16 data_offset = 0; + int mask_size; rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s\n", __func__); + mask_size = BITS_TO_LONGS(drvdata->irq_count) * sizeof(unsigned long); + ret = rmi_read(fn->rmi_dev, query_addr, &buf); if (ret < 0) { dev_err(&fn->dev, "Failed to read general info register: %d\n", @@ -332,10 +347,19 @@ static int rmi_f12_probe(struct rmi_function *fn) return -ENODEV; } - f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data), GFP_KERNEL); + f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data) + mask_size * 2, + GFP_KERNEL); if (!f12) return -ENOMEM; + f12->abs_mask = (unsigned long *)((char *)f12 + + sizeof(struct f12_data)); + f12->rel_mask = (unsigned long *)((char *)f12 + + sizeof(struct f12_data) + mask_size); + + set_bit(fn->irq_pos, f12->abs_mask); + set_bit(fn->irq_pos + 1, f12->rel_mask); + f12->has_dribble = !!(buf & BIT(3)); if (fn->dev.of_node) { From 5d40d95e7e64756cc30606c2ba169271704d47cb Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Mon, 4 Nov 2019 16:07:30 -0800 Subject: [PATCH 008/146] Input: synaptics-rmi4 - do not consume more data than we have (F11, F12) Currently, rmi_f11_attention() and rmi_f12_attention() functions update the attn_data data pointer and size based on the size of the expected size of the attention data. However, if the actual valid data in the attn buffer is less then the expected value then the updated data pointer will point to memory beyond the end of the attn buffer. Using the calculated valid_bytes instead will prevent this from happening. Signed-off-by: Andrew Duggan Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191025002527.3189-3-aduggan@synaptics.com Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f11.c | 4 ++-- drivers/input/rmi4/rmi_f12.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index f28a7158b2ef..26c239325f95 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -1284,8 +1284,8 @@ static irqreturn_t rmi_f11_attention(int irq, void *ctx) valid_bytes = f11->sensor.attn_size; memcpy(f11->sensor.data_pkt, drvdata->attn_data.data, valid_bytes); - drvdata->attn_data.data += f11->sensor.attn_size; - drvdata->attn_data.size -= f11->sensor.attn_size; + drvdata->attn_data.data += valid_bytes; + drvdata->attn_data.size -= valid_bytes; } else { error = rmi_read_block(rmi_dev, data_base_addr, f11->sensor.data_pkt, diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c index 734077f2c40b..7e97944f7616 100644 --- a/drivers/input/rmi4/rmi_f12.c +++ b/drivers/input/rmi4/rmi_f12.c @@ -212,8 +212,8 @@ static irqreturn_t rmi_f12_attention(int irq, void *ctx) valid_bytes = sensor->attn_size; memcpy(sensor->data_pkt, drvdata->attn_data.data, valid_bytes); - drvdata->attn_data.data += sensor->attn_size; - drvdata->attn_data.size -= sensor->attn_size; + drvdata->attn_data.data += valid_bytes; + drvdata->attn_data.size -= valid_bytes; } else { retval = rmi_read_block(rmi_dev, f12->data_addr, sensor->data_pkt, sensor->pkt_size); From 310ca2a61c410b161ae64fd19a3475500ef9eafb Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Mon, 4 Nov 2019 16:09:22 -0800 Subject: [PATCH 009/146] Input: synaptics-rmi4 - remove unused result_bits mask The result_bits mask is no longer used by the driver and should be removed. Signed-off-by: Andrew Duggan Link: https://lore.kernel.org/r/20191025002527.3189-4-aduggan@synaptics.com Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f11.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index 26c239325f95..bbf9ae9f3f0c 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -510,7 +510,6 @@ struct f11_data { struct rmi_2d_sensor_platform_data sensor_pdata; unsigned long *abs_mask; unsigned long *rel_mask; - unsigned long *result_bits; }; enum f11_finger_state { @@ -1057,7 +1056,7 @@ static int rmi_f11_initialize(struct rmi_function *fn) /* ** init instance data, fill in values and create any sysfs files */ - f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 3, + f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 2, GFP_KERNEL); if (!f11) return -ENOMEM; @@ -1076,8 +1075,6 @@ static int rmi_f11_initialize(struct rmi_function *fn) + sizeof(struct f11_data)); f11->rel_mask = (unsigned long *)((char *)f11 + sizeof(struct f11_data) + mask_size); - f11->result_bits = (unsigned long *)((char *)f11 - + sizeof(struct f11_data) + mask_size * 2); set_bit(fn->irq_pos, f11->abs_mask); set_bit(fn->irq_pos + 1, f11->rel_mask); From a9c3c4c597704b3a1a2b9bef990e7d8a881f6533 Mon Sep 17 00:00:00 2001 From: James Erwin Date: Fri, 1 Nov 2019 15:20:59 -0400 Subject: [PATCH 010/146] IB/hfi1: Ensure full Gen3 speed in a Gen4 system If an hfi1 card is inserted in a Gen4 systems, the driver will avoid the gen3 speed bump and the card will operate at half speed. This is because the driver avoids the gen3 speed bump when the parent bus speed isn't identical to gen3, 8.0GT/s. This is not compatible with gen4 and newer speeds. Fix by relaxing the test to explicitly look for the lower capability speeds which inherently allows for gen4 and all future speeds. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Link: https://lore.kernel.org/r/20191101192059.106248.1699.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Dennis Dalessandro Reviewed-by: Kaike Wan Signed-off-by: James Erwin Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pcie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 61aa5504d7c3..61362bd6d3ce 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed */ - if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { + if (parent && + (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT || + dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) { dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); dd->link_gen3_capable = 0; } From c1abd865bd125015783286b353abb8da51644f59 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Fri, 25 Oct 2019 15:58:30 -0400 Subject: [PATCH 011/146] IB/hfi1: Ensure r_tid_ack is valid before building TID RDMA ACK packet The index r_tid_ack is used to indicate the next TID RDMA WRITE request to acknowledge in the ring s_ack_queue[] on the responder side and should be set to a valid index other than its initial value before r_tid_tail is advanced to the next TID RDMA WRITE request and particularly before a TID RDMA ACK is built. Otherwise, a NULL pointer dereference may result: BUG: unable to handle kernel paging request at ffff9a32d27abff8 IP: [] hfi1_make_tid_rdma_pkt+0x476/0xcb0 [hfi1] PGD 2749032067 PUD 0 Oops: 0000 1 SMP Modules linked in: osp(OE) ofd(OE) lfsck(OE) ost(OE) mgc(OE) osd_zfs(OE) lquota(OE) lustre(OE) lmv(OE) mdc(OE) lov(OE) fid(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ib_ipoib(OE) hfi1(OE) rdmavt(OE) nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache ib_isert iscsi_target_mod target_core_mod ib_ucm dm_mirror dm_region_hash dm_log mlx5_ib dm_mod zfs(POE) rpcrdma sunrpc rdma_ucm ib_uverbs opa_vnic ib_iser zunicode(POE) ib_umad zavl(POE) icp(POE) sb_edac intel_powerclamp coretemp rdma_cm intel_rapl iosf_mbi iw_cm libiscsi scsi_transport_iscsi kvm ib_cm iTCO_wdt mxm_wmi iTCO_vendor_support irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd zcommon(POE) znvpair(POE) pcspkr spl(OE) mei_me sg mei ioatdma lpc_ich joydev i2c_i801 shpchp ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_power_meter ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 mlx5_core drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ixgbe ahci ttm mlxfw ib_core libahci devlink mdio crct10dif_pclmul crct10dif_common drm ptp libata megaraid_sas crc32c_intel i2c_algo_bit pps_core i2c_core dca [last unloaded: rdmavt] CPU: 15 PID: 68691 Comm: kworker/15:2H Kdump: loaded Tainted: P W OE ------------ 3.10.0-862.2.3.el7_lustre.x86_64 #1 Hardware name: Intel Corporation S2600WTT/S2600WTT, BIOS SE5C610.86B.01.01.0016.033120161139 03/31/2016 Workqueue: hfi0_0 _hfi1_do_tid_send [hfi1] task: ffff9a01f47faf70 ti: ffff9a11776a8000 task.ti: ffff9a11776a8000 RIP: 0010:[] [] hfi1_make_tid_rdma_pkt+0x476/0xcb0 [hfi1] RSP: 0018:ffff9a11776abd08 EFLAGS: 00010002 RAX: ffff9a32d27abfc0 RBX: ffff99f2d27aa000 RCX: 00000000ffffffff RDX: 0000000000000000 RSI: 0000000000000220 RDI: ffff99f2ffc05300 RBP: ffff9a11776abd88 R08: 000000000001c310 R09: ffffffffc0d87ad4 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9a117a423c00 R13: ffff9a117a423c00 R14: ffff9a03500c0000 R15: ffff9a117a423cb8 FS: 0000000000000000(0000) GS:ffff9a117e9c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff9a32d27abff8 CR3: 0000002748a0e000 CR4: 00000000001607e0 Call Trace: [] _hfi1_do_tid_send+0x194/0x320 [hfi1] [] process_one_work+0x17f/0x440 [] worker_thread+0x126/0x3c0 [] ? manage_workers.isra.24+0x2a0/0x2a0 [] kthread+0xd1/0xe0 [] ? insert_kthread_work+0x40/0x40 [] ret_from_fork_nospec_begin+0x21/0x21 [] ? insert_kthread_work+0x40/0x40 hfi1 0000:05:00.0: hfi1_0: reserved_op: opcode 0xf2, slot 2, rsv_used 1, rsv_ops 1 Code: 00 00 41 8b 8d d8 02 00 00 89 c8 48 89 45 b0 48 c1 65 b0 06 48 8b 83 a0 01 00 00 48 01 45 b0 48 8b 45 b0 41 80 bd 10 03 00 00 00 <48> 8b 50 38 4c 8d 7a 50 74 45 8b b2 d0 00 00 00 85 f6 0f 85 72 RIP [] hfi1_make_tid_rdma_pkt+0x476/0xcb0 [hfi1] RSP CR2: ffff9a32d27abff8 This problem can happen if a RESYNC request is received before r_tid_ack is modified. This patch fixes the issue by making sure that r_tid_ack is set to a valid value before a TID RDMA ACK is built. Functions are defined to simplify the code. Fixes: 07b923701e38 ("IB/hfi1: Add functions to receive TID RDMA WRITE request") Fixes: 7cf0ad679de4 ("IB/hfi1: Add a function to receive TID RDMA RESYNC packet") Link: https://lore.kernel.org/r/20191025195830.106825.44022.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 44 ++++++++++++++++----------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index f21fca3617d5..73bc78ba288e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -136,6 +136,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet, struct tid_rdma_flow *flow, bool fecn); +static void validate_r_tid_ack(struct hfi1_qp_priv *priv) +{ + if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) + priv->r_tid_ack = priv->r_tid_tail; +} + +static void tid_rdma_schedule_ack(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + priv->s_flags |= RVT_S_ACK_PENDING; + hfi1_schedule_tid_send(qp); +} + +static void tid_rdma_trigger_ack(struct rvt_qp *qp) +{ + validate_r_tid_ack(qp->priv); + tid_rdma_schedule_ack(qp); +} + static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { return @@ -3005,10 +3025,7 @@ nak_psn: qpriv->s_nak_state = IB_NAK_PSN_ERROR; /* We are NAK'ing the next expected PSN */ qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn); - qpriv->s_flags |= RVT_S_ACK_PENDING; - if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID) - qpriv->r_tid_ack = qpriv->r_tid_tail; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); } goto unlock; } @@ -3526,7 +3543,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) /* * If overtaking req->acked_tail, send an RNR NAK. Because the * QP is not queued in this case, and the issue can only be - * caused due a delay in scheduling the second leg which we + * caused by a delay in scheduling the second leg which we * cannot estimate, we use a rather arbitrary RNR timeout of * (MAX_FLOWS / 2) segments */ @@ -3534,8 +3551,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) MAX_FLOWS)) { ret = -EAGAIN; to_seg = MAX_FLOWS >> 1; - qpriv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); break; } @@ -4335,8 +4351,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn, req); trace_hfi1_tid_write_rsp_rcv_data(qp); - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) - priv->r_tid_ack = priv->r_tid_tail; + validate_r_tid_ack(priv); if (opcode == TID_OP(WRITE_DATA_LAST)) { release_rdma_sge_mr(e); @@ -4375,8 +4390,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) } done: - priv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_schedule_ack(qp); exit: priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; if (fecn) @@ -4388,10 +4402,7 @@ send_nak: if (!priv->s_nak_state) { priv->s_nak_state = IB_NAK_PSN_ERROR; priv->s_nak_psn = flow->flow_state.r_next_psn; - priv->s_flags |= RVT_S_ACK_PENDING; - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) - priv->r_tid_ack = priv->r_tid_tail; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); } goto done; } @@ -4939,8 +4950,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) qpriv->resync = true; /* RESYNC request always gets a TID RDMA ACK. */ qpriv->s_nak_state = 0; - qpriv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); bail: if (fecn) qp->s_flags |= RVT_S_ECN; From c2be3865a1763c4be39574937e1aae27e917af4d Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Fri, 25 Oct 2019 15:58:36 -0400 Subject: [PATCH 012/146] IB/hfi1: Calculate flow weight based on QP MTU for TID RDMA For a TID RDMA WRITE request, a QP on the responder side could be put into a queue when a hardware flow is not available. A RNR NAK will be returned to the requester with a RNR timeout value based on the position of the QP in the queue. The tid_rdma_flow_wt variable is used to calculate the timeout value and is determined by using a MTU of 4096 at the module loading time. This could reduce the timeout value by half from the desired value, leading to excessive RNR retries. This patch fixes the issue by calculating the flow weight with the real MTU assigned to the QP. Fixes: 07b923701e38 ("IB/hfi1: Add functions to receive TID RDMA WRITE request") Link: https://lore.kernel.org/r/20191025195836.106825.77769.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 1 - drivers/infiniband/hw/hfi1/tid_rdma.c | 13 +++++-------- drivers/infiniband/hw/hfi1/tid_rdma.h | 3 +-- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 71cb9525c074..26b792bb1027 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void) goto bail_dev; } - hfi1_compute_tid_rdma_flow_wt(); /* * These must be called before the driver is registered with * the PCI subsystem. diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 73bc78ba288e..e53f542b60af 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -107,8 +107,6 @@ static u32 mask_generation(u32 a) * C - Capcode */ -static u32 tid_rdma_flow_wt; - static void tid_rdma_trigger_resume(struct work_struct *work); static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req); static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req, @@ -3388,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32); } -void hfi1_compute_tid_rdma_flow_wt(void) +static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp) { /* * Heuristic for computing the RNR timeout when waiting on the flow * queue. Rather than a computationaly expensive exact estimate of when * a flow will be available, we assume that if a QP is at position N in * the flow queue it has to wait approximately (N + 1) * (number of - * segments between two sync points), assuming PMTU of 4K. The rationale - * for this is that flows are released and recycled at each sync point. + * segments between two sync points). The rationale for this is that + * flows are released and recycled at each sync point. */ - tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) / - TID_RDMA_MAX_SEGMENT_SIZE; + return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT; } static u32 position_in_queue(struct hfi1_qp_priv *qpriv, @@ -3522,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) { ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp); if (ret) { - to_seg = tid_rdma_flow_wt * + to_seg = hfi1_compute_tid_rdma_flow_wt(qp) * position_in_queue(qpriv, &rcd->flow_queue); break; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 1c536185261e..6e82df2190b7 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -17,6 +17,7 @@ #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) +#define TID_RDMA_SEGMENT_SHIFT 18 /* * Bit definitions for priv->s_flags. @@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u32 *len); -void hfi1_compute_tid_rdma_flow_wt(void); - void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, From ce8e8087cf3b5b4f19d29248bfc7deef95525490 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Fri, 25 Oct 2019 15:58:42 -0400 Subject: [PATCH 013/146] IB/hfi1: TID RDMA WRITE should not return IB_WC_RNR_RETRY_EXC_ERR Normal RDMA WRITE request never returns IB_WC_RNR_RETRY_EXC_ERR to ULPs because it does not need post receive buffer on the responder side. Consequently, as an enhancement to normal RDMA WRITE request inside the hfi1 driver, TID RDMA WRITE request should not return such an error status to ULPs, although it does receive RNR NAKs from the responder when TID resources are not available. This behavior is violated when qp->s_rnr_retry_cnt is set in current hfi1 implementation. This patch enforces these semantics by avoiding any reaction to the updates of the RNR QP attributes. Fixes: 3c6cb20a0d17 ("IB/hfi1: Add TID RDMA WRITE functionality into RDMA verbs") Link: https://lore.kernel.org/r/20191025195842.106825.71532.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 513a8aac9ccd..1a3c647675a7 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, if (qp->s_flags & RVT_S_WAIT_RNR) goto bail_stop; rdi = ib_to_rvt(qp->ibqp.device); - if (qp->s_rnr_retry == 0 && - !((rdi->post_parms[wqe->wr.opcode].flags & - RVT_OPERATION_IGN_RNR_CNT) && - qp->s_rnr_retry_cnt == 0)) { - status = IB_WC_RNR_RETRY_EXC_ERR; - goto class_b; + if (!(rdi->post_parms[wqe->wr.opcode].flags & + RVT_OPERATION_IGN_RNR_CNT)) { + if (qp->s_rnr_retry == 0) { + status = IB_WC_RNR_RETRY_EXC_ERR; + goto class_b; + } + if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) + qp->s_rnr_retry--; } - if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) - qp->s_rnr_retry--; /* * The last valid PSN is the previous PSN. For TID RDMA WRITE From 531eb45b3da4267fc2a64233ba256c8ffb02edd2 Mon Sep 17 00:00:00 2001 From: Sirong Wang Date: Fri, 1 Nov 2019 10:33:29 +0800 Subject: [PATCH 014/146] RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN Size of pointer to buf field of struct hns_roce_hem_chunk should be considered when calculating HNS_ROCE_HEM_CHUNK_LEN, or sg table size will be larger than expected when allocating hem. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/1572575610-52530-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Sirong Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 86783276fb1f..3bb8f78fb7b0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -59,7 +59,7 @@ enum { #define HNS_ROCE_HEM_CHUNK_LEN \ ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ - (sizeof(struct scatterlist))) + (sizeof(struct scatterlist) + sizeof(void *))) #define check_whether_bt_num_3(type, hop_num) \ (type < HEM_TYPE_MTT && hop_num == 2) From 411c1e6774e2e1f96b1ccce4f119376b94ade3e4 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Fri, 1 Nov 2019 10:33:30 +0800 Subject: [PATCH 015/146] RDMA/hns: Correct the value of srq_desc_size srq_desc_size should be rounded up to pow of two before used, or related calculation may cause allocating wrong size of memory for srq buffer. Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Link: https://lore.kernel.org/r/1572575610-52530-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9591457eb768..43ea2c13b212 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); srq->max_gs = srq_init_attr->attr.max_sge; - srq_desc_size = max(16, 16 * srq->max_gs); + srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs)); srq->wqe_shift = ilog2(srq_desc_size); From 86c6739eda7d2a03f2db30cbee67a5fb81afa8ba Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 6 Nov 2019 08:13:49 +0100 Subject: [PATCH 016/146] xfrm: Fix memleak on xfrm state destroy We leak the page that we use to create skb page fragments when destroying the xfrm_state. Fix this by dropping a page reference if a page was assigned to the xfrm_state. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Reported-by: JD Reported-by: Paul Wouters Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index c6f3c4a1bd99..f3423562d933 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -495,6 +495,8 @@ static void ___xfrm_state_destroy(struct xfrm_state *x) x->type->destructor(x); xfrm_put_type(x->type); } + if (x->xfrag.page) + put_page(x->xfrag.page); xfrm_dev_state_free(x); security_xfrm_state_free(x); xfrm_state_free(x); From 41d931459b53e32c67a1f8838d1e6826a69ee745 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 15:31:07 +0800 Subject: [PATCH 017/146] drm/i915/gvt: fix dropping obj reference twice The reference count of obj will be decremented twice if error occurs in dma_buf_fd(). Additionally, attempting to read the reference count of obj after dropping reference may lead to a use after free bug. Here, we drop obj's reference until it is not used. Fixes: e546e281d33d ("drm/i915/gvt: Dmabuf support for GVT-g") Signed-off-by: Pan Bian Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 13044c027f27..4bfaefdf548d 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -498,8 +498,6 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) goto out_free_gem; } - i915_gem_object_put(obj); - ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR); if (ret < 0) { gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret); @@ -524,6 +522,8 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) file_count(dmabuf->file), kref_read(&obj->base.refcount)); + i915_gem_object_put(obj); + return dmabuf_fd; out_free_dmabuf: From cb1a4badf59275eb7221dcec621e8154917eabd1 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Thu, 7 Nov 2019 13:51:47 +0200 Subject: [PATCH 018/146] iwlwifi: pcie: don't consider IV len in A-MSDU From gen2 PN is totally offloaded to hardware (also the space for the IV isn't part of the skb). As you can see in mvm/mac80211.c:3545, the MAC for cipher types CCMP/GCMP doesn't set IEEE80211_KEY_FLAG_PUT_IV_SPACE for gen2 NICs. This causes all the AMSDU data to be corrupted with cipher enabled. Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 8894027429d6..d80f71f82a6d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -251,27 +251,23 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; - u16 length, iv_len, amsdu_pad; + u16 length, amsdu_pad; u8 *start_hdr; struct iwl_tso_hdr_page *hdr_page; struct page **page_ptr; struct tso_t tso; - /* if the packet is protected, then it must be CCMP or GCMP */ - iv_len = ieee80211_has_protected(hdr->frame_control) ? - IEEE80211_CCMP_HDR_LEN : 0; - trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, start_len, 0); ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb); snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); - total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len; + total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; amsdu_pad = 0; /* total amount of header we may need for this A-MSDU */ hdr_room = DIV_ROUND_UP(total_len, mss) * - (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len; + (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); /* Our device supports 9 segments at most, it will fit in 1 page */ hdr_page = get_page_hdr(trans, hdr_room); @@ -282,14 +278,12 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, start_hdr = hdr_page->pos; page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs); *page_ptr = hdr_page->page; - memcpy(hdr_page->pos, skb->data + hdr_len, iv_len); - hdr_page->pos += iv_len; /* - * Pull the ieee80211 header + IV to be able to use TSO core, + * Pull the ieee80211 header to be able to use TSO core, * we will restore it for the tx_status flow. */ - skb_pull(skb, hdr_len + iv_len); + skb_pull(skb, hdr_len); /* * Remove the length of all the headers that we don't actually @@ -364,8 +358,8 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, } } - /* re -add the WiFi header and IV */ - skb_push(skb, hdr_len + iv_len); + /* re -add the WiFi header */ + skb_push(skb, hdr_len); return 0; From 167beb1756791e0806365a3f86a0da10d7a327ee Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 9 Nov 2019 19:16:58 +0100 Subject: [PATCH 019/146] ALSA: usb-audio: Fix missing error check at mixer resolution test A check of the return value from get_cur_mix_raw() is missing at the resolution test code in get_min_max_with_quirks(), which may leave the variable untouched, leading to a random uninitialized value, as detected by syzkaller fuzzer. Add the missing return error check for fixing that. Reported-and-tested-by: syzbot+abe1ab7afc62c6bb6377@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20191109181658.30368-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 3fd1d1749edf..45eee5cc312e 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1229,7 +1229,8 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, if (cval->min + cval->res < cval->max) { int last_valid_res = cval->res; int saved, test, check; - get_cur_mix_raw(cval, minchn, &saved); + if (get_cur_mix_raw(cval, minchn, &saved) < 0) + goto no_res_check; for (;;) { test = saved; if (test < cval->max) @@ -1249,6 +1250,7 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, snd_usb_set_cur_mix_value(cval, minchn, 0, saved); } +no_res_check: cval->initialized = 1; } From 5a508a254bed9a2e36a5fb96c9065532a6bf1e9c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 9 Nov 2019 11:29:46 +0100 Subject: [PATCH 020/146] devlink: disallow reload operation during device cleanup There is a race between driver code that does setup/cleanup of device and devlink reload operation that in some drivers works with the same code. Use after free could we easily obtained by running: while true; do echo "0000:00:10.0" >/sys/bus/pci/drivers/mlxsw_spectrum2/bind devlink dev reload pci/0000:00:10.0 & echo "0000:00:10.0" >/sys/bus/pci/drivers/mlxsw_spectrum2/unbind done Fix this by enabling reload only after setup of device is complete and disabling it at the beginning of the cleanup process. Reported-by: Ido Schimmel Fixes: 2d8dc5bbf4e7 ("devlink: Add support for reload") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 3 ++ drivers/net/ethernet/mellanox/mlxsw/core.c | 6 +++- drivers/net/netdevsim/dev.c | 2 ++ include/net/devlink.h | 5 ++- net/core/devlink.c | 39 +++++++++++++++++++++- 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 69bb6bb06e76..d44ac666e730 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4010,6 +4010,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto err_params_unregister; devlink_params_publish(devlink); + devlink_reload_enable(devlink); pci_save_state(pdev); return 0; @@ -4121,6 +4122,8 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; + devlink_reload_disable(devlink); + if (mlx4_is_slave(dev)) persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 4421ab22182f..20e9dc46cacd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1186,8 +1186,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->params_register) + if (mlxsw_driver->params_register) { devlink_params_publish(devlink); + devlink_reload_enable(devlink); + } return 0; @@ -1249,6 +1251,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, { struct devlink *devlink = priv_to_devlink(mlxsw_core); + if (!reload) + devlink_reload_disable(devlink); if (devlink_is_reload_failed(devlink)) { if (!reload) /* Only the parts that were not de-initialized in the diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 54ca6681ba31..44c2d857a7fa 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -708,6 +708,7 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) goto err_debugfs_exit; devlink_params_publish(devlink); + devlink_reload_enable(devlink); return nsim_dev; err_debugfs_exit: @@ -732,6 +733,7 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev) { struct devlink *devlink = priv_to_devlink(nsim_dev); + devlink_reload_disable(devlink); nsim_bpf_dev_exit(nsim_dev); nsim_dev_debugfs_exit(nsim_dev); nsim_dev_traps_exit(devlink); diff --git a/include/net/devlink.h b/include/net/devlink.h index 23e4b65ec9df..2116c88663a1 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -38,7 +38,8 @@ struct devlink { struct device *dev; possible_net_t _net; struct mutex lock; - bool reload_failed; + u8 reload_failed:1, + reload_enabled:1; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -774,6 +775,8 @@ struct ib_device; struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); +void devlink_reload_enable(struct devlink *devlink); +void devlink_reload_disable(struct devlink *devlink); void devlink_free(struct devlink *devlink); int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, diff --git a/net/core/devlink.c b/net/core/devlink.c index f80151eeaf51..7d64660a72fc 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2699,7 +2699,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) struct devlink *devlink = info->user_ptr[0]; int err; - if (!devlink_reload_supported(devlink)) + if (!devlink_reload_supported(devlink) || !devlink->reload_enabled) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); @@ -6196,12 +6196,49 @@ EXPORT_SYMBOL_GPL(devlink_register); void devlink_unregister(struct devlink *devlink) { mutex_lock(&devlink_mutex); + WARN_ON(devlink_reload_supported(devlink) && + devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); list_del(&devlink->list); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); +/** + * devlink_reload_enable - Enable reload of devlink instance + * + * @devlink: devlink + * + * Should be called at end of device initialization + * process when reload operation is supported. + */ +void devlink_reload_enable(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + devlink->reload_enabled = true; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_reload_enable); + +/** + * devlink_reload_disable - Disable reload of devlink instance + * + * @devlink: devlink + * + * Should be called at the beginning of device cleanup + * process when reload operation is supported. + */ +void devlink_reload_disable(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + /* Mutex is taken which ensures that no reload operation is in + * progress while setting up forbidded flag. + */ + devlink->reload_enabled = false; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_reload_disable); + /** * devlink_free - Free devlink instance resources * From dd3d792def0d4f33bbf319982b1878b0c8aaca34 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Sat, 9 Nov 2019 18:43:06 +0800 Subject: [PATCH 021/146] tcp: remove redundant new line from tcp_event_sk_skb This removes '\n' from trace event class tcp_event_sk_skb to avoid redundant new blank line and make output compact. Fixes: af4325ecc24f ("tcp: expose sk_state in tcp_retransmit_skb tracepoint") Reviewed-by: Eric Dumazet Reviewed-by: Yafang Shao Signed-off-by: Tony Lu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 2bc9960a31aa..cf97f6339acb 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -86,7 +86,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s\n", + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->state)) From 630faf81b3e61bcc90dc6d8b497800657d2752a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Nov 2019 11:53:27 -0500 Subject: [PATCH 022/146] cgroup: don't put ERR_PTR() into fc->root the caller of ->get_tree() expects NULL left there on error... Reported-by: Thibaut Sautereau Signed-off-by: Al Viro --- kernel/cgroup/cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 080561bb8a4b..ef4242e5d4bc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2119,11 +2119,12 @@ int cgroup_do_get_tree(struct fs_context *fc) nsdentry = kernfs_node_dentry(cgrp->kn, sb); dput(fc->root); - fc->root = nsdentry; if (IS_ERR(nsdentry)) { - ret = PTR_ERR(nsdentry); deactivate_locked_super(sb); + ret = PTR_ERR(nsdentry); + nsdentry = NULL; } + fc->root = nsdentry; } if (!ctx->kfc.new_sb_created) From a2ece088882666e1dc7113744ac912eb161e3f87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Nov 2019 22:08:29 -0500 Subject: [PATCH 023/146] exportfs_decode_fh(): negative pinned may become positive without the parent locked Signed-off-by: Al Viro --- fs/exportfs/expfs.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 09bc68708d28..2dd55b172d57 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -519,26 +519,33 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * inode is actually connected to the parent. */ err = exportfs_get_name(mnt, target_dir, nbuf, result); - if (!err) { - inode_lock(target_dir->d_inode); - nresult = lookup_one_len(nbuf, target_dir, - strlen(nbuf)); - inode_unlock(target_dir->d_inode); - if (!IS_ERR(nresult)) { - if (nresult->d_inode) { - dput(result); - result = nresult; - } else - dput(nresult); - } + if (err) { + dput(target_dir); + goto err_result; } + inode_lock(target_dir->d_inode); + nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); + if (!IS_ERR(nresult)) { + if (unlikely(nresult->d_inode != result->d_inode)) { + dput(nresult); + nresult = ERR_PTR(-ESTALE); + } + } + inode_unlock(target_dir->d_inode); /* * At this point we are done with the parent, but it's pinned * by the child dentry anyway. */ dput(target_dir); + if (IS_ERR(nresult)) { + err = PTR_ERR(nresult); + goto err_result; + } + dput(result); + result = nresult; + /* * And finally make sure the dentry is actually acceptable * to NFSD. From 69924b89687a2923e88cc42144aea27868913d0e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 Nov 2019 13:11:41 -0400 Subject: [PATCH 024/146] audit_get_nd(): don't unlock parent too early if the child has been negative and just went positive under us, we want coherent d_is_positive() and ->d_inode. Don't unlock the parent until we'd done that work... Signed-off-by: Al Viro --- kernel/audit_watch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 1f31c2f1e6fc..4508d5e0cf69 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -351,12 +351,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - inode_unlock(d_backing_inode(parent->dentry)); if (d_is_positive(d)) { /* update watch filter fields */ watch->dev = d->d_sb->s_dev; watch->ino = d_backing_inode(d)->i_ino; } + inode_unlock(d_backing_inode(parent->dentry)); dput(d); return 0; } From bcf0d9d4b76976f892154efdfc509b256fd898e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Nov 2019 12:07:15 -0500 Subject: [PATCH 025/146] ecryptfs: fix unlink and rmdir in face of underlying fs modifications A problem similar to the one caught in commit 74dd7c97ea2a ("ecryptfs_rename(): verify that lower dentries are still OK after lock_rename()") exists for unlink/rmdir as well. Instead of playing with dget_parent() of underlying dentry of victim and hoping it's the same as underlying dentry of our directory, do the following: * find the underlying dentry of victim * find the underlying directory of victim's parent (stable since the victim is ecryptfs dentry and inode of its parent is held exclusive by the caller). * lock the inode of dentry underlying the victim's parent * check that underlying dentry of victim is still hashed and has the right parent - it can be moved, but it can't be moved to/from the directory we are holding exclusive. So while ->d_parent itself might not be stable, the result of comparison is. If the check passes, everything is fine - underlying directory is locked, underlying victim is still a child of that directory and we can go ahead and feed them to vfs_unlink(). As in the current mainline we need to pin the underlying dentry of victim, so that it wouldn't go negative under us, but that's the only temporary reference that needs to be grabbed there. Underlying dentry of parent won't go away (it's pinned by the parent, which is held by caller), so there's no need to grab it. The same problem (with the same solution) exists for rmdir. Moreover, rename gets simpler and more robust with the same "don't bother with dget_parent()" approach. Fixes: 74dd7c97ea2 "ecryptfs_rename(): verify that lower dentries are still OK after lock_rename()" Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 65 ++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 18426f4855f1..a905d5f4f3b0 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -128,13 +128,20 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, struct inode *inode) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); struct dentry *lower_dir_dentry; + struct inode *lower_dir_inode; int rc; - dget(lower_dentry); - lower_dir_dentry = lock_parent(lower_dentry); - rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL); + lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); + lower_dir_inode = d_inode(lower_dir_dentry); + inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT); + dget(lower_dentry); // don't even try to make the lower negative + if (lower_dentry->d_parent != lower_dir_dentry) + rc = -EINVAL; + else if (d_unhashed(lower_dentry)) + rc = -EINVAL; + else + rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); goto out_unlock; @@ -142,10 +149,11 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, fsstack_copy_attr_times(dir, lower_dir_inode); set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink); inode->i_ctime = dir->i_ctime; - d_drop(dentry); out_unlock: - unlock_dir(lower_dir_dentry); dput(lower_dentry); + inode_unlock(lower_dir_inode); + if (!rc) + d_drop(dentry); return rc; } @@ -512,22 +520,30 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) { struct dentry *lower_dentry; struct dentry *lower_dir_dentry; + struct inode *lower_dir_inode; int rc; lower_dentry = ecryptfs_dentry_to_lower(dentry); - dget(dentry); - lower_dir_dentry = lock_parent(lower_dentry); - dget(lower_dentry); - rc = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry); - dput(lower_dentry); - if (!rc && d_really_is_positive(dentry)) + lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); + lower_dir_inode = d_inode(lower_dir_dentry); + + inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT); + dget(lower_dentry); // don't even try to make the lower negative + if (lower_dentry->d_parent != lower_dir_dentry) + rc = -EINVAL; + else if (d_unhashed(lower_dentry)) + rc = -EINVAL; + else + rc = vfs_rmdir(lower_dir_inode, lower_dentry); + if (!rc) { clear_nlink(d_inode(dentry)); - fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry)); - set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink); - unlock_dir(lower_dir_dentry); + fsstack_copy_attr_times(dir, lower_dir_inode); + set_nlink(dir, lower_dir_inode->i_nlink); + } + dput(lower_dentry); + inode_unlock(lower_dir_inode); if (!rc) d_drop(dentry); - dput(dentry); return rc; } @@ -565,20 +581,22 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dentry; struct dentry *lower_old_dir_dentry; struct dentry *lower_new_dir_dentry; - struct dentry *trap = NULL; + struct dentry *trap; struct inode *target_inode; if (flags) return -EINVAL; + lower_old_dir_dentry = ecryptfs_dentry_to_lower(old_dentry->d_parent); + lower_new_dir_dentry = ecryptfs_dentry_to_lower(new_dentry->d_parent); + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); - dget(lower_old_dentry); - dget(lower_new_dentry); - lower_old_dir_dentry = dget_parent(lower_old_dentry); - lower_new_dir_dentry = dget_parent(lower_new_dentry); + target_inode = d_inode(new_dentry); + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dget(lower_new_dentry); rc = -EINVAL; if (lower_old_dentry->d_parent != lower_old_dir_dentry) goto out_lock; @@ -606,11 +624,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_dir != old_dir) fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry)); out_lock: - unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); - dput(lower_new_dir_dentry); - dput(lower_old_dir_dentry); dput(lower_new_dentry); - dput(lower_old_dentry); + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); return rc; } From e72b9dd6a5f17d0fb51f16f8685f3004361e83d0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Nov 2019 13:45:04 -0500 Subject: [PATCH 026/146] ecryptfs_lookup_interpose(): lower_dentry->d_inode is not stable lower_dentry can't go from positive to negative (we have it pinned), but it *can* go from negative to positive. So fetching ->d_inode into a local variable, doing a blocking allocation, checking that now ->d_inode is non-NULL and feeding the value we'd fetched earlier to a function that won't accept NULL is not a good idea. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a905d5f4f3b0..3c2298721359 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -319,7 +319,7 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, struct dentry *lower_dentry) { - struct inode *inode, *lower_inode = d_inode(lower_dentry); + struct inode *inode, *lower_inode; struct ecryptfs_dentry_info *dentry_info; struct vfsmount *lower_mnt; int rc = 0; @@ -339,7 +339,15 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, dentry_info->lower_path.mnt = lower_mnt; dentry_info->lower_path.dentry = lower_dentry; - if (d_really_is_negative(lower_dentry)) { + /* + * negative dentry can go positive under us here - its parent is not + * locked. That's OK and that could happen just as we return from + * ecryptfs_lookup() anyway. Just need to be careful and fetch + * ->d_inode only once - it's not stable here. + */ + lower_inode = READ_ONCE(lower_dentry->d_inode); + + if (!lower_inode) { /* We want to add because we couldn't find in lower */ d_add(dentry, NULL); return NULL; From 762c69685ff7ad5ad7fee0656671e20a0c9c864d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Nov 2019 13:55:43 -0500 Subject: [PATCH 027/146] ecryptfs_lookup_interpose(): lower_dentry->d_parent is not stable either We need to get the underlying dentry of parent; sure, absent the races it is the parent of underlying dentry, but there's nothing to prevent losing a timeslice to preemtion in the middle of evaluation of lower_dentry->d_parent->d_inode, having another process move lower_dentry around and have its (ex)parent not pinned anymore and freed on memory pressure. Then we regain CPU and try to fetch ->d_inode from memory that is freed by that point. dentry->d_parent *is* stable here - it's an argument of ->lookup() and we are guaranteed that it won't be moved anywhere until we feed it to d_add/d_splice_alias. So we safely go that way to get to its underlying dentry. Cc: stable@vger.kernel.org # since 2009 or so Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 3c2298721359..e23752d9a79f 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -319,9 +319,9 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, struct dentry *lower_dentry) { + struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent); struct inode *inode, *lower_inode; struct ecryptfs_dentry_info *dentry_info; - struct vfsmount *lower_mnt; int rc = 0; dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); @@ -330,13 +330,12 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, return ERR_PTR(-ENOMEM); } - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); fsstack_copy_attr_atime(d_inode(dentry->d_parent), - d_inode(lower_dentry->d_parent)); + d_inode(path->dentry)); BUG_ON(!d_count(lower_dentry)); ecryptfs_set_dentry_private(dentry, dentry_info); - dentry_info->lower_path.mnt = lower_mnt; + dentry_info->lower_path.mnt = mntget(path->mnt); dentry_info->lower_path.dentry = lower_dentry; /* From b73a58549ea37a44434c7afab3c7ad9af210cfd9 Mon Sep 17 00:00:00 2001 From: "Chiou, Cooper" Date: Fri, 8 Nov 2019 15:13:49 +0800 Subject: [PATCH 028/146] ALSA: hda: Add Cometlake-S PCI ID Add HD Audio Device PCI ID for the Intel Cometlake-S platform Signed-off-by: Chiou, Cooper Link: https://lore.kernel.org/r/20191108071349.12840-1-cooper.chiou@intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index cf53fbd872ee..c52419376c74 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2396,6 +2396,9 @@ static const struct pci_device_id azx_ids[] = { /* CometLake-H */ { PCI_DEVICE(0x8086, 0x06C8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* CometLake-S */ + { PCI_DEVICE(0x8086, 0xa3f0), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Icelake */ { PCI_DEVICE(0x8086, 0x34c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, From a7d0358ea3b7f8d7216e663c1ae71cabf7ac24e3 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 11 Nov 2019 15:38:38 +0200 Subject: [PATCH 029/146] ALSA: hda: hdmi - fix pin setup on Tigerlake Apply same logic to pin setup as on previous platforms. Fixes errors in HDMI/DP playback. Tested with both snd-hda-intel and SOF drivers. Fixes: 9a11ba7388f1 ("ALSA: hda: hdmi - add Tigerlake support") Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20191111133838.21213-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 3c720703ebb8..78bd2e3722c7 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -46,10 +46,12 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); ((codec)->core.vendor_id == 0x80862800)) #define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c) #define is_icelake(codec) ((codec)->core.vendor_id == 0x8086280f) +#define is_tigerlake(codec) ((codec)->core.vendor_id == 0x80862812) #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ || is_skylake(codec) || is_broxton(codec) \ || is_kabylake(codec) || is_geminilake(codec) \ - || is_cannonlake(codec) || is_icelake(codec)) + || is_cannonlake(codec) || is_icelake(codec) \ + || is_tigerlake(codec)) #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec)) From 9059f3c9c02714fcb1093cba281efb1a1bb04258 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 2 Nov 2019 10:53:11 +0800 Subject: [PATCH 030/146] MAINTAINERS: Update for INTEL IOMMU (VT-d) entry Update the INTEL IOMMU (VT-d) entry and add myself as the co-maintainer. I have several years of VT-d development experience and have actively contributed to Intel VT-d driver during recent two years. I volunteer to take this rule. With this role, I can better help review and test patches. Cc: David Woodhouse Cc: Joerg Roedel Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- MAINTAINERS | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index eb19fad370d7..2d64b581a35f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8299,11 +8299,14 @@ F: drivers/hid/intel-ish-hid/ INTEL IOMMU (VT-d) M: David Woodhouse +M: Lu Baolu L: iommu@lists.linux-foundation.org -T: git git://git.infradead.org/iommu-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git S: Supported -F: drivers/iommu/intel-iommu.c +F: drivers/iommu/dmar.c +F: drivers/iommu/intel*.[ch] F: include/linux/intel-iommu.h +F: include/linux/intel-svm.h INTEL IOP-ADMA DMA DRIVER R: Dan Williams From 4e7120d79edb31e4ee68e6f8421448e4603be1e9 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 8 Nov 2019 16:58:03 +0100 Subject: [PATCH 031/146] iommu/vt-d: Fix QI_DEV_IOTLB_PFSID and QI_DEV_EIOTLB_PFSID macros For both PASID-based-Device-TLB Invalidate Descriptor and Device-TLB Invalidate Descriptor, the Physical Function Source-ID value is split according to this layout: PFSID[3:0] is set at offset 12 and PFSID[15:4] is put at offset 52. Fix the part laid out at offset 52. Fixes: 0f725561e1684 ("iommu/vt-d: Add definitions for PFSID") Signed-off-by: Eric Auger Acked-by: Jacob Pan Cc: stable@vger.kernel.org # v4.19+ Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index ed11ef594378..6d8bf4bdf240 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -336,7 +336,8 @@ enum { #define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) #define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) #define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) -#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) +#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ + ((u64)((pfsid >> 4) & 0xfff) << 52)) #define QI_DEV_IOTLB_SIZE 1 #define QI_DEV_IOTLB_MAX_INVS 32 @@ -360,7 +361,8 @@ enum { #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) -#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) +#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ + ((u64)((pfsid >> 4) & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 /* Page group response descriptor QW0 */ From e6c617102c7e4ac1398cb0b98ff1f0727755b520 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 8 Nov 2019 16:11:56 +0000 Subject: [PATCH 032/146] Btrfs: fix log context list corruption after rename exchange operation During rename exchange we might have successfully log the new name in the source root's log tree, in which case we leave our log context (allocated on stack) in the root's list of log contextes. However we might fail to log the new name in the destination root, in which case we fallback to a transaction commit later and never sync the log of the source root, which causes the source root log context to remain in the list of log contextes. This later causes invalid memory accesses because the context was allocated on stack and after rename exchange finishes the stack gets reused and overwritten for other purposes. The kernel's linked list corruption detector (CONFIG_DEBUG_LIST=y) can detect this and report something like the following: [ 691.489929] ------------[ cut here ]------------ [ 691.489947] list_add corruption. prev->next should be next (ffff88819c944530), but was ffff8881c23f7be4. (prev=ffff8881c23f7a38). [ 691.489967] WARNING: CPU: 2 PID: 28933 at lib/list_debug.c:28 __list_add_valid+0x95/0xe0 (...) [ 691.489998] CPU: 2 PID: 28933 Comm: fsstress Not tainted 5.4.0-rc6-btrfs-next-62 #1 [ 691.490001] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 [ 691.490003] RIP: 0010:__list_add_valid+0x95/0xe0 (...) [ 691.490007] RSP: 0018:ffff8881f0b3faf8 EFLAGS: 00010282 [ 691.490010] RAX: 0000000000000000 RBX: ffff88819c944530 RCX: 0000000000000000 [ 691.490011] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffffffffa2c497e0 [ 691.490013] RBP: ffff8881f0b3fe68 R08: ffffed103eaa4115 R09: ffffed103eaa4114 [ 691.490015] R10: ffff88819c944000 R11: ffffed103eaa4115 R12: 7fffffffffffffff [ 691.490016] R13: ffff8881b4035610 R14: ffff8881e7b84728 R15: 1ffff1103e167f7b [ 691.490019] FS: 00007f4b25ea2e80(0000) GS:ffff8881f5500000(0000) knlGS:0000000000000000 [ 691.490021] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 691.490022] CR2: 00007fffbb2d4eec CR3: 00000001f2a4a004 CR4: 00000000003606e0 [ 691.490025] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 691.490027] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 691.490029] Call Trace: [ 691.490058] btrfs_log_inode_parent+0x667/0x2730 [btrfs] [ 691.490083] ? join_transaction+0x24a/0xce0 [btrfs] [ 691.490107] ? btrfs_end_log_trans+0x80/0x80 [btrfs] [ 691.490111] ? dget_parent+0xb8/0x460 [ 691.490116] ? lock_downgrade+0x6b0/0x6b0 [ 691.490121] ? rwlock_bug.part.0+0x90/0x90 [ 691.490127] ? do_raw_spin_unlock+0x142/0x220 [ 691.490151] btrfs_log_dentry_safe+0x65/0x90 [btrfs] [ 691.490172] btrfs_sync_file+0x9f1/0xc00 [btrfs] [ 691.490195] ? btrfs_file_write_iter+0x1800/0x1800 [btrfs] [ 691.490198] ? rcu_read_lock_any_held.part.11+0x20/0x20 [ 691.490204] ? __do_sys_newstat+0x88/0xd0 [ 691.490207] ? cp_new_stat+0x5d0/0x5d0 [ 691.490218] ? do_fsync+0x38/0x60 [ 691.490220] do_fsync+0x38/0x60 [ 691.490224] __x64_sys_fdatasync+0x32/0x40 [ 691.490228] do_syscall_64+0x9f/0x540 [ 691.490233] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 691.490235] RIP: 0033:0x7f4b253ad5f0 (...) [ 691.490239] RSP: 002b:00007fffbb2d6078 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 691.490242] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f4b253ad5f0 [ 691.490244] RDX: 00007fffbb2d5fe0 RSI: 00007fffbb2d5fe0 RDI: 0000000000000003 [ 691.490245] RBP: 000000000000000d R08: 0000000000000001 R09: 00007fffbb2d608c [ 691.490247] R10: 00000000000002e8 R11: 0000000000000246 R12: 00000000000001f4 [ 691.490248] R13: 0000000051eb851f R14: 00007fffbb2d6120 R15: 00005635a498bda0 This started happening recently when running some test cases from fstests like btrfs/004 for example, because support for rename exchange was added last week to fsstress from fstests. So fix this by deleting the log context for the source root from the list if we have logged the new name in the source root. Reported-by: Su Yue Fixes: d4682ba03ef618 ("Btrfs: sync log after logging new name") CC: stable@vger.kernel.org # 4.19+ Tested-by: Su Yue Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c6dc4dd16cf7..015910079e73 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9744,6 +9744,18 @@ out_fail: commit_transaction = true; } if (commit_transaction) { + /* + * We may have set commit_transaction when logging the new name + * in the destination root, in which case we left the source + * root context in the list of log contextes. So make sure we + * remove it to avoid invalid memory accesses, since the context + * was allocated in our stack frame. + */ + if (sync_log_root) { + mutex_lock(&root->log_mutex); + list_del_init(&ctx_root.list); + mutex_unlock(&root->log_mutex); + } ret = btrfs_commit_transaction(trans); } else { int ret2; @@ -9757,6 +9769,9 @@ out_notrans: if (old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); + ASSERT(list_empty(&ctx_root.list)); + ASSERT(list_empty(&ctx_dest.list)); + return ret; } From 40a1dcee2d1846a24619fe9ca45c661ca0db7dda Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sun, 10 Nov 2019 11:30:48 +0000 Subject: [PATCH 033/146] net: ethernet: dwmac-sun8i: Use the correct function in exit path When PHY is not powered, the probe function fail and some resource are still unallocated. Furthermore some BUG happens: dwmac-sun8i 5020000.ethernet: EMAC reset timeout ------------[ cut here ]------------ kernel BUG at /linux-next/net/core/dev.c:9844! So let's use the right function (stmmac_pltfr_remove) in the error path. Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Cc: # v4.15+ Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index ddcc191febdb..6e47be63a43c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1226,7 +1226,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) dwmac_mux: sun8i_dwmac_unset_syscon(gmac); dwmac_exit: - sun8i_dwmac_exit(pdev, plat_dat->bsp_priv); + stmmac_pltfr_remove(pdev); return ret; } From d279505b723cba058b604ed8cf9cd4c854e2a041 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 10 Nov 2019 14:11:56 +0200 Subject: [PATCH 034/146] devlink: Add method for time-stamp on reporter's dump When setting the dump's time-stamp, use ktime_get_real in addition to jiffies. This simplifies the user space implementation and bypasses some inconsistent behavior with translating jiffies to current time. The time taken is transformed into nsec, to comply with y2038 issue. Fixes: c8e1da0bf923 ("devlink: Add health report functionality") Signed-off-by: Aya Levin Acked-by: Jiri Pirko Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 580b7a2e40e1..a8a2174db030 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -421,6 +421,7 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_FAILED, /* u8 0 or 1 */ + DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, /* u64 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 7d64660a72fc..93905dc7c179 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4618,6 +4618,7 @@ struct devlink_health_reporter { bool auto_recover; u8 health_state; u64 dump_ts; + u64 dump_real_ts; u64 error_count; u64 recovery_count; u64 last_recovery_ts; @@ -4790,6 +4791,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter, goto dump_err; reporter->dump_ts = jiffies; + reporter->dump_real_ts = ktime_get_real_ns(); return 0; @@ -4952,6 +4954,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, jiffies_to_msecs(reporter->dump_ts), DEVLINK_ATTR_PAD)) goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, + reporter->dump_real_ts, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; nla_nest_end(msg, reporter_attr); genlmsg_end(msg, hdr); From 73a533ecf0af5f73ff72dd7c96d1c8598ca93649 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 10 Nov 2019 16:31:23 +0100 Subject: [PATCH 035/146] mlxsw: core: Enable devlink reload only on probe Call devlink enable only during probe time and avoid deadlock during reload. Reported-by: Shalom Toledo Fixes: 5a508a254bed ("devlink: disallow reload operation during device cleanup") Signed-off-by: Jiri Pirko Tested-by: Shalom Toledo Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 20e9dc46cacd..0a0884d86d44 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1186,10 +1186,11 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->params_register) { + if (mlxsw_driver->params_register) devlink_params_publish(devlink); + + if (!reload) devlink_reload_enable(devlink); - } return 0; From a71a29f50de1ef97ab55c151a1598eb12dde379d Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sun, 10 Nov 2019 17:19:15 +0100 Subject: [PATCH 036/146] NFC: nxp-nci: Fix NULL pointer dereference after I2C communication error I2C communication errors (-EREMOTEIO) during the IRQ handler of nxp-nci result in a NULL pointer dereference at the moment: BUG: kernel NULL pointer dereference, address: 0000000000000000 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 355 Comm: irq/137-nxp-nci Not tainted 5.4.0-rc6 #1 RIP: 0010:skb_queue_tail+0x25/0x50 Call Trace: nci_recv_frame+0x36/0x90 [nci] nxp_nci_i2c_irq_thread_fn+0xd1/0x285 [nxp_nci_i2c] ? preempt_count_add+0x68/0xa0 ? irq_forced_thread_fn+0x80/0x80 irq_thread_fn+0x20/0x60 irq_thread+0xee/0x180 ? wake_threads_waitq+0x30/0x30 kthread+0xfb/0x130 ? irq_thread_check_affinity+0xd0/0xd0 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x40 Afterward the kernel must be rebooted to work properly again. This happens because it attempts to call nci_recv_frame() with skb == NULL. However, unlike nxp_nci_fw_recv_frame(), nci_recv_frame() does not have any NULL checks for skb, causing the NULL pointer dereference. Change the code to call only nxp_nci_fw_recv_frame() in case of an error. Make sure to log it so it is obvious that a communication error occurred. The error above then becomes: nxp-nci_i2c i2c-NXP1001:00: NFC: Read failed with error -121 nci: __nci_request: wait_for_completion_interruptible_timeout failed 0 nxp-nci_i2c i2c-NXP1001:00: NFC: Read failed with error -121 Fixes: 6be88670fc59 ("NFC: nxp-nci_i2c: Add I2C support to NXP NCI driver") Signed-off-by: Stephan Gerhold Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/nfc/nxp-nci/i2c.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 307bd2afbe05..4d1909aecd6c 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -220,8 +220,10 @@ static irqreturn_t nxp_nci_i2c_irq_thread_fn(int irq, void *phy_id) if (r == -EREMOTEIO) { phy->hard_fault = r; - skb = NULL; - } else if (r < 0) { + if (info->mode == NXP_NCI_MODE_FW) + nxp_nci_fw_recv_frame(phy->ndev, NULL); + } + if (r < 0) { nfc_err(&client->dev, "Read failed with error %d\n", r); goto exit_irq_handled; } From 1d4639567d970de087a893521f7f50a32740b595 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 11 Nov 2019 15:13:47 +0800 Subject: [PATCH 037/146] mdio_bus: Fix PTR_ERR applied after initialization to constant Fix coccinelle warning: ./drivers/net/phy/mdio_bus.c:67:5-12: ERROR: PTR_ERR applied after initialization to constant on line 62 ./drivers/net/phy/mdio_bus.c:68:5-12: ERROR: PTR_ERR applied after initialization to constant on line 62 Fix this by using IS_ERR before PTR_ERR Reported-by: Hulk Robot Fixes: 71dd6c0dff51 ("net: phy: add support for reset-controller") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 2e29ab841b4d..35876562e32a 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -64,11 +64,12 @@ static int mdiobus_register_reset(struct mdio_device *mdiodev) if (mdiodev->dev.of_node) reset = devm_reset_control_get_exclusive(&mdiodev->dev, "phy"); - if (PTR_ERR(reset) == -ENOENT || - PTR_ERR(reset) == -ENOTSUPP) - reset = NULL; - else if (IS_ERR(reset)) - return PTR_ERR(reset); + if (IS_ERR(reset)) { + if (PTR_ERR(reset) == -ENOENT || PTR_ERR(reset) == -ENOSYS) + reset = NULL; + else + return PTR_ERR(reset); + } mdiodev->reset_ctrl = reset; From 2f5841349df281ecf8f81cc82d869b8476f0db0b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Nov 2019 21:34:24 +0100 Subject: [PATCH 038/146] ntp/y2038: Remove incorrect time_t truncation A cast to 'time_t' was accidentally left in place during the conversion of __do_adjtimex() to 64-bit timestamps, so the resulting value is incorrectly truncated. Remove the cast so the 64-bit time gets propagated correctly. Fixes: ead25417f82e ("timex: use __kernel_timex internally") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20191108203435.112759-2-arnd@arndb.de --- kernel/time/ntp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 65eb796610dc..069ca78fb0bf 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -771,7 +771,7 @@ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, /* fill PPS status fields */ pps_fill_timex(txc); - txc->time.tv_sec = (time_t)ts->tv_sec; + txc->time.tv_sec = ts->tv_sec; txc->time.tv_usec = ts->tv_nsec; if (!(time_status & STA_NANO)) txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC; From 93bd25bb69f46367ba8f82c578e0c05702ceb482 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 Nov 2019 23:34:31 -0700 Subject: [PATCH 039/146] io_uring: make timeout sequence == 0 mean no sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we make sequence == 0 be the same as sequence == 1, but that's not super useful if the intent is really to have a timeout that's just a pure timeout. If the user passes in sqe->off == 0, then don't apply any sequence logic to the request, let it purely be driven by the timeout specified. Reported-by: 李通洲 Reviewed-by: 李通洲 Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f9a38998f2fc..87beca4377f7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -326,6 +326,7 @@ struct io_kiocb { #define REQ_F_TIMEOUT 1024 /* timeout request */ #define REQ_F_ISREG 2048 /* regular file */ #define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */ +#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */ u64 user_data; u32 result; u32 sequence; @@ -453,9 +454,13 @@ static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx) struct io_kiocb *req; req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list); - if (req && !__io_sequence_defer(ctx, req)) { - list_del_init(&req->list); - return req; + if (req) { + if (req->flags & REQ_F_TIMEOUT_NOSEQ) + return NULL; + if (!__io_sequence_defer(ctx, req)) { + list_del_init(&req->list); + return req; + } } return NULL; @@ -1941,18 +1946,24 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr))) return -EFAULT; + req->flags |= REQ_F_TIMEOUT; + /* * sqe->off holds how many events that need to occur for this - * timeout event to be satisfied. + * timeout event to be satisfied. If it isn't set, then this is + * a pure timeout request, sequence isn't used. */ count = READ_ONCE(sqe->off); - if (!count) - count = 1; + if (!count) { + req->flags |= REQ_F_TIMEOUT_NOSEQ; + spin_lock_irq(&ctx->completion_lock); + entry = ctx->timeout_list.prev; + goto add; + } req->sequence = ctx->cached_sq_head + count - 1; /* reuse it to store the count */ req->submit.sequence = count; - req->flags |= REQ_F_TIMEOUT; /* * Insertion sort, ensuring the first entry in the list is always @@ -1964,6 +1975,9 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) unsigned nxt_sq_head; long long tmp, tmp_nxt; + if (nxt->flags & REQ_F_TIMEOUT_NOSEQ) + continue; + /* * Since cached_sq_head + count - 1 can overflow, use type long * long to store it. @@ -1990,6 +2004,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) nxt->sequence++; } req->sequence -= span; +add: list_add(&req->list, entry); spin_unlock_irq(&ctx->completion_lock); From 4944a4b1077f74d89073624bd286219d2fcbfce3 Mon Sep 17 00:00:00 2001 From: Xiaodong Xu Date: Mon, 11 Nov 2019 15:05:46 -0800 Subject: [PATCH 040/146] xfrm: release device reference for invalid state An ESP packet could be decrypted in async mode if the input handler for this packet returns -EINPROGRESS in xfrm_input(). At this moment the device reference in skb is held. Later xfrm_input() will be invoked again to resume the processing. If the transform state is still valid it would continue to release the device reference and there won't be a problem; however if the transform state is not valid when async resumption happens, the packet will be dropped while the device reference is still being held. When the device is deleted for some reason and the reference to this device is not properly released, the kernel will keep logging like: unregister_netdevice: waiting for ppp2 to become free. Usage count = 1 The issue is observed when running IPsec traffic over a PPPoE device based on a bridge interface. By terminating the PPPoE connection on the server end for multiple times, the PPPoE device on the client side will eventually get stuck on the above warning message. This patch will check the async mode first and continue to release device reference in async resumption, before it is dropped due to invalid state. v2: Do not assign address family from outer_mode in the transform if the state is invalid v3: Release device reference in the error path instead of jumping to resume Fixes: 4ce3dbe397d7b ("xfrm: Fix xfrm_input() to verify state is valid when (encap_type < 0)") Signed-off-by: Xiaodong Xu Reported-by: Bo Chen Tested-by: Bo Chen Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 9b599ed66d97..2c86a2fc3915 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -480,6 +480,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) else XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); + + if (encap_type == -1) + dev_put(skb->dev); goto drop; } From e3a5d8e386c3fb973fa75f2403622a8f3640ec06 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Tue, 12 Nov 2019 07:19:58 +0000 Subject: [PATCH 041/146] block: check bi_size overflow before merge __bio_try_merge_page() may merge a page to bio without bio_full() check and cause bi_size overflow. The overflow typically ends up with sd_init_command() warning on zero segment request with call trace like this: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1986 at drivers/scsi/scsi_lib.c:1025 scsi_init_io+0x156/0x180 CPU: 2 PID: 1986 Comm: kworker/2:1H Kdump: loaded Not tainted 5.4.0-rc7 #1 Workqueue: kblockd blk_mq_run_work_fn RIP: 0010:scsi_init_io+0x156/0x180 RSP: 0018:ffffa11487663bf0 EFLAGS: 00010246 RAX: 00000000002be0a0 RBX: ffff8e6e9ff30118 RCX: 0000000000000000 RDX: 00000000ffffffe1 RSI: 0000000000000000 RDI: ffff8e6e9ff30118 RBP: ffffa11487663c18 R08: ffffa11487663d28 R09: ffff8e6e9ff30150 R10: 0000000000000001 R11: 0000000000000000 R12: ffff8e6e9ff30000 R13: 0000000000000001 R14: ffff8e74a1cf1800 R15: ffff8e6e9ff30000 FS: 0000000000000000(0000) GS:ffff8e6ea7680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff18cf0fe8 CR3: 0000000659f0a001 CR4: 00000000001606e0 Call Trace: sd_init_command+0x326/0xb40 [sd_mod] scsi_queue_rq+0x502/0xaa0 ? blk_mq_get_driver_tag+0xe7/0x120 blk_mq_dispatch_rq_list+0x256/0x5a0 ? elv_rb_del+0x24/0x30 ? deadline_remove_request+0x7b/0xc0 blk_mq_do_dispatch_sched+0xa3/0x140 blk_mq_sched_dispatch_requests+0xfb/0x170 __blk_mq_run_hw_queue+0x81/0x130 blk_mq_run_work_fn+0x1b/0x20 process_one_work+0x179/0x390 worker_thread+0x4f/0x3e0 kthread+0x105/0x140 ? max_active_store+0x80/0x80 ? kthread_bind+0x20/0x20 ret_from_fork+0x35/0x40 ---[ end trace f9036abf5af4a4d3 ]--- blk_update_request: I/O error, dev sdd, sector 2875552 op 0x1:(WRITE) flags 0x0 phys_seg 0 prio class 0 XFS (sdd1): writeback error on sector 2875552 __bio_try_merge_page() should check the overflow before actually doing merge. Fixes: 07173c3ec276c ("block: enable multipage bvecs") Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 8f0ed6228fc5..b1170ec18464 100644 --- a/block/bio.c +++ b/block/bio.c @@ -751,7 +751,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) return false; - if (bio->bi_vcnt > 0) { + if (bio->bi_vcnt > 0 && !bio_full(bio, len)) { struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (page_is_mergeable(bv, page, len, off, same_page)) { From fc5db58539b49351e76f19817ed1102bf7c712d0 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 16 Oct 2019 18:38:16 +0800 Subject: [PATCH 042/146] x86/quirks: Disable HPET on Intel Coffe Lake platforms Some Coffee Lake platforms have a skewed HPET timer once the SoCs entered PC10, which in consequence marks TSC as unstable because HPET is used as watchdog clocksource for TSC. Harry Pan tried to work around it in the clocksource watchdog code [1] thereby creating a circular dependency between HPET and TSC. This also ignores the fact, that HPET is not only unsuitable as watchdog clocksource on these systems, it becomes unusable in general. Disable HPET on affected platforms. Suggested-by: Feng Tang Signed-off-by: Kai-Heng Feng Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203183 Link: https://lore.kernel.org/lkml/20190516090651.1396-1-harry.pan@intel.com/ [1] Link: https://lkml.kernel.org/r/20191016103816.30650-1-kai.heng.feng@canonical.com --- arch/x86/kernel/early-quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6f6b1d04dadf..4cba91ec8049 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -710,6 +710,8 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x3ec4, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} From 2f216a8507153578efc309c821528a6b81628cd2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 1 Nov 2019 16:20:24 +0200 Subject: [PATCH 043/146] drm/i915: update rawclk also on resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since CNP it's possible for rawclk to have two different values, 19.2 and 24 MHz. If the value indicated by SFUSE_STRAP register is different from the power on default for PCH_RAWCLK_FREQ, we'll end up having a mismatch between the rawclk hardware and software states after suspend/resume. On previous platforms this used to work by accident, because the power on defaults worked just fine. Update the rawclk also on resume. The natural place to do this would be intel_modeset_init_hw(), however VLV/CHV need it done before intel_power_domains_init_hw(). Thus put it there even if it feels slightly out of place. v2: Call intel_update_rawclck() in intel_power_domains_init_hw() for all platforms (Ville). Reported-by: Shawn Lee Cc: Shawn Lee Cc: Ville Syrjala Reviewed-by: Ville Syrjälä Tested-by: Shawn Lee Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191101142024.13877-1-jani.nikula@intel.com (cherry picked from commit 59ed05ccdded5eb18ce012eff3d01798ac8535fa) Cc: # v4.15+ Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display_power.c | 3 +++ drivers/gpu/drm/i915/i915_drv.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 12099760d99e..c002f234ff31 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4896,6 +4896,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) power_domains->initializing = true; + /* Must happen before power domain init on VLV/CHV */ + intel_update_rawclk(i915); + if (INTEL_GEN(i915) >= 11) { icl_display_core_init(i915, resume); } else if (IS_CANNONLAKE(i915)) { diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index bb6f86c7067a..916e6ca86a1d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -364,9 +364,6 @@ static int i915_driver_modeset_probe(struct drm_device *dev) if (ret) goto cleanup_vga_client; - /* must happen before intel_power_domains_init_hw() on VLV/CHV */ - intel_update_rawclk(dev_priv); - intel_power_domains_init_hw(dev_priv, false); intel_csr_ucode_init(dev_priv); From 6d6dd528d5af05dc2d0c773951ed68d630a0c3f1 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 12 Nov 2019 16:03:41 +0100 Subject: [PATCH 044/146] net/smc: fix refcount non-blocking connect() -part 2 If an SMC socket is immediately terminated after a non-blocking connect() has been called, a memory leak is possible. Due to the sock_hold move in commit 301428ea3708 ("net/smc: fix refcounting for non-blocking connect()") an extra sock_put() is needed in smc_connect_work(), if the internal TCP socket is aborted and cancels the sk_stream_wait_connect() of the connect worker. Reported-by: syzbot+4b73ad6fc767e576e275@syzkaller.appspotmail.com Fixes: 301428ea3708 ("net/smc: fix refcounting for non-blocking connect()") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 47946f489fd4..8edf1619f0e4 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -796,6 +796,7 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_err = EPIPE; else if (signal_pending(current)) smc->sk.sk_err = -sock_intr_errno(timeo); + sock_put(&smc->sk); /* passive closing */ goto out; } From 549766ac2ac1f6c8bb85906bbcea759541bb19a2 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 12 Nov 2019 16:47:08 -0800 Subject: [PATCH 045/146] Input: synaptics-rmi4 - clear IRQ enables for F54 The driver for F54 just polls the status and doesn't even have a IRQ handler registered. Make sure to disable all F54 IRQs, so we don't crash the kernel on a nonexistent handler. Signed-off-by: Lucas Stach Link: https://lore.kernel.org/r/20191105114402.6009-1-l.stach@pengutronix.de Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f54.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index 4841354af0d7..484ae1f97330 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -601,7 +601,7 @@ static int rmi_f54_config(struct rmi_function *fn) { struct rmi_driver *drv = fn->rmi_dev->driver; - drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); + drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask); return 0; } From 79aae6acbef16f720a7949f8fc6ac69816c79d62 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 12 Nov 2019 17:04:54 -0800 Subject: [PATCH 046/146] Input: cyttsp4_core - fix use after free bug The device md->input is used after it is released. Setting the device data to NULL is unnecessary as the device is never used again. Instead, md->input should be assigned NULL to avoid accessing the freed memory accidently. Besides, checking md->si against NULL is superfluous as it points to a variable address, which cannot be NULL. Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/1572936379-6423-1-git-send-email-bianpan2016@163.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/cyttsp4_core.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c index 4b22d49a0f49..6bcffc930384 100644 --- a/drivers/input/touchscreen/cyttsp4_core.c +++ b/drivers/input/touchscreen/cyttsp4_core.c @@ -1990,11 +1990,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd) /* get sysinfo */ md->si = &cd->sysinfo; - if (!md->si) { - dev_err(dev, "%s: Fail get sysinfo pointer from core p=%p\n", - __func__, md->si); - goto error_get_sysinfo; - } rc = cyttsp4_setup_input_device(cd); if (rc) @@ -2004,8 +1999,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd) error_init_input: input_free_device(md->input); -error_get_sysinfo: - input_set_drvdata(md->input, NULL); error_alloc_failed: dev_err(dev, "%s failed.\n", __func__); return rc; From 5aa4277d4368c099223bbcd3a9086f3351a12ce9 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 12 Nov 2019 18:21:52 +0200 Subject: [PATCH 047/146] dpaa2-eth: free already allocated channels on probe defer The setup_dpio() function tries to allocate a number of channels equal to the number of CPUs online. When there are not enough DPCON objects already probed, the function will return EPROBE_DEFER. When this happens, the already allocated channels are not freed. This results in the incapacity of properly probing the next time around. Fix this by freeing the channels on the error path. Fixes: d7f5a9d89a55 ("dpaa2-eth: defer probe on object allocate") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 19379bae0144..bf5add954181 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2232,8 +2232,16 @@ err_set_cdan: err_service_reg: free_channel(priv, channel); err_alloc_ch: - if (err == -EPROBE_DEFER) + if (err == -EPROBE_DEFER) { + for (i = 0; i < priv->num_channels; i++) { + channel = priv->channel[i]; + nctx = &channel->nctx; + dpaa2_io_service_deregister(channel->dpio, nctx, dev); + free_channel(priv, channel); + } + priv->num_channels = 0; return err; + } if (cpumask_empty(&priv->dpio_cpumask)) { dev_err(dev, "No cpu with an affine DPIO/DPCON\n"); From ff51ff84d82aea5a889b85f2b9fb3aa2b8691668 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 1 Oct 2019 11:18:37 +0200 Subject: [PATCH 048/146] sched/core: Avoid spurious lock dependencies While seemingly harmless, __sched_fork() does hrtimer_init(), which, when DEBUG_OBJETS, can end up doing allocations. This then results in the following lock order: rq->lock zone->lock.rlock batched_entropy_u64.lock Which in turn causes deadlocks when we do wakeups while holding that batched_entropy lock -- as the random code does. Solve this by moving __sched_fork() out from under rq->lock. This is safe because nothing there relies on rq->lock, as also evident from the other __sched_fork() callsite. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Qian Cai Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: bigeasy@linutronix.de Cc: cl@linux.com Cc: keescook@chromium.org Cc: penberg@kernel.org Cc: rientjes@google.com Cc: thgarnie@google.com Cc: tytso@mit.edu Cc: will@kernel.org Fixes: b7d5dc21072c ("random: add a spinlock_t to struct batched_entropy") Link: https://lkml.kernel.org/r/20191001091837.GK4536@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0f2eb3629070..33cd25051f3a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6019,10 +6019,11 @@ void init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; + __sched_fork(0, idle); + raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_lock(&rq->lock); - __sched_fork(0, idle); idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); idle->flags |= PF_IDLE; From b90f7c9d2198d789709390280a43e0a46345682b Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 30 Oct 2019 12:18:29 +0100 Subject: [PATCH 049/146] sched/pelt: Fix update of blocked PELT ordering update_cfs_rq_load_avg() can call cpufreq_update_util() to trigger an update of the frequency. Make sure that RT, DL and IRQ PELT signals have been updated before calling cpufreq. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dietmar.eggemann@arm.com Cc: dsmythies@telus.net Cc: juri.lelli@redhat.com Cc: mgorman@suse.de Cc: rostedt@goodmis.org Fixes: 371bf4273269 ("sched/rt: Add rt_rq utilization tracking") Fixes: 3727e0e16340 ("sched/dl: Add dl_rq utilization tracking") Fixes: 91c27493e78d ("sched/irq: Add IRQ utilization tracking") Link: https://lkml.kernel.org/r/1572434309-32512-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 22a2fed29054..69a81a5709ff 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7547,6 +7547,19 @@ static void update_blocked_averages(int cpu) rq_lock_irqsave(rq, &rf); update_rq_clock(rq); + /* + * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure + * that RT, DL and IRQ signals have been updated before updating CFS. + */ + curr_class = rq->curr->sched_class; + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); + update_irq_load_avg(rq, 0); + + /* Don't need periodic decay once load/util_avg are null */ + if (others_have_blocked(rq)) + done = false; + /* * Iterates the task_group tree in a bottom up fashion, see * list_add_leaf_cfs_rq() for details. @@ -7574,14 +7587,6 @@ static void update_blocked_averages(int cpu) done = false; } - curr_class = rq->curr->sched_class; - update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); - update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); - update_irq_load_avg(rq, 0); - /* Don't need periodic decay once load/util_avg are null */ - if (others_have_blocked(rq)) - done = false; - update_blocked_load_status(rq, !done); rq_unlock_irqrestore(rq, &rf); } @@ -7642,12 +7647,18 @@ static inline void update_blocked_averages(int cpu) rq_lock_irqsave(rq, &rf); update_rq_clock(rq); - update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq); + /* + * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure + * that RT, DL and IRQ signals have been updated before updating CFS. + */ curr_class = rq->curr->sched_class; update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); update_irq_load_avg(rq, 0); + + update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq); + update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq)); rq_unlock_irqrestore(rq, &rf); } From 09f4e8f05d85bfc98fe9227e988a7c1b3ec416ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Nov 2019 12:51:04 +0100 Subject: [PATCH 050/146] perf/core: Disallow uncore-cgroup events While discussing uncore event scheduling, I noticed we do not in fact seem to dis-allow making uncore-cgroup events. Such events make no sense what so ever because the cgroup is a CPU local state where uncore counts across a number of CPUs. Disallow them. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- kernel/events/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index aec8dba2bea4..022a34b66e60 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10535,6 +10535,15 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_ns; } + /* + * Disallow uncore-cgroup events, they don't make sense as the cgroup will + * be different on other CPUs in the uncore mask. + */ + if (pmu->task_ctx_nr == perf_invalid_context && cgroup_fd != -1) { + err = -EINVAL; + goto err_pmu; + } + if (event->attr.aux_output && !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) { err = -EOPNOTSUPP; From 00496fe5e09e8c8bb115540e7e3470553cd07a5c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 1 Nov 2019 17:12:48 +0200 Subject: [PATCH 051/146] perf/aux: Fix the aux_output group inheritance fix Commit f733c6b508bc ("perf/core: Fix inheritance of aux_output groups") adds a NULL pointer dereference in case inherit_group() races with perf_release(), which causes the below crash: > BUG: kernel NULL pointer dereference, address: 000000000000010b > #PF: supervisor read access in kernel mode > #PF: error_code(0x0000) - not-present page > PGD 3b203b067 P4D 3b203b067 PUD 3b2040067 PMD 0 > Oops: 0000 [#1] SMP KASAN > CPU: 0 PID: 315 Comm: exclusive-group Tainted: G B 5.4.0-rc3-00181-g72e1839403cb-dirty #878 > RIP: 0010:perf_get_aux_event+0x86/0x270 > Call Trace: > ? __perf_read_group_add+0x3b0/0x3b0 > ? __kasan_check_write+0x14/0x20 > ? __perf_event_init_context+0x154/0x170 > inherit_task_group.isra.0.part.0+0x14b/0x170 > perf_event_init_task+0x296/0x4b0 Fix this by skipping over events that are getting closed, in the inheritance path. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: f733c6b508bc ("perf/core: Fix inheritance of aux_output groups") Link: https://lkml.kernel.org/r/20191101151248.47327-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 022a34b66e60..b752bd3aa03b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11899,7 +11899,7 @@ static int inherit_group(struct perf_event *parent_event, if (IS_ERR(child_ctr)) return PTR_ERR(child_ctr); - if (sub->aux_event == parent_event && + if (sub->aux_event == parent_event && child_ctr && !perf_get_aux_event(child_ctr, leader)) return -EINVAL; } From f25d8ba9e1b204b90fbf55970ea6e68955006068 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 30 Oct 2019 15:47:30 +0200 Subject: [PATCH 052/146] perf/core: Reattach a misplaced comment A comment is in a wrong place in perf_event_create_kernel_counter(). Fix that. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/20191030134731.5437-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b752bd3aa03b..e18d8d34ca77 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11331,10 +11331,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, struct perf_event *event; int err; - /* - * Get the target context (task or percpu): - */ - event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler, context, -1); if (IS_ERR(event)) { @@ -11345,6 +11341,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, /* Mark owner so we could distinguish it from user events. */ event->owner = TASK_TOMBSTONE; + /* + * Get the target context (task or percpu): + */ ctx = find_get_context(event->pmu, task, event); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); From dce5affb94eb54edfff17727a6240a6a5d998666 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 30 Oct 2019 15:47:31 +0200 Subject: [PATCH 053/146] perf/aux: Disallow aux_output for kernel events Commit ab43762ef0109 ("perf: Allow normal events to output AUX data") added 'aux_output' bit to the attribute structure, which relies on AUX events and grouping, neither of which is supported for the kernel events. This notwithstanding, attempts have been made to use it in the kernel code, suggesting the necessity of an explicit hard -EINVAL. Fix this by rejecting attributes with aux_output set for kernel events. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/20191030134731.5437-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index e18d8d34ca77..7655441065a9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11331,6 +11331,13 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, struct perf_event *event; int err; + /* + * Grouping is not supported for kernel events, neither is 'AUX', + * make sure the caller's intentions are adjusted. + */ + if (attr->aux_output) + return ERR_PTR(-EINVAL); + event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler, context, -1); if (IS_ERR(event)) { From 697d877849d4b34ab58d7078d6930bad0ef6fc66 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 5 Nov 2019 09:57:02 +0200 Subject: [PATCH 054/146] perf/core: Consistently fail fork on allocation failures Commit: 313ccb9615948 ("perf: Allocate context task_ctx_data for child event") makes the inherit path skip over the current event in case of task_ctx_data allocation failure. This, however, is inconsistent with allocation failures in perf_event_alloc(), which would abort the fork. Correct this by returning an error code on task_ctx_data allocation failure and failing the fork in that case. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/20191105075702.60319-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7655441065a9..466e333db1f3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11802,7 +11802,7 @@ inherit_event(struct perf_event *parent_event, GFP_KERNEL); if (!child_ctx->task_ctx_data) { free_event(child_event); - return NULL; + return ERR_PTR(-ENOMEM); } } From d00dbd29814236ad128ff9517e8f7af6b6ef4ba0 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Wed, 6 Nov 2019 13:25:27 +0000 Subject: [PATCH 055/146] perf/core: Fix missing static inline on perf_cgroup_switch() It looks like a "static inline" has been missed in front of the empty definition of perf_cgroup_switch() under certain configurations. Fixes the following sparse warning: kernel/events/core.c:1035:1: warning: symbol 'perf_cgroup_switch' was not declared. Should it be static? Signed-off-by: Ben Dooks (Codethink) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/20191106132527.19977-1-ben.dooks@codethink.co.uk Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 466e333db1f3..00a014670ed0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1031,7 +1031,7 @@ perf_cgroup_set_timestamp(struct task_struct *task, { } -void +static inline void perf_cgroup_switch(struct task_struct *task, struct task_struct *next) { } From 975987e7015bb12a482df7f14fd524417d2c8e8f Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 7 Nov 2019 11:55:42 +0100 Subject: [PATCH 056/146] can: af_can: export can_sock_destruct() In j1939 we need our own struct sock::sk_destruct callback. Export the generic af_can can_sock_destruct() that allows us to chain-call it. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel --- include/linux/can/core.h | 1 + net/can/af_can.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 8339071ab08b..e20a0cd09ba5 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -65,5 +65,6 @@ extern void can_rx_unregister(struct net *net, struct net_device *dev, void *data); extern int can_send(struct sk_buff *skb, int loop); +void can_sock_destruct(struct sock *sk); #endif /* !_CAN_CORE_H */ diff --git a/net/can/af_can.c b/net/can/af_can.c index 5518a7d9eed9..128d37a4c2e0 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -86,11 +86,12 @@ static atomic_t skbcounter = ATOMIC_INIT(0); /* af_can socket functions */ -static void can_sock_destruct(struct sock *sk) +void can_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_error_queue); } +EXPORT_SYMBOL(can_sock_destruct); static const struct can_proto *can_get_proto(int protocol) { From 25fe97cb7620ef2e6b4f44ef0de4e371adf6c1d0 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 7 Nov 2019 11:57:36 +0100 Subject: [PATCH 057/146] can: j1939: move j1939_priv_put() into sk_destruct callback This patch delays the j1939_priv_put() until the socket is destroyed via the sk_destruct callback, to avoid use-after-free problems. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel --- net/can/j1939/socket.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 4d8ba701e15d..aee94b09ef08 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -78,7 +78,6 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) { jsk->state |= J1939_SOCK_BOUND; j1939_priv_get(priv); - jsk->priv = priv; spin_lock_bh(&priv->j1939_socks_lock); list_add_tail(&jsk->list, &priv->j1939_socks); @@ -91,7 +90,6 @@ static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) list_del_init(&jsk->list); spin_unlock_bh(&priv->j1939_socks_lock); - jsk->priv = NULL; j1939_priv_put(priv); jsk->state &= ~J1939_SOCK_BOUND; } @@ -349,6 +347,34 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) spin_unlock_bh(&priv->j1939_socks_lock); } +static void j1939_sk_sock_destruct(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + /* This function will be call by the generic networking code, when then + * the socket is ultimately closed (sk->sk_destruct). + * + * The race between + * - processing a received CAN frame + * (can_receive -> j1939_can_recv) + * and accessing j1939_priv + * ... and ... + * - closing a socket + * (j1939_can_rx_unregister -> can_rx_unregister) + * and calling the final j1939_priv_put() + * + * is avoided by calling the final j1939_priv_put() from this + * RCU deferred cleanup call. + */ + if (jsk->priv) { + j1939_priv_put(jsk->priv); + jsk->priv = NULL; + } + + /* call generic CAN sock destruct */ + can_sock_destruct(sk); +} + static int j1939_sk_init(struct sock *sk) { struct j1939_sock *jsk = j1939_sk(sk); @@ -371,6 +397,7 @@ static int j1939_sk_init(struct sock *sk) atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); + sk->sk_destruct = j1939_sk_sock_destruct; return 0; } @@ -443,6 +470,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) } jsk->ifindex = addr->can_ifindex; + + /* the corresponding j1939_priv_put() is called via + * sk->sk_destruct, which points to j1939_sk_sock_destruct() + */ + j1939_priv_get(priv); + jsk->priv = priv; } /* set default transmit pgn */ From c48c8c1e2e81e71a0f13b83cc5124333f3750064 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 5 Nov 2019 11:07:08 +0100 Subject: [PATCH 058/146] can: j1939: main: j1939_ndev_to_priv(): avoid crash if can_ml_priv is NULL This patch avoids a NULL pointer deref crash if ndev->ml_priv is NULL. Reported-by: syzbot+95c8e0d9dffde15b6c5c@syzkaller.appspotmail.com Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel --- net/can/j1939/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index def2f813ffce..8dc935dc2e54 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -207,6 +207,9 @@ static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev) { struct can_ml_priv *can_ml_priv = ndev->ml_priv; + if (!can_ml_priv) + return NULL; + return can_ml_priv->j1939_priv; } From fd81ebfe7975b9a69494430676d16f7125aac3ee Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 5 Nov 2019 14:31:58 +0100 Subject: [PATCH 059/146] can: j1939: socket: rework socket locking for j1939_sk_release() and j1939_sk_sendmsg() j1939_sk_sendmsg() should be protected by lock_sock() to avoid race with j1939_sk_bind() and j1939_sk_release(). Reported-by: syzbot+afd421337a736d6c1ee6@syzkaller.appspotmail.com Reported-by: syzbot+6d04f6a1b31a0ae12ca9@syzkaller.appspotmail.com Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel --- net/can/j1939/socket.c | 57 +++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index aee94b09ef08..de09b0a65791 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -593,8 +593,8 @@ static int j1939_sk_release(struct socket *sock) if (!sk) return 0; - jsk = j1939_sk(sk); lock_sock(sk); + jsk = j1939_sk(sk); if (jsk->state & J1939_SOCK_BOUND) { struct j1939_priv *priv = jsk->priv; @@ -1092,51 +1092,72 @@ static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct j1939_sock *jsk = j1939_sk(sk); - struct j1939_priv *priv = jsk->priv; + struct j1939_priv *priv; int ifindex; int ret; + lock_sock(sock->sk); /* various socket state tests */ - if (!(jsk->state & J1939_SOCK_BOUND)) - return -EBADFD; + if (!(jsk->state & J1939_SOCK_BOUND)) { + ret = -EBADFD; + goto sendmsg_done; + } + priv = jsk->priv; ifindex = jsk->ifindex; - if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) + if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) { /* no source address assigned yet */ - return -EBADFD; + ret = -EBADFD; + goto sendmsg_done; + } /* deal with provided destination address info */ if (msg->msg_name) { struct sockaddr_can *addr = msg->msg_name; - if (msg->msg_namelen < J1939_MIN_NAMELEN) - return -EINVAL; + if (msg->msg_namelen < J1939_MIN_NAMELEN) { + ret = -EINVAL; + goto sendmsg_done; + } - if (addr->can_family != AF_CAN) - return -EINVAL; + if (addr->can_family != AF_CAN) { + ret = -EINVAL; + goto sendmsg_done; + } - if (addr->can_ifindex && addr->can_ifindex != ifindex) - return -EBADFD; + if (addr->can_ifindex && addr->can_ifindex != ifindex) { + ret = -EBADFD; + goto sendmsg_done; + } if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && - !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) - return -EINVAL; + !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) { + ret = -EINVAL; + goto sendmsg_done; + } if (!addr->can_addr.j1939.name && addr->can_addr.j1939.addr == J1939_NO_ADDR && - !sock_flag(sk, SOCK_BROADCAST)) + !sock_flag(sk, SOCK_BROADCAST)) { /* broadcast, but SO_BROADCAST not set */ - return -EACCES; + ret = -EACCES; + goto sendmsg_done; + } } else { if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR && - !sock_flag(sk, SOCK_BROADCAST)) + !sock_flag(sk, SOCK_BROADCAST)) { /* broadcast, but SO_BROADCAST not set */ - return -EACCES; + ret = -EACCES; + goto sendmsg_done; + } } ret = j1939_sk_send_loop(priv, sk, msg, size); +sendmsg_done: + release_sock(sock->sk); + return ret; } From d966635b384b9571a43bd38c61f280c47eb564ad Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 7 Nov 2019 18:46:38 +0100 Subject: [PATCH 060/146] can: j1939: transport: make sure the aborted session will be deactivated only once j1939_session_cancel() was modifying session->state without protecting it by locks and without checking actual state of the session. This patch moves j1939_tp_set_rxtimeout() into j1939_session_cancel() and adds the missing locking. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel --- net/can/j1939/transport.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index e5f1a56994c6..ecdedfc0b10c 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1042,12 +1042,13 @@ j1939_session_deactivate_activate_next(struct j1939_session *session) j1939_sk_queue_activate_next(session); } -static void j1939_session_cancel(struct j1939_session *session, +static void __j1939_session_cancel(struct j1939_session *session, enum j1939_xtp_abort err) { struct j1939_priv *priv = session->priv; WARN_ON_ONCE(!err); + lockdep_assert_held(&session->priv->active_session_list_lock); session->err = j1939_xtp_abort_to_errno(priv, err); /* do not send aborts on incoming broadcasts */ @@ -1062,6 +1063,20 @@ static void j1939_session_cancel(struct j1939_session *session, j1939_sk_send_loop_abort(session->sk, session->err); } +static void j1939_session_cancel(struct j1939_session *session, + enum j1939_xtp_abort err) +{ + j1939_session_list_lock(session->priv); + + if (session->state >= J1939_SESSION_ACTIVE && + session->state < J1939_SESSION_WAITING_ABORT) { + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + __j1939_session_cancel(session, err); + } + + j1939_session_list_unlock(session->priv); +} + static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) { struct j1939_session *session = @@ -1108,8 +1123,6 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n", __func__, session, ret); if (session->skcb.addr.type != J1939_SIMPLE) { - j1939_tp_set_rxtimeout(session, - J1939_XTP_ABORT_TIMEOUT_MS); j1939_session_cancel(session, J1939_XTP_ABORT_OTHER); } else { session->err = ret; @@ -1169,7 +1182,7 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) hrtimer_start(&session->rxtimer, ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), HRTIMER_MODE_REL_SOFT); - j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); + __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); } j1939_session_list_unlock(session->priv); } @@ -1375,7 +1388,6 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) out_session_cancel: j1939_session_timers_cancel(session); - j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); j1939_session_cancel(session, err); } @@ -1572,7 +1584,6 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, /* RTS on active session */ j1939_session_timers_cancel(session); - j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); } @@ -1583,7 +1594,6 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, session->last_cmd); j1939_session_timers_cancel(session); - j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); return -EBUSY; @@ -1785,7 +1795,6 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, out_session_cancel: j1939_session_timers_cancel(session); - j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); j1939_session_put(session); } From 62ebce1dc1fa649a1c54db02f1a3c409bb0529ec Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 7 Nov 2019 18:51:40 +0100 Subject: [PATCH 061/146] can: j1939: make sure socket is held as long as session exists We link the socket to the session to be able provide socket specific notifications. For example messages over error queue. We need to keep the socket held, while we have a reference to it. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel --- net/can/j1939/transport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index ecdedfc0b10c..afc2adfd97e4 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -255,6 +255,7 @@ static void __j1939_session_drop(struct j1939_session *session) return; j1939_sock_pending_del(session->sk); + sock_put(session->sk); } static void j1939_session_destroy(struct j1939_session *session) @@ -1875,6 +1876,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv, return ERR_PTR(-ENOMEM); /* skb is recounted in j1939_session_new() */ + sock_hold(skb->sk); session->sk = skb->sk; session->transmission = true; session->pkt.total = (size + 6) / 7; From 8d7a5f000e235f1dfc61862197d4e8e72c18c6fc Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 8 Nov 2019 14:02:10 +0100 Subject: [PATCH 062/146] can: j1939: transport: j1939_cancel_active_session(): use hrtimer_try_to_cancel() instead of hrtimer_cancel() This part of the code protected by lock used in the hrtimer as well. Using hrtimer_cancel() will trigger dead lock. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel --- net/can/j1939/transport.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index afc2adfd97e4..0c62b8fc4b20 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -2039,7 +2039,11 @@ int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk) &priv->active_session_list, active_session_list_entry) { if (!sk || sk == session->sk) { - j1939_session_timers_cancel(session); + if (hrtimer_try_to_cancel(&session->txtimer) == 1) + j1939_session_put(session); + if (hrtimer_try_to_cancel(&session->rxtimer) == 1) + j1939_session_put(session); + session->err = ESHUTDOWN; j1939_session_deactivate_locked(session); } From ddeeb7d4822ed06d79fc15e822b70dce3fa77e39 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sat, 9 Nov 2019 16:11:18 +0100 Subject: [PATCH 063/146] can: j1939: j1939_can_recv(): add priv refcounting j1939_can_recv() can be called in parallel with socket release. In this case sk_release and sk_destruct can be done earlier than j1939_can_recv() is processed. Reported-by: syzbot+ca172a0ac477ac90f045@syzkaller.appspotmail.com Reported-by: syzbot+07ca5bce8530070a5650@syzkaller.appspotmail.com Reported-by: syzbot+a47537d3964ef6c874e1@syzkaller.appspotmail.com Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel --- net/can/j1939/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 8dc935dc2e54..2afcf27c72c8 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -51,6 +51,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) if (!skb) return; + j1939_priv_get(priv); can_skb_set_owner(skb, iskb->sk); /* get a pointer to the header of the skb @@ -104,6 +105,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) j1939_simple_recv(priv, skb); j1939_sk_recv(priv, skb); done: + j1939_priv_put(priv); kfree_skb(skb); } From 4a15d574e68afffbe8d7265e015cda2ac2a248ec Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 8 Nov 2019 13:57:14 +0100 Subject: [PATCH 064/146] can: j1939: warn if resources are still linked on destroy j1939_session_destroy() and __j1939_priv_release() should be called only if session, ecu or socket are not linked or used by any one else. If at least one of these resources is linked, then the reference counting is broken somewhere. This warning will be triggered before KASAN will do, and will make it easier to debug initial issue. This works on platforms without KASAN support. Signed-off-by: Oleksij Rempel --- net/can/j1939/main.c | 4 ++++ net/can/j1939/transport.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 2afcf27c72c8..137054bff9ec 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -152,6 +152,10 @@ static void __j1939_priv_release(struct kref *kref) netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv); + WARN_ON_ONCE(!list_empty(&priv->active_session_list)); + WARN_ON_ONCE(!list_empty(&priv->ecus)); + WARN_ON_ONCE(!list_empty(&priv->j1939_socks)); + dev_put(ndev); kfree(priv); } diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 0c62b8fc4b20..9f99af5b0b11 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -267,6 +267,9 @@ static void j1939_session_destroy(struct j1939_session *session) netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry)); + WARN_ON_ONCE(!list_empty(&session->active_session_list_entry)); + skb_queue_purge(&session->skb_queue); __j1939_session_drop(session); j1939_priv_put(session->priv); From 528699317dd6dc722dccc11b68800cf945109390 Mon Sep 17 00:00:00 2001 From: Henry Lin Date: Wed, 13 Nov 2019 10:14:19 +0800 Subject: [PATCH 065/146] ALSA: usb-audio: not submit urb for stopped endpoint While output urb's snd_complete_urb() is executing, calling prepare_outbound_urb() may cause endpoint stopped before prepare_outbound_urb() returns and result in next urb submitted to stopped endpoint. usb-audio driver cannot re-use it afterwards as the urb is still hold by usb stack. This change checks EP_FLAG_RUNNING flag after prepare_outbound_urb() again to let snd_complete_urb() know the endpoint already stopped and does not submit next urb. Below kind of error will be fixed: [ 213.153103] usb 1-2: timeout: still 1 active urbs on EP #1 [ 213.164121] usb 1-2: cannot submit urb 0, error -16: unknown error Signed-off-by: Henry Lin Cc: Link: https://lore.kernel.org/r/20191113021420.13377-1-henryl@nvidia.com Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index a2ab8e8d3a93..4a9a2f6ef5a4 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -388,6 +388,9 @@ static void snd_complete_urb(struct urb *urb) } prepare_outbound_urb(ep, ctx); + /* can be stopped during prepare callback */ + if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + goto exit_clear; } else { retire_inbound_urb(ep, ctx); /* can be stopped during retire callback */ From f5cdc9d4003a2f66ea57b3edd3e04acc2b1a4439 Mon Sep 17 00:00:00 2001 From: paulhsia Date: Wed, 13 Nov 2019 01:17:14 +0800 Subject: [PATCH 066/146] ALSA: pcm: Fix stream lock usage in snd_pcm_period_elapsed() If the nullity check for `substream->runtime` is outside of the lock region, it is possible to have a null runtime in the critical section if snd_pcm_detach_substream is called right before the lock. Signed-off-by: paulhsia Link: https://lore.kernel.org/r/20191112171715.128727-2-paulhsia@chromium.org Signed-off-by: Takashi Iwai --- sound/core/pcm_lib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index d80041ea4e01..2236b5e0c1f2 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1782,11 +1782,14 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime; unsigned long flags; - if (PCM_RUNTIME_CHECK(substream)) + if (snd_BUG_ON(!substream)) return; - runtime = substream->runtime; snd_pcm_stream_lock_irqsave(substream, flags); + if (PCM_RUNTIME_CHECK(substream)) + goto _unlock; + runtime = substream->runtime; + if (!snd_pcm_running(substream) || snd_pcm_update_hw_ptr0(substream, 1) < 0) goto _end; @@ -1797,6 +1800,7 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream) #endif _end: kill_fasync(&runtime->fasync, SIGIO, POLL_IN); + _unlock: snd_pcm_stream_unlock_irqrestore(substream, flags); } EXPORT_SYMBOL(snd_pcm_period_elapsed); From 65e1f38d9a2f07d4b81f369864c105880e47bd5a Mon Sep 17 00:00:00 2001 From: Ilie Halip Date: Tue, 12 Nov 2019 15:45:20 +0200 Subject: [PATCH 067/146] scripts/tools-support-relr.sh: un-quote variables When the CC variable contains quotes, e.g. when using ccache (make CC="ccache "), this script always fails, so CONFIG_RELR is never enabled, even when the toolchain supports this feature. Removing the /dev/null redirect and invoking the script manually shows the issue: $ CC='/usr/bin/ccache clang' ./scripts/tools-support-relr.sh ./scripts/tools-support-relr.sh: 7: ./scripts/tools-support-relr.sh: /usr/bin/ccache clang: not found Fix this by un-quoting the variables. Before: $ make ARCH=arm64 CC='/usr/bin/ccache clang' LD=ld.lld \ NM=llvm-nm OBJCOPY=llvm-objcopy defconfig $ grep RELR .config CONFIG_ARCH_HAS_RELR=y With this change: $ make ARCH=arm64 CC='/usr/bin/ccache clang' LD=ld.lld \ NM=llvm-nm OBJCOPY=llvm-objcopy defconfig $ grep RELR .config CONFIG_TOOLS_SUPPORT_RELR=y CONFIG_ARCH_HAS_RELR=y CONFIG_RELR=y Fixes: 5cf896fb6be3 ("arm64: Add support for relocating the kernel with RELR relocations") Reported-by: Dmitry Golovin Reviewed-by: Nathan Chancellor Reviewed-by: Masahiro Yamada Link: https://github.com/ClangBuiltLinux/linux/issues/769 Cc: Peter Collingbourne Signed-off-by: Ilie Halip Signed-off-by: Will Deacon --- scripts/tools-support-relr.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh index 97a2c844a95e..45e8aa360b45 100755 --- a/scripts/tools-support-relr.sh +++ b/scripts/tools-support-relr.sh @@ -4,13 +4,13 @@ tmp_file=$(mktemp) trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT -cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1 +cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1 void *p = &p; END -"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file +$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file # Despite printing an error message, GNU nm still exits with exit code 0 if it # sees a relr section. So we need to check that nothing is printed to stderr. -test -z "$("$NM" $tmp_file 2>&1 >/dev/null)" +test -z "$($NM $tmp_file 2>&1 >/dev/null)" -"$OBJCOPY" -O binary $tmp_file $tmp_file.bin +$OBJCOPY -O binary $tmp_file $tmp_file.bin From c8eafe1495303bfd0eedaa8156b1ee9082ee9642 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Thu, 7 Nov 2019 06:36:36 +0800 Subject: [PATCH 068/146] x86/resctrl: Fix potential lockdep warning rdtgroup_cpus_write() and mkdir_rdt_prepare() call rdtgroup_kn_lock_live() -> kernfs_to_rdtgroup() to get 'rdtgrp', and then call the rdt_last_cmd_{clear,puts,...}() functions which will check if rdtgroup_mutex is held/requires its caller to hold rdtgroup_mutex. But if 'rdtgrp' returned from kernfs_to_rdtgroup() is NULL, rdtgroup_mutex is not held and calling rdt_last_cmd_{clear,puts,...}() will result in a self-incurred, potential lockdep warning. Remove the rdt_last_cmd_{clear,puts,...}() calls in these two paths. Just returning error should be sufficient to report to the user that the entry doesn't exist any more. [ bp: Massage. ] Fixes: 94457b36e8a5 ("x86/intel_rdt: Add diagnostics when writing the cpus file") Fixes: cfd0f34e4cd5 ("x86/intel_rdt: Add diagnostics when making directories") Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: pei.p.jia@intel.com Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/1573079796-11713-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index a46dee8e78db..2e3b06d6bbc6 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, } rdtgrp = rdtgroup_kn_lock_live(of->kn); - rdt_last_cmd_clear(); if (!rdtgrp) { ret = -ENOENT; - rdt_last_cmd_puts("Directory was removed\n"); goto unlock; } @@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, int ret; prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); - rdt_last_cmd_clear(); if (!prdtgrp) { ret = -ENODEV; - rdt_last_cmd_puts("Directory was removed\n"); goto out_unlock; } From 0b8e7bbde5e7e2c419567e1ee29587dae3b78ee3 Mon Sep 17 00:00:00 2001 From: Yunhao Tian Date: Wed, 13 Nov 2019 13:27:25 +0000 Subject: [PATCH 069/146] drm/sun4i: tcon: Set min division of TCON0_DCLK to 1. The datasheet of V3s (and various other chips) wrote that TCON0_DCLK_DIV can be >= 1 if only dclk is used, and must >= 6 if dclk1 or dclk2 is used. As currently neither dclk1 nor dclk2 is used (no writes to these bits), let's set minimal division to 1. If this minimal division is 6, some common dot clock frequencies can't be produced (e.g. 30MHz will not be possible and will fallback to 25MHz), which is obviously not an expected behaviour. Signed-off-by: Yunhao Tian Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/linux-arm-kernel/MN2PR08MB57905AD8A00C08DA219377C989760@MN2PR08MB5790.namprd08.prod.outlook.com/ --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 04c721d0d3b9..b89439ed210d 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -488,7 +488,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, WARN_ON(!tcon->quirks->has_channel_0); - tcon->dclk_min_div = 6; + tcon->dclk_min_div = 1; tcon->dclk_max_div = 127; sun4i_tcon0_mode_set_common(tcon, mode); From 13fb59276b47db556370bba53b5b55f3849dd8c9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Nov 2019 15:47:06 +0100 Subject: [PATCH 070/146] kvm: x86: disable shattered huge page recovery for PREEMPT_RT. If a huge page is recovered (and becomes no executable) while another thread is executing it, the resulting contention on mmu_lock can cause latency spikes. Disabling recovery for PREEMPT_RT kernels fixes this issue. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fd6012eef9c9..cf718fa23dff 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -51,7 +51,12 @@ extern bool itlb_multihit_kvm_mitigation; static int __read_mostly nx_huge_pages = -1; +#ifdef CONFIG_PREEMPT_RT +/* Recovery can cause latency spikes, disable it for PREEMPT_RT. */ +static uint __read_mostly nx_huge_pages_recovery_ratio = 0; +#else static uint __read_mostly nx_huge_pages_recovery_ratio = 60; +#endif static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); From e37f9f139f62deddff90c7298ae3a85026a71067 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 13 Nov 2019 13:51:15 +0100 Subject: [PATCH 071/146] selftests: kvm: fix build with glibc >= 2.30 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Glibc-2.30 gained gettid() wrapper, selftests fail to compile: lib/assert.c:58:14: error: static declaration of ‘gettid’ follows non-static declaration 58 | static pid_t gettid(void) | ^~~~~~ In file included from /usr/include/unistd.h:1170, from include/test_util.h:18, from lib/assert.c:10: /usr/include/bits/unistd_ext.h:34:16: note: previous declaration of ‘gettid’ was here 34 | extern __pid_t gettid (void) __THROW; | ^~~~~~ Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/assert.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 4911fc77d0f6..d1cf9f6e0e6b 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -55,7 +55,7 @@ static void test_dump_stack(void) #pragma GCC diagnostic pop } -static pid_t gettid(void) +static pid_t _gettid(void) { return syscall(SYS_gettid); } @@ -72,7 +72,7 @@ test_assert(bool exp, const char *exp_str, fprintf(stderr, "==== Test Assertion Failure ====\n" " %s:%u: %s\n" " pid=%d tid=%d - %s\n", - file, line, exp_str, getpid(), gettid(), + file, line, exp_str, getpid(), _gettid(), strerror(errno)); test_dump_stack(); if (fmt) { From 6cbee2b9eccfc1c93a03aaa286ec93331f7c95e7 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 13 Nov 2019 09:15:21 +0800 Subject: [PATCH 072/146] KVM: X86: Reset the three MSR list number variables to 0 in kvm_init_msr_list() When applying commit 7a5ee6edb42e ("KVM: X86: Fix initialization of MSR lists"), it forgot to reset the three MSR lists number varialbes to 0 while removing the useless conditionals. Fixes: 7a5ee6edb42e (KVM: X86: Fix initialization of MSR lists) Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7db5c8ef35dd..5d530521f11d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5130,6 +5130,10 @@ static void kvm_init_msr_list(void) perf_get_x86_pmu_capability(&x86_pmu); + num_msrs_to_save = 0; + num_emulated_msrs = 0; + num_msr_based_features = 0; + for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) { if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0) continue; From b9876e6de123adb52ac693bac08c493e989bd93e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Nov 2019 16:05:23 +0000 Subject: [PATCH 073/146] KVM: Forbid /dev/kvm being opened by a compat task when CONFIG_KVM_COMPAT=n On a system without KVM_COMPAT, we prevent IOCTLs from being issued by a compat task. Although this prevents most silly things from happening, it can still confuse a 32bit userspace that is able to open the kvm device (the qemu test suite seems to be pretty mad with this behaviour). Take a more radical approach and return a -ENODEV to the compat task. Reported-by: Peter Maydell Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 524cff24a68d..6a65ed915c7a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -124,7 +124,13 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, #else static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { return -EINVAL; } -#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl + +static int kvm_no_compat_open(struct inode *inode, struct file *file) +{ + return is_compat_task() ? -ENODEV : 0; +} +#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ + .open = kvm_no_compat_open #endif static int hardware_enable_all(void); static void hardware_disable_all(void); From 802753cb0b141cf5170ab97fe7e79f5ca10d06b0 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 13 Nov 2019 11:11:10 +0100 Subject: [PATCH 074/146] net: usb: qmi_wwan: add support for Foxconn T77W968 LTE modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are the Foxconn-branded variants of the Dell DW5821e modules, same USB layout as those. The QMI interface is exposed in USB configuration #1: P: Vendor=0489 ProdID=e0b4 Rev=03.18 S: Manufacturer=FII S: Product=T77W968 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Signed-off-by: Aleksander Morgado Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 56d334b9ad45..4196c0e32740 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1371,6 +1371,8 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ + {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ + {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From 3b5a39979dafea9d0cd69c7ae06088f7a84cdafa Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Wed, 13 Nov 2019 13:45:02 +0200 Subject: [PATCH 075/146] slip: Fix memory leak in slip_open error path Driver/net/can/slcan.c is derived from slip.c. Memory leak was detected by Syzkaller in slcan. Same issue exists in slip.c and this patch is addressing the leak in slip.c. Here is the slcan memory leak trace reported by Syzkaller: BUG: memory leak unreferenced object 0xffff888067f65500 (size 4096): comm "syz-executor043", pid 454, jiffies 4294759719 (age 11.930s) hex dump (first 32 bytes): 73 6c 63 61 6e 30 00 00 00 00 00 00 00 00 00 00 slcan0.......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000a06eec0d>] __kmalloc+0x18b/0x2c0 [<0000000083306e66>] kvmalloc_node+0x3a/0xc0 [<000000006ac27f87>] alloc_netdev_mqs+0x17a/0x1080 [<0000000061a996c9>] slcan_open+0x3ae/0x9a0 [<000000001226f0f9>] tty_ldisc_open.isra.1+0x76/0xc0 [<0000000019289631>] tty_set_ldisc+0x28c/0x5f0 [<000000004de5a617>] tty_ioctl+0x48d/0x1590 [<00000000daef496f>] do_vfs_ioctl+0x1c7/0x1510 [<0000000059068dbc>] ksys_ioctl+0x99/0xb0 [<000000009a6eb334>] __x64_sys_ioctl+0x78/0xb0 [<0000000053d0332e>] do_syscall_64+0x16f/0x580 [<0000000021b83b99>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000008ea75434>] 0xfffffffffffffff Cc: "David S. Miller" Cc: Oliver Hartkopp Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: David S. Miller --- drivers/net/slip/slip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index cac64b96d545..4d479e3c817d 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -855,6 +855,7 @@ err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); + free_netdev(sl->dev); err_exit: rtnl_unlock(); From ed77d88752aea56b33731aee42e7146379b90769 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 12 Nov 2019 14:47:56 -0800 Subject: [PATCH 076/146] Revert "drm/i915/ehl: Update MOCS table for EHL" This reverts commit f4071997f1de016780ec6b79c63d90cd5886ee83. These extra EHL entries won't behave as expected without a bit more work on the kernel side so let's drop them until that kernel work has had a chance to land. Userspace trying to use these new entries won't get the advantage of the new functionality these entries are meant to provide, but at least it won't misbehave. When we do add these back in the future, we'll probably want to explicitly use separate tables for ICL and EHL so that userspace software that mistakenly uses these entries (which are undefined on ICL) sees the same behavior it sees with all the other undefined entries. Cc: Francisco Jerez Cc: Jon Bloomfield Cc: Lucas De Marchi Cc: # v5.3+ Fixes: f4071997f1de ("drm/i915/ehl: Update MOCS table for EHL") Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191112224757.25116-1-matthew.d.roper@intel.com Reviewed-by: Francisco Jerez (cherry picked from commit 046091758b50a5fff79726a31c1391614a3d84c8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_mocs.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 728704bbbe18..55fbbaf4e1bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -199,14 +199,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { MOCS_ENTRY(15, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ L3_3_WB), \ - /* Bypass LLC - Uncached (EHL+) */ \ - MOCS_ENTRY(16, \ - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ - L3_1_UC), \ - /* Bypass LLC - L3 (Read-Only) (EHL+) */ \ - MOCS_ENTRY(17, \ - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ - L3_3_WB), \ /* Self-Snoop - L3 + LLC */ \ MOCS_ENTRY(18, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ From 1c602006d1dcb7501ae1c569fdabe1b8e1f082a4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 12 Nov 2019 14:47:57 -0800 Subject: [PATCH 077/146] drm/i915/tgl: MOCS table update The bspec was just updated with a minor correction to entry 61 (it shouldn't have had the SCF bit set). v2: - Add a MOCS_ENTRY_UNUSED() and use it to declare the explicitly-reserved MOCS entries. (Lucas) - Move the warning suppression from the Makefile to a #pragma that only affects the TGL table. (Lucas) v3: - Entries 16 and 17 are identical to ICL now, so no need to explicitly adjust them (or mess with compiler warning overrides). Bspec: 45101 Fixes: 2ddf992179c4 ("drm/i915/tgl: Define MOCS entries for Tigerlake") Cc: Tomasz Lis Cc: Lucas De Marchi Cc: Francisco Jerez Cc: Jon Bloomfield Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191112224757.25116-2-matthew.d.roper@intel.com Reviewed-by: Francisco Jerez Reviewed-by: Lucas De Marchi Reviewed-by: Tomasz Lis (cherry picked from commit bfb0e8e63d865559cc97af235aea583b7dcc235f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_mocs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 55fbbaf4e1bc..cea184a7dde9 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -262,7 +262,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = { L3_1_UC), /* HW Special Case (Displayable) */ MOCS_ENTRY(61, - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), + LE_1_UC | LE_TC_1_LLC, L3_3_WB), }; From 5683e5406e94ae1bfb0d9516a18fdb281d0f8d1d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 14 Nov 2019 00:59:19 +0300 Subject: [PATCH 078/146] io_uring: Fix getting file for timeout For timeout requests io_uring tries to grab a file with specified fd, which is usually stdin/fd=0. Update io_op_needs_file() Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 87beca4377f7..57ea54d5b0fb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2298,6 +2298,7 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe) switch (op) { case IORING_OP_NOP: case IORING_OP_POLL_REMOVE: + case IORING_OP_TIMEOUT: return false; default: return true; From a56dcc6b455830776899ce3686735f1172e12243 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Nov 2019 21:28:31 +0300 Subject: [PATCH 079/146] net: cdc_ncm: Signedness bug in cdc_ncm_set_dgram_size() This code is supposed to test for negative error codes and partial reads, but because sizeof() is size_t (unsigned) type then negative error codes are type promoted to high positive values and the condition doesn't work as expected. Fixes: 332f989a3b00 ("CDC-NCM: handle incomplete transfer of MTU") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index a245597a3902..c2c82e6391b4 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -579,7 +579,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); - if (err < sizeof(max_datagram_size)) { + if (err != sizeof(max_datagram_size)) { dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n"); goto out; } From 5e559561a8d7e6d4adfce6aa8fbf3daa3dec1577 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Nov 2019 16:12:46 -0700 Subject: [PATCH 080/146] io_uring: ensure registered buffer import returns the IO length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A test case was reported where two linked reads with registered buffers failed the second link always. This is because we set the expected value of a request in req->result, and if we don't get this result, then we fail the dependent links. For some reason the registered buffer import returned -ERROR/0, while the normal import returns -ERROR/length. This broke linked commands with registered buffers. Fix this by making io_import_fixed() correctly return the mapped length. Cc: stable@vger.kernel.org # v5.3 Reported-by: 李通洲 Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 57ea54d5b0fb..2c819c3c855d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1230,7 +1230,7 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, } } - return 0; + return len; } static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, From ed50e1600b4483c049ce76e6bd3b665a6a9300ed Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Wed, 13 Nov 2019 12:08:01 +0200 Subject: [PATCH 081/146] slcan: Fix memory leak in error path This patch is fixing memory leak reported by Syzkaller: BUG: memory leak unreferenced object 0xffff888067f65500 (size 4096): comm "syz-executor043", pid 454, jiffies 4294759719 (age 11.930s) hex dump (first 32 bytes): 73 6c 63 61 6e 30 00 00 00 00 00 00 00 00 00 00 slcan0.......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000a06eec0d>] __kmalloc+0x18b/0x2c0 [<0000000083306e66>] kvmalloc_node+0x3a/0xc0 [<000000006ac27f87>] alloc_netdev_mqs+0x17a/0x1080 [<0000000061a996c9>] slcan_open+0x3ae/0x9a0 [<000000001226f0f9>] tty_ldisc_open.isra.1+0x76/0xc0 [<0000000019289631>] tty_set_ldisc+0x28c/0x5f0 [<000000004de5a617>] tty_ioctl+0x48d/0x1590 [<00000000daef496f>] do_vfs_ioctl+0x1c7/0x1510 [<0000000059068dbc>] ksys_ioctl+0x99/0xb0 [<000000009a6eb334>] __x64_sys_ioctl+0x78/0xb0 [<0000000053d0332e>] do_syscall_64+0x16f/0x580 [<0000000021b83b99>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000008ea75434>] 0xffffffffffffffff Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: Marc Kleine-Budde --- drivers/net/can/slcan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index bb6032211043..0a9f42e5fedf 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -617,6 +617,7 @@ err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); + free_netdev(sl->dev); err_exit: rtnl_unlock(); From cc9dbfa9707868fb0ca864c05e0c42d3f4d15cf2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Nov 2019 12:12:59 +0100 Subject: [PATCH 082/146] ALSA: usb-audio: Fix incorrect NULL check in create_yamaha_midi_quirk() The commit 60849562a5db ("ALSA: usb-audio: Fix possible NULL dereference at create_yamaha_midi_quirk()") added NULL checks in create_yamaha_midi_quirk(), but there was an overlook. The code allows one of either injd or outjd is NULL, but the second if check made returning -ENODEV if any of them is NULL. Fix it in a proper form. Fixes: 60849562a5db ("ALSA: usb-audio: Fix possible NULL dereference at create_yamaha_midi_quirk()") Reported-by: Pavel Machek Cc: Link: https://lore.kernel.org/r/20191113111259.24123-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 0bbe1201a6ac..349e1e52996d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -248,8 +248,8 @@ static int create_yamaha_midi_quirk(struct snd_usb_audio *chip, NULL, USB_MS_MIDI_OUT_JACK); if (!injd && !outjd) return -ENODEV; - if (!(injd && snd_usb_validate_midi_desc(injd)) || - !(outjd && snd_usb_validate_midi_desc(outjd))) + if ((injd && !snd_usb_validate_midi_desc(injd)) || + (outjd && !snd_usb_validate_midi_desc(outjd))) return -ENODEV; if (injd && (injd->bLength < 5 || (injd->bJackType != USB_MS_EMBEDDED && From fed23c5829ecab4ddc712d7b0046e59610ca3ba4 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 14 Nov 2019 12:59:26 +0000 Subject: [PATCH 083/146] mmc: sdhci-of-at91: fix quirk2 overwrite The quirks2 are parsed and set (e.g. from DT) before the quirk for broken HS200 is set in the driver. The driver needs to enable just this flag, not rewrite the whole quirk set. Fixes: 7871aa60ae00 ("mmc: sdhci-of-at91: add quirk for broken HS200") Signed-off-by: Eugen Hristev Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-at91.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index e7d1920729fb..0ae986c42bc8 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -358,7 +358,7 @@ static int sdhci_at91_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(&pdev->dev); /* HS200 is broken at this moment */ - host->quirks2 = SDHCI_QUIRK2_BROKEN_HS200; + host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200; ret = sdhci_add_host(host); if (ret) From 478de3380c1c7dbb0f65f545ee0185848413f3fe Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Thu, 14 Nov 2019 10:33:11 +0100 Subject: [PATCH 084/146] block, bfq: deschedule empty bfq_queues not referred by any process Since commit 3726112ec731 ("block, bfq: re-schedule empty queues if they deserve I/O plugging"), to prevent the service guarantees of a bfq_queue from being violated, the bfq_queue may be left busy, i.e., scheduled for service, even if empty (see comments in __bfq_bfqq_expire() for details). But, if no process will send requests to the bfq_queue any longer, then there is no point in keeping the bfq_queue scheduled for service. In addition, keeping the bfq_queue scheduled for service, but with no process reference any longer, may cause the bfq_queue to be freed when descheduled from service. But this is assumed to never happen, and causes a UAF if it happens. This, in turn, caused crashes [1, 2]. This commit fixes this issue by descheduling an empty bfq_queue when it remains with not process reference. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1767539 [2] https://bugzilla.kernel.org/show_bug.cgi?id=205447 Fixes: 3726112ec731 ("block, bfq: re-schedule empty queues if they deserve I/O plugging") Reported-by: Chris Evich Reported-by: Patrick Dung Reported-by: Thorsten Schubert Tested-by: Thorsten Schubert Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0319d6339822..0c6214497fcc 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2713,6 +2713,28 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) } } + +static +void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + /* + * To prevent bfqq's service guarantees from being violated, + * bfqq may be left busy, i.e., queued for service, even if + * empty (see comments in __bfq_bfqq_expire() for + * details). But, if no process will send requests to bfqq any + * longer, then there is no point in keeping bfqq queued for + * service. In addition, keeping bfqq queued for service, but + * with no process ref any longer, may have caused bfqq to be + * freed when dequeued from service. But this is assumed to + * never happen. + */ + if (bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) && + bfqq != bfqd->in_service_queue) + bfq_del_bfqq_busy(bfqd, bfqq, false); + + bfq_put_queue(bfqq); +} + static void bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) @@ -2783,8 +2805,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, */ new_bfqq->pid = -1; bfqq->bic = NULL; - /* release process reference to bfqq */ - bfq_put_queue(bfqq); + bfq_release_process_ref(bfqd, bfqq); } static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, @@ -4899,7 +4920,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_put_cooperator(bfqq); - bfq_put_queue(bfqq); /* release process reference */ + bfq_release_process_ref(bfqd, bfqq); } static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) @@ -5001,8 +5022,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) bfqq = bic_to_bfqq(bic, false); if (bfqq) { - /* release process reference on this queue */ - bfq_put_queue(bfqq); + bfq_release_process_ref(bfqd, bfqq); bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); bic_set_bfqq(bic, bfqq, false); } @@ -5963,7 +5983,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) bfq_put_cooperator(bfqq); - bfq_put_queue(bfqq); + bfq_release_process_ref(bfqq->bfqd, bfqq); return NULL; } From 5347291415a33bfa6efa5bb61350b078f200956b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Sep 2019 20:34:36 +0900 Subject: [PATCH 085/146] sparc: vdso: fix build error of vdso32 Since commit 54b8ae66ae1a ("kbuild: change *FLAGS_.o to take the path relative to $(obj)"), sparc allmodconfig fails to build as follows: CC arch/sparc/vdso/vdso32/vclock_gettime.o unrecognized e_machine 18 arch/sparc/vdso/vdso32/vclock_gettime.o arch/sparc/vdso/vdso32/vclock_gettime.o: failed The cause of the breakage is that -pg flag not being dropped. The vdso32 files are located in the vdso32/ subdirectory, but I missed to update the Makefile. I removed the meaningless CFLAGS_REMOVE_vdso-note.o since it is only effective for C file. vdso-note.o is compiled from assembly file: arch/sparc/vdso/vdso-note.S arch/sparc/vdso/vdso32/vdso-note.S Fixes: 54b8ae66ae1a ("kbuild: change *FLAGS_.o to take the path relative to $(obj)") Reported-by: Anatoly Pugachev Reported-by: Guenter Roeck Signed-off-by: Masahiro Yamada Tested-by: Anatoly Pugachev Acked-by: David S. Miller --- arch/sparc/vdso/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 324a23947585..997ffe46e953 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -65,14 +65,14 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. # -CFLAGS_REMOVE_vdso-note.o = -pg CFLAGS_REMOVE_vclock_gettime.o = -pg +CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) -CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) +CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1 #This makes sure the $(obj) subdirectory exists even though vdso32/ From 80591e61a0f7e88deaada69844e4a31280c4a38f Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sat, 9 Nov 2019 13:12:16 +0100 Subject: [PATCH 086/146] kbuild: tell sparse about the $ARCH Sparse uses the same executable for all archs and uses flags like -m64, -mbig-endian or -D__arm__ for arch-specific parameters. But Sparse also uses value from the host machine used to build Sparse as default value for the target machine. This works, of course, well for native build but can create problems when cross-compiling, like defining both '__i386__' and '__arm__' when cross-compiling for arm on a x86-64 machine. Fix this by explicitely telling sparse the target architecture. Reported-by: Ben Dooks Signed-off-by: Luc Van Oostenryck Signed-off-by: Masahiro Yamada --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 1d5298356ea8..42bfda209cb8 100644 --- a/Makefile +++ b/Makefile @@ -917,6 +917,9 @@ ifeq ($(CONFIG_RELR),y) LDFLAGS_vmlinux += --pack-dyn-relocs=relr endif +# make the checker run with the right architecture +CHECKFLAGS += --arch=$(ARCH) + # insure the checker run with the right endianness CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian) From ed69a6cb700880d052a0d085ff2e5bfc108ce238 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Nov 2019 11:30:32 -0800 Subject: [PATCH 087/146] KVM: x86/mmu: Take slots_lock when using kvm_mmu_zap_all_fast() Acquire the per-VM slots_lock when zapping all shadow pages as part of toggling nx_huge_pages. The fast zap algorithm relies on exclusivity (via slots_lock) to identify obsolete vs. valid shadow pages, because it uses a single bit for its generation number. Holding slots_lock also obviates the need to acquire a read lock on the VM's srcu. Failing to take slots_lock when toggling nx_huge_pages allows multiple instances of kvm_mmu_zap_all_fast() to run concurrently, as the other user, KVM_SET_USER_MEMORY_REGION, does not take the global kvm_lock. (kvm_mmu_zap_all_fast() does take kvm->mmu_lock, but it can be temporarily dropped by kvm_zap_obsolete_pages(), so it is not enough to enforce exclusivity). Concurrent fast zap instances causes obsolete shadow pages to be incorrectly identified as valid due to the single bit generation number wrapping, which results in stale shadow pages being left in KVM's MMU and leads to all sorts of undesirable behavior. The bug is easily confirmed by running with CONFIG_PROVE_LOCKING and toggling nx_huge_pages via its module param. Note, until commit 4ae5acbc4936 ("KVM: x86/mmu: Take slots_lock when using kvm_mmu_zap_all_fast()", 2019-11-13) the fast zap algorithm used an ulong-sized generation instead of relying on exclusivity for correctness, but all callers except the recently added set_nx_huge_pages() needed to hold slots_lock anyways. Therefore, this patch does not have to be backported to stable kernels. Given that toggling nx_huge_pages is by no means a fast path, force it to conform to the current approach instead of reintroducing the previous generation count. Fixes: b8e8c8303ff28 ("kvm: mmu: ITLB_MULTIHIT mitigation", but NOT FOR STABLE) Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cf718fa23dff..2ce9da58611e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -6285,14 +6285,13 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) if (new_val != old_val) { struct kvm *kvm; - int idx; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - idx = srcu_read_lock(&kvm->srcu); + mutex_lock(&kvm->slots_lock); kvm_mmu_zap_all_fast(kvm); - srcu_read_unlock(&kvm->srcu, idx); + mutex_unlock(&kvm->slots_lock); wake_up_process(kvm->arch.nx_lpage_recovery_thread); } From 976a68f06b2ea49e2ab67a5f84919a8b105db8be Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Nov 2019 17:56:12 +0100 Subject: [PATCH 088/146] ALSA: usb-audio: Fix incorrect size check for processing/extension units The recently introduced unit descriptor validation had some bug for processing and extension units, it counts a bControlSize byte twice so it expected a bigger size than it should have been. This seems resulting in a probe error on a few devices. Fix the calculation for proper checks of PU and EU. Fixes: 57f8770620e9 ("ALSA: usb-audio: More validations of descriptor units") Cc: Link: https://lore.kernel.org/r/20191114165613.7422-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/validate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/validate.c b/sound/usb/validate.c index a5e584b60dcd..389e8657434a 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -81,9 +81,9 @@ static bool validate_processing_unit(const void *p, switch (v->protocol) { case UAC_VERSION_1: default: - /* bNrChannels, wChannelConfig, iChannelNames, bControlSize */ - len += 1 + 2 + 1 + 1; - if (d->bLength < len) /* bControlSize */ + /* bNrChannels, wChannelConfig, iChannelNames */ + len += 1 + 2 + 1; + if (d->bLength < len + 1) /* bControlSize */ return false; m = hdr[len]; len += 1 + m + 1; /* bControlSize, bmControls, iProcessing */ From a81bc3102b4ffb885f34855d0133f862f915ab13 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 13 Nov 2019 09:10:27 -0500 Subject: [PATCH 089/146] ceph: take the inode lock before acquiring cap refs Most of the time, we (or the vfs layer) takes the inode_lock and then acquires caps, but ceph_read_iter does the opposite, and that can lead to a deadlock. When there are multiple clients treading over the same data, we can end up in a situation where a reader takes caps and then tries to acquire the inode_lock. Another task holds the inode_lock and issues a request to the MDS which needs to revoke the caps, but that can't happen until the inode_lock is unwedged. Fix this by having ceph_read_iter take the inode_lock earlier, before attempting to acquire caps. Fixes: 321fe13c9398 ("ceph: add buffered/direct exclusionary locking for reads and writes") Link: https://tracker.ceph.com/issues/36348 Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index bd77adb64bfd..06efeaff3b57 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1264,14 +1264,24 @@ again: dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode); + if (iocb->ki_flags & IOCB_DIRECT) + ceph_start_io_direct(inode); + else + ceph_start_io_read(inode); + if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else want = CEPH_CAP_FILE_CACHE; ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); - if (ret < 0) + if (ret < 0) { + if (iocb->ki_flags & IOCB_DIRECT) + ceph_end_io_direct(inode); + else + ceph_end_io_read(inode); return ret; + } if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_flags & IOCB_DIRECT) || @@ -1283,16 +1293,12 @@ again: if (ci->i_inline_version == CEPH_INLINE_NONE) { if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) { - ceph_start_io_direct(inode); ret = ceph_direct_read_write(iocb, to, NULL, NULL); - ceph_end_io_direct(inode); if (ret >= 0 && ret < len) retry_op = CHECK_EOF; } else { - ceph_start_io_read(inode); ret = ceph_sync_read(iocb, to, &retry_op); - ceph_end_io_read(inode); } } else { retry_op = READ_INLINE; @@ -1303,11 +1309,10 @@ again: inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, ceph_cap_string(got)); ceph_add_rw_context(fi, &rw_ctx); - ceph_start_io_read(inode); ret = generic_file_read_iter(iocb, to); - ceph_end_io_read(inode); ceph_del_rw_context(fi, &rw_ctx); } + dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); if (pinned_page) { @@ -1315,6 +1320,12 @@ again: pinned_page = NULL; } ceph_put_cap_refs(ci, got); + + if (iocb->ki_flags & IOCB_DIRECT) + ceph_end_io_direct(inode); + else + ceph_end_io_read(inode); + if (retry_op > HAVE_RETRIED && ret >= 0) { int statret; struct page *page = NULL; From 6a81749ebe5f1b52d7eeb8a1031deb8d520f23e6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 13 Nov 2019 09:56:06 -0500 Subject: [PATCH 090/146] ceph: increment/decrement dio counter on async requests Ceph can in some cases issue an async DIO request, in which case we can end up calling ceph_end_io_direct before the I/O is actually complete. That may allow buffered operations to proceed while DIO requests are still in flight. Fix this by incrementing the i_dio_count when issuing an async DIO request, and decrement it when tearing down the aio_req. Fixes: 321fe13c9398 ("ceph: add buffered/direct exclusionary locking for reads and writes") Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 06efeaff3b57..8de633964dc3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -753,6 +753,9 @@ static void ceph_aio_complete(struct inode *inode, if (!atomic_dec_and_test(&aio_req->pending_reqs)) return; + if (aio_req->iocb->ki_flags & IOCB_DIRECT) + inode_dio_end(inode); + ret = aio_req->error; if (!ret) ret = aio_req->total_len; @@ -1091,6 +1094,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, CEPH_CAP_FILE_RD); list_splice(&aio_req->osd_reqs, &osd_reqs); + inode_dio_begin(inode); while (!list_empty(&osd_reqs)) { req = list_first_entry(&osd_reqs, struct ceph_osd_request, From 633739b2fedb6617d782ca252797b7a8ad754347 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 13 Nov 2019 12:07:15 +0100 Subject: [PATCH 091/146] rbd: silence bogus uninitialized warning in rbd_object_map_update_finish() Some versions of gcc (so far 6.3 and 7.4) throw a warning: drivers/block/rbd.c: In function 'rbd_object_map_callback': drivers/block/rbd.c:2124:21: warning: 'current_state' may be used uninitialized in this function [-Wmaybe-uninitialized] (current_state == OBJECT_EXISTS && state == OBJECT_EXISTS_CLEAN)) drivers/block/rbd.c:2092:23: note: 'current_state' was declared here u8 state, new_state, current_state; ^~~~~~~~~~~~~ It's bogus because all current_state accesses are guarded by has_current_state. Reported-by: kbuild test robot Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 39136675dae5..13527a0b4e44 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2087,7 +2087,7 @@ static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req, struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; struct ceph_osd_data *osd_data; u64 objno; - u8 state, new_state, current_state; + u8 state, new_state, uninitialized_var(current_state); bool has_current_state; void *p; From 8b37bc277fb459fa100808880a9d4e0641fff444 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Wed, 13 Nov 2019 15:21:31 +0800 Subject: [PATCH 092/146] iocost: check active_list of all the ancestors in iocg_activate() There is a bug that checking the same active_list over and over again in iocg_activate(). The intention of the code was checking whether all the ancestors and self have already been activated. So fix it. Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Acked-by: Tejun Heo Signed-off-by: Jiufei Xue Signed-off-by: Jens Axboe --- block/blk-iocost.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a7ed434eae03..e01267f99183 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1057,9 +1057,12 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) atomic64_set(&iocg->active_period, cur_period); /* already activated or breaking leaf-only constraint? */ - for (i = iocg->level; i > 0; i--) - if (!list_empty(&iocg->active_list)) + if (!list_empty(&iocg->active_list)) + goto succeed_unlock; + for (i = iocg->level - 1; i > 0; i--) + if (!list_empty(&iocg->ancestors[i]->active_list)) goto fail_unlock; + if (iocg->child_active_sum) goto fail_unlock; @@ -1101,6 +1104,7 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) ioc_start_period(ioc, now); } +succeed_unlock: spin_unlock_irq(&ioc->lock); return true; From dcb77e4b274b8f13ac6482dfb09160cd2fae9a40 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 13 Nov 2019 14:38:47 +0800 Subject: [PATCH 093/146] rsxx: add missed destroy_workqueue calls in remove The driver misses calling destroy_workqueue in remove like what is done when probe fails. Add the missed calls to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 76b73ddf8fd7..10f6368117d8 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -1000,8 +1000,10 @@ static void rsxx_pci_remove(struct pci_dev *dev) cancel_work_sync(&card->event_work); + destroy_workqueue(card->event_wq); rsxx_destroy_dev(card); rsxx_dma_destroy(card); + destroy_workqueue(card->creg_ctrl.creg_wq); spin_lock_irqsave(&card->irq_lock, flags); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); From a84fddb16d9bdcb2e40b26aa53a4e4703839298a Mon Sep 17 00:00:00 2001 From: Xiaojie Yuan Date: Thu, 5 Sep 2019 16:50:22 +0800 Subject: [PATCH 094/146] drm/amdgpu: fix null pointer deref in firmware header printing v2: declare as (struct common_firmware_header *) type because struct xxx_firmware_header inherits from it When CE's ucode_id(8) is used to get sdma_hdr, we will be accessing an unallocated amdgpu_firmware_info instance. This issue appears on rhel7.7 with gcc 4.8.5. Newer compilers might have optimized out such 'defined but not referenced' variable. [ 1120.798564] BUG: unable to handle kernel NULL pointer dereference at 000000000000000a [ 1120.806703] IP: [] psp_np_fw_load+0x1e3/0x390 [amdgpu] [ 1120.813693] PGD 80000002603ff067 PUD 271b8d067 PMD 0 [ 1120.818931] Oops: 0000 [#1] SMP [ 1120.822245] Modules linked in: amdgpu(OE+) amdkcl(OE) amd_iommu_v2 amdttm(OE) amd_sched(OE) xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun bridge stp llc devlink ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ebtable_nat ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat iptable_mangle iptable_security iptable_raw nf_conntrack libcrc32c ip_set nfnetlink ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sunrpc dm_mirror dm_region_hash dm_log dm_mod intel_pmc_core intel_powerclamp coretemp intel_rapl joydev kvm_intel eeepc_wmi asus_wmi kvm sparse_keymap iTCO_wdt irqbypass rfkill crc32_pclmul snd_hda_codec_realtek mxm_wmi ghash_clmulni_intel intel_wmi_thunderbolt iTCO_vendor_support snd_hda_codec_generic snd_hda_codec_hdmi aesni_intel lrw gf128mul glue_helper ablk_helper sg cryptd pcspkr snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd pinctrl_sunrisepoint pinctrl_intel soundcore acpi_pad mei_me wmi mei i2c_i801 pcc_cpufreq ip_tables ext4 mbcache jbd2 sd_mod crc_t10dif crct10dif_generic i915 i2c_algo_bit iosf_mbi drm_kms_helper e1000e syscopyarea sysfillrect sysimgblt fb_sys_fops ahci libahci drm ptp libata crct10dif_pclmul crct10dif_common crc32c_intel serio_raw pps_core drm_panel_orientation_quirks video i2c_hid [ 1120.954136] CPU: 4 PID: 2426 Comm: modprobe Tainted: G OE ------------ 3.10.0-1062.el7.x86_64 #1 [ 1120.964390] Hardware name: System manufacturer System Product Name/Z170-A, BIOS 1302 11/09/2015 [ 1120.973321] task: ffff991ef1e3c1c0 ti: ffff991ee625c000 task.ti: ffff991ee625c000 [ 1120.981020] RIP: 0010:[] [] psp_np_fw_load+0x1e3/0x390 [amdgpu] [ 1120.990483] RSP: 0018:ffff991ee625f950 EFLAGS: 00010202 [ 1120.995935] RAX: 0000000000000002 RBX: ffff991edf6b2d38 RCX: ffff991edf6a0000 [ 1121.003391] RDX: 0000000000000000 RSI: ffff991f01d13898 RDI: ffffffffc110afb3 [ 1121.010706] RBP: ffff991ee625f9b0 R08: 0000000000000000 R09: 0000000000000000 [ 1121.018029] R10: 00000000000004c4 R11: ffff991ee625f64e R12: ffff991edf6b3220 [ 1121.025353] R13: ffff991edf6a0000 R14: 0000000000000008 R15: ffff991edf6b2d30 [ 1121.032666] FS: 00007f97b0c0b740(0000) GS:ffff991f01d00000(0000) knlGS:0000000000000000 [ 1121.041000] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1121.046880] CR2: 000000000000000a CR3: 000000025e604000 CR4: 00000000003607e0 [ 1121.054239] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1121.061631] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1121.068938] Call Trace: [ 1121.071494] [] psp_hw_init+0x218/0x270 [amdgpu] [ 1121.077886] [] amdgpu_device_fw_loading+0xe8/0x160 [amdgpu] [ 1121.085296] [] ? vega10_ih_irq_init+0x4bc/0x730 [amdgpu] [ 1121.092534] [] amdgpu_device_init+0x1495/0x1c90 [amdgpu] [ 1121.099675] [] amdgpu_driver_load_kms+0x8b/0x2f0 [amdgpu] [ 1121.106888] [] drm_dev_register+0x12f/0x1d0 [drm] [ 1121.113419] [] ? pci_enable_device_flags+0xe8/0x140 [ 1121.120183] [] amdgpu_pci_probe+0xca/0x170 [amdgpu] [ 1121.126919] [] local_pci_probe+0x4a/0xb0 [ 1121.132622] [] pci_device_probe+0x109/0x160 [ 1121.138607] [] driver_probe_device+0xc5/0x3e0 [ 1121.144766] [] __driver_attach+0x93/0xa0 [ 1121.150507] [] ? __device_attach+0x50/0x50 [ 1121.156422] [] bus_for_each_dev+0x75/0xc0 [ 1121.162213] [] driver_attach+0x1e/0x20 [ 1121.167771] [] bus_add_driver+0x200/0x2d0 [ 1121.173590] [] driver_register+0x64/0xf0 [ 1121.179345] [] __pci_register_driver+0xa5/0xc0 [ 1121.185593] [] ? 0xffffffffc099efff [ 1121.190914] [] amdgpu_init+0xa4/0xb0 [amdgpu] [ 1121.197101] [] do_one_initcall+0xba/0x240 [ 1121.202901] [] load_module+0x271a/0x2bb0 [ 1121.208598] [] ? ddebug_proc_write+0x100/0x100 [ 1121.214894] [] SyS_init_module+0xef/0x140 [ 1121.220698] [] system_call_fastpath+0x25/0x2a [ 1121.226870] Code: b4 01 60 a2 00 00 31 c0 e8 83 60 33 e4 41 8b 47 08 48 8b 4d d0 48 c7 c7 b3 af 10 c1 48 69 c0 68 07 00 00 48 8b 84 01 60 a2 00 00 <48> 8b 70 08 31 c0 48 89 75 c8 e8 56 60 33 e4 48 8b 4d d0 48 c7 [ 1121.247422] RIP [] psp_np_fw_load+0x1e3/0x390 [amdgpu] [ 1121.254432] RSP [ 1121.258017] CR2: 000000000000000a [ 1121.261427] ---[ end trace e98b35387ede75bd ]--- Signed-off-by: Xiaojie Yuan Fixes: c5fb912653dae3f878 ("drm/amdgpu: add firmware header printing for psp fw loading (v2)") Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 38 +++++++++++-------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 4d71537a960d..a46090071034 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -950,21 +950,7 @@ static void psp_print_fw_hdr(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { struct amdgpu_device *adev = psp->adev; - const struct sdma_firmware_header_v1_0 *sdma_hdr = - (const struct sdma_firmware_header_v1_0 *) - adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; - const struct gfx_firmware_header_v1_0 *ce_hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - const struct gfx_firmware_header_v1_0 *pfp_hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - const struct gfx_firmware_header_v1_0 *me_hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - const struct gfx_firmware_header_v1_0 *mec_hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - const struct rlc_firmware_header_v2_0 *rlc_hdr = - (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; - const struct smc_firmware_header_v1_0 *smc_hdr = - (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; + struct common_firmware_header *hdr; switch (ucode->ucode_id) { case AMDGPU_UCODE_ID_SDMA0: @@ -975,25 +961,33 @@ static void psp_print_fw_hdr(struct psp_context *psp, case AMDGPU_UCODE_ID_SDMA5: case AMDGPU_UCODE_ID_SDMA6: case AMDGPU_UCODE_ID_SDMA7: - amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header); + hdr = (struct common_firmware_header *) + adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; + amdgpu_ucode_print_sdma_hdr(hdr); break; case AMDGPU_UCODE_ID_CP_CE: - amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_CP_PFP: - amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_CP_ME: - amdgpu_ucode_print_gfx_hdr(&me_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.me_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_CP_MEC1: - amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_RLC_G: - amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(hdr); break; case AMDGPU_UCODE_ID_SMC: - amdgpu_ucode_print_smc_hdr(&smc_hdr->header); + hdr = (struct common_firmware_header *)adev->pm.fw->data; + amdgpu_ucode_print_smc_hdr(hdr); break; default: break; From d7f9f47d4d1243cf960725a755123cfae13a3e51 Mon Sep 17 00:00:00 2001 From: Matt Bennett Date: Thu, 14 Nov 2019 12:20:03 +1300 Subject: [PATCH 095/146] tipc: add back tipc prefix to log messages The tipc prefix for log messages generated by tipc was removed in commit 07f6c4bc048a ("tipc: convert tipc reference table to use generic rhashtable"). This is still a useful prefix so add it back. Signed-off-by: Matt Bennett Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/core.c | 2 -- net/tipc/core.h | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 23cb379a93d6..8f35060a24e1 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -34,8 +34,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include "core.h" #include "name_table.h" #include "subscr.h" diff --git a/net/tipc/core.h b/net/tipc/core.h index 60d829581068..3042f654e0af 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -60,6 +60,12 @@ #include #include +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + struct tipc_node; struct tipc_bearer; struct tipc_bc_base; From 15fb35fa9ff456b81159033eba6397fcee85e671 Mon Sep 17 00:00:00 2001 From: Ulrich Hecht Date: Thu, 14 Nov 2019 02:49:49 +0100 Subject: [PATCH 096/146] ravb: implement MTU change while device is up Pre-allocates buffers sufficient for the maximum supported MTU (2026) in order to eliminate the possibility of resource exhaustion when changing the MTU while the device is up. Signed-off-by: Ulrich Hecht Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb.h | 3 ++- drivers/net/ethernet/renesas/ravb_main.c | 26 +++++++++++++----------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index a9c89d5d8898..9f88b5db4f89 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -955,6 +955,8 @@ enum RAVB_QUEUE { #define NUM_RX_QUEUE 2 #define NUM_TX_QUEUE 2 +#define RX_BUF_SZ (2048 - ETH_FCS_LEN + sizeof(__sum16)) + /* TX descriptors per packet */ #define NUM_TX_DESC_GEN2 2 #define NUM_TX_DESC_GEN3 1 @@ -1018,7 +1020,6 @@ struct ravb_private { u32 dirty_rx[NUM_RX_QUEUE]; /* Producer ring indices */ u32 cur_tx[NUM_TX_QUEUE]; u32 dirty_tx[NUM_TX_QUEUE]; - u32 rx_buf_sz; /* Based on MTU+slack. */ struct napi_struct napi[NUM_RX_QUEUE]; struct work_struct work; /* MII transceiver section. */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index de9aa8c47f1c..3f165c137236 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -230,7 +230,7 @@ static void ravb_ring_free(struct net_device *ndev, int q) le32_to_cpu(desc->dptr))) dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - priv->rx_buf_sz, + RX_BUF_SZ, DMA_FROM_DEVICE); } ring_size = sizeof(struct ravb_ex_rx_desc) * @@ -293,9 +293,9 @@ static void ravb_ring_format(struct net_device *ndev, int q) for (i = 0; i < priv->num_rx_ring[q]; i++) { /* RX descriptor */ rx_desc = &priv->rx_ring[q][i]; - rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz); + rx_desc->ds_cc = cpu_to_le16(RX_BUF_SZ); dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, - priv->rx_buf_sz, + RX_BUF_SZ, DMA_FROM_DEVICE); /* We just set the data size to 0 for a failed mapping which * should prevent DMA from happening... @@ -342,9 +342,6 @@ static int ravb_ring_init(struct net_device *ndev, int q) int ring_size; int i; - priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) + - ETH_HLEN + VLAN_HLEN + sizeof(__sum16); - /* Allocate RX and TX skb rings */ priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q], sizeof(*priv->rx_skb[q]), GFP_KERNEL); @@ -354,7 +351,7 @@ static int ravb_ring_init(struct net_device *ndev, int q) goto error; for (i = 0; i < priv->num_rx_ring[q]; i++) { - skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1); + skb = netdev_alloc_skb(ndev, RX_BUF_SZ + RAVB_ALIGN - 1); if (!skb) goto error; ravb_set_buffer_align(skb); @@ -584,7 +581,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) skb = priv->rx_skb[q][entry]; priv->rx_skb[q][entry] = NULL; dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - priv->rx_buf_sz, + RX_BUF_SZ, DMA_FROM_DEVICE); get_ts &= (q == RAVB_NC) ? RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : @@ -617,11 +614,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; desc = &priv->rx_ring[q][entry]; - desc->ds_cc = cpu_to_le16(priv->rx_buf_sz); + desc->ds_cc = cpu_to_le16(RX_BUF_SZ); if (!priv->rx_skb[q][entry]) { skb = netdev_alloc_skb(ndev, - priv->rx_buf_sz + + RX_BUF_SZ + RAVB_ALIGN - 1); if (!skb) break; /* Better luck next round. */ @@ -1801,10 +1798,15 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) static int ravb_change_mtu(struct net_device *ndev, int new_mtu) { - if (netif_running(ndev)) - return -EBUSY; + struct ravb_private *priv = netdev_priv(ndev); ndev->mtu = new_mtu; + + if (netif_running(ndev)) { + synchronize_irq(priv->emac_irq); + ravb_emac_init(ndev); + } + netdev_update_features(ndev); return 0; From 71c5e83bcf1e795e28a9dae6b1e74095452bf26e Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Thu, 14 Nov 2019 10:32:39 +0800 Subject: [PATCH 097/146] net: hns3: add compatible handling for MAC VLAN switch parameter configuration Previously, hns3 driver just directly send specific setting bit and mask bits of MAC VLAN switch parameter to the firmware, it can not be compatible with the old firmware, because the old one ignores mask bits and covers all bits with new setting bits. So when running with old firmware, the communication between PF and VF will fail after resetting or configuring spoof check, since they will do the MAC VLAN switch parameter configuration. This patch fixes this problem by reading switch parameter firstly, then just modifies the corresponding bit and sends it to firmware. Fixes: dd2956eab104 ("net: hns3: not allow SSU loopback while execute ethtool -t dev") Signed-off-by: Guangbin Huang Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 16f7d0e15b4f..c052bb33b3d3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6263,11 +6263,23 @@ static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid, func_id = hclge_get_port_number(HOST_PORT, 0, vfid, 0); req = (struct hclge_mac_vlan_switch_cmd *)desc.data; + + /* read current config parameter */ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_SWITCH_PARAM, - false); + true); req->roce_sel = HCLGE_MAC_VLAN_NIC_SEL; req->func_id = cpu_to_le32(func_id); - req->switch_param = switch_param; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "read mac vlan switch parameter fail, ret = %d\n", ret); + return ret; + } + + /* modify and write new config parameter */ + hclge_cmd_reuse_desc(&desc, false); + req->switch_param = (req->switch_param & param_mask) | switch_param; req->param_mask = param_mask; ret = hclge_cmd_send(&hdev->hw, &desc, 1); From aea8cfb35a82d6c2f3517c86694933ba766635e5 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Thu, 14 Nov 2019 10:32:40 +0800 Subject: [PATCH 098/146] net: hns3: reallocate SSU' buffer size when pfc_en changes When a TC's PFC is disabled or enabled, the RX private buffer for this TC need to be changed too, otherwise this may cause packet dropped problem. This patch fixes it by calling hclge_buffer_alloc to reallocate buffer when pfc_en changes. Fixes: cacde272dd00 ("net: hns3: Add hclge_dcb module for the support of DCB feature") Signed-off-by: Yunsheng Lin Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index c063301d6060..8b42caed14fd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -318,6 +318,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) struct net_device *netdev = h->kinfo.netdev; struct hclge_dev *hdev = vport->back; u8 i, j, pfc_map, *prio_tc; + int ret; if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) @@ -347,7 +348,21 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) hclge_tm_pfc_info_update(hdev); - return hclge_pause_setup_hw(hdev, false); + ret = hclge_pause_setup_hw(hdev, false); + if (ret) + return ret; + + ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); + if (ret) + return ret; + + ret = hclge_buffer_alloc(hdev); + if (ret) { + hclge_notify_client(hdev, HNAE3_UP_CLIENT); + return ret; + } + + return hclge_notify_client(hdev, HNAE3_UP_CLIENT); } /* DCBX configuration */ From c2d56897819338eb0ba8b93184f7d10329b36653 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 14 Nov 2019 10:32:41 +0800 Subject: [PATCH 099/146] net: hns3: fix ETS bandwidth validation bug Some device only support 4 TCs, but the driver check the total bandwidth of 8 TCs, so may cause wrong configurations write to the hw. This patch uses hdev->tc_max to instead HNAE3_MAX_TC to fix it. Fixes: e432abfb99e5 ("net: hns3: add common validation in hclge_dcb") Signed-off-by: Yonglong Liu Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 8b42caed14fd..a1790af73096 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -124,7 +124,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, if (ret) return ret; - for (i = 0; i < HNAE3_MAX_TC; i++) { + for (i = 0; i < hdev->tc_max; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: if (hdev->tm_info.tc_info[i].tc_sch_mode != From 9cb09e7c1c9af2968d5186ef9085f05641ab65d9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 14 Nov 2019 13:17:39 +0000 Subject: [PATCH 100/146] KVM: Add a comment describing the /dev/kvm no_compat handling Add a comment explaining the rational behind having both no_compat open and ioctl callbacks to fend off compat tasks. Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a65ed915c7a..13efc291b1c7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -122,6 +122,13 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); #define KVM_COMPAT(c) .compat_ioctl = (c) #else +/* + * For architectures that don't implement a compat infrastructure, + * adopt a double line of defense: + * - Prevent a compat task from opening /dev/kvm + * - If the open has been done by a 64bit task, and the KVM fd + * passed to a compat task, let the ioctls fail. + */ static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { return -EINVAL; } From 6e1ff0773f49c7d38e8b4a9df598def6afb9f415 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 14 Nov 2019 21:10:52 +0000 Subject: [PATCH 101/146] sched/uclamp: Fix incorrect condition uclamp_update_active() should perform the update when p->uclamp[clamp_id].active is true. But when the logic was inverted in [1], the if condition wasn't inverted correctly too. [1] https://lore.kernel.org/lkml/20190902073836.GO2369@hirez.programming.kicks-ass.net/ Reported-by: Suren Baghdasaryan Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Acked-by: Vincent Guittot Cc: Ben Segall Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Mel Gorman Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Fixes: babbe170e053 ("sched/uclamp: Update CPU's refcount on TG's clamp changes") Link: https://lkml.kernel.org/r/20191114211052.15116-1-qais.yousef@arm.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 33cd25051f3a..44123b4d14e8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1065,7 +1065,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id) * affecting a valid clamp bucket, the next time it's enqueued, * it will already see the updated clamp bucket value. */ - if (!p->uclamp[clamp_id].active) { + if (p->uclamp[clamp_id].active) { uclamp_rq_dec_id(rq, p, clamp_id); uclamp_rq_inc_id(rq, p, clamp_id); } From a28f239e296767ebf4ec4ae8a9ecb57d0d444b3f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Nov 2019 18:41:03 +0000 Subject: [PATCH 102/146] afs: Fix race in commit bulk status fetch When a lookup is done, the afs filesystem will perform a bulk status-fetch operation on the requested vnode (file) plus the next 49 other vnodes from the directory list (in AFS, directory contents are downloaded as blobs and parsed locally). When the results are received, it will speculatively populate the inode cache from the extra data. However, if the lookup races with another lookup on the same directory, but for a different file - one that's in the 49 extra fetches, then if the bulk status-fetch operation finishes first, it will try and update the inode from the other lookup. If this other inode is still in the throes of being created, however, this will cause an assertion failure in afs_apply_status(): BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags)); on or about fs/afs/inode.c:175 because it expects data to be there already that it can compare to. Fix this by skipping the update if the inode is being created as the creator will presumably set up the inode with the same information. Fixes: 39db9815da48 ("afs: Fix application of the results of a inline bulk status fetch") Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: Linus Torvalds --- fs/afs/dir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index cc12772d0a4d..497f979018c2 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -803,7 +803,12 @@ success: continue; if (cookie->inodes[i]) { - afs_vnode_commit_status(&fc, AFS_FS_I(cookie->inodes[i]), + struct afs_vnode *iv = AFS_FS_I(cookie->inodes[i]); + + if (test_bit(AFS_VNODE_UNSET, &iv->flags)) + continue; + + afs_vnode_commit_status(&fc, iv, scb->cb_break, NULL, scb); continue; } From fa3a5a1880c91bb92594ad42dfe9eedad7996b86 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 15 Nov 2019 11:35:05 -0800 Subject: [PATCH 103/146] Input: ff-memless - kill timer in destroy() No timer must be left running when the device goes away. Signed-off-by: Oliver Neukum Reported-and-tested-by: syzbot+b6c55daa701fc389e286@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1573726121.17351.3.camel@suse.com Signed-off-by: Dmitry Torokhov --- drivers/input/ff-memless.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/ff-memless.c b/drivers/input/ff-memless.c index 1cb40c7475af..8229a9006917 100644 --- a/drivers/input/ff-memless.c +++ b/drivers/input/ff-memless.c @@ -489,6 +489,15 @@ static void ml_ff_destroy(struct ff_device *ff) { struct ml_device *ml = ff->private; + /* + * Even though we stop all playing effects when tearing down + * an input device (via input_device_flush() that calls into + * input_ff_flush() that stops and erases all effects), we + * do not actually stop the timer, and therefore we should + * do it here. + */ + del_timer_sync(&ml->timer); + kfree(ml->private); } From ba60cf9f78f0d7c8e73c7390608f7f818ee68aa0 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 15 Nov 2019 11:32:36 -0800 Subject: [PATCH 104/146] Input: synaptics-rmi4 - destroy F54 poller workqueue when removing The driver forgets to destroy workqueue in remove() similarly to what is done when probe() fails. Add a call to destroy_workqueue() to fix it. Since unregistration will wait for the work to finish, we do not need to cancel/flush the work instance in remove(). Signed-off-by: Chuhong Yuan Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191114023405.31477-1-hslester96@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f54.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index 484ae1f97330..897105b9a98b 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -730,6 +730,7 @@ static void rmi_f54_remove(struct rmi_function *fn) video_unregister_device(&f54->vdev); v4l2_device_unregister(&f54->v4l2); + destroy_workqueue(f54->workqueue); } struct rmi_function_handler rmi_f54_handler = { From 4d189c1026fac6af922e16538fb7c653bbeed778 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 14 Nov 2019 11:50:57 +0200 Subject: [PATCH 105/146] selftests: mlxsw: Adjust test to recent changes mlxsw does not support VXLAN devices with a physical device attached and vetoes such configurations upon enslavement to an offloaded bridge. Commit 0ce1822c2a08 ("vxlan: add adjacent link to limit depth level") changed the VXLAN device to be an upper of the physical device which causes mlxsw to veto the creation of the VXLAN device with "Unknown upper device type". This is OK as this configuration is not supported, but it prevents us from testing bad flows involving the enslavement of VXLAN devices with a physical device to a bridge, regardless if the physical device is an mlxsw netdev or not. Adjust the test to use a dummy device as a physical device instead of a mlxsw netdev. Fixes: 0ce1822c2a08 ("vxlan: add adjacent link to limit depth level") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index ae6146ec5afd..4632f51af7ab 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -112,14 +112,16 @@ sanitization_single_dev_mcast_group_test() RET=0 ip link add dev br0 type bridge mcast_snooping 0 + ip link add name dummy1 up type dummy ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ - dev $swp2 group 239.0.0.1 + dev dummy1 group 239.0.0.1 sanitization_single_dev_test_fail ip link del dev vxlan0 + ip link del dev dummy1 ip link del dev br0 log_test "vxlan device with a multicast group" @@ -181,13 +183,15 @@ sanitization_single_dev_local_interface_test() RET=0 ip link add dev br0 type bridge mcast_snooping 0 + ip link add name dummy1 up type dummy ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2 + ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1 sanitization_single_dev_test_fail ip link del dev vxlan0 + ip link del dev dummy1 ip link del dev br0 log_test "vxlan device with local interface" From a9a51bd727d141a67b589f375fe69d0e54c4fe22 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Nov 2019 11:16:01 +0100 Subject: [PATCH 106/146] ax88172a: fix information leak on short answers If a malicious device gives a short MAC it can elicit up to 5 bytes of leaked memory out of the driver. We need to check for ETH_ALEN instead. Reported-by: syzbot+a8d4acdad35e6bbca308@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/ax88172a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 011bd4cb546e..af3994e0853b 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -196,7 +196,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) /* Get the MAC address */ ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); - if (ret < 0) { + if (ret < ETH_ALEN) { netdev_err(dev->net, "Failed to read MAC address: %d\n", ret); goto free; } From 3df70afe8d33f4977d0e0891bdcfb639320b5257 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Thu, 14 Nov 2019 23:43:24 +0800 Subject: [PATCH 107/146] net: ep93xx_eth: fix mismatch of request_mem_region in remove The driver calls release_resource in remove to match request_mem_region in probe, which is incorrect. Fix it by using the right one, release_mem_region. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller --- drivers/net/ethernet/cirrus/ep93xx_eth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index f1a0c4dceda0..f37c9a08c4cf 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -763,6 +763,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev) { struct net_device *dev; struct ep93xx_priv *ep; + struct resource *mem; dev = platform_get_drvdata(pdev); if (dev == NULL) @@ -778,8 +779,8 @@ static int ep93xx_eth_remove(struct platform_device *pdev) iounmap(ep->base_addr); if (ep->res != NULL) { - release_resource(ep->res); - kfree(ep->res); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(mem->start, resource_size(mem)); } free_netdev(dev); From cd734d54e67990eebfc3106dc39047c1141d4197 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 14 Nov 2019 10:44:55 -0800 Subject: [PATCH 108/146] ptp: Validate requests to enable time stamping of external signals. Commit 415606588c61 ("PTP: introduce new versions of IOCTLs") introduced a new external time stamp ioctl that validates the flags. This patch extends the validation to ensure that at least one rising or falling edge flag is set when enabling external time stamps. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 18 +++++++++++++----- include/uapi/linux/ptp_clock.h | 1 + 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 67d0199840fd..cbbe1237ff8d 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -149,11 +149,19 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EFAULT; break; } - if (((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) || - req.extts.rsv[0] || req.extts.rsv[1]) && - cmd == PTP_EXTTS_REQUEST2) { - err = -EINVAL; - break; + if (cmd == PTP_EXTTS_REQUEST2) { + /* Make sure no reserved bit is set. */ + if ((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) || + req.extts.rsv[0] || req.extts.rsv[1]) { + err = -EINVAL; + break; + } + /* Ensure one of the rising/falling edge bits is set. */ + if ((req.extts.flags & PTP_ENABLE_FEATURE) && + (req.extts.flags & PTP_EXTTS_EDGES) == 0) { + err = -EINVAL; + break; + } } else if (cmd == PTP_EXTTS_REQUEST) { req.extts.flags &= PTP_EXTTS_V1_VALID_FLAGS; req.extts.rsv[0] = 0; diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 59e89a1bc3bb..304059b1609d 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -31,6 +31,7 @@ #define PTP_ENABLE_FEATURE (1<<0) #define PTP_RISING_EDGE (1<<1) #define PTP_FALLING_EDGE (1<<2) +#define PTP_EXTTS_EDGES (PTP_RISING_EDGE | PTP_FALLING_EDGE) /* * flag fields valid for the new PTP_EXTTS_REQUEST2 ioctl. From 7f9048f1df6f0c1c7a74a15c8b4ce033a753f274 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 14 Nov 2019 10:44:56 -0800 Subject: [PATCH 109/146] net: reject PTP periodic output requests with unsupported flags Commit 823eb2a3c4c7 ("PTP: add support for one-shot output") introduced a new flag for the PTP periodic output request ioctl. This flag is not currently supported by any driver. Fix all drivers which implement the periodic output request ioctl to explicitly reject any request with flags they do not understand. This ensures that the driver does not accidentally misinterpret the PTP_PEROUT_ONE_SHOT flag, or any new flag introduced in the future. This is important for forward compatibility: if a new flag is introduced, the driver should reject requests to enable the flag until the driver has actually been modified to support the flag in question. Cc: Felipe Balbi Cc: David S. Miller Cc: Christopher Hall Signed-off-by: Jacob Keller Signed-off-by: Richard Cochran Tested-by: Aaron Brown Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 4 ++++ drivers/net/ethernet/intel/igb/igb_ptp.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 4 ++++ drivers/net/ethernet/microchip/lan743x_ptp.c | 4 ++++ drivers/net/ethernet/renesas/ravb_ptp.c | 4 ++++ drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 4 ++++ drivers/net/phy/dp83640.c | 3 +++ 7 files changed, 27 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 77f3511b97de..ca3aa1250dd1 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6280,6 +6280,10 @@ static int tg3_ptp_enable(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_PEROUT: + /* Reject requests with unsupported flags */ + if (rq->perout.flags) + return -EOPNOTSUPP; + if (rq->perout.index != 0) return -EINVAL; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index fd3071f55bd3..4997963149f6 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -551,6 +551,10 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, return 0; case PTP_CLK_REQ_PEROUT: + /* Reject requests with unsupported flags */ + if (rq->perout.flags) + return -EOPNOTSUPP; + if (on) { pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT, rq->perout.index); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 0059b290e095..cff6b60de304 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -290,6 +290,10 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (!MLX5_PPS_CAP(mdev)) return -EOPNOTSUPP; + /* Reject requests with unsupported flags */ + if (rq->perout.flags) + return -EOPNOTSUPP; + if (rq->perout.index >= clock->ptp_info.n_pins) return -EINVAL; diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index 57b26c2acf87..e8fe9a90fe4f 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -429,6 +429,10 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on, int pulse_width = 0; int perout_bit = 0; + /* Reject requests with unsupported flags */ + if (perout->flags) + return -EOPNOTSUPP; + if (!on) { lan743x_ptp_perout_off(adapter); return 0; diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c index 9a42580693cb..638f1fc2166f 100644 --- a/drivers/net/ethernet/renesas/ravb_ptp.c +++ b/drivers/net/ethernet/renesas/ravb_ptp.c @@ -211,6 +211,10 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp, unsigned long flags; int error = 0; + /* Reject requests with unsupported flags */ + if (req->flags) + return -EOPNOTSUPP; + if (req->index) return -EINVAL; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index df638b18b72c..0989e2bb6ee3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -140,6 +140,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_PEROUT: + /* Reject requests with unsupported flags */ + if (rq->perout.flags) + return -EOPNOTSUPP; + cfg = &priv->pps[rq->perout.index]; cfg->start.tv_sec = rq->perout.start.sec; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 6580094161a9..04ad77758920 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -491,6 +491,9 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp, return 0; case PTP_CLK_REQ_PEROUT: + /* Reject requests with unsupported flags */ + if (rq->perout.flags) + return -EOPNOTSUPP; if (rq->perout.index >= N_PER_OUT) return -EINVAL; return periodic_output(clock, rq, on, rq->perout.index); From 7d9465ebcc5d0e5ab0d7dd36d7147d31cc76fcaf Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 14 Nov 2019 10:44:57 -0800 Subject: [PATCH 110/146] mv88e6xxx: reject unsupported external timestamp flags Fix the mv88e6xxx PTP support to explicitly reject any future flags that get added to the external timestamp request ioctl. In order to maintain currently functioning code, this patch accepts all three current flags. This is because the PTP_RISING_EDGE and PTP_FALLING_EDGE flags have unclear semantics and each driver seems to have interpreted them slightly differently. For the record, the semantics of this driver are: flags Meaning ---------------------------------------------------- -------------------------- PTP_ENABLE_FEATURE Time stamp falling edge PTP_ENABLE_FEATURE|PTP_RISING_EDGE Time stamp rising edge PTP_ENABLE_FEATURE|PTP_FALLING_EDGE Time stamp falling edge PTP_ENABLE_FEATURE|PTP_RISING_EDGE|PTP_FALLING_EDGE Time stamp rising edge Cc: Brandon Streiff Signed-off-by: Jacob Keller Reviewed-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/ptp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c index 073cbd0bb91b..076e622a64d6 100644 --- a/drivers/net/dsa/mv88e6xxx/ptp.c +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -273,6 +273,12 @@ static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip, int pin; int err; + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE)) + return -EOPNOTSUPP; + pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index); if (pin < 0) From e8e9c98dc3b52d77ae041baf76ff1c1fec2ad0da Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 14 Nov 2019 10:44:58 -0800 Subject: [PATCH 111/146] dp83640: reject unsupported external timestamp flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the dp83640 PTP support to explicitly reject any future flags that get added to the external timestamp request ioctl. In order to maintain currently functioning code, this patch accepts all three current flags. This is because the PTP_RISING_EDGE and PTP_FALLING_EDGE flags have unclear semantics and each driver seems to have interpreted them slightly differently. For the record, the semantics of this driver are: flags Meaning ---------------------------------------------------- -------------------------- PTP_ENABLE_FEATURE Time stamp rising edge PTP_ENABLE_FEATURE|PTP_RISING_EDGE Time stamp rising edge PTP_ENABLE_FEATURE|PTP_FALLING_EDGE Time stamp falling edge PTP_ENABLE_FEATURE|PTP_RISING_EDGE|PTP_FALLING_EDGE Time stamp falling edge Cc: Stefan Sørensen Cc: Richard Cochran Signed-off-by: Jacob Keller Reviewed-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 04ad77758920..2781b0e2d947 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -469,6 +469,11 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_EXTTS: + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE)) + return -EOPNOTSUPP; index = rq->extts.index; if (index >= N_EXT_TS) return -EINVAL; From 6edd110b41d5aa63d29492eea637e4e344b5d71d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 14 Nov 2019 10:44:59 -0800 Subject: [PATCH 112/146] igb: reject unsupported external timestamp flags Fix the igb PTP support to explicitly reject any future flags that get added to the external timestamp request ioctl. In order to maintain currently functioning code, this patch accepts all three current flags. This is because the PTP_RISING_EDGE and PTP_FALLING_EDGE flags have unclear semantics and each driver seems to have interpreted them slightly differently. This HW always time stamps both edges: flags Meaning ---------------------------------------------------- -------------------------- PTP_ENABLE_FEATURE Time stamp both edges PTP_ENABLE_FEATURE|PTP_RISING_EDGE Time stamp both edges PTP_ENABLE_FEATURE|PTP_FALLING_EDGE Time stamp both edges PTP_ENABLE_FEATURE|PTP_RISING_EDGE|PTP_FALLING_EDGE Time stamp both edges Signed-off-by: Jacob Keller Tested-by: Aaron Brown Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_ptp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 4997963149f6..0bce3e0f1af0 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -521,6 +521,12 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_EXTTS: + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE)) + return -EOPNOTSUPP; + if (on) { pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS, rq->extts.index); From 2e0645a00e25f7122cad6da57ce3cc855df49ddd Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 14 Nov 2019 10:45:00 -0800 Subject: [PATCH 113/146] mlx5: reject unsupported external timestamp flags Fix the mlx5 core PTP support to explicitly reject any future flags that get added to the external timestamp request ioctl. In order to maintain currently functioning code, this patch accepts all three current flags. This is because the PTP_RISING_EDGE and PTP_FALLING_EDGE flags have unclear semantics and each driver seems to have interpreted them slightly differently. [ RC: I'm not 100% sure what this driver does, but if I'm not wrong it follows the dp83640: flags Meaning ---------------------------------------------------- -------------------------- PTP_ENABLE_FEATURE Time stamp rising edge PTP_ENABLE_FEATURE|PTP_RISING_EDGE Time stamp rising edge PTP_ENABLE_FEATURE|PTP_FALLING_EDGE Time stamp falling edge PTP_ENABLE_FEATURE|PTP_RISING_EDGE|PTP_FALLING_EDGE Time stamp falling edge ] Cc: Feras Daoud Cc: Eugenia Emantayev Signed-off-by: Jacob Keller Reviewed-by: Richard Cochran Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index cff6b60de304..9a40f24e3193 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -236,6 +236,12 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, if (!MLX5_PPS_CAP(mdev)) return -EOPNOTSUPP; + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE)) + return -EOPNOTSUPP; + if (rq->extts.index >= clock->ptp_info.n_pins) return -EINVAL; From 592025a03b34927f35852058c933ffcd7c500321 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 14 Nov 2019 10:45:01 -0800 Subject: [PATCH 114/146] renesas: reject unsupported external timestamp flags Fix the renesas PTP support to explicitly reject any future flags that get added to the external timestamp request ioctl. In order to maintain currently functioning code, this patch accepts all three current flags. This is because the PTP_RISING_EDGE and PTP_FALLING_EDGE flags have unclear semantics and each driver seems to have interpreted them slightly differently. Cc: Sergei Shtylyov Signed-off-by: Jacob Keller Reviewed-by: Richard Cochran Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_ptp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c index 638f1fc2166f..666dbee48097 100644 --- a/drivers/net/ethernet/renesas/ravb_ptp.c +++ b/drivers/net/ethernet/renesas/ravb_ptp.c @@ -182,6 +182,12 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp, struct net_device *ndev = priv->ndev; unsigned long flags; + /* Reject requests with unsupported flags */ + if (req->flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE)) + return -EOPNOTSUPP; + if (req->index) return -EINVAL; From 6138e687c7b679da08c0feb55a88f448f7890c07 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 14 Nov 2019 10:45:02 -0800 Subject: [PATCH 115/146] ptp: Introduce strict checking of external time stamp options. User space may request time stamps on rising edges, falling edges, or both. However, the particular mode may or may not be supported in the hardware or in the driver. This patch adds a "strict" flag that tells drivers to ensure that the requested mode will be honored. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/ptp.c | 3 ++- drivers/net/ethernet/intel/igb/igb_ptp.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 3 ++- drivers/net/ethernet/renesas/ravb_ptp.c | 3 ++- drivers/net/phy/dp83640.c | 3 ++- drivers/ptp/ptp_chardev.c | 2 ++ include/uapi/linux/ptp_clock.h | 4 +++- 7 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c index 076e622a64d6..3b1985902f95 100644 --- a/drivers/net/dsa/mv88e6xxx/ptp.c +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -276,7 +276,8 @@ static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip, /* Reject requests with unsupported flags */ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | PTP_RISING_EDGE | - PTP_FALLING_EDGE)) + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) return -EOPNOTSUPP; pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index); diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 0bce3e0f1af0..3fd60715bca7 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -524,7 +524,8 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, /* Reject requests with unsupported flags */ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | PTP_RISING_EDGE | - PTP_FALLING_EDGE)) + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) return -EOPNOTSUPP; if (on) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 9a40f24e3193..819097d9b583 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -239,7 +239,8 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, /* Reject requests with unsupported flags */ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | PTP_RISING_EDGE | - PTP_FALLING_EDGE)) + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) return -EOPNOTSUPP; if (rq->extts.index >= clock->ptp_info.n_pins) diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c index 666dbee48097..6984bd5b7da9 100644 --- a/drivers/net/ethernet/renesas/ravb_ptp.c +++ b/drivers/net/ethernet/renesas/ravb_ptp.c @@ -185,7 +185,8 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp, /* Reject requests with unsupported flags */ if (req->flags & ~(PTP_ENABLE_FEATURE | PTP_RISING_EDGE | - PTP_FALLING_EDGE)) + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) return -EOPNOTSUPP; if (req->index) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 2781b0e2d947..3bba2bea3a88 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -472,7 +472,8 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp, /* Reject requests with unsupported flags */ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | PTP_RISING_EDGE | - PTP_FALLING_EDGE)) + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) return -EOPNOTSUPP; index = rq->extts.index; if (index >= N_EXT_TS) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index cbbe1237ff8d..9d72ab593f13 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -150,6 +150,8 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) break; } if (cmd == PTP_EXTTS_REQUEST2) { + /* Tell the drivers to check the flags carefully. */ + req.extts.flags |= PTP_STRICT_FLAGS; /* Make sure no reserved bit is set. */ if ((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) || req.extts.rsv[0] || req.extts.rsv[1]) { diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 304059b1609d..9dc9d0079e98 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -31,6 +31,7 @@ #define PTP_ENABLE_FEATURE (1<<0) #define PTP_RISING_EDGE (1<<1) #define PTP_FALLING_EDGE (1<<2) +#define PTP_STRICT_FLAGS (1<<3) #define PTP_EXTTS_EDGES (PTP_RISING_EDGE | PTP_FALLING_EDGE) /* @@ -38,7 +39,8 @@ */ #define PTP_EXTTS_VALID_FLAGS (PTP_ENABLE_FEATURE | \ PTP_RISING_EDGE | \ - PTP_FALLING_EDGE) + PTP_FALLING_EDGE | \ + PTP_STRICT_FLAGS) /* * flag fields valid for the original PTP_EXTTS_REQUEST ioctl. From c019b4be5de0d53fff4aa309322397605f634e27 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 14 Nov 2019 10:45:03 -0800 Subject: [PATCH 116/146] mv88e6xxx: Reject requests to enable time stamping on both edges. This driver enables rising edge or falling edge, but not both, and so this patch validates that the request contains only one of the two edges. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/ptp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c index 3b1985902f95..d838c174dc0d 100644 --- a/drivers/net/dsa/mv88e6xxx/ptp.c +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -280,6 +280,12 @@ static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip, PTP_STRICT_FLAGS)) return -EOPNOTSUPP; + /* Reject requests to enable time stamping on both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index); if (pin < 0) From 9289252bd1e6a37f8182f7c48e69927292fe9340 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 14 Nov 2019 10:45:04 -0800 Subject: [PATCH 117/146] dp83640: Reject requests to enable time stamping on both edges. This driver enables rising edge or falling edge, but not both, and so this patch validates that the request contains only one of the two edges. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 3bba2bea3a88..8f241b57fcf6 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -475,6 +475,13 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp, PTP_FALLING_EDGE | PTP_STRICT_FLAGS)) return -EOPNOTSUPP; + + /* Reject requests to enable time stamping on both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + index = rq->extts.index; if (index >= N_EXT_TS) return -EINVAL; From 5a450eb388d5c262a1c938a0b5cebb24800a03b2 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 14 Nov 2019 10:45:05 -0800 Subject: [PATCH 118/146] igb: Reject requests that fail to enable time stamping on both edges. This hardware always time stamps rising and falling edges, and so this patch validates that the request does contains both edges. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_ptp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 3fd60715bca7..c39e921757ba 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -528,6 +528,12 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, PTP_STRICT_FLAGS)) return -EOPNOTSUPP; + /* Reject requests failing to enable both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + if (on) { pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS, rq->extts.index); From ca12cf5ac9c8fcbf64bc18e0dc2974dc3217f97d Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 14 Nov 2019 10:45:06 -0800 Subject: [PATCH 119/146] mlx5: Reject requests to enable time stamping on both edges. This driver enables rising edge or falling edge, but not both, and so this patch validates that the request contains only one of the two edges. Signed-off-by: Richard Cochran Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 819097d9b583..43f97601b500 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -243,6 +243,12 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, PTP_STRICT_FLAGS)) return -EOPNOTSUPP; + /* Reject requests to enable time stamping on both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + if (rq->extts.index >= clock->ptp_info.n_pins) return -EINVAL; From 6eb54cbb4a86b9a9dac5ddf3b2fcd130249a2008 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 14 Nov 2019 10:45:07 -0800 Subject: [PATCH 120/146] ptp: Extend the test program to check the external time stamp flags. Because each driver and hardware has different capabilities, the test cannot provide a simple pass/fail result, but it can at least show what combinations of flags are supported. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- tools/testing/selftests/ptp/testptp.c | 53 ++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index bd4a7247b44f..c0dd10257df5 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -44,6 +44,46 @@ static int clock_adjtime(clockid_t id, struct timex *tx) } #endif +static void show_flag_test(int rq_index, unsigned int flags, int err) +{ + printf("PTP_EXTTS_REQUEST%c flags 0x%08x : (%d) %s\n", + rq_index ? '1' + rq_index : ' ', + flags, err, strerror(errno)); + /* sigh, uClibc ... */ + errno = 0; +} + +static void do_flag_test(int fd, unsigned int index) +{ + struct ptp_extts_request extts_request; + unsigned long request[2] = { + PTP_EXTTS_REQUEST, + PTP_EXTTS_REQUEST2, + }; + unsigned int enable_flags[5] = { + PTP_ENABLE_FEATURE, + PTP_ENABLE_FEATURE | PTP_RISING_EDGE, + PTP_ENABLE_FEATURE | PTP_FALLING_EDGE, + PTP_ENABLE_FEATURE | PTP_RISING_EDGE | PTP_FALLING_EDGE, + PTP_ENABLE_FEATURE | (PTP_EXTTS_VALID_FLAGS + 1), + }; + int err, i, j; + + memset(&extts_request, 0, sizeof(extts_request)); + extts_request.index = index; + + for (i = 0; i < 2; i++) { + for (j = 0; j < 5; j++) { + extts_request.flags = enable_flags[j]; + err = ioctl(fd, request[i], &extts_request); + show_flag_test(i, extts_request.flags, err); + + extts_request.flags = 0; + err = ioctl(fd, request[i], &extts_request); + } + } +} + static clockid_t get_clockid(int fd) { #define CLOCKFD 3 @@ -96,7 +136,8 @@ static void usage(char *progname) " -s set the ptp clock time from the system time\n" " -S set the system time from the ptp clock time\n" " -t val shift the ptp clock time by 'val' seconds\n" - " -T val set the ptp clock time to 'val' seconds\n", + " -T val set the ptp clock time to 'val' seconds\n" + " -z test combinations of rising/falling external time stamp flags\n", progname); } @@ -122,6 +163,7 @@ int main(int argc, char *argv[]) int adjtime = 0; int capabilities = 0; int extts = 0; + int flagtest = 0; int gettime = 0; int index = 0; int list_pins = 0; @@ -138,7 +180,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) { switch (c) { case 'c': capabilities = 1; @@ -191,6 +233,9 @@ int main(int argc, char *argv[]) settime = 3; seconds = atoi(optarg); break; + case 'z': + flagtest = 1; + break; case 'h': usage(progname); return 0; @@ -322,6 +367,10 @@ int main(int argc, char *argv[]) } } + if (flagtest) { + do_flag_test(fd, index); + } + if (list_pins) { int n_pins = 0; if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) { From 7574c0db2e68c4d0bae9d415a683bdd8b2a761e9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 13 Nov 2019 19:29:38 +0100 Subject: [PATCH 121/146] i2c: acpi: Force bus speed to 400KHz if a Silead touchscreen is present Many cheap devices use Silead touchscreen controllers. Testing has shown repeatedly that these touchscreen controllers work fine at 400KHz, but for unknown reasons do not work properly at 100KHz. This has been seen on both ARM and x86 devices using totally different i2c controllers. On some devices the ACPI tables list another device at the same I2C-bus as only being capable of 100KHz, testing has shown that these other devices work fine at 400KHz (as can be expected of any recent I2C hw). This commit makes i2c_acpi_find_bus_speed() always return 400KHz if a Silead touchscreen controller is present, fixing the touchscreen not working on devices which ACPI tables' wrongly list another device on the same bus as only being capable of 100KHz. Specifically this fixes the touchscreen on the Jumper EZpad 6 m4 not working. Reported-by: youling 257 Tested-by: youling 257 Signed-off-by: Hans de Goede Reviewed-by: Jarkko Nikula Acked-by: Mika Westerberg [wsa: rewording warning a little] Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-core-acpi.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 9cb2aa1e20ef..62a1c92ab803 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -39,6 +39,7 @@ struct i2c_acpi_lookup { int index; u32 speed; u32 min_speed; + u32 force_speed; }; /** @@ -285,6 +286,19 @@ i2c_acpi_match_device(const struct acpi_device_id *matches, return acpi_match_device(matches, &client->dev); } +static const struct acpi_device_id i2c_acpi_force_400khz_device_ids[] = { + /* + * These Silead touchscreen controllers only work at 400KHz, for + * some reason they do not work at 100KHz. On some devices the ACPI + * tables list another device at their bus as only being capable + * of 100KHz, testing has shown that these other devices work fine + * at 400KHz (as can be expected of any recent i2c hw) so we force + * the speed of the bus to 400 KHz if a Silead device is present. + */ + { "MSSL1680", 0 }, + {} +}; + static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level, void *data, void **return_value) { @@ -303,6 +317,9 @@ static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level, if (lookup->speed <= lookup->min_speed) lookup->min_speed = lookup->speed; + if (acpi_match_device_ids(adev, i2c_acpi_force_400khz_device_ids) == 0) + lookup->force_speed = 400000; + return AE_OK; } @@ -340,7 +357,16 @@ u32 i2c_acpi_find_bus_speed(struct device *dev) return 0; } - return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0; + if (lookup.force_speed) { + if (lookup.force_speed != lookup.min_speed) + dev_warn(dev, FW_BUG "DSDT uses known not-working I2C bus speed %d, forcing it to %d\n", + lookup.min_speed, lookup.force_speed); + return lookup.force_speed; + } else if (lookup.min_speed != UINT_MAX) { + return lookup.min_speed; + } else { + return 0; + } } EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed); From a4c2fec16f5e6a5fee4865e6e0e91e2bc2d10f37 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 8 Nov 2019 16:36:48 +0800 Subject: [PATCH 122/146] i2c: core: fix use after free in of_i2c_notify We can't use "adap->dev" after it has been freed. Fixes: 5bf4fa7daea6 ("i2c: break out OF support into separate file") Signed-off-by: Wen Yang Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index 6f632d543fcc..7eb41990bd6d 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -245,14 +245,14 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, } client = of_i2c_register_device(adap, rd->dn); - put_device(&adap->dev); - if (IS_ERR(client)) { dev_err(&adap->dev, "failed to create client for '%pOF'\n", rd->dn); + put_device(&adap->dev); of_node_clear_flag(rd->dn, OF_POPULATED); return notifier_from_errno(PTR_ERR(client)); } + put_device(&adap->dev); break; case OF_RECONFIG_CHANGE_REMOVE: /* already depopulated? */ From 768ea88bcb235ac3a92754bf82afcd3f12200bcc Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 15 Nov 2019 14:57:13 -0800 Subject: [PATCH 123/146] Input: synaptics - enable RMI mode for X1 Extreme 2nd Generation Just got one of these for debugging some unrelated issues, and noticed that Lenovo seems to have gone back to using RMI4 over smbus with Synaptics touchpads on some of their new systems, particularly this one. So, let's enable RMI mode for the X1 Extreme 2nd Generation. Signed-off-by: Lyude Paul Link: https://lore.kernel.org/r/20191115221814.31903-1-lyude@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 56fae3472114..704558d449a2 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -177,6 +177,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0096", /* X280 */ "LEN0097", /* X280 -> ALPS trackpoint */ "LEN009b", /* T580 */ + "LEN0402", /* X1 Extreme 2nd Generation */ "LEN200f", /* T450s */ "LEN2054", /* E480 */ "LEN2055", /* E580 */ From a85dfc305a21acfc48fa28a0fa0a0cb6ad496120 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 15 Nov 2019 17:34:33 -0800 Subject: [PATCH 124/146] mm: mempolicy: fix the wrong return value and potential pages leak of mbind Commit d883544515aa ("mm: mempolicy: make the behavior consistent when MPOL_MF_MOVE* and MPOL_MF_STRICT were specified") fixed the return value of mbind() for a couple of corner cases. But, it altered the errno for some other cases, for example, mbind() should return -EFAULT when part or all of the memory range specified by nodemask and maxnode points outside your accessible address space, or there was an unmapped hole in the specified memory range specified by addr and len. Fix this by preserving the errno returned by queue_pages_range(). And, the pagelist may be not empty even though queue_pages_range() returns error, put the pages back to LRU since mbind_range() is not called to really apply the policy so those pages should not be migrated, this is also the old behavior before the problematic commit. Link: http://lkml.kernel.org/r/1572454731-3925-1-git-send-email-yang.shi@linux.alibaba.com Fixes: d883544515aa ("mm: mempolicy: make the behavior consistent when MPOL_MF_MOVE* and MPOL_MF_STRICT were specified") Signed-off-by: Yang Shi Reported-by: Li Xinhai Reviewed-by: Li Xinhai Cc: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: [4.19 and 5.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4ae967bcf954..e08c94170ae4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -672,7 +672,9 @@ static const struct mm_walk_ops queue_pages_walk_ops = { * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were * specified. * 0 - queue pages successfully or no misplaced page. - * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified. + * errno - i.e. misplaced pages with MPOL_MF_STRICT specified (-EIO) or + * memory range specified by nodemask and maxnode points outside + * your accessible address space (-EFAULT) */ static int queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, @@ -1286,7 +1288,7 @@ static long do_mbind(unsigned long start, unsigned long len, flags | MPOL_MF_INVERT, &pagelist); if (ret < 0) { - err = -EIO; + err = ret; goto up_out; } @@ -1305,10 +1307,12 @@ static long do_mbind(unsigned long start, unsigned long len, if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) err = -EIO; - } else - putback_movable_pages(&pagelist); - + } else { up_out: + if (!list_empty(&pagelist)) + putback_movable_pages(&pagelist); + } + up_write(&mm->mmap_sem); mpol_out: mpol_put(new); From 82072962973008201b817fae1896512977dd5083 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 15 Nov 2019 17:34:36 -0800 Subject: [PATCH 125/146] mm: fix trying to reclaim unevictable lru page when calling madvise_pageout Recently, I hit the following issue when running upstream. kernel BUG at mm/vmscan.c:1521! invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 0 PID: 23385 Comm: syz-executor.6 Not tainted 5.4.0-rc4+ #1 RIP: 0010:shrink_page_list+0x12b6/0x3530 mm/vmscan.c:1521 Call Trace: reclaim_pages+0x499/0x800 mm/vmscan.c:2188 madvise_cold_or_pageout_pte_range+0x58a/0x710 mm/madvise.c:453 walk_pmd_range mm/pagewalk.c:53 [inline] walk_pud_range mm/pagewalk.c:112 [inline] walk_p4d_range mm/pagewalk.c:139 [inline] walk_pgd_range mm/pagewalk.c:166 [inline] __walk_page_range+0x45a/0xc20 mm/pagewalk.c:261 walk_page_range+0x179/0x310 mm/pagewalk.c:349 madvise_pageout_page_range mm/madvise.c:506 [inline] madvise_pageout+0x1f0/0x330 mm/madvise.c:542 madvise_vma mm/madvise.c:931 [inline] __do_sys_madvise+0x7d2/0x1600 mm/madvise.c:1113 do_syscall_64+0x9f/0x4c0 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe madvise_pageout() accesses the specified range of the vma and isolates them, then runs shrink_page_list() to reclaim its memory. But it also isolates the unevictable pages to reclaim. Hence, we can catch the cases in shrink_page_list(). The root cause is that we scan the page tables instead of specific LRU list. and so we need to filter out the unevictable lru pages from our end. Link: http://lkml.kernel.org/r/1572616245-18946-1-git-send-email-zhongjiang@huawei.com Fixes: 1a4e58cce84e ("mm: introduce MADV_PAGEOUT") Signed-off-by: zhong jiang Suggested-by: Johannes Weiner Acked-by: Johannes Weiner Acked-by: Minchan Kim Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 2be9f3fdb05e..94c343b4c968 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -363,8 +363,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, ClearPageReferenced(page); test_and_clear_page_young(page); if (pageout) { - if (!isolate_lru_page(page)) - list_add(&page->lru, &page_list); + if (!isolate_lru_page(page)) { + if (PageUnevictable(page)) + putback_lru_page(page); + else + list_add(&page->lru, &page_list); + } } else deactivate_page(page); huge_unlock: @@ -441,8 +445,12 @@ regular_page: ClearPageReferenced(page); test_and_clear_page_young(page); if (pageout) { - if (!isolate_lru_page(page)) - list_add(&page->lru, &page_list); + if (!isolate_lru_page(page)) { + if (PageUnevictable(page)) + putback_lru_page(page); + else + list_add(&page->lru, &page_list); + } } else deactivate_page(page); } From 8e20ba2e53fc6198cbfbcc700e9f884157052a8d Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Fri, 15 Nov 2019 17:34:39 -0800 Subject: [PATCH 126/146] lib/xz: fix XZ_DYNALLOC to avoid useless memory reallocations s->dict.allocated was initialized to 0 but never set after a successful allocation, thus the code always thought that the dictionary buffer has to be reallocated. Link: http://lkml.kernel.org/r/20191104185107.3b6330df@tukaani.org Signed-off-by: Lasse Collin Reported-by: Yu Sun Acked-by: Daniel Walker Cc: "Yixia Si (yisi)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/xz/xz_dec_lzma2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index 08c3c8049998..156f26fdc4c9 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -1146,6 +1146,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) if (DEC_IS_DYNALLOC(s->dict.mode)) { if (s->dict.allocated < s->dict.size) { + s->dict.allocated = s->dict.size; vfree(s->dict.buf); s->dict.buf = vmalloc(s->dict.size); if (s->dict.buf == NULL) { From 00d484f354d85845991b40141d40ba9e5eb60faf Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 15 Nov 2019 17:34:43 -0800 Subject: [PATCH 127/146] mm: memcg: switch to css_tryget() in get_mem_cgroup_from_mm() We've encountered a rcu stall in get_mem_cgroup_from_mm(): rcu: INFO: rcu_sched self-detected stall on CPU rcu: 33-....: (21000 ticks this GP) idle=6c6/1/0x4000000000000002 softirq=35441/35441 fqs=5017 (t=21031 jiffies g=324821 q=95837) NMI backtrace for cpu 33 <...> RIP: 0010:get_mem_cgroup_from_mm+0x2f/0x90 <...> __memcg_kmem_charge+0x55/0x140 __alloc_pages_nodemask+0x267/0x320 pipe_write+0x1ad/0x400 new_sync_write+0x127/0x1c0 __kernel_write+0x4f/0xf0 dump_emit+0x91/0xc0 writenote+0xa0/0xc0 elf_core_dump+0x11af/0x1430 do_coredump+0xc65/0xee0 get_signal+0x132/0x7c0 do_signal+0x36/0x640 exit_to_usermode_loop+0x61/0xd0 do_syscall_64+0xd4/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The problem is caused by an exiting task which is associated with an offline memcg. We're iterating over and over in the do {} while (!css_tryget_online()) loop, but obviously the memcg won't become online and the exiting task won't be migrated to a live memcg. Let's fix it by switching from css_tryget_online() to css_tryget(). As css_tryget_online() cannot guarantee that the memcg won't go offline, the check is usually useless, except some rare cases when for example it determines if something should be presented to a user. A similar problem is described by commit 18fa84a2db0e ("cgroup: Use css_tryget() instead of css_tryget_online() in task_get_css()"). Johannes: : The bug aside, it doesn't matter whether the cgroup is online for the : callers. It used to matter when offlining needed to evacuate all charges : from the memcg, and so needed to prevent new ones from showing up, but we : don't care now. Link: http://lkml.kernel.org/r/20191106225131.3543616-1-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: Tejun Heo Reviewed-by: Shakeel Butt Cc: Michal Hocko Cc: Michal Koutn Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 37592dd7ae32..46ad252e6d6a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -960,7 +960,7 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) if (unlikely(!memcg)) memcg = root_mem_cgroup; } - } while (!css_tryget_online(&memcg->css)); + } while (!css_tryget(&memcg->css)); rcu_read_unlock(); return memcg; } From 0362f326d86c645b5e96b7dbc3ee515986ed019d Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 15 Nov 2019 17:34:46 -0800 Subject: [PATCH 128/146] mm: hugetlb: switch to css_tryget() in hugetlb_cgroup_charge_cgroup() An exiting task might belong to an offline cgroup. In this case an attempt to grab a cgroup reference from the task can end up with an infinite loop in hugetlb_cgroup_charge_cgroup(), because neither the cgroup will become online, neither the task will be migrated to a live cgroup. Fix this by switching over to css_tryget(). As css_tryget_online() can't guarantee that the cgroup won't go offline, in most cases the check doesn't make sense. In this particular case users of hugetlb_cgroup_charge_cgroup() are not affected by this change. A similar problem is described by commit 18fa84a2db0e ("cgroup: Use css_tryget() instead of css_tryget_online() in task_get_css()"). Link: http://lkml.kernel.org/r/20191106225131.3543616-2-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: Tejun Heo Reviewed-by: Shakeel Butt Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index f1930fa0b445..2ac38bdc18a1 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -196,7 +196,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, again: rcu_read_lock(); h_cg = hugetlb_cgroup_from_task(current); - if (!css_tryget_online(&h_cg->css)) { + if (!css_tryget(&h_cg->css)) { rcu_read_unlock(); goto again; } From aea4df4c53f754cc229edde6c5465e481311cc49 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 15 Nov 2019 17:34:50 -0800 Subject: [PATCH 129/146] mm: slub: really fix slab walking for init_on_free Commit 1b7e816fc80e ("mm: slub: Fix slab walking for init_on_free") fixed one problem with the slab walking but missed a key detail: When walking the list, the head and tail pointers need to be updated since we end up reversing the list as a result. Without doing this, bulk free is broken. One way this is exposed is a NULL pointer with slub_debug=F: ============================================================================= BUG skbuff_head_cache (Tainted: G T): Object already free ----------------------------------------------------------------------------- INFO: Slab 0x000000000d2d2f8f objects=16 used=3 fp=0x0000000064309071 flags=0x3fff00000000201 BUG: kernel NULL pointer dereference, address: 0000000000000000 Oops: 0000 [#1] PREEMPT SMP PTI RIP: 0010:print_trailer+0x70/0x1d5 Call Trace: free_debug_processing.cold.37+0xc9/0x149 __slab_free+0x22a/0x3d0 kmem_cache_free_bulk+0x415/0x420 __kfree_skb_flush+0x30/0x40 net_rx_action+0x2dd/0x480 __do_softirq+0xf0/0x246 irq_exit+0x93/0xb0 do_IRQ+0xa0/0x110 common_interrupt+0xf/0xf Given we're now almost identical to the existing debugging code which correctly walks the list, combine with that. Link: https://lkml.kernel.org/r/20191104170303.GA50361@gandi.net Link: http://lkml.kernel.org/r/20191106222208.26815-1-labbott@redhat.com Fixes: 1b7e816fc80e ("mm: slub: Fix slab walking for init_on_free") Signed-off-by: Laura Abbott Reported-by: Thibaut Sautereau Acked-by: David Rientjes Tested-by: Alexander Potapenko Acked-by: Alexander Potapenko Cc: Kees Cook Cc: "David S. Miller" Cc: Vlastimil Babka Cc: Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b25c807a111f..e72e802fc569 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1433,12 +1433,15 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, void *old_tail = *tail ? *tail : *head; int rsize; - if (slab_want_init_on_free(s)) { - void *p = NULL; + /* Head and tail of the reconstructed freelist */ + *head = NULL; + *tail = NULL; - do { - object = next; - next = get_freepointer(s, object); + do { + object = next; + next = get_freepointer(s, object); + + if (slab_want_init_on_free(s)) { /* * Clear the object and the metadata, but don't touch * the redzone. @@ -1448,29 +1451,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, : 0; memset((char *)object + s->inuse, 0, s->size - s->inuse - rsize); - set_freepointer(s, object, p); - p = object; - } while (object != old_tail); - } -/* - * Compiler cannot detect this function can be removed if slab_free_hook() - * evaluates to nothing. Thus, catch all relevant config debug options here. - */ -#if defined(CONFIG_LOCKDEP) || \ - defined(CONFIG_DEBUG_KMEMLEAK) || \ - defined(CONFIG_DEBUG_OBJECTS_FREE) || \ - defined(CONFIG_KASAN) - - next = *head; - - /* Head and tail of the reconstructed freelist */ - *head = NULL; - *tail = NULL; - - do { - object = next; - next = get_freepointer(s, object); + } /* If object's reuse doesn't have to be delayed */ if (!slab_free_hook(s, object)) { /* Move object to the new freelist */ @@ -1485,9 +1467,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, *tail = NULL; return *head != NULL; -#else - return true; -#endif } static void *setup_object(struct kmem_cache *s, struct page *page, From 4655e5e5f387264fd22a835bcfbe4af6691ff774 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 15 Nov 2019 17:34:53 -0800 Subject: [PATCH 130/146] mm,thp: recheck each page before collapsing file THP In collapse_file(), for !is_shmem case, current check cannot guarantee the locked page is up-to-date. Specifically, xas_unlock_irq() should not be called before lock_page() and get_page(); and it is necessary to recheck PageUptodate() after locking the page. With this bug and CONFIG_READ_ONLY_THP_FOR_FS=y, madvise(HUGE)'ed .text may contain corrupted data. This is because khugepaged mistakenly collapses some not up-to-date sub pages into a huge page, and assumes the huge page is up-to-date. This will NOT corrupt data in the disk, because the page is read-only and never written back. Fix this by properly checking PageUptodate() after locking the page. This check replaces "VM_BUG_ON_PAGE(!PageUptodate(page), page);". Also, move PageDirty() check after locking the page. Current khugepaged should not try to collapse dirty file THP, because it is limited to read-only .text. The only case we hit a dirty page here is when the page hasn't been written since write. Bail out and retry when this happens. syzbot reported bug on previous version of this patch. Link: http://lkml.kernel.org/r/20191106060930.2571389-2-songliubraving@fb.com Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Signed-off-by: Song Liu Reported-by: syzbot+efb9e48b9fbdc49bb34a@syzkaller.appspotmail.com Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Hugh Dickins Cc: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f05d27b7183d..a8a57bebb5fa 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1602,17 +1602,6 @@ static void collapse_file(struct mm_struct *mm, result = SCAN_FAIL; goto xa_unlocked; } - } else if (!PageUptodate(page)) { - xas_unlock_irq(&xas); - wait_on_page_locked(page); - if (!trylock_page(page)) { - result = SCAN_PAGE_LOCK; - goto xa_unlocked; - } - get_page(page); - } else if (PageDirty(page)) { - result = SCAN_FAIL; - goto xa_locked; } else if (trylock_page(page)) { get_page(page); xas_unlock_irq(&xas); @@ -1627,7 +1616,12 @@ static void collapse_file(struct mm_struct *mm, * without racing with truncate. */ VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(!PageUptodate(page), page); + + /* make sure the page is up to date */ + if (unlikely(!PageUptodate(page))) { + result = SCAN_FAIL; + goto out_unlock; + } /* * If file was truncated then extended, or hole-punched, before @@ -1643,6 +1637,16 @@ static void collapse_file(struct mm_struct *mm, goto out_unlock; } + if (!is_shmem && PageDirty(page)) { + /* + * khugepaged only works on read-only fd, so this + * page is dirty because it hasn't been flushed + * since first write. + */ + result = SCAN_FAIL; + goto out_unlock; + } + if (isolate_lru_page(page)) { result = SCAN_DEL_PAGE_LRU; goto out_unlock; From 2c91f8fc6c999fe10185d8ad99fda1759f662f70 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 15 Nov 2019 17:34:57 -0800 Subject: [PATCH 131/146] mm/memory_hotplug: fix try_offline_node() try_offline_node() is pretty much broken right now: - The node span is updated when onlining memory, not when adding it. We ignore memory that was mever onlined. Bad. - We touch possible garbage memmaps. The pfn_to_nid(pfn) can easily trigger a kernel panic. Bad for memory that is offline but also bad for subsection hotadd with ZONE_DEVICE, whereby the memmap of the first PFN of a section might contain garbage. - Sections belonging to mixed nodes are not properly considered. As memory blocks might belong to multiple nodes, we would have to walk all pageblocks (or at least subsections) within present sections. However, we don't have a way to identify whether a memmap that is not online was initialized (relevant for ZONE_DEVICE). This makes things more complicated. Luckily, we can piggy pack on the node span and the nid stored in memory blocks. Currently, the node span is grown when calling move_pfn_range_to_zone() - e.g., when onlining memory, and shrunk when removing memory, before calling try_offline_node(). Sysfs links are created via link_mem_sections(), e.g., during boot or when adding memory. If the node still spans memory or if any memory block belongs to the nid, we don't set the node offline. As memory blocks that span multiple nodes cannot get offlined, the nid stored in memory blocks is reliable enough (for such online memory blocks, the node still spans the memory). Introduce for_each_memory_block() to efficiently walk all memory blocks. Note: We will soon stop shrinking the ZONE_DEVICE zone and the node span when removing ZONE_DEVICE memory to fix similar issues (access of garbage memmaps) - until we have a reliable way to identify whether these memmaps were properly initialized. This implies later, that once a node had ZONE_DEVICE memory, we won't be able to set a node offline - which should be acceptable. Since commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") memory that is added is not assoziated with a zone/node (memmap not initialized). The introducing commit 60a5a19e7419 ("memory-hotplug: remove sysfs file of node") already missed that we could have multiple nodes for a section and that the zone/node span is updated when onlining pages, not when adding them. I tested this by hotplugging two DIMMs to a memory-less and cpu-less NUMA node. The node is properly onlined when adding the DIMMs. When removing the DIMMs, the node is properly offlined. Masayoshi Mizuma reported: : Without this patch, memory hotplug fails as panic: : : BUG: kernel NULL pointer dereference, address: 0000000000000000 : ... : Call Trace: : remove_memory_block_devices+0x81/0xc0 : try_remove_memory+0xb4/0x130 : __remove_memory+0xa/0x20 : acpi_memory_device_remove+0x84/0x100 : acpi_bus_trim+0x57/0x90 : acpi_bus_trim+0x2e/0x90 : acpi_device_hotplug+0x2b2/0x4d0 : acpi_hotplug_work_fn+0x1a/0x30 : process_one_work+0x171/0x380 : worker_thread+0x49/0x3f0 : kthread+0xf8/0x130 : ret_from_fork+0x35/0x40 [david@redhat.com: v3] Link: http://lkml.kernel.org/r/20191102120221.7553-1-david@redhat.com Link: http://lkml.kernel.org/r/20191028105458.28320-1-david@redhat.com Fixes: 60a5a19e7419 ("memory-hotplug: remove sysfs file of node") Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") # visiable after d0dc12e86b319 Signed-off-by: David Hildenbrand Tested-by: Masayoshi Mizuma Cc: Tang Chen Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Keith Busch Cc: Jiri Olsa Cc: "Peter Zijlstra (Intel)" Cc: Jani Nikula Cc: Nayna Jain Cc: Michal Hocko Cc: Oscar Salvador Cc: Stephen Rothwell Cc: Dan Williams Cc: Pavel Tatashin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 36 +++++++++++++++++++++++++++++++++ include/linux/memory.h | 1 + mm/memory_hotplug.c | 45 ++++++++++++++++++++++++++---------------- 3 files changed, 65 insertions(+), 17 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 55907c27075b..84c4e1f72cbd 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -872,3 +872,39 @@ int walk_memory_blocks(unsigned long start, unsigned long size, } return ret; } + +struct for_each_memory_block_cb_data { + walk_memory_blocks_func_t func; + void *arg; +}; + +static int for_each_memory_block_cb(struct device *dev, void *data) +{ + struct memory_block *mem = to_memory_block(dev); + struct for_each_memory_block_cb_data *cb_data = data; + + return cb_data->func(mem, cb_data->arg); +} + +/** + * for_each_memory_block - walk through all present memory blocks + * + * @arg: argument passed to func + * @func: callback for each memory block walked + * + * This function walks through all present memory blocks, calling func on + * each memory block. + * + * In case func() returns an error, walking is aborted and the error is + * returned. + */ +int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) +{ + struct for_each_memory_block_cb_data cb_data = { + .func = func, + .arg = arg, + }; + + return bus_for_each_dev(&memory_subsys, NULL, &cb_data, + for_each_memory_block_cb); +} diff --git a/include/linux/memory.h b/include/linux/memory.h index 0ebb105eb261..4c75dae8dd29 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -119,6 +119,7 @@ extern struct memory_block *find_memory_block(struct mem_section *); typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); +extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<nid == nid ? -EEXIST : 0; +} + /** * try_offline_node * @nid: the node ID @@ -1658,25 +1670,24 @@ static int check_cpu_on_node(pg_data_t *pgdat) void try_offline_node(int nid) { pg_data_t *pgdat = NODE_DATA(nid); - unsigned long start_pfn = pgdat->node_start_pfn; - unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; - unsigned long pfn; + int rc; - for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - unsigned long section_nr = pfn_to_section_nr(pfn); - - if (!present_section_nr(section_nr)) - continue; - - if (pfn_to_nid(pfn) != nid) - continue; - - /* - * some memory sections of this node are not removed, and we - * can't offline node now. - */ + /* + * If the node still spans pages (especially ZONE_DEVICE), don't + * offline it. A node spans memory after move_pfn_range_to_zone(), + * e.g., after the memory block was onlined. + */ + if (pgdat->node_spanned_pages) + return; + + /* + * Especially offline memory blocks might not be spanned by the + * node. They will get spanned by the node once they get onlined. + * However, they link to the node in sysfs and can get onlined later. + */ + rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb); + if (rc) return; - } if (check_cpu_on_node(pgdat)) return; From 5df373e95689b9519b8557da7c5bd0db0856d776 Mon Sep 17 00:00:00 2001 From: Vinayak Menon Date: Fri, 15 Nov 2019 17:35:00 -0800 Subject: [PATCH 132/146] mm/page_io.c: do not free shared swap slots The following race is observed due to which a processes faulting on a swap entry, finds the page neither in swapcache nor swap. This causes zram to give a zero filled page that gets mapped to the process, resulting in a user space crash later. Consider parent and child processes Pa and Pb sharing the same swap slot with swap_count 2. Swap is on zram with SWP_SYNCHRONOUS_IO set. Virtual address 'VA' of Pa and Pb points to the shared swap entry. Pa Pb fault on VA fault on VA do_swap_page do_swap_page lookup_swap_cache fails lookup_swap_cache fails Pb scheduled out swapin_readahead (deletes zram entry) swap_free (makes swap_count 1) Pb scheduled in swap_readpage (swap_count == 1) Takes SWP_SYNCHRONOUS_IO path zram enrty absent zram gives a zero filled page Fix this by making sure that swap slot is freed only when swap count drops down to one. Link: http://lkml.kernel.org/r/1571743294-14285-1-git-send-email-vinmenon@codeaurora.org Fixes: aa8d22a11da9 ("mm: swap: SWP_SYNCHRONOUS_IO: skip swapcache only if swapped page has no other reference") Signed-off-by: Vinayak Menon Suggested-by: Minchan Kim Acked-by: Minchan Kim Cc: Michal Hocko Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index 24ee600f9131..60a66a58b9bf 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -73,6 +73,7 @@ static void swap_slot_free_notify(struct page *page) { struct swap_info_struct *sis; struct gendisk *disk; + swp_entry_t entry; /* * There is no guarantee that the page is in swap cache - the software @@ -104,11 +105,10 @@ static void swap_slot_free_notify(struct page *page) * we again wish to reclaim it. */ disk = sis->bdev->bd_disk; - if (disk->fops->swap_slot_free_notify) { - swp_entry_t entry; + entry.val = page_private(page); + if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) { unsigned long offset; - entry.val = page_private(page); offset = swp_offset(entry); SetPageDirty(page); From 76a1850e45724e8aca44fc0a245de6782ce42e65 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 15 Nov 2019 17:35:04 -0800 Subject: [PATCH 133/146] mm/debug.c: __dump_page() prints an extra line When dumping struct page information, __dump_page() prints the page type with a trailing blank followed by the page flags on a separate line: anon flags: 0x100000000090034(uptodate|lru|active|head|swapbacked) It looks like the intent was to use pr_cont() for printing "flags:" but pr_cont() usage is discouraged so fix this by extending the format to include the flags into a single line: anon flags: 0x100000000090034(uptodate|lru|active|head|swapbacked) If the page is file backed, the name might be long so use two lines: shmem_aops name:"dev/zero" flags: 0x10000000008000c(uptodate|dirty|swapbacked) Eliminate pr_conf() usage as well for appending compound_mapcount. Link: http://lkml.kernel.org/r/20191112012608.16926-1-rcampbell@nvidia.com Signed-off-by: Ralph Campbell Reviewed-by: Andrew Morton Cc: Jerome Glisse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index 8345bb6e4769..772d4cf0691f 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -67,28 +67,31 @@ void __dump_page(struct page *page, const char *reason) */ mapcount = PageSlab(page) ? 0 : page_mapcount(page); - pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx", - page, page_ref_count(page), mapcount, - page->mapping, page_to_pgoff(page)); if (PageCompound(page)) - pr_cont(" compound_mapcount: %d", compound_mapcount(page)); - pr_cont("\n"); + pr_warn("page:%px refcount:%d mapcount:%d mapping:%px " + "index:%#lx compound_mapcount: %d\n", + page, page_ref_count(page), mapcount, + page->mapping, page_to_pgoff(page), + compound_mapcount(page)); + else + pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx\n", + page, page_ref_count(page), mapcount, + page->mapping, page_to_pgoff(page)); if (PageAnon(page)) - pr_warn("anon "); + pr_warn("anon flags: %#lx(%pGp)\n", page->flags, &page->flags); else if (PageKsm(page)) - pr_warn("ksm "); + pr_warn("ksm flags: %#lx(%pGp)\n", page->flags, &page->flags); else if (mapping) { - pr_warn("%ps ", mapping->a_ops); if (mapping->host && mapping->host->i_dentry.first) { struct dentry *dentry; dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias); - pr_warn("name:\"%pd\" ", dentry); - } + pr_warn("%ps name:\"%pd\"\n", mapping->a_ops, dentry); + } else + pr_warn("%ps\n", mapping->a_ops); + pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags); } BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1); - pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags); - hex_only: print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32, sizeof(unsigned long), page, From 6855ac4acd3bad4a5caf813b0e401a0bc79a54a9 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 15 Nov 2019 17:35:07 -0800 Subject: [PATCH 134/146] mm/debug.c: PageAnon() is true for PageKsm() pages PageAnon() and PageKsm() use the low two bits of the page->mapping pointer to indicate the page type. PageAnon() only checks the LSB while PageKsm() checks the least significant 2 bits are equal to 3. Therefore, PageAnon() is true for KSM pages. __dump_page() incorrectly will never print "ksm" because it checks PageAnon() first. Fix this by checking PageKsm() first. Link: http://lkml.kernel.org/r/20191113000651.20677-1-rcampbell@nvidia.com Fixes: 1c6fb1d89e73 ("mm: print more information about mapping in __dump_page") Signed-off-by: Ralph Campbell Acked-by: Michal Hocko Cc: Jerome Glisse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index 772d4cf0691f..0461df1207cb 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -77,10 +77,10 @@ void __dump_page(struct page *page, const char *reason) pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx\n", page, page_ref_count(page), mapcount, page->mapping, page_to_pgoff(page)); - if (PageAnon(page)) - pr_warn("anon flags: %#lx(%pGp)\n", page->flags, &page->flags); - else if (PageKsm(page)) + if (PageKsm(page)) pr_warn("ksm flags: %#lx(%pGp)\n", page->flags, &page->flags); + else if (PageAnon(page)) + pr_warn("anon flags: %#lx(%pGp)\n", page->flags, &page->flags); else if (mapping) { if (mapping->host && mapping->host->i_dentry.first) { struct dentry *dentry; From 26b3f3cc0a5b30d5b6af4d614f1c324c29ff2a9c Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 16 Nov 2019 14:50:45 +0530 Subject: [PATCH 135/146] octeontx2-af: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to Marvell OcteonTX2 network devices. It uses an expilict block comment for the SPDX License Identifier. Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/common.h | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/npc.h | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h | 4 ++-- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 206dc5dc1df8..5c1f389e3320 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 CGX driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 CGX driver * * Copyright (C) 2018 Marvell International Ltd. * diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h index fb3ba4968a9b..473d9751601f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 CGX driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 CGX driver * * Copyright (C) 2018 Marvell International Ltd. * diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index e332e82fc066..413c3f254cf8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 RVU Admin Function driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RVU Admin Function driver * * Copyright (C) 2018 Marvell International Ltd. * diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 76a4575d18ff..75439fce0505 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 RVU Admin Function driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RVU Admin Function driver * * Copyright (C) 2018 Marvell International Ltd. * diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index 8d6d90fdfb73..5d4df315a0e1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 RVU Admin Function driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RVU Admin Function driver * * Copyright (C) 2018 Marvell International Ltd. * diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h index b2ce957605bb..da649f6a5573 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 RVU Admin Function driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RVU Admin Function driver * * Copyright (C) 2018 Marvell International Ltd. * diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index c9d60b0554c0..5222e4228905 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 RVU Admin Function driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RVU Admin Function driver * * Copyright (C) 2018 Marvell International Ltd. * diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 09a8d61f3144..1ea92a2e7cfe 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 RVU Admin Function driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RVU Admin Function driver * * Copyright (C) 2018 Marvell International Ltd. * diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index f920dac74e6c..84a39063a8bb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Marvell OcteonTx2 RVU Admin Function driver +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RVU Admin Function driver * * Copyright (C) 2018 Marvell International Ltd. * From acb9bdc1482280b0164d638ecd416eff6f694e9c Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 16 Nov 2019 15:10:59 +0530 Subject: [PATCH 136/146] net: stmmac: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to STMicroelectronics based Multi-Gigabit Ethernet driver. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used). Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac5.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 2 +- drivers/net/ethernet/stmicro/stmmac/hwif.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index 775db776b3cc..23fecf68f781 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ // Copyright (c) 2017 Synopsys, Inc. and/or its affiliates. // stmmac Support for 5.xx Ethernet QoS cores diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 99037386080a..9d08a934fe4f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* * Copyright (c) 2018 Synopsys, Inc. and/or its affiliates. * stmmac XGMAC definitions. diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index ddb851d99618..9010d881b12e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ // Copyright (c) 2018 Synopsys, Inc. and/or its affiliates. // stmmac HW Interface Callbacks From 7f91ed8c4f4b6368b0603543bccb6b5fd10804e1 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Sat, 16 Nov 2019 16:05:52 +0100 Subject: [PATCH 137/146] seg6: fix srh pointer in get_srh() pskb_may_pull may change pointers in header. For this reason, it is mandatory to reload any pointer that points into skb header. Signed-off-by: Andrea Mayer Signed-off-by: David S. Miller --- net/ipv6/seg6_local.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 9d4f75e0d33a..5e3d7004d431 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -81,6 +81,11 @@ static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb) if (!pskb_may_pull(skb, srhoff + len)) return NULL; + /* note that pskb_may_pull may change pointers in header; + * for this reason it is necessary to reload them when needed. + */ + srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + if (!seg6_validate_srh(srh, len)) return NULL; From c71644d00f9fdd87e5d54fdd388ba899ae3852fa Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Sat, 16 Nov 2019 16:05:53 +0100 Subject: [PATCH 138/146] seg6: fix skb transport_header after decap_and_validate() in the receive path (more precisely in ip6_rcv_core()) the skb->transport_header is set to skb->network_header + sizeof(*hdr). As a consequence, after routing operations, destination input expects to find skb->transport_header correctly set to the next protocol (or extension header) that follows the network protocol. However, decap behaviors (DX*, DT*) remove the outer IPv6 and SRH extension and do not set again the skb->transport_header pointer correctly. For this reason, the patch sets the skb->transport_header to the skb->network_header + sizeof(hdr) in each DX* and DT* behavior. Signed-off-by: Andrea Mayer Signed-off-by: David S. Miller --- net/ipv6/seg6_local.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 5e3d7004d431..e70567446f28 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -341,6 +341,8 @@ static int input_action_end_dx6(struct sk_buff *skb, if (!ipv6_addr_any(&slwt->nh6)) nhaddr = &slwt->nh6; + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + seg6_lookup_nexthop(skb, nhaddr, 0); return dst_input(skb); @@ -370,6 +372,8 @@ static int input_action_end_dx4(struct sk_buff *skb, skb_dst_drop(skb); + skb_set_transport_header(skb, sizeof(struct iphdr)); + err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); if (err) goto drop; @@ -390,6 +394,8 @@ static int input_action_end_dt6(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + seg6_lookup_nexthop(skb, NULL, slwt->table); return dst_input(skb); From c80ed84e76886487703bf04b38ce10e92e2d6e26 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 16 Nov 2019 18:08:25 +0200 Subject: [PATCH 139/146] net: dsa: tag_8021q: Fix dsa_8021q_restore_pvid for an absent pvid This sequence of operations: ip link set dev br0 type bridge vlan_filtering 1 bridge vlan del dev swp2 vid 1 ip link set dev br0 type bridge vlan_filtering 1 ip link set dev br0 type bridge vlan_filtering 0 apparently fails with the message: [ 31.305716] sja1105 spi0.1: Reset switch and programmed static config. Reason: VLAN filtering [ 31.322161] sja1105 spi0.1: Couldn't determine PVID attributes (pvid 0) [ 31.328939] sja1105 spi0.1: Failed to setup VLAN tagging for port 1: -2 [ 31.335599] ------------[ cut here ]------------ [ 31.340215] WARNING: CPU: 1 PID: 194 at net/switchdev/switchdev.c:157 switchdev_port_attr_set_now+0x9c/0xa4 [ 31.349981] br0: Commit of attribute (id=6) failed. [ 31.354890] Modules linked in: [ 31.357942] CPU: 1 PID: 194 Comm: ip Not tainted 5.4.0-rc6-01792-gf4f632e07665-dirty #2062 [ 31.366167] Hardware name: Freescale LS1021A [ 31.370437] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 31.378153] [] (show_stack) from [] (dump_stack+0xe0/0x10c) [ 31.385437] [] (dump_stack) from [] (__warn+0xf4/0x10c) [ 31.392373] [] (__warn) from [] (warn_slowpath_fmt+0x74/0xb8) [ 31.399827] [] (warn_slowpath_fmt) from [] (switchdev_port_attr_set_now+0x9c/0xa4) [ 31.409097] [] (switchdev_port_attr_set_now) from [] (__br_vlan_filter_toggle+0x6c/0x118) [ 31.418971] [] (__br_vlan_filter_toggle) from [] (br_changelink+0xf8/0x518) [ 31.427637] [] (br_changelink) from [] (__rtnl_newlink+0x3f4/0x76c) [ 31.435613] [] (__rtnl_newlink) from [] (rtnl_newlink+0x44/0x60) [ 31.443329] [] (rtnl_newlink) from [] (rtnetlink_rcv_msg+0x2cc/0x51c) [ 31.451477] [] (rtnetlink_rcv_msg) from [] (netlink_rcv_skb+0xb8/0x110) [ 31.459796] [] (netlink_rcv_skb) from [] (netlink_unicast+0x17c/0x1f8) [ 31.468026] [] (netlink_unicast) from [] (netlink_sendmsg+0x2bc/0x3b4) [ 31.476261] [] (netlink_sendmsg) from [] (___sys_sendmsg+0x230/0x250) [ 31.484408] [] (___sys_sendmsg) from [] (__sys_sendmsg+0x50/0x8c) [ 31.492209] [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x28) [ 31.500090] Exception stack(0xedf47fa8 to 0xedf47ff0) [ 31.505122] 7fa0: 00000002 b6f2e060 00000003 beabd6a4 00000000 00000000 [ 31.513265] 7fc0: 00000002 b6f2e060 5d6e3213 00000128 00000000 00000001 00000006 000619c4 [ 31.521405] 7fe0: 00086078 beabd658 0005edbc b6e7ce68 The reason is the implementation of br_get_pvid: static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) { if (!vg) return 0; smp_rmb(); return vg->pvid; } Since VID 0 is an invalid pvid from the bridge's point of view, let's add this check in dsa_8021q_restore_pvid to avoid restoring a pvid that doesn't really exist. Fixes: 5f33183b7fdf ("net: dsa: tag_8021q: Restore bridge VLANs when enabling vlan_filtering") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_8021q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 9c1cc2482b68..9e5a883a9f0c 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -106,7 +106,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port) slave = ds->ports[port].slave; err = br_vlan_get_pvid(slave, &pvid); - if (err < 0) + if (!pvid || err < 0) /* There is no pvid on the bridge for this port, which is * perfectly valid. Nothing to restore, bye-bye! */ From 18d647ae74116bfee38953978501cea2960a0c25 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 15 Nov 2019 14:24:54 +0800 Subject: [PATCH 140/146] net: gemini: add missed free_netdev This driver forgets to free allocated netdev in remove like what is done in probe failure. Add the free to fix it. Signed-off-by: Chuhong Yuan Reviewed-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/ethernet/cortina/gemini.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index e736ce2c58ca..a8f4c69252ff 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2524,6 +2524,7 @@ static int gemini_ethernet_port_remove(struct platform_device *pdev) struct gemini_ethernet_port *port = platform_get_drvdata(pdev); gemini_port_remove(port); + free_netdev(port->netdev); return 0; } From a36e629ee77a9df78310c972655092e41ce07ea4 Mon Sep 17 00:00:00 2001 From: Dag Moxnes Date: Fri, 15 Nov 2019 09:56:01 +0100 Subject: [PATCH 141/146] rds: ib: update WR sizes when bringing up connection Currently WR sizes are updated from rds_ib_sysctl_max_send_wr and rds_ib_sysctl_max_recv_wr when a connection is shut down. As a result, a connection being down while rds_ib_sysctl_max_send_wr or rds_ib_sysctl_max_recv_wr are updated, will not update the sizes when it comes back up. Move resizing of WRs to rds_ib_setup_qp so that connections will be setup with the most current WR sizes. Signed-off-by: Dag Moxnes Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_cm.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 233f1368162b..18c6fac6ead9 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -450,6 +450,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct ib_qp_init_attr attr; struct ib_cq_init_attr cq_attr = {}; struct rds_ib_device *rds_ibdev; + unsigned long max_wrs; int ret, fr_queue_space; /* @@ -469,10 +470,15 @@ static int rds_ib_setup_qp(struct rds_connection *conn) /* add the conn now so that connection establishment has the dev */ rds_ib_add_conn(rds_ibdev, conn); - if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) - rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); - if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) - rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); + max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ? + rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr; + if (ic->i_send_ring.w_nr != max_wrs) + rds_ib_ring_resize(&ic->i_send_ring, max_wrs); + + max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ? + rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr; + if (ic->i_recv_ring.w_nr != max_wrs) + rds_ib_ring_resize(&ic->i_recv_ring, max_wrs); /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; @@ -1099,8 +1105,9 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ic->i_flowctl = 0; atomic_set(&ic->i_credits, 0); - rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); - rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); + /* Re-init rings, but retain sizes. */ + rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr); + rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr); if (ic->i_ibinc) { rds_inc_put(&ic->i_ibinc->ii_inc); @@ -1147,8 +1154,8 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) * rds_ib_conn_shutdown() waits for these to be emptied so they * must be initialized before it can be called. */ - rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); - rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); + rds_ib_ring_init(&ic->i_send_ring, 0); + rds_ib_ring_init(&ic->i_recv_ring, 0); ic->conn = conn; conn->c_transport_data = ic; From 8204df72bea1a7d83d0777add6da98a41dfbdc34 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 15 Nov 2019 12:39:30 +0100 Subject: [PATCH 142/146] net/smc: fix fastopen for non-blocking connect() FASTOPEN does not work with SMC-sockets. Since SMC allows fallback to TCP native during connection start, the FASTOPEN setsockopts trigger this fallback, if the SMC-socket is still in state SMC_INIT. But if a FASTOPEN setsockopt is called after a non-blocking connect(), this is broken, and fallback does not make sense. This change complements commit cd2063604ea6 ("net/smc: avoid fallback in case of non-blocking connect") and fixes the syzbot reported problem "WARNING in smc_unhash_sk". Reported-by: syzbot+8488cc4cf1c9e09b8b86@syzkaller.appspotmail.com Fixes: e1bbdd570474 ("net/smc: reduce sock_put() for fallback sockets") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8edf1619f0e4..737b49909a7a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1732,7 +1732,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_KEY: case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ - if (sk->sk_state == SMC_INIT) { + if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { From b696083d5e9bd65a964211bb121513298efec187 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Fri, 15 Nov 2019 11:52:32 +0000 Subject: [PATCH 143/146] net: hns3: cleanup of stray struct hns3_link_mode_mapping This patch cleans-up the stray left over code. It has no functionality impact. Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 680c3508876d..52c9d204fe3d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -70,11 +70,6 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 -struct hns3_link_mode_mapping { - u32 hns3_link_mode; - u32 ethtool_link_mode; -}; - static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en) { struct hnae3_handle *h = hns3_get_handle(ndev); From 7901cd97963d6cbde88fa25a4a446db3554c16c6 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 15 Nov 2019 18:29:52 +0100 Subject: [PATCH 144/146] ipmr: Fix skb headroom in ipmr_get_route(). In route.c, inet_rtm_getroute_build_skb() creates an skb with no headroom. This skb is then used by inet_rtm_getroute() which may pass it to rt_fill_info() and, from there, to ipmr_get_route(). The later might try to reuse this skb by cloning it and prepending an IPv4 header. But since the original skb has no headroom, skb_push() triggers skb_under_panic(): skbuff: skb_under_panic: text:00000000ca46ad8a len:80 put:20 head:00000000cd28494e data:000000009366fd6b tail:0x3c end:0xec0 dev:veth0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:108! invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 6 PID: 587 Comm: ip Not tainted 5.4.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014 RIP: 0010:skb_panic+0xbf/0xd0 Code: 41 a2 ff 8b 4b 70 4c 8b 4d d0 48 c7 c7 20 76 f5 8b 44 8b 45 bc 48 8b 55 c0 48 8b 75 c8 41 54 41 57 41 56 41 55 e8 75 dc 7a ff <0f> 0b 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 RSP: 0018:ffff888059ddf0b0 EFLAGS: 00010286 RAX: 0000000000000086 RBX: ffff888060a315c0 RCX: ffffffff8abe4822 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88806c9a79cc RBP: ffff888059ddf118 R08: ffffed100d9361b1 R09: ffffed100d9361b0 R10: ffff88805c68aee3 R11: ffffed100d9361b1 R12: ffff88805d218000 R13: ffff88805c689fec R14: 000000000000003c R15: 0000000000000ec0 FS: 00007f6af184b700(0000) GS:ffff88806c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffc8204a000 CR3: 0000000057b40006 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_push+0x7e/0x80 ipmr_get_route+0x459/0x6fa rt_fill_info+0x692/0x9f0 inet_rtm_getroute+0xd26/0xf20 rtnetlink_rcv_msg+0x45d/0x630 netlink_rcv_skb+0x1a5/0x220 rtnetlink_rcv+0x15/0x20 netlink_unicast+0x305/0x3a0 netlink_sendmsg+0x575/0x730 sock_sendmsg+0xb5/0xc0 ___sys_sendmsg+0x497/0x4f0 __sys_sendmsg+0xcb/0x150 __x64_sys_sendmsg+0x48/0x50 do_syscall_64+0xd2/0xac0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Actually the original skb used to have enough headroom, but the reserve_skb() call was lost with the introduction of inet_rtm_getroute_build_skb() by commit 404eb77ea766 ("ipv4: support sport, dport and ip_proto in RTM_GETROUTE"). We could reserve some headroom again in inet_rtm_getroute_build_skb(), but this function shouldn't be responsible for handling the special case of ipmr_get_route(). Let's handle that directly in ipmr_get_route() by calling skb_realloc_headroom() instead of skb_clone(). Fixes: 404eb77ea766 ("ipv4: support sport, dport and ip_proto in RTM_GETROUTE") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 716d5472c022..58007439cffd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2289,7 +2289,8 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, rcu_read_unlock(); return -ENODEV; } - skb2 = skb_clone(skb, GFP_ATOMIC); + + skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); if (!skb2) { read_unlock(&mrt_lock); rcu_read_unlock(); From 08e97aec700aeff54c4847f170e566cbd7e14e81 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 17 Nov 2019 08:48:17 +0800 Subject: [PATCH 145/146] Revert "hwrng: core - Freeze khwrng thread during suspend" This reverts commit 03a3bb7ae631 ("hwrng: core - Freeze khwrng thread during suspend"), ff296293b353 ("random: Support freezable kthreads in add_hwgenerator_randomness()") and 59b569480dc8 ("random: Use wait_event_freezable() in add_hwgenerator_randomness()"). These patches introduced regressions and we need more time to get them ready for mainline. Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 5 +---- drivers/char/random.c | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 80b850ef1bf6..8d53b8ef545c 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -422,9 +421,7 @@ static int hwrng_fillfn(void *unused) { long rc; - set_freezable(); - - while (!kthread_freezable_should_stop(NULL)) { + while (!kthread_should_stop()) { struct hwrng *rng; rng = get_current_rng(); diff --git a/drivers/char/random.c b/drivers/char/random.c index 5b799aa973a3..5d5ea4ce1442 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -327,7 +327,6 @@ #include #include #include -#include #include #include #include @@ -2440,8 +2439,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, * We'll be woken up again once below random_write_wakeup_thresh, * or when the calling thread is about to terminate. */ - wait_event_freezable(random_write_wait, - kthread_should_stop() || + wait_event_interruptible(random_write_wait, kthread_should_stop() || ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); mix_pool_bytes(poolp, buffer, count); credit_entropy_bits(poolp, entropy); From af42d3466bdc8f39806b26f593604fdc54140bcb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Nov 2019 14:47:30 -0800 Subject: [PATCH 146/146] Linux 5.4-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 42bfda209cb8..9cd289196267 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Kleptomaniac Octopus # *DOCUMENTATION*