0847230e9b
Changes in 5.10.173
HID: asus: Remove check for same LED brightness on set
HID: asus: use spinlock to protect concurrent accesses
HID: asus: use spinlock to safely schedule workers
powerpc/mm: Rearrange if-else block to avoid clang warning
ARM: OMAP2+: Fix memory leak in realtime_counter_init()
arm64: dts: qcom: qcs404: use symbol names for PCIe resets
ARM: zynq: Fix refcount leak in zynq_early_slcr_init
arm64: dts: mediatek: mt8183: Fix systimer 13 MHz clock description
arm64: dts: qcom: sdm845-db845c: fix audio codec interrupt pin name
arm64: dts: qcom: sc7180: correct SPMI bus address cells
arm64: dts: meson-gx: Fix Ethernet MAC address unit name
arm64: dts: meson-g12a: Fix internal Ethernet PHY unit name
arm64: dts: meson-gx: Fix the SCPI DVFS node name and unit address
arm64: dts: qcom: ipq8074: correct USB3 QMP PHY-s clock output names
arm64: dts: qcom: Fix IPQ8074 PCIe PHY nodes
arm64: dts: qcom: ipq8074: fix PCIe PHY serdes size
arm64: dts: qcom: ipq8074: fix Gen3 PCIe QMP PHY
arm64: dts: qcom: ipq8074: correct Gen2 PCIe ranges
arm64: dts: qcom: ipq8074: fix Gen3 PCIe node
arm64: dts: qcom: ipq8074: correct PCIe QMP PHY output clock names
arm64: dts: meson: remove CPU opps below 1GHz for G12A boards
ARM: OMAP1: call platform_device_put() in error case in omap1_dm_timer_init()
ARM: s3c: fix s3c64xx_set_timer_source prototype
arm64: dts: ti: k3-j7200: Fix wakeup pinmux range
ARM: dts: exynos: correct wr-active property in Exynos3250 Rinato
ARM: imx: Call ida_simple_remove() for ida_simple_get
arm64: dts: amlogic: meson-gx: fix SCPI clock dvfs node name
arm64: dts: amlogic: meson-axg: fix SCPI clock dvfs node name
arm64: dts: amlogic: meson-gx: add missing SCPI sensors compatible
arm64: dts: amlogic: meson-gxl-s905d-sml5442tw: drop invalid clock-names property
arm64: dts: amlogic: meson-gx: add missing unit address to rng node name
arm64: dts: amlogic: meson-gxl: add missing unit address to eth-phy-mux node name
arm64: dts: amlogic: meson-gx-libretech-pc: fix update button name
arm64: dts: amlogic: meson-gxl-s905d-phicomm-n1: fix led node name
arm64: dts: amlogic: meson-gxbb-kii-pro: fix led node name
arm64: dts: renesas: beacon-renesom: Fix gpio expander reference
ARM: dts: sun8i: nanopi-duo2: Fix regulator GPIO reference
ARM: dts: imx7s: correct iomuxc gpr mux controller cells
arm64: dts: mediatek: mt7622: Add missing pwm-cells to pwm node
blk-mq: avoid sleep in blk_mq_alloc_request_hctx
blk-mq: remove stale comment for blk_mq_sched_mark_restart_hctx
blk-mq: correct stale comment of .get_budget
s390/dasd: Prepare for additional path event handling
s390/dasd: Fix potential memleak in dasd_eckd_init()
sched/deadline,rt: Remove unused parameter from pick_next_[rt|dl]_entity()
sched/rt: pick_next_rt_entity(): check list_entry
x86/perf/zhaoxin: Add stepping check for ZXC
block: bio-integrity: Copy flags when bio_integrity_payload is cloned
wifi: rsi: Fix memory leak in rsi_coex_attach()
wifi: rtlwifi: rtl8821ae: don't call kfree_skb() under spin_lock_irqsave()
wifi: rtlwifi: rtl8188ee: don't call kfree_skb() under spin_lock_irqsave()
wifi: rtlwifi: rtl8723be: don't call kfree_skb() under spin_lock_irqsave()
wifi: iwlegacy: common: don't call dev_kfree_skb() under spin_lock_irqsave()
wifi: libertas: fix memory leak in lbs_init_adapter()
wifi: rtl8xxxu: don't call dev_kfree_skb() under spin_lock_irqsave()
rtlwifi: fix -Wpointer-sign warning
wifi: rtlwifi: Fix global-out-of-bounds bug in _rtl8812ae_phy_set_txpower_limit()
libbpf: Fix btf__align_of() by taking into account field offsets
wifi: ipw2x00: don't call dev_kfree_skb() under spin_lock_irqsave()
wifi: ipw2200: fix memory leak in ipw_wdev_init()
wifi: wilc1000: fix potential memory leak in wilc_mac_xmit()
wifi: brcmfmac: fix potential memory leak in brcmf_netdev_start_xmit()
wifi: brcmfmac: unmap dma buffer in brcmf_msgbuf_alloc_pktid()
wifi: libertas_tf: don't call kfree_skb() under spin_lock_irqsave()
wifi: libertas: if_usb: don't call kfree_skb() under spin_lock_irqsave()
wifi: libertas: main: don't call kfree_skb() under spin_lock_irqsave()
wifi: libertas: cmdresp: don't call kfree_skb() under spin_lock_irqsave()
wifi: wl3501_cs: don't call kfree_skb() under spin_lock_irqsave()
crypto: x86/ghash - fix unaligned access in ghash_setkey()
ACPICA: Drop port I/O validation for some regions
genirq: Fix the return type of kstat_cpu_irqs_sum()
rcu-tasks: Improve comments explaining tasks_rcu_exit_srcu purpose
rcu-tasks: Remove preemption disablement around srcu_read_[un]lock() calls
rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes()
lib/mpi: Fix buffer overrun when SG is too long
crypto: ccp: Use the stack for small SEV command buffers
crypto: ccp: Use the stack and common buffer for status commands
crypto: ccp - Use kzalloc for sev ioctl interfaces to prevent kernel memory leak
crypto: ccp - Avoid page allocation failure warning for SEV_GET_ID2
ACPICA: nsrepair: handle cases without a return value correctly
thermal/drivers/tsens: Drop msm8976-specific defines
thermal/drivers/qcom/tsens_v1: Enable sensor 3 on MSM8976
thermal/drivers/tsens: Add compat string for the qcom,msm8960
thermal/drivers/tsens: Sort out msm8976 vs msm8956 data
wifi: rtl8xxxu: Fix memory leaks with RTL8723BU, RTL8192EU
wifi: orinoco: check return value of hermes_write_wordrec()
wifi: ath9k: htc_hst: free skb in ath9k_htc_rx_msg() if there is no callback function
ath9k: hif_usb: simplify if-if to if-else
ath9k: htc: clean up statistics macros
wifi: ath9k: hif_usb: clean up skbs if ath9k_hif_usb_rx_stream() fails
wifi: ath9k: Fix potential stack-out-of-bounds write in ath9k_wmi_rsp_callback()
wifi: ath11k: Fix memory leak in ath11k_peer_rx_frag_setup
wifi: cfg80211: Fix extended KCK key length check in nl80211_set_rekey_data()
ACPI: battery: Fix missing NUL-termination with large strings
crypto: ccp - Failure on re-initialization due to duplicate sysfs filename
crypto: essiv - Handle EBUSY correctly
crypto: seqiv - Handle EBUSY correctly
powercap: fix possible name leak in powercap_register_zone()
x86/cpu: Init AP exception handling from cpu_init_secondary()
x86/microcode: Replace deprecated CPU-hotplug functions.
x86: Mark stop_this_cpu() __noreturn
x86/microcode: Rip out the OLD_INTERFACE
x86/microcode: Default-disable late loading
x86/microcode: Print previous version of microcode after reload
x86/microcode: Add a parameter to microcode_check() to store CPU capabilities
x86/microcode: Check CPU capabilities after late microcode update correctly
x86/microcode: Adjust late loading result reporting message
net: ethernet: ti: am65-cpsw: fix tx csum offload for multi mac mode
net: ethernet: ti: am65-cpsw: handle deferred probe with dev_err_probe()
net: ethernet: ti: add missing of_node_put before return
crypto: xts - Handle EBUSY correctly
leds: led-class: Add missing put_device() to led_put()
crypto: ccp - Refactor out sev_fw_alloc()
crypto: ccp - Flush the SEV-ES TMR memory before giving it to firmware
bpftool: profile online CPUs instead of possible
net/mlx5: Enhance debug print in page allocation failure
irqchip: Fix refcount leak in platform_irqchip_probe
irqchip/alpine-msi: Fix refcount leak in alpine_msix_init_domains
irqchip/irq-mvebu-gicp: Fix refcount leak in mvebu_gicp_probe
irqchip/ti-sci: Fix refcount leak in ti_sci_intr_irq_domain_probe
s390/vmem: fix empty page tables cleanup under KASAN
net: add sock_init_data_uid()
tun: tun_chr_open(): correctly initialize socket uid
tap: tap_open(): correctly initialize socket uid
OPP: fix error checking in opp_migrate_dentry()
Bluetooth: L2CAP: Fix potential user-after-free
libbpf: Fix alen calculation in libbpf_nla_dump_errormsg()
rds: rds_rm_zerocopy_callback() correct order for list_add_tail()
crypto: rsa-pkcs1pad - Use akcipher_request_complete
m68k: /proc/hardware should depend on PROC_FS
RISC-V: time: initialize hrtimer based broadcast clock event device
wifi: iwl3945: Add missing check for create_singlethread_workqueue
wifi: iwl4965: Add missing check for create_singlethread_workqueue()
wifi: mwifiex: fix loop iterator in mwifiex_update_ampdu_txwinsize()
selftests/bpf: Fix out-of-srctree build
crypto: crypto4xx - Call dma_unmap_page when done
wifi: mac80211: make rate u32 in sta_set_rate_info_rx()
thermal/drivers/hisi: Drop second sensor hi3660
can: esd_usb: Move mislocated storage of SJA1000_ECC_SEG bits in case of a bus error
bpf: Fix global subprog context argument resolution logic
irqchip/irq-brcmstb-l2: Set IRQ_LEVEL for level triggered interrupts
irqchip/irq-bcm7120-l2: Set IRQ_LEVEL for level triggered interrupts
selftests/net: Interpret UDP_GRO cmsg data as an int value
l2tp: Avoid possible recursive deadlock in l2tp_tunnel_register()
net: bcmgenet: fix MoCA LED control
selftest: fib_tests: Always cleanup before exit
sefltests: netdevsim: wait for devlink instance after netns removal
drm: Fix potential null-ptr-deref due to drmm_mode_config_init()
drm/fourcc: Add missing big-endian XRGB1555 and RGB565 formats
drm: mxsfb: DRM_MXSFB should depend on ARCH_MXS || ARCH_MXC
drm/bridge: megachips: Fix error handling in i2c_register_driver()
drm/vkms: Fix null-ptr-deref in vkms_release()
drm/vc4: dpi: Add option for inverting pixel clock and output enable
drm/vc4: dpi: Fix format mapping for RGB565
drm: tidss: Fix pixel format definition
gpu: ipu-v3: common: Add of_node_put() for reference returned by of_graph_get_port_by_id()
drm/msm/hdmi: Add missing check for alloc_ordered_workqueue
pinctrl: qcom: pinctrl-msm8976: Correct function names for wcss pins
pinctrl: stm32: Fix refcount leak in stm32_pctrl_get_irq_domain
pinctrl: rockchip: add support for rk3568
pinctrl: rockchip: do coding style for mux route struct
pinctrl: rockchip: Fix refcount leak in rockchip_pinctrl_parse_groups
drm/vc4: hvs: Set AXI panic modes
drm/vc4: hvs: Fix colour order for xRGB1555 on HVS5
drm/vc4: hdmi: Correct interlaced timings again
ASoC: fsl_sai: initialize is_dsp_mode flag
drm/msm/adreno: Fix null ptr access in adreno_gpu_cleanup()
ALSA: hda/ca0132: minor fix for allocation size
drm/msm/dpu: Disallow unallocated resources to be returned
drm/bridge: lt9611: fix sleep mode setup
drm/bridge: lt9611: fix HPD reenablement
drm/bridge: lt9611: fix polarity programming
drm/bridge: lt9611: fix programming of video modes
drm/bridge: lt9611: fix clock calculation
drm/bridge: lt9611: pass a pointer to the of node
drm/mipi-dsi: Fix byte order of 16-bit DCS set/get brightness
drm/msm: use strscpy instead of strncpy
drm/msm/dpu: Add check for cstate
drm/msm/dpu: Add check for pstates
drm/msm/mdp5: Add check for kzalloc
pinctrl: bcm2835: Remove of_node_put() in bcm2835_of_gpio_ranges_fallback()
pinctrl: mediatek: Initialize variable pullen and pullup to zero
pinctrl: mediatek: Initialize variable *buf to zero
gpu: host1x: Don't skip assigning syncpoints to channels
drm/mediatek: dsi: Reduce the time of dsi from LP11 to sending cmd
drm/mediatek: Use NULL instead of 0 for NULL pointer
drm/mediatek: Drop unbalanced obj unref
drm/mediatek: mtk_drm_crtc: Add checks for devm_kcalloc
drm/mediatek: Clean dangling pointer on bind error path
ASoC: soc-compress.c: fixup private_data on snd_soc_new_compress()
gpio: vf610: connect GPIO label to dev name
spi: dw_bt1: fix MUX_MMIO dependencies
ASoC: mchp-spdifrx: fix controls which rely on rsr register
ASoC: atmel: fix spelling mistakes
ASoC: mchp-spdifrx: fix return value in case completion times out
ASoC: mchp-spdifrx: fix controls that works with completion mechanism
ASoC: mchp-spdifrx: disable all interrupts in mchp_spdifrx_dai_remove()
ASoC: mchp-spdifrx: Fix uninitialized use of mr in mchp_spdifrx_hw_params()
ASoC: dt-bindings: meson: fix gx-card codec node regex
hwmon: (ltc2945) Handle error case in ltc2945_value_store
drm/amdgpu: fix enum odm_combine_mode mismatch
scsi: mpt3sas: Fix a memory leak
scsi: aic94xx: Add missing check for dma_map_single()
spi: bcm63xx-hsspi: fix pm_runtime
spi: bcm63xx-hsspi: Fix multi-bit mode setting
hwmon: (mlxreg-fan) Return zero speed for broken fan
ASoC: tlv320adcx140: fix 'ti,gpio-config' DT property init
dm: remove flush_scheduled_work() during local_exit()
NFS: Fix up handling of outstanding layoutcommit in nfs_update_inode()
NFSv4: keep state manager thread active if swap is enabled
nfs4trace: fix state manager flag printing
NFS: fix disabling of swap
spi: synquacer: Fix timeout handling in synquacer_spi_transfer_one()
ASoC: soc-dapm.h: fixup warning struct snd_pcm_substream not declared
HID: bigben: use spinlock to protect concurrent accesses
HID: bigben_worker() remove unneeded check on report_field
HID: bigben: use spinlock to safely schedule workers
hid: bigben_probe(): validate report count
nfsd: fix race to check ls_layouts
cifs: Fix lost destroy smbd connection when MR allocate failed
cifs: Fix warning and UAF when destroy the MR list
gfs2: jdata writepage fix
perf llvm: Fix inadvertent file creation
leds: led-core: Fix refcount leak in of_led_get()
perf tools: Fix auto-complete on aarch64
sparc: allow PM configs for sparc32 COMPILE_TEST
selftests/ftrace: Fix bash specific "==" operator
printf: fix errname.c list
objtool: add UACCESS exceptions for __tsan_volatile_read/write
mfd: pcf50633-adc: Fix potential memleak in pcf50633_adc_async_read()
clk: qcom: gcc-qcs404: disable gpll[04]_out_aux parents
clk: qcom: gcc-qcs404: fix names of the DSI clocks used as parents
RISC-V: fix funct4 definition for c.jalr in parse_asm.h
mtd: rawnand: sunxi: Fix the size of the last OOB region
Input: iqs269a - drop unused device node references
Input: iqs269a - increase interrupt handler return delay
Input: iqs269a - configure device with a single block write
linux/kconfig.h: replace IF_ENABLED() with PTR_IF() in <linux/kernel.h>
clk: renesas: cpg-mssr: Fix use after free if cpg_mssr_common_init() failed
clk: renesas: cpg-mssr: Remove superfluous check in resume code
clk: imx: avoid memory leak
Input: ads7846 - don't report pressure for ads7845
Input: ads7846 - convert to full duplex
Input: ads7846 - convert to one message
Input: ads7846 - always set last command to PWRDOWN
Input: ads7846 - don't check penirq immediately for 7845
clk: qcom: gpucc-sc7180: fix clk_dis_wait being programmed for CX GDSC
clk: qcom: gpucc-sdm845: fix clk_dis_wait being programmed for CX GDSC
powerpc/powernv/ioda: Skip unallocated resources when mapping to PE
clk: Honor CLK_OPS_PARENT_ENABLE in clk_core_is_enabled()
powerpc/perf/hv-24x7: add missing RTAS retry status handling
powerpc/pseries/lpar: add missing RTAS retry status handling
powerpc/pseries/lparcfg: add missing RTAS retry status handling
powerpc/rtas: make all exports GPL
powerpc/rtas: ensure 4KB alignment for rtas_data_buf
powerpc/eeh: Small refactor of eeh_handle_normal_event()
powerpc/eeh: Set channel state after notifying the drivers
MIPS: SMP-CPS: fix build error when HOTPLUG_CPU not set
MIPS: vpe-mt: drop physical_memsize
vdpa/mlx5: Don't clear mr struct on destroy MR
alpha/boot/tools/objstrip: fix the check for ELF header
Input: iqs269a - do not poll during suspend or resume
Input: iqs269a - do not poll during ATI
remoteproc: qcom_q6v5_mss: Use a carveout to authenticate modem headers
media: ti: cal: fix possible memory leak in cal_ctx_create()
media: platform: ti: Add missing check for devm_regulator_get
powerpc: Remove linker flag from KBUILD_AFLAGS
builddeb: clean generated package content
media: max9286: Fix memleak in max9286_v4l2_register()
media: ov2740: Fix memleak in ov2740_init_controls()
media: ov5675: Fix memleak in ov5675_init_controls()
media: i2c: ov772x: Fix memleak in ov772x_probe()
media: i2c: imx219: remove redundant writes
media: i2c: imx219: Split common registers from mode tables
media: i2c: imx219: Fix binning for RAW8 capture
media: rc: Fix use-after-free bugs caused by ene_tx_irqsim()
media: i2c: ov7670: 0 instead of -EINVAL was returned
media: usb: siano: Fix use after free bugs caused by do_submit_urb
media: saa7134: Use video_unregister_device for radio_dev
rpmsg: glink: Avoid infinite loop on intent for missing channel
udf: Define EFSCORRUPTED error code
ARM: dts: exynos: Use Exynos5420 compatible for the MIPI video phy
blk-iocost: fix divide by 0 error in calc_lcoefs()
wifi: ath9k: Fix use-after-free in ath9k_hif_usb_disconnect()
wifi: brcmfmac: Fix potential stack-out-of-bounds in brcmf_c_preinit_dcmds()
rcu: Make RCU_LOCKDEP_WARN() avoid early lockdep checks
rcu: Suppress smp_processor_id() complaint in synchronize_rcu_expedited_wait()
rcu-tasks: Make rude RCU-Tasks work well with CPU hotplug
wifi: ath11k: debugfs: fix to work with multiple PCI devices
thermal: intel: Fix unsigned comparison with less than zero
timers: Prevent union confusion from unexpected restart_syscall()
x86/bugs: Reset speculation control settings on init
wifi: brcmfmac: ensure CLM version is null-terminated to prevent stack-out-of-bounds
wifi: mt7601u: fix an integer underflow
inet: fix fast path in __inet_hash_connect()
ice: add missing checks for PF vsi type
ACPI: Don't build ACPICA with '-Os'
clocksource: Suspend the watchdog temporarily when high read latency detected
crypto: hisilicon: Wipe entire pool on error
net: bcmgenet: Add a check for oversized packets
m68k: Check syscall_trace_enter() return code
wifi: mt76: dma: free rx_head in mt76_dma_rx_cleanup
ACPI: video: Fix Lenovo Ideapad Z570 DMI match
net/mlx5: fw_tracer: Fix debug print
coda: Avoid partial allocation of sig_inputArgs
uaccess: Add minimum bounds check on kernel buffer size
PM: EM: fix memory leak with using debugfs_lookup()
Bluetooth: btusb: Add VID:PID 13d3:3529 for Realtek RTL8821CE
drm/amd/display: Fix potential null-deref in dm_resume
drm/omap: dsi: Fix excessive stack usage
HID: Add Mapping for System Microphone Mute
drm/tiny: ili9486: Do not assume 8-bit only SPI controllers
drm/radeon: free iio for atombios when driver shutdown
drm: amd: display: Fix memory leakage
drm/msm/dsi: Add missing check for alloc_ordered_workqueue
docs/scripts/gdb: add necessary make scripts_gdb step
ASoC: kirkwood: Iterate over array indexes instead of using pointer math
regulator: max77802: Bounds check regulator id against opmode
regulator: s5m8767: Bounds check id indexing into arrays
gfs2: Improve gfs2_make_fs_rw error handling
hwmon: (coretemp) Simplify platform device handling
pinctrl: at91: use devm_kasprintf() to avoid potential leaks
HID: logitech-hidpp: Don't restart communication if not necessary
drm: panel-orientation-quirks: Add quirk for Lenovo IdeaPad Duet 3 10IGL5
dm thin: add cond_resched() to various workqueue loops
dm cache: add cond_resched() to various workqueue loops
nfsd: zero out pointers after putting nfsd_files on COPY setup error
wifi: rtl8xxxu: fixing transmisison failure for rtl8192eu
firmware: coreboot: framebuffer: Ignore reserved pixel color bits
rtc: pm8xxx: fix set-alarm race
ipmi_ssif: Rename idle state and check
s390/extmem: return correct segment type in __segment_load()
s390: discard .interp section
s390/kprobes: fix irq mask clobbering on kprobe reenter from post_handler
s390/kprobes: fix current_kprobe never cleared after kprobes reenter
cifs: Fix uninitialized memory read in smb3_qfs_tcon()
ARM: dts: exynos: correct HDMI phy compatible in Exynos4
hfs: fix missing hfs_bnode_get() in __hfs_bnode_create
fs: hfsplus: fix UAF issue in hfsplus_put_super
exfat: fix reporting fs error when reading dir beyond EOF
exfat: fix unexpected EOF while reading dir
exfat: redefine DIR_DELETED as the bad cluster number
exfat: fix inode->i_blocks for non-512 byte sector size device
f2fs: fix information leak in f2fs_move_inline_dirents()
f2fs: fix cgroup writeback accounting with fs-layer encryption
ocfs2: fix defrag path triggering jbd2 ASSERT
ocfs2: fix non-auto defrag path not working issue
udf: Truncate added extents on failed expansion
udf: Do not bother merging very long extents
udf: Do not update file length for failed writes to inline files
udf: Preserve link count of system files
udf: Detect system inodes linked into directory hierarchy
udf: Fix file corruption when appending just after end of preallocated extent
KVM: Destroy target device if coalesced MMIO unregistration fails
KVM: x86: Inject #GP if WRMSR sets reserved bits in APIC Self-IPI
KVM: s390: disable migration mode when dirty tracking is disabled
x86/virt: Force GIF=1 prior to disabling SVM (for reboot flows)
x86/crash: Disable virt in core NMI crash handler to avoid double shootdown
x86/reboot: Disable virtualization in an emergency if SVM is supported
x86/reboot: Disable SVM, not just VMX, when stopping CPUs
x86/kprobes: Fix __recover_optprobed_insn check optimizing logic
x86/kprobes: Fix arch_check_optimized_kprobe check within optimized_kprobe range
x86/microcode/amd: Remove load_microcode_amd()'s bsp parameter
x86/microcode/AMD: Add a @cpu parameter to the reloading functions
x86/microcode/AMD: Fix mixed steppings support
x86/speculation: Allow enabling STIBP with legacy IBRS
Documentation/hw-vuln: Document the interaction between IBRS and STIBP
brd: return 0/-error from brd_insert_page()
ima: Align ima_file_mmap() parameters with mmap_file LSM hook
irqdomain: Fix association race
irqdomain: Fix disassociation race
irqdomain: Drop bogus fwspec-mapping error handling
io_uring: handle TIF_NOTIFY_RESUME when checking for task_work
io_uring: mark task TASK_RUNNING before handling resume/task work
io_uring: add a conditional reschedule to the IOPOLL cancelation loop
io_uring/rsrc: disallow multi-source reg buffers
io_uring: remove MSG_NOSIGNAL from recvmsg
io_uring/poll: allow some retries for poll triggering spuriously
ALSA: ice1712: Do not left ice->gpio_mutex locked in aureon_add_controls()
ALSA: hda/realtek: Add quirk for HP EliteDesk 800 G6 Tower PC
jbd2: fix data missing when reusing bh which is ready to be checkpointed
ext4: optimize ea_inode block expansion
ext4: refuse to create ea block when umounted
mtd: spi-nor: Fix shift-out-of-bounds in spi_nor_set_erase_type
dm: add cond_resched() to dm_wq_work()
wifi: rtl8xxxu: Use a longer retry limit of 48
wifi: cfg80211: Fix use after free for wext
thermal: intel: powerclamp: Fix cur_state for multi package system
dm flakey: fix logic when corrupting a bio
dm flakey: don't corrupt the zero page
ARM: dts: exynos: correct TMU phandle in Exynos4210
ARM: dts: exynos: correct TMU phandle in Exynos4
ARM: dts: exynos: correct TMU phandle in Odroid XU3 family
ARM: dts: exynos: correct TMU phandle in Exynos5250
ARM: dts: exynos: correct TMU phandle in Odroid XU
ARM: dts: exynos: correct TMU phandle in Odroid HC1
rbd: avoid use-after-free in do_rbd_add() when rbd_dev_create() fails
alpha: fix FEN fault handling
dax/kmem: Fix leak of memory-hotplug resources
mips: fix syscall_get_nr
media: ipu3-cio2: Fix PM runtime usage_count in driver unbind
remoteproc/mtk_scp: Move clk ops outside send_lock
docs: gdbmacros: print newest record
mm: memcontrol: deprecate charge moving
mm/thp: check and bail out if page in deferred queue already
ktest.pl: Give back console on Ctrt^C on monitor
ktest.pl: Fix missing "end_monitor" when machine check fails
ktest.pl: Add RUN_TIMEOUT option with default unlimited
ring-buffer: Handle race between rb_move_tail and rb_check_pages
scsi: qla2xxx: Fix link failure in NPIV environment
scsi: qla2xxx: Fix DMA-API call trace on NVMe LS requests
scsi: qla2xxx: Fix erroneous link down
scsi: ses: Don't attach if enclosure has no components
scsi: ses: Fix slab-out-of-bounds in ses_enclosure_data_process()
scsi: ses: Fix possible addl_desc_ptr out-of-bounds accesses
scsi: ses: Fix possible desc_ptr out-of-bounds accesses
scsi: ses: Fix slab-out-of-bounds in ses_intf_remove()
riscv: jump_label: Fixup unaligned arch_static_branch function
PCI/PM: Observe reset delay irrespective of bridge_d3
PCI: hotplug: Allow marking devices as disconnected during bind/unbind
PCI: Avoid FLR for AMD FCH AHCI adapters
vfio/type1: prevent underflow of locked_vm via exec()
drm/i915/quirks: Add inverted backlight quirk for HP 14-r206nv
drm/radeon: Fix eDP for single-display iMac11,2
drm/edid: fix AVI infoframe aspect ratio handling
arm64: dts: qcom: ipq8074: fix Gen2 PCIe QMP PHY
wifi: ath9k: use proper statements in conditionals
pinctrl: rockchip: fix mux route data for rk3568
pinctrl: rockchip: fix reading pull type on rk3568
kbuild: Port silent mode detection to future gnu make.
net/sched: Retire tcindex classifier
fs/jfs: fix shift exponent db_agl2size negative
objtool: Fix memory leak in create_static_call_sections()
pwm: sifive: Reduce time the controller lock is held
pwm: sifive: Always let the first pwm_apply_state succeed
pwm: stm32-lp: fix the check on arr and cmp registers update
f2fs: use memcpy_{to,from}_page() where possible
fs: f2fs: initialize fsdata in pagecache_write()
um: vector: Fix memory leak in vector_config
ubi: ensure that VID header offset + VID header size <= alloc, size
ubifs: Fix build errors as symbol undefined
ubifs: Rectify space budget for ubifs_symlink() if symlink is encrypted
ubifs: Rectify space budget for ubifs_xrename()
ubifs: Fix wrong dirty space budget for dirty inode
ubifs: do_rename: Fix wrong space budget when target inode's nlink > 1
ubifs: Reserve one leb for each journal head while doing budget
ubi: Fix use-after-free when volume resizing failed
ubi: Fix unreferenced object reported by kmemleak in ubi_resize_volume()
ubifs: Fix memory leak in alloc_wbufs()
ubi: Fix possible null-ptr-deref in ubi_free_volume()
ubifs: Re-statistic cleaned znode count if commit failed
ubifs: dirty_cow_znode: Fix memleak in error handling path
ubifs: ubifs_writepage: Mark page dirty after writing inode failed
ubi: fastmap: Fix missed fm_anchor PEB in wear-leveling after disabling fastmap
ubi: Fix UAF wear-leveling entry in eraseblk_count_seq_show()
ubi: ubi_wl_put_peb: Fix infinite loop when wear-leveling work failed
x86: um: vdso: Add '%rcx' and '%r11' to the syscall clobber list
watchdog: at91sam9_wdt: use devm_request_irq to avoid missing free_irq() in error path
watchdog: Fix kmemleak in watchdog_cdev_register
watchdog: pcwd_usb: Fix attempting to access uninitialized memory
netfilter: ctnetlink: fix possible refcount leak in ctnetlink_create_conntrack()
netfilter: ebtables: fix table blob use-after-free
ipv6: Add lwtunnel encap size of all siblings in nexthop calculation
sctp: add a refcnt in sctp_stream_priorities to avoid a nested loop
net: fix __dev_kfree_skb_any() vs drop monitor
9p/xen: fix version parsing
9p/xen: fix connection sequence
9p/rdma: unmap receive dma buffer in rdma_request()/post_recv()
net/mlx5: Geneve, Fix handling of Geneve object id as error code
nfc: fix memory leak of se_io context in nfc_genl_se_io
net/sched: act_sample: fix action bind logic
ARM: dts: spear320-hmi: correct STMPE GPIO compatible
tcp: tcp_check_req() can be called from process context
vc_screen: modify vcs_size() handling in vcs_read()
rtc: sun6i: Always export the internal oscillator
scsi: ipr: Work around fortify-string warning
loop: loop_set_status_from_info() check before assignment
ASoC: adau7118: don't disable regulators on device unbind
ASoC: zl38060: Remove spurious gpiolib select
ASoC: zl38060 add gpiolib dependency
thermal: intel: quark_dts: fix error pointer dereference
thermal: intel: BXT_PMIC: select REGMAP instead of depending on it
tracing: Add NULL checks for buffer in ring_buffer_free_read_page()
firmware/efi sysfb_efi: Add quirk for Lenovo IdeaPad Duet 3
bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support
mfd: arizona: Use pm_runtime_resume_and_get() to prevent refcnt leak
IB/hfi1: Update RMT size calculation
media: uvcvideo: Handle cameras with invalid descriptors
media: uvcvideo: Handle errors from calls to usb_string
media: uvcvideo: Quirk for autosuspend in Logitech B910 and C910
media: uvcvideo: Silence memcpy() run-time false positive warnings
staging: emxx_udc: Add checks for dma_alloc_coherent()
tty: fix out-of-bounds access in tty_driver_lookup_tty()
tty: serial: fsl_lpuart: disable the CTS when send break signal
serial: sc16is7xx: setup GPIO controller later in probe
mei: bus-fixup:upon error print return values of send and receive
tools/iio/iio_utils:fix memory leak
iio: accel: mma9551_core: Prevent uninitialized variable in mma9551_read_status_word()
iio: accel: mma9551_core: Prevent uninitialized variable in mma9551_read_config_word()
PCI: loongson: Prevent LS7A MRRS increases
usb: host: xhci: mvebu: Iterate over array indexes instead of using pointer math
USB: ene_usb6250: Allocate enough memory for full object
usb: uvc: Enumerate valid values for color matching
usb: gadget: uvc: Make bSourceID read/write
PCI: Align extra resources for hotplug bridges properly
PCI: Take other bus devices into account when distributing resources
kernel/fail_function: fix memory leak with using debugfs_lookup()
PCI: loongson: Add more devices that need MRRS quirk
PCI: Add ACS quirk for Wangxun NICs
phy: rockchip-typec: Fix unsigned comparison with less than zero
soundwire: cadence: Remove wasted space in response_buf
soundwire: cadence: Drain the RX FIFO after an IO timeout
net: tls: avoid hanging tasks on the tx_lock
x86/resctrl: Apply READ_ONCE/WRITE_ONCE to task_struct.{rmid,closid}
x86/resctl: fix scheduler confusion with 'current'
drm/display/dp_mst: Fix down/up message handling after sink disconnect
drm/display/dp_mst: Fix down message handling after a packet reception error
Bluetooth: hci_sock: purge socket queues in the destruct() callback
tcp: Fix listen() regression in 5.10.163
drm/virtio: Fix error code in virtio_gpu_object_shmem_init()
media: uvcvideo: Provide sync and async uvc_ctrl_status_event
media: uvcvideo: Fix race condition with usb_kill_urb
Revert "scsi: mpt3sas: Fix return value check of dma_get_required_mask()"
scsi: mpt3sas: Don't change DMA mask while reallocating pools
scsi: mpt3sas: re-do lost mpt3sas DMA mask fix
scsi: mpt3sas: Remove usage of dma_get_required_mask() API
malidp: Fix NULL vs IS_ERR() checking
usb: gadget: uvc: fix missing mutex_unlock() if kstrtou8() fails
Linux 5.10.173
Change-Id: Iedcbc093feb171d48c70976d0aa99e972fac3ad1
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
654 lines
17 KiB
C
654 lines
17 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* blk-mq scheduling framework
|
|
*
|
|
* Copyright (C) 2016 Jens Axboe
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/list_sort.h>
|
|
|
|
#include <trace/events/block.h>
|
|
|
|
#include "blk.h"
|
|
#include "blk-mq.h"
|
|
#include "blk-mq-debugfs.h"
|
|
#include "blk-mq-sched.h"
|
|
#include "blk-mq-tag.h"
|
|
#include "blk-wbt.h"
|
|
|
|
void blk_mq_sched_assign_ioc(struct request *rq)
|
|
{
|
|
struct request_queue *q = rq->q;
|
|
struct io_context *ioc;
|
|
struct io_cq *icq;
|
|
|
|
/*
|
|
* May not have an IO context if it's a passthrough request
|
|
*/
|
|
ioc = current->io_context;
|
|
if (!ioc)
|
|
return;
|
|
|
|
spin_lock_irq(&q->queue_lock);
|
|
icq = ioc_lookup_icq(ioc, q);
|
|
spin_unlock_irq(&q->queue_lock);
|
|
|
|
if (!icq) {
|
|
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
|
|
if (!icq)
|
|
return;
|
|
}
|
|
get_io_context(icq->ioc);
|
|
rq->elv.icq = icq;
|
|
}
|
|
|
|
/*
|
|
* Mark a hardware queue as needing a restart.
|
|
*/
|
|
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
|
return;
|
|
|
|
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
|
|
|
|
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
|
return;
|
|
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
|
|
|
/*
|
|
* Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
|
|
* in blk_mq_run_hw_queue(). Its pair is the barrier in
|
|
* blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
|
|
* meantime new request added to hctx->dispatch is missed to check in
|
|
* blk_mq_run_hw_queue().
|
|
*/
|
|
smp_mb();
|
|
|
|
blk_mq_run_hw_queue(hctx, true);
|
|
}
|
|
|
|
static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
|
|
{
|
|
struct request *rqa = container_of(a, struct request, queuelist);
|
|
struct request *rqb = container_of(b, struct request, queuelist);
|
|
|
|
return rqa->mq_hctx > rqb->mq_hctx;
|
|
}
|
|
|
|
static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx =
|
|
list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
|
|
struct request *rq;
|
|
LIST_HEAD(hctx_list);
|
|
unsigned int count = 0;
|
|
|
|
list_for_each_entry(rq, rq_list, queuelist) {
|
|
if (rq->mq_hctx != hctx) {
|
|
list_cut_before(&hctx_list, rq_list, &rq->queuelist);
|
|
goto dispatch;
|
|
}
|
|
count++;
|
|
}
|
|
list_splice_tail_init(rq_list, &hctx_list);
|
|
|
|
dispatch:
|
|
return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
|
|
}
|
|
|
|
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
|
|
|
|
/*
|
|
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
|
* its queue by itself in its completion handler, so we don't need to
|
|
* restart queue if .get_budget() fails to get the budget.
|
|
*
|
|
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
|
* be run again. This is necessary to avoid starving flushes.
|
|
*/
|
|
static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
struct request_queue *q = hctx->queue;
|
|
struct elevator_queue *e = q->elevator;
|
|
bool multi_hctxs = false, run_queue = false;
|
|
bool dispatched = false, busy = false;
|
|
unsigned int max_dispatch;
|
|
LIST_HEAD(rq_list);
|
|
int count = 0;
|
|
|
|
if (hctx->dispatch_busy)
|
|
max_dispatch = 1;
|
|
else
|
|
max_dispatch = hctx->queue->nr_requests;
|
|
|
|
do {
|
|
struct request *rq;
|
|
|
|
if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
|
|
break;
|
|
|
|
if (!list_empty_careful(&hctx->dispatch)) {
|
|
busy = true;
|
|
break;
|
|
}
|
|
|
|
if (!blk_mq_get_dispatch_budget(q))
|
|
break;
|
|
|
|
rq = e->type->ops.dispatch_request(hctx);
|
|
if (!rq) {
|
|
blk_mq_put_dispatch_budget(q);
|
|
/*
|
|
* We're releasing without dispatching. Holding the
|
|
* budget could have blocked any "hctx"s with the
|
|
* same queue and if we didn't dispatch then there's
|
|
* no guarantee anyone will kick the queue. Kick it
|
|
* ourselves.
|
|
*/
|
|
run_queue = true;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Now this rq owns the budget which has to be released
|
|
* if this rq won't be queued to driver via .queue_rq()
|
|
* in blk_mq_dispatch_rq_list().
|
|
*/
|
|
list_add_tail(&rq->queuelist, &rq_list);
|
|
if (rq->mq_hctx != hctx)
|
|
multi_hctxs = true;
|
|
} while (++count < max_dispatch);
|
|
|
|
if (!count) {
|
|
if (run_queue)
|
|
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
|
|
} else if (multi_hctxs) {
|
|
/*
|
|
* Requests from different hctx may be dequeued from some
|
|
* schedulers, such as bfq and deadline.
|
|
*
|
|
* Sort the requests in the list according to their hctx,
|
|
* dispatch batching requests from same hctx at a time.
|
|
*/
|
|
list_sort(NULL, &rq_list, sched_rq_cmp);
|
|
do {
|
|
dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
|
|
} while (!list_empty(&rq_list));
|
|
} else {
|
|
dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
|
|
}
|
|
|
|
if (busy)
|
|
return -EAGAIN;
|
|
return !!dispatched;
|
|
}
|
|
|
|
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
unsigned long end = jiffies + HZ;
|
|
int ret;
|
|
|
|
do {
|
|
ret = __blk_mq_do_dispatch_sched(hctx);
|
|
if (ret != 1)
|
|
break;
|
|
if (need_resched() || time_is_before_jiffies(end)) {
|
|
blk_mq_delay_run_hw_queue(hctx, 0);
|
|
break;
|
|
}
|
|
} while (1);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
|
|
struct blk_mq_ctx *ctx)
|
|
{
|
|
unsigned short idx = ctx->index_hw[hctx->type];
|
|
|
|
if (++idx == hctx->nr_ctx)
|
|
idx = 0;
|
|
|
|
return hctx->ctxs[idx];
|
|
}
|
|
|
|
/*
|
|
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
|
* its queue by itself in its completion handler, so we don't need to
|
|
* restart queue if .get_budget() fails to get the budget.
|
|
*
|
|
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
|
* be run again. This is necessary to avoid starving flushes.
|
|
*/
|
|
static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
struct request_queue *q = hctx->queue;
|
|
LIST_HEAD(rq_list);
|
|
struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
|
|
int ret = 0;
|
|
struct request *rq;
|
|
|
|
do {
|
|
if (!list_empty_careful(&hctx->dispatch)) {
|
|
ret = -EAGAIN;
|
|
break;
|
|
}
|
|
|
|
if (!sbitmap_any_bit_set(&hctx->ctx_map))
|
|
break;
|
|
|
|
if (!blk_mq_get_dispatch_budget(q))
|
|
break;
|
|
|
|
rq = blk_mq_dequeue_from_ctx(hctx, ctx);
|
|
if (!rq) {
|
|
blk_mq_put_dispatch_budget(q);
|
|
/*
|
|
* We're releasing without dispatching. Holding the
|
|
* budget could have blocked any "hctx"s with the
|
|
* same queue and if we didn't dispatch then there's
|
|
* no guarantee anyone will kick the queue. Kick it
|
|
* ourselves.
|
|
*/
|
|
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Now this rq owns the budget which has to be released
|
|
* if this rq won't be queued to driver via .queue_rq()
|
|
* in blk_mq_dispatch_rq_list().
|
|
*/
|
|
list_add(&rq->queuelist, &rq_list);
|
|
|
|
/* round robin for fair dispatch */
|
|
ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
|
|
|
|
} while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
|
|
|
|
WRITE_ONCE(hctx->dispatch_from, ctx);
|
|
return ret;
|
|
}
|
|
|
|
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
struct request_queue *q = hctx->queue;
|
|
struct elevator_queue *e = q->elevator;
|
|
const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
|
|
int ret = 0;
|
|
LIST_HEAD(rq_list);
|
|
|
|
/*
|
|
* If we have previous entries on our dispatch list, grab them first for
|
|
* more fair dispatch.
|
|
*/
|
|
if (!list_empty_careful(&hctx->dispatch)) {
|
|
spin_lock(&hctx->lock);
|
|
if (!list_empty(&hctx->dispatch))
|
|
list_splice_init(&hctx->dispatch, &rq_list);
|
|
spin_unlock(&hctx->lock);
|
|
}
|
|
|
|
/*
|
|
* Only ask the scheduler for requests, if we didn't have residual
|
|
* requests from the dispatch list. This is to avoid the case where
|
|
* we only ever dispatch a fraction of the requests available because
|
|
* of low device queue depth. Once we pull requests out of the IO
|
|
* scheduler, we can no longer merge or sort them. So it's best to
|
|
* leave them there for as long as we can. Mark the hw queue as
|
|
* needing a restart in that case.
|
|
*
|
|
* We want to dispatch from the scheduler if there was nothing
|
|
* on the dispatch list or we were able to dispatch from the
|
|
* dispatch list.
|
|
*/
|
|
if (!list_empty(&rq_list)) {
|
|
blk_mq_sched_mark_restart_hctx(hctx);
|
|
if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
|
|
if (has_sched_dispatch)
|
|
ret = blk_mq_do_dispatch_sched(hctx);
|
|
else
|
|
ret = blk_mq_do_dispatch_ctx(hctx);
|
|
}
|
|
} else if (has_sched_dispatch) {
|
|
ret = blk_mq_do_dispatch_sched(hctx);
|
|
} else if (hctx->dispatch_busy) {
|
|
/* dequeue request one by one from sw queue if queue is busy */
|
|
ret = blk_mq_do_dispatch_ctx(hctx);
|
|
} else {
|
|
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
|
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
struct request_queue *q = hctx->queue;
|
|
|
|
/* RCU or SRCU read lock is needed before checking quiesced flag */
|
|
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
|
|
return;
|
|
|
|
hctx->run++;
|
|
|
|
/*
|
|
* A return of -EAGAIN is an indication that hctx->dispatch is not
|
|
* empty and we must run again in order to avoid starving flushes.
|
|
*/
|
|
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
|
|
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
|
|
blk_mq_run_hw_queue(hctx, true);
|
|
}
|
|
}
|
|
|
|
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
|
|
unsigned int nr_segs)
|
|
{
|
|
struct elevator_queue *e = q->elevator;
|
|
struct blk_mq_ctx *ctx;
|
|
struct blk_mq_hw_ctx *hctx;
|
|
bool ret = false;
|
|
enum hctx_type type;
|
|
|
|
if (e && e->type->ops.bio_merge)
|
|
return e->type->ops.bio_merge(q, bio, nr_segs);
|
|
|
|
ctx = blk_mq_get_ctx(q);
|
|
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
|
|
type = hctx->type;
|
|
if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
|
|
list_empty_careful(&ctx->rq_lists[type]))
|
|
return false;
|
|
|
|
/* default per sw-queue merge */
|
|
spin_lock(&ctx->lock);
|
|
/*
|
|
* Reverse check our software queue for entries that we could
|
|
* potentially merge with. Currently includes a hand-wavy stop
|
|
* count of 8, to not spend too much time checking for merges.
|
|
*/
|
|
if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) {
|
|
ctx->rq_merged++;
|
|
ret = true;
|
|
}
|
|
|
|
spin_unlock(&ctx->lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
|
|
{
|
|
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
|
|
|
|
void blk_mq_sched_request_inserted(struct request *rq)
|
|
{
|
|
trace_block_rq_insert(rq->q, rq);
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
|
|
|
|
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
|
|
bool has_sched,
|
|
struct request *rq)
|
|
{
|
|
/*
|
|
* dispatch flush and passthrough rq directly
|
|
*
|
|
* passthrough request has to be added to hctx->dispatch directly.
|
|
* For some reason, device may be in one situation which can't
|
|
* handle FS request, so STS_RESOURCE is always returned and the
|
|
* FS request will be added to hctx->dispatch. However passthrough
|
|
* request may be required at that time for fixing the problem. If
|
|
* passthrough request is added to scheduler queue, there isn't any
|
|
* chance to dispatch it given we prioritize requests in hctx->dispatch.
|
|
*/
|
|
if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
|
|
return true;
|
|
|
|
if (has_sched)
|
|
rq->rq_flags |= RQF_SORTED;
|
|
|
|
return false;
|
|
}
|
|
|
|
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
|
bool run_queue, bool async)
|
|
{
|
|
struct request_queue *q = rq->q;
|
|
struct elevator_queue *e = q->elevator;
|
|
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
|
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
|
|
|
WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
|
|
|
|
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
|
|
/*
|
|
* Firstly normal IO request is inserted to scheduler queue or
|
|
* sw queue, meantime we add flush request to dispatch queue(
|
|
* hctx->dispatch) directly and there is at most one in-flight
|
|
* flush request for each hw queue, so it doesn't matter to add
|
|
* flush request to tail or front of the dispatch queue.
|
|
*
|
|
* Secondly in case of NCQ, flush request belongs to non-NCQ
|
|
* command, and queueing it will fail when there is any
|
|
* in-flight normal IO request(NCQ command). When adding flush
|
|
* rq to the front of hctx->dispatch, it is easier to introduce
|
|
* extra time to flush rq's latency because of S_SCHED_RESTART
|
|
* compared with adding to the tail of dispatch queue, then
|
|
* chance of flush merge is increased, and less flush requests
|
|
* will be issued to controller. It is observed that ~10% time
|
|
* is saved in blktests block/004 on disk attached to AHCI/NCQ
|
|
* drive when adding flush rq to the front of hctx->dispatch.
|
|
*
|
|
* Simply queue flush rq to the front of hctx->dispatch so that
|
|
* intensive flush workloads can benefit in case of NCQ HW.
|
|
*/
|
|
at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
|
|
blk_mq_request_bypass_insert(rq, at_head, false);
|
|
goto run;
|
|
}
|
|
|
|
if (e && e->type->ops.insert_requests) {
|
|
LIST_HEAD(list);
|
|
|
|
list_add(&rq->queuelist, &list);
|
|
e->type->ops.insert_requests(hctx, &list, at_head);
|
|
} else {
|
|
spin_lock(&ctx->lock);
|
|
__blk_mq_insert_request(hctx, rq, at_head);
|
|
spin_unlock(&ctx->lock);
|
|
}
|
|
|
|
run:
|
|
if (run_queue)
|
|
blk_mq_run_hw_queue(hctx, async);
|
|
}
|
|
|
|
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
|
struct blk_mq_ctx *ctx,
|
|
struct list_head *list, bool run_queue_async)
|
|
{
|
|
struct elevator_queue *e;
|
|
struct request_queue *q = hctx->queue;
|
|
|
|
/*
|
|
* blk_mq_sched_insert_requests() is called from flush plug
|
|
* context only, and hold one usage counter to prevent queue
|
|
* from being released.
|
|
*/
|
|
percpu_ref_get(&q->q_usage_counter);
|
|
|
|
e = hctx->queue->elevator;
|
|
if (e && e->type->ops.insert_requests)
|
|
e->type->ops.insert_requests(hctx, list, false);
|
|
else {
|
|
/*
|
|
* try to issue requests directly if the hw queue isn't
|
|
* busy in case of 'none' scheduler, and this way may save
|
|
* us one extra enqueue & dequeue to sw queue.
|
|
*/
|
|
if (!hctx->dispatch_busy && !e && !run_queue_async) {
|
|
blk_mq_try_issue_list_directly(hctx, list);
|
|
if (list_empty(list))
|
|
goto out;
|
|
}
|
|
blk_mq_insert_requests(hctx, ctx, list);
|
|
}
|
|
|
|
blk_mq_run_hw_queue(hctx, run_queue_async);
|
|
out:
|
|
percpu_ref_put(&q->q_usage_counter);
|
|
}
|
|
|
|
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
|
|
struct blk_mq_hw_ctx *hctx,
|
|
unsigned int hctx_idx)
|
|
{
|
|
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
|
|
|
if (hctx->sched_tags) {
|
|
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
|
|
blk_mq_free_rq_map(hctx->sched_tags, flags);
|
|
hctx->sched_tags = NULL;
|
|
}
|
|
}
|
|
|
|
static int blk_mq_sched_alloc_tags(struct request_queue *q,
|
|
struct blk_mq_hw_ctx *hctx,
|
|
unsigned int hctx_idx)
|
|
{
|
|
struct blk_mq_tag_set *set = q->tag_set;
|
|
/* Clear HCTX_SHARED so tags are init'ed */
|
|
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
|
int ret;
|
|
|
|
hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
|
|
set->reserved_tags, flags);
|
|
if (!hctx->sched_tags)
|
|
return -ENOMEM;
|
|
|
|
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
|
|
if (ret)
|
|
blk_mq_sched_free_tags(set, hctx, hctx_idx);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* called in queue's release handler, tagset has gone away */
|
|
static void blk_mq_sched_tags_teardown(struct request_queue *q)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
int i;
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
/* Clear HCTX_SHARED so tags are freed */
|
|
unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
|
|
|
if (hctx->sched_tags) {
|
|
blk_mq_free_rq_map(hctx->sched_tags, flags);
|
|
hctx->sched_tags = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
struct elevator_queue *eq;
|
|
unsigned int i;
|
|
int ret;
|
|
|
|
if (!e) {
|
|
q->elevator = NULL;
|
|
q->nr_requests = q->tag_set->queue_depth;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Default to double of smaller one between hw queue_depth and 128,
|
|
* since we don't split into sync/async like the old code did.
|
|
* Additionally, this is a per-hw queue depth.
|
|
*/
|
|
q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
|
|
BLKDEV_MAX_RQ);
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
ret = blk_mq_sched_alloc_tags(q, hctx, i);
|
|
if (ret)
|
|
goto err;
|
|
}
|
|
|
|
ret = e->ops.init_sched(q, e);
|
|
if (ret)
|
|
goto err;
|
|
|
|
blk_mq_debugfs_register_sched(q);
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
if (e->ops.init_hctx) {
|
|
ret = e->ops.init_hctx(hctx, i);
|
|
if (ret) {
|
|
eq = q->elevator;
|
|
blk_mq_sched_free_requests(q);
|
|
blk_mq_exit_sched(q, eq);
|
|
kobject_put(&eq->kobj);
|
|
return ret;
|
|
}
|
|
}
|
|
blk_mq_debugfs_register_sched_hctx(q, hctx);
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
blk_mq_sched_free_requests(q);
|
|
blk_mq_sched_tags_teardown(q);
|
|
q->elevator = NULL;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* called in either blk_queue_cleanup or elevator_switch, tagset
|
|
* is required for freeing requests
|
|
*/
|
|
void blk_mq_sched_free_requests(struct request_queue *q)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
int i;
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
if (hctx->sched_tags)
|
|
blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
|
|
}
|
|
}
|
|
|
|
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
unsigned int i;
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
blk_mq_debugfs_unregister_sched_hctx(hctx);
|
|
if (e->type->ops.exit_hctx && hctx->sched_data) {
|
|
e->type->ops.exit_hctx(hctx, i);
|
|
hctx->sched_data = NULL;
|
|
}
|
|
}
|
|
blk_mq_debugfs_unregister_sched(q);
|
|
if (e->type->ops.exit_sched)
|
|
e->type->ops.exit_sched(e);
|
|
blk_mq_sched_tags_teardown(q);
|
|
q->elevator = NULL;
|
|
}
|