477f5e6b9e
Changes in 5.10.188
media: atomisp: fix "variable dereferenced before check 'asd'"
x86/smp: Use dedicated cache-line for mwait_play_dead()
can: isotp: isotp_sendmsg(): fix return error fix on TX path
video: imsttfb: check for ioremap() failures
fbdev: imsttfb: Fix use after free bug in imsttfb_probe
HID: wacom: Use ktime_t rather than int when dealing with timestamps
HID: logitech-hidpp: add HIDPP_QUIRK_DELAYED_INIT for the T651.
Revert "thermal/drivers/mediatek: Use devm_of_iomap to avoid resource leak in mtk_thermal_probe"
scripts/tags.sh: Resolve gtags empty index generation
drm/amdgpu: Validate VM ioctl flags.
nubus: Partially revert proc_create_single_data() conversion
fs: pipe: reveal missing function protoypes
x86/resctrl: Only show tasks' pid in current pid namespace
blk-iocost: use spin_lock_irqsave in adjust_inuse_and_calc_cost
md/raid10: check slab-out-of-bounds in md_bitmap_get_counter
md/raid10: fix overflow of md/safe_mode_delay
md/raid10: fix wrong setting of max_corr_read_errors
md/raid10: fix null-ptr-deref of mreplace in raid10_sync_request
md/raid10: fix io loss while replacement replace rdev
irqchip/jcore-aic: Kill use of irq_create_strict_mappings()
irqchip/jcore-aic: Fix missing allocation of IRQ descriptors
posix-timers: Prevent RT livelock in itimer_delete()
tracing/timer: Add missing hrtimer modes to decode_hrtimer_mode().
clocksource/drivers/cadence-ttc: Fix memory leak in ttc_timer_probe
PM: domains: fix integer overflow issues in genpd_parse_state()
perf/arm-cmn: Fix DTC reset
powercap: RAPL: Fix CONFIG_IOSF_MBI dependency
ARM: 9303/1: kprobes: avoid missing-declaration warnings
cpufreq: intel_pstate: Fix energy_performance_preference for passive
thermal/drivers/sun8i: Fix some error handling paths in sun8i_ths_probe()
rcuscale: Console output claims too few grace periods
rcuscale: Always log error message
rcuscale: Move shutdown from wait_event() to wait_event_idle()
rcu/rcuscale: Move rcu_scale_*() after kfree_scale_cleanup()
rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading rcuscale
perf/ibs: Fix interface via core pmu events
x86/mm: Fix __swp_entry_to_pte() for Xen PV guests
evm: Complete description of evm_inode_setattr()
ima: Fix build warnings
pstore/ram: Add check for kstrdup
igc: Enable and fix RX hash usage by netstack
wifi: ath9k: fix AR9003 mac hardware hang check register offset calculation
wifi: ath9k: avoid referencing uninit memory in ath9k_wmi_ctrl_rx
samples/bpf: Fix buffer overflow in tcp_basertt
spi: spi-geni-qcom: Correct CS_TOGGLE bit in SPI_TRANS_CFG
wifi: wilc1000: fix for absent RSN capabilities WFA testcase
wifi: mwifiex: Fix the size of a memory allocation in mwifiex_ret_802_11_scan()
bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE
sctp: add bpf_bypass_getsockopt proto callback
libbpf: fix offsetof() and container_of() to work with CO-RE
nfc: constify several pointers to u8, char and sk_buff
nfc: llcp: fix possible use of uninitialized variable in nfc_llcp_send_connect()
bpftool: JIT limited misreported as negative value on aarch64
regulator: core: Fix more error checking for debugfs_create_dir()
regulator: core: Streamline debugfs operations
wifi: orinoco: Fix an error handling path in spectrum_cs_probe()
wifi: orinoco: Fix an error handling path in orinoco_cs_probe()
wifi: atmel: Fix an error handling path in atmel_probe()
wl3501_cs: Fix misspelling and provide missing documentation
net: create netdev->dev_addr assignment helpers
wl3501_cs: use eth_hw_addr_set()
wifi: wl3501_cs: Fix an error handling path in wl3501_probe()
wifi: ray_cs: Utilize strnlen() in parse_addr()
wifi: ray_cs: Drop useless status variable in parse_addr()
wifi: ray_cs: Fix an error handling path in ray_probe()
wifi: ath9k: don't allow to overwrite ENDPOINT0 attributes
wifi: rsi: Do not configure WoWlan in shutdown hook if not enabled
wifi: rsi: Do not set MMC_PM_KEEP_POWER in shutdown
watchdog/perf: define dummy watchdog_update_hrtimer_threshold() on correct config
watchdog/perf: more properly prevent false positives with turbo modes
kexec: fix a memory leak in crash_shrink_memory()
memstick r592: make memstick_debug_get_tpc_name() static
wifi: ath9k: Fix possible stall on ath9k_txq_list_has_key()
rtnetlink: extend RTEXT_FILTER_SKIP_STATS to IFLA_VF_INFO
wifi: iwlwifi: pull from TXQs with softirqs disabled
wifi: cfg80211: rewrite merging of inherited elements
wifi: ath9k: convert msecs to jiffies where needed
igc: Fix race condition in PTP tx code
net: stmmac: fix double serdes powerdown
netlink: fix potential deadlock in netlink_set_err()
netlink: do not hard code device address lenth in fdb dumps
selftests: rtnetlink: remove netdevsim device after ipsec offload test
gtp: Fix use-after-free in __gtp_encap_destroy().
net: axienet: Move reset before 64-bit DMA detection
sfc: fix crash when reading stats while NIC is resetting
nfc: llcp: simplify llcp_sock_connect() error paths
net: nfc: Fix use-after-free caused by nfc_llcp_find_local
lib/ts_bm: reset initial match offset for every block of text
netfilter: conntrack: dccp: copy entire header to stack buffer, not just basic one
netfilter: nf_conntrack_sip: fix the ct_sip_parse_numerical_param() return value.
ipvlan: Fix return value of ipvlan_queue_xmit()
netlink: Add __sock_i_ino() for __netlink_diag_dump().
radeon: avoid double free in ci_dpm_init()
drm/amd/display: Explicitly specify update type per plane info change
Input: drv260x - sleep between polling GO bit
drm/bridge: tc358768: always enable HS video mode
drm/bridge: tc358768: fix PLL parameters computation
drm/bridge: tc358768: fix PLL target frequency
drm/bridge: tc358768: fix TCLK_ZEROCNT computation
drm/bridge: tc358768: Add atomic_get_input_bus_fmts() implementation
drm/bridge: tc358768: fix TCLK_TRAILCNT computation
drm/bridge: tc358768: fix THS_ZEROCNT computation
drm/bridge: tc358768: fix TXTAGOCNT computation
drm/bridge: tc358768: fix THS_TRAILCNT computation
drm/vram-helper: fix function names in vram helper doc
ARM: dts: BCM5301X: Drop "clock-names" from the SPI node
ARM: dts: meson8b: correct uart_B and uart_C clock references
Input: adxl34x - do not hardcode interrupt trigger type
drm: sun4i_tcon: use devm_clk_get_enabled in `sun4i_tcon_init_clocks`
drm/panel: sharp-ls043t1le01: adjust mode settings
ARM: dts: stm32: Move ethernet MAC EEPROM from SoM to carrier boards
bus: ti-sysc: Fix dispc quirk masking bool variables
arm64: dts: microchip: sparx5: do not use PSCI on reference boards
RDMA/bnxt_re: Disable/kill tasklet only if it is enabled
RDMA/bnxt_re: Fix to remove unnecessary return labels
RDMA/bnxt_re: Use unique names while registering interrupts
RDMA/bnxt_re: Remove a redundant check inside bnxt_re_update_gid
RDMA/bnxt_re: Fix to remove an unnecessary log
ARM: dts: gta04: Move model property out of pinctrl node
arm64: dts: qcom: msm8916: correct camss unit address
arm64: dts: qcom: msm8994: correct SPMI unit address
arm64: dts: qcom: msm8996: correct camss unit address
drm/panel: simple: fix active size for Ampire AM-480272H3TMQW-T01H
ARM: ep93xx: fix missing-prototype warnings
ARM: omap2: fix missing tick_broadcast() prototype
arm64: dts: qcom: apq8096: fix fixed regulator name property
ARM: dts: stm32: Shorten the AV96 HDMI sound card name
memory: brcmstb_dpfe: fix testing array offset after use
ASoC: es8316: Increment max value for ALC Capture Target Volume control
ASoC: es8316: Do not set rate constraints for unsupported MCLKs
ARM: dts: meson8: correct uart_B and uart_C clock references
soc/fsl/qe: fix usb.c build errors
IB/hfi1: Use bitmap_zalloc() when applicable
IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors
IB/hfi1: Fix wrong mmu_node used for user SDMA packet after invalidate
RDMA: Remove uverbs_ex_cmd_mask values that are linked to functions
RDMA/hns: Fix coding style issues
RDMA/hns: Use refcount_t APIs for HEM
RDMA/hns: Clean the hardware related code for HEM
RDMA/hns: Fix hns_roce_table_get return value
ARM: dts: iwg20d-q7-common: Fix backlight pwm specifier
arm64: dts: renesas: ulcb-kf: Remove flow control for SCIF1
fbdev: omapfb: lcd_mipid: Fix an error handling path in mipid_spi_probe()
arm64: dts: ti: k3-j7200: Fix physical address of pin
ARM: dts: stm32: Fix audio routing on STM32MP15xx DHCOM PDK2
ARM: dts: stm32: fix i2s endpoint format property for stm32mp15xx-dkx
hwmon: (gsc-hwmon) fix fan pwm temperature scaling
hwmon: (adm1275) enable adm1272 temperature reporting
hwmon: (adm1275) Allow setting sample averaging
hwmon: (pmbus/adm1275) Fix problems with temperature monitoring on ADM1272
ARM: dts: BCM5301X: fix duplex-full => full-duplex
drm/amdkfd: Fix potential deallocation of previously deallocated memory.
drm/radeon: fix possible division-by-zero errors
amdgpu: validate offset_in_bo of drm_amdgpu_gem_va
RDMA/bnxt_re: wraparound mbox producer index
RDMA/bnxt_re: Avoid calling wake_up threads from spin_lock context
clk: imx: clk-imx8mn: fix memory leak in imx8mn_clocks_probe
clk: imx: clk-imx8mp: improve error handling in imx8mp_clocks_probe()
clk: tegra: tegra124-emc: Fix potential memory leak
ALSA: ac97: Fix possible NULL dereference in snd_ac97_mixer
drm/msm/dpu: do not enable color-management if DSPPs are not available
drm/msm/dp: Free resources after unregistering them
clk: vc5: check memory returned by kasprintf()
clk: cdce925: check return value of kasprintf()
clk: si5341: Allow different output VDD_SEL values
clk: si5341: Add sysfs properties to allow checking/resetting device faults
clk: si5341: return error if one synth clock registration fails
clk: si5341: check return value of {devm_}kasprintf()
clk: si5341: free unused memory on probe failure
clk: keystone: sci-clk: check return value of kasprintf()
clk: ti: clkctrl: check return value of kasprintf()
drivers: meson: secure-pwrc: always enable DMA domain
ovl: update of dentry revalidate flags after copy up
ASoC: imx-audmix: check return value of devm_kasprintf()
PCI: cadence: Fix Gen2 Link Retraining process
scsi: qedf: Fix NULL dereference in error handling
pinctrl: bcm2835: Handle gpiochip_add_pin_range() errors
PCI/ASPM: Disable ASPM on MFD function removal to avoid use-after-free
scsi: 3w-xxxx: Add error handling for initialization failure in tw_probe()
PCI: pciehp: Cancel bringup sequence if card is not present
PCI: ftpci100: Release the clock resources
PCI: Add pci_clear_master() stub for non-CONFIG_PCI
perf bench: Use unbuffered output when pipe/tee'ing to a file
perf bench: Add missing setlocale() call to allow usage of %'d style formatting
pinctrl: cherryview: Return correct value if pin in push-pull mode
kcsan: Don't expect 64 bits atomic builtins from 32 bits architectures
perf script: Fixup 'struct evsel_script' method prefix
perf script: Fix allocation of evsel->priv related to per-event dump files
perf dwarf-aux: Fix off-by-one in die_get_varname()
pinctrl: at91-pio4: check return value of devm_kasprintf()
powerpc/powernv/sriov: perform null check on iov before dereferencing iov
mm: rename pud_page_vaddr to pud_pgtable and make it return pmd_t *
mm: rename p4d_page_vaddr to p4d_pgtable and make it return pud_t *
powerpc/book3s64/mm: Fix DirectMap stats in /proc/meminfo
powerpc/mm/dax: Fix the condition when checking if altmap vmemap can cross-boundary
hwrng: virtio - add an internal buffer
hwrng: virtio - don't wait on cleanup
hwrng: virtio - don't waste entropy
hwrng: virtio - always add a pending request
hwrng: virtio - Fix race on data_avail and actual data
crypto: nx - fix build warnings when DEBUG_FS is not enabled
modpost: fix section mismatch message for R_ARM_ABS32
modpost: fix section mismatch message for R_ARM_{PC24,CALL,JUMP24}
crypto: marvell/cesa - Fix type mismatch warning
modpost: fix off by one in is_executable_section()
ARC: define ASM_NL and __ALIGN(_STR) outside #ifdef __ASSEMBLY__ guard
NFSv4.1: freeze the session table upon receiving NFS4ERR_BADSESSION
dax: Fix dax_mapping_release() use after free
dax: Introduce alloc_dev_dax_id()
hwrng: st - keep clock enabled while hwrng is registered
io_uring: ensure IOPOLL locks around deferred work
USB: serial: option: add LARA-R6 01B PIDs
usb: dwc3: gadget: Propagate core init errors to UDC during pullup
phy: tegra: xusb: Clear the driver reference in usb-phy dev
block: fix signed int overflow in Amiga partition support
block: change all __u32 annotations to __be32 in affs_hardblocks.h
SUNRPC: Fix UAF in svc_tcp_listen_data_ready()
w1: w1_therm: fix locking behavior in convert_t
w1: fix loop in w1_fini()
sh: j2: Use ioremap() to translate device tree address into kernel memory
serial: 8250: omap: Fix freeing of resources on failed register
clk: qcom: gcc-ipq6018: Use floor ops for sdcc clocks
media: usb: Check az6007_read() return value
media: videodev2.h: Fix struct v4l2_input tuner index comment
media: usb: siano: Fix warning due to null work_func_t function pointer
clk: qcom: reset: Allow specifying custom reset delay
clk: qcom: reset: support resetting multiple bits
clk: qcom: ipq6018: fix networking resets
usb: dwc3: qcom: Fix potential memory leak
usb: gadget: u_serial: Add null pointer check in gserial_suspend
extcon: Fix kernel doc of property fields to avoid warnings
extcon: Fix kernel doc of property capability fields to avoid warnings
usb: phy: phy-tahvo: fix memory leak in tahvo_usb_probe()
usb: hide unused usbfs_notify_suspend/resume functions
serial: 8250: lock port for stop_rx() in omap8250_irq()
serial: 8250: lock port for UART_IER access in omap8250_irq()
kernfs: fix missing kernfs_idr_lock to remove an ID from the IDR
coresight: Fix loss of connection info when a module is unloaded
mfd: rt5033: Drop rt5033-battery sub-device
media: venus: helpers: Fix ALIGN() of non power of two
media: atomisp: gmin_platform: fix out_len in gmin_get_config_dsm_var()
KVM: s390: fix KVM_S390_GET_CMMA_BITS for GFNs in memslot holes
usb: dwc3: qcom: Release the correct resources in dwc3_qcom_remove()
usb: dwc3: qcom: Fix an error handling path in dwc3_qcom_probe()
usb: common: usb-conn-gpio: Set last role to unknown before initial detection
usb: dwc3-meson-g12a: Fix an error handling path in dwc3_meson_g12a_probe()
mfd: intel-lpss: Add missing check for platform_get_resource
Revert "usb: common: usb-conn-gpio: Set last role to unknown before initial detection"
serial: 8250_omap: Use force_suspend and resume for system suspend
test_firmware: return ENOMEM instead of ENOSPC on failed memory allocation
mfd: stmfx: Fix error path in stmfx_chip_init
mfd: stmfx: Nullify stmfx->vdd in case of error
KVM: s390: vsie: fix the length of APCB bitmap
mfd: stmpe: Only disable the regulators if they are enabled
phy: tegra: xusb: check return value of devm_kzalloc()
pwm: imx-tpm: force 'real_period' to be zero in suspend
pwm: sysfs: Do not apply state to already disabled PWMs
rtc: st-lpc: Release some resources in st_rtc_probe() in case of error
media: cec: i2c: ch7322: also select REGMAP
sctp: fix potential deadlock on &net->sctp.addr_wq_lock
Add MODULE_FIRMWARE() for FIRMWARE_TG357766.
net: dsa: vsc73xx: fix MTU configuration
spi: bcm-qspi: return error if neither hif_mspi nor mspi is available
mailbox: ti-msgmgr: Fill non-message tx data fields with 0x0
f2fs: fix error path handling in truncate_dnode()
octeontx2-af: Fix mapping for NIX block from CGX connection
powerpc: allow PPC_EARLY_DEBUG_CPM only when SERIAL_CPM=y
net: bridge: keep ports without IFF_UNICAST_FLT in BR_PROMISC mode
tcp: annotate data races in __tcp_oow_rate_limited()
xsk: Honor SO_BINDTODEVICE on bind
net/sched: act_pedit: Add size check for TCA_PEDIT_PARMS_EX
pptp: Fix fib lookup calls.
net: dsa: tag_sja1105: fix MAC DA patching from meta frames
s390/qeth: Fix vipa deletion
sh: dma: Fix DMA channel offset calculation
apparmor: fix missing error check for rhashtable_insert_fast
i2c: xiic: Defer xiic_wakeup() and __xiic_start_xfer() in xiic_process()
i2c: xiic: Don't try to handle more interrupt events after error
ALSA: jack: Fix mutex call in snd_jack_report()
i2c: qup: Add missing unwind goto in qup_i2c_probe()
NFSD: add encoding of op_recall flag for write delegation
io_uring: wait interruptibly for request completions on exit
mmc: core: disable TRIM on Kingston EMMC04G-M627
mmc: core: disable TRIM on Micron MTFC4GACAJCN-1M
mmc: mmci: Set PROBE_PREFER_ASYNCHRONOUS
mmc: sdhci: fix DMA configure compatibility issue when 64bit DMA mode is used.
bcache: fixup btree_cache_wait list damage
bcache: Remove unnecessary NULL point check in node allocations
bcache: Fix __bch_btree_node_alloc to make the failure behavior consistent
um: Use HOST_DIR for mrproper
integrity: Fix possible multiple allocation in integrity_inode_get()
autofs: use flexible array in ioctl structure
shmem: use ramfs_kill_sb() for kill_sb method of ramfs-based tmpfs
jffs2: reduce stack usage in jffs2_build_xattr_subsystem()
fs: avoid empty option when generating legacy mount string
ext4: Remove ext4 locking of moved directory
Revert "f2fs: fix potential corruption when moving a directory"
fs: Establish locking order for unrelated directories
fs: Lock moved directories
btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile
btrfs: fix race when deleting quota root from the dirty cow roots list
ASoC: mediatek: mt8173: Fix irq error path
ASoC: mediatek: mt8173: Fix snd_soc_component_initialize error path
ARM: orion5x: fix d2net gpio initialization
leds: trigger: netdev: Recheck NETDEV_LED_MODE_LINKUP on dev rename
fs: no need to check source
fanotify: disallow mount/sb marks on kernel internal pseudo fs
tpm, tpm_tis: Claim locality in interrupt handler
selftests/bpf: Add verifier test for PTR_TO_MEM spill
block: add overflow checks for Amiga partition support
sh: pgtable-3level: Fix cast to pointer from integer of different size
netfilter: nf_tables: use net_generic infra for transaction data
netfilter: nf_tables: add rescheduling points during loop detection walks
netfilter: nf_tables: incorrect error path handling with NFT_MSG_NEWRULE
netfilter: nf_tables: fix chain binding transaction logic
netfilter: nf_tables: add NFT_TRANS_PREPARE_ERROR to deal with bound set/chain
netfilter: nf_tables: reject unbound anonymous set before commit phase
netfilter: nf_tables: reject unbound chain set before commit phase
netfilter: nftables: rename set element data activation/deactivation functions
netfilter: nf_tables: drop map element references from preparation phase
netfilter: nf_tables: unbind non-anonymous set if rule construction fails
netfilter: nf_tables: fix scheduling-while-atomic splat
netfilter: conntrack: Avoid nf_ct_helper_hash uses after free
netfilter: nf_tables: do not ignore genmask when looking up chain by id
netfilter: nf_tables: prevent OOB access in nft_byteorder_eval
wireguard: queueing: use saner cpu selection wrapping
wireguard: netlink: send staged packets when setting initial private key
tty: serial: fsl_lpuart: add earlycon for imx8ulp platform
rcu-tasks: Mark ->trc_reader_nesting data races
rcu-tasks: Mark ->trc_reader_special.b.need_qs data races
rcu-tasks: Simplify trc_read_check_handler() atomic operations
block/partition: fix signedness issue for Amiga partitions
io_uring: Use io_schedule* in cqring wait
io_uring: add reschedule point to handle_tw_list()
net: lan743x: Don't sleep in atomic context
workqueue: clean up WORK_* constant types, clarify masking
drm/panel: simple: Add connector_type for innolux_at043tn24
drm/panel: simple: Add Powertip PH800480T013 drm_display_mode flags
igc: Remove delay during TX ring configuration
net/mlx5e: fix double free in mlx5e_destroy_flow_table
net/mlx5e: Check for NOT_READY flag state after locking
igc: set TP bit in 'supported' and 'advertising' fields of ethtool_link_ksettings
scsi: qla2xxx: Fix error code in qla2x00_start_sp()
net: mvneta: fix txq_map in case of txq_number==1
net/sched: cls_fw: Fix improper refcount update leads to use-after-free
gve: Set default duplex configuration to full
ionic: remove WARN_ON to prevent panic_on_warn
net: bgmac: postpone turning IRQs off to avoid SoC hangs
net: prevent skb corruption on frag list segmentation
icmp6: Fix null-ptr-deref of ip6_null_entry->rt6i_idev in icmp6_dev().
udp6: fix udp6_ehashfn() typo
ntb: idt: Fix error handling in idt_pci_driver_init()
NTB: amd: Fix error handling in amd_ntb_pci_driver_init()
ntb: intel: Fix error handling in intel_ntb_pci_driver_init()
NTB: ntb_transport: fix possible memory leak while device_register() fails
NTB: ntb_tool: Add check for devm_kcalloc
ipv6/addrconf: fix a potential refcount underflow for idev
platform/x86: wmi: remove unnecessary argument
platform/x86: wmi: use guid_t and guid_equal()
platform/x86: wmi: move variables
platform/x86: wmi: Break possible infinite loop when parsing GUID
igc: Fix launchtime before start of cycle
igc: Fix inserting of empty frame for launchtime
riscv: bpf: Move bpf_jit_alloc_exec() and bpf_jit_free_exec() to core
riscv: bpf: Avoid breaking W^X
bpf, riscv: Support riscv jit to provide bpf_line_info
riscv, bpf: Fix inconsistent JIT image generation
erofs: avoid infinite loop in z_erofs_do_read_page() when reading beyond EOF
wifi: airo: avoid uninitialized warning in airo_get_rate()
net/sched: flower: Ensure both minimum and maximum ports are specified
netdevsim: fix uninitialized data in nsim_dev_trap_fa_cookie_write()
net/sched: make psched_mtu() RTNL-less safe
net/sched: sch_qfq: refactor parsing of netlink parameters
net/sched: sch_qfq: account for stab overhead in qfq_enqueue
nvme-pci: fix DMA direction of unmapping integrity data
f2fs: fix to avoid NULL pointer dereference f2fs_write_end_io()
pinctrl: amd: Fix mistake in handling clearing pins at startup
pinctrl: amd: Detect internal GPIO0 debounce handling
pinctrl: amd: Only use special debounce behavior for GPIO 0
tpm: tpm_vtpm_proxy: fix a race condition in /dev/vtpmx creation
mtd: rawnand: meson: fix unaligned DMA buffers handling
net: bcmgenet: Ensure MDIO unregistration has clocks enabled
powerpc: Fail build if using recordmcount with binutils v2.37
misc: fastrpc: Create fastrpc scalar with correct buffer count
erofs: fix compact 4B support for 16k block size
MIPS: Loongson: Fix cpu_probe_loongson() again
ext4: Fix reusing stale buffer heads from last failed mounting
ext4: fix wrong unit use in ext4_mb_clear_bb
ext4: get block from bh in ext4_free_blocks for fast commit replay
ext4: fix wrong unit use in ext4_mb_new_blocks
ext4: only update i_reserved_data_blocks on successful block allocation
jfs: jfs_dmap: Validate db_l2nbperpage while mounting
hwrng: imx-rngc - fix the timeout for init and self check
PCI/PM: Avoid putting EloPOS E2/S2/H2 PCIe Ports in D3cold
PCI: Add function 1 DMA alias quirk for Marvell 88SE9235
PCI: qcom: Disable write access to read only registers for IP v2.3.3
PCI: rockchip: Assert PCI Configuration Enable bit after probe
PCI: rockchip: Write PCI Device ID to correct register
PCI: rockchip: Add poll and timeout to wait for PHY PLLs to be locked
PCI: rockchip: Fix legacy IRQ generation for RK3399 PCIe endpoint core
PCI: rockchip: Use u32 variable to access 32-bit registers
PCI: rockchip: Set address alignment for endpoint mode
misc: pci_endpoint_test: Free IRQs before removing the device
misc: pci_endpoint_test: Re-init completion for every test
md/raid0: add discard support for the 'original' layout
fs: dlm: return positive pid value for F_GETLK
drm/atomic: Allow vblank-enabled + self-refresh "disable"
drm/rockchip: vop: Leave vblank enabled in self-refresh
drm/amd/display: Correct `DMUB_FW_VERSION` macro
serial: atmel: don't enable IRQs prematurely
tty: serial: samsung_tty: Fix a memory leak in s3c24xx_serial_getclk() in case of error
tty: serial: samsung_tty: Fix a memory leak in s3c24xx_serial_getclk() when iterating clk
firmware: stratix10-svc: Fix a potential resource leak in svc_create_memory_pool()
ceph: don't let check_caps skip sending responses for revoke msgs
xhci: Fix resume issue of some ZHAOXIN hosts
xhci: Fix TRB prefetch issue of ZHAOXIN hosts
xhci: Show ZHAOXIN xHCI root hub speed correctly
meson saradc: fix clock divider mask length
Revert "8250: add support for ASIX devices with a FIFO bug"
s390/decompressor: fix misaligned symbol build error
tracing/histograms: Add histograms to hist_vars if they have referenced variables
samples: ftrace: Save required argument registers in sample trampolines
net: ena: fix shift-out-of-bounds in exponential backoff
ring-buffer: Fix deadloop issue on reading trace_pipe
xtensa: ISS: fix call to split_if_spec
tracing: Fix null pointer dereference in tracing_err_log_open()
tracing/probes: Fix not to count error code to total length
scsi: qla2xxx: Wait for io return on terminate rport
scsi: qla2xxx: Array index may go out of bound
scsi: qla2xxx: Fix buffer overrun
scsi: qla2xxx: Fix potential NULL pointer dereference
scsi: qla2xxx: Check valid rport returned by fc_bsg_to_rport()
scsi: qla2xxx: Correct the index of array
scsi: qla2xxx: Pointer may be dereferenced
scsi: qla2xxx: Remove unused nvme_ls_waitq wait queue
net/sched: sch_qfq: reintroduce lmax bound check for MTU
RDMA/cma: Ensure rdma_addr_cancel() happens before issuing more requests
drm/atomic: Fix potential use-after-free in nonblocking commits
ALSA: hda/realtek - remove 3k pull low procedure
ALSA: hda/realtek: Enable Mute LED on HP Laptop 15s-eq2xxx
keys: Fix linking a duplicate key to a keyring's assoc_array
perf probe: Add test for regression introduced by switch to die_get_decl_file()
btrfs: fix warning when putting transaction with qgroups enabled after abort
fuse: revalidate: don't invalidate if interrupted
selftests: tc: set timeout to 15 minutes
selftests: tc: add 'ct' action kconfig dep
regmap: Drop initial version of maximum transfer length fixes
regmap: Account for register length in SMBus I/O limits
can: bcm: Fix UAF in bcm_proc_show()
drm/client: Fix memory leak in drm_client_target_cloned
drm/client: Fix memory leak in drm_client_modeset_probe
ASoC: fsl_sai: Disable bit clock with transmitter
ext4: correct inline offset when handling xattrs in inode body
debugobjects: Recheck debug_objects_enabled before reporting
nbd: Add the maximum limit of allocated index in nbd_dev_add
md: fix data corruption for raid456 when reshape restart while grow up
md/raid10: prevent soft lockup while flush writes
posix-timers: Ensure timer ID search-loop limit is valid
btrfs: add xxhash to fast checksum implementations
ACPI: button: Add lid disable DMI quirk for Nextbook Ares 8A
ACPI: video: Add backlight=native DMI quirk for Apple iMac11,3
ACPI: video: Add backlight=native DMI quirk for Lenovo ThinkPad X131e (3371 AMD version)
arm64: set __exception_irq_entry with __irq_entry as a default
arm64: mm: fix VA-range sanity check
sched/fair: Don't balance task to its current running CPU
wifi: ath11k: fix registration of 6Ghz-only phy without the full channel range
bpf: Address KCSAN report on bpf_lru_list
devlink: report devlink_port_type_warn source device
wifi: wext-core: Fix -Wstringop-overflow warning in ioctl_standard_iw_point()
wifi: iwlwifi: mvm: avoid baid size integer overflow
igb: Fix igb_down hung on surprise removal
spi: bcm63xx: fix max prepend length
fbdev: imxfb: warn about invalid left/right margin
pinctrl: amd: Use amd_pinconf_set() for all config options
net: ethernet: ti: cpsw_ale: Fix cpsw_ale_get_field()/cpsw_ale_set_field()
bridge: Add extack warning when enabling STP in netns.
iavf: Fix use-after-free in free_netdev
iavf: Fix out-of-bounds when setting channels on remove
security: keys: Modify mismatched function name
octeontx2-pf: Dont allocate BPIDs for LBK interfaces
tcp: annotate data-races around tcp_rsk(req)->ts_recent
net: ipv4: Use kfree_sensitive instead of kfree
net:ipv6: check return value of pskb_trim()
Revert "tcp: avoid the lookup process failing to get sk in ehash table"
fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe
llc: Don't drop packet from non-root netns.
netfilter: nf_tables: fix spurious set element insertion failure
netfilter: nf_tables: can't schedule in nft_chain_validate
netfilter: nft_set_pipapo: fix improper element removal
netfilter: nf_tables: skip bound chain in netns release path
netfilter: nf_tables: skip bound chain on rule flush
tcp: annotate data-races around tp->tcp_tx_delay
tcp: annotate data-races around tp->keepalive_time
tcp: annotate data-races around tp->keepalive_intvl
tcp: annotate data-races around tp->keepalive_probes
net: Introduce net.ipv4.tcp_migrate_req.
tcp: Fix data-races around sysctl_tcp_syn(ack)?_retries.
tcp: annotate data-races around icsk->icsk_syn_retries
tcp: annotate data-races around tp->linger2
tcp: annotate data-races around rskq_defer_accept
tcp: annotate data-races around tp->notsent_lowat
tcp: annotate data-races around icsk->icsk_user_timeout
tcp: annotate data-races around fastopenq.max_qlen
net: phy: prevent stale pointer dereference in phy_init()
tracing/histograms: Return an error if we fail to add histogram to hist_vars list
tracing: Fix memory leak of iter->temp when reading trace_pipe
ftrace: Store the order of pages allocated in ftrace_page
ftrace: Fix possible warning on checking all pages used in ftrace_process_locs()
Linux 5.10.188
Change-Id: Ibcc1adc43df5b8f649b12078eedd5d4f57de4578
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
950 lines
25 KiB
C
950 lines
25 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* Generic INET transport hashtables
|
|
*
|
|
* Authors: Lotsa people, from code originally in tcp
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/random.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/memblock.h>
|
|
|
|
#include <net/addrconf.h>
|
|
#include <net/inet_connection_sock.h>
|
|
#include <net/inet_hashtables.h>
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
#include <net/inet6_hashtables.h>
|
|
#endif
|
|
#include <net/secure_seq.h>
|
|
#include <net/ip.h>
|
|
#include <net/tcp.h>
|
|
#include <net/sock_reuseport.h>
|
|
|
|
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
|
|
const __u16 lport, const __be32 faddr,
|
|
const __be16 fport)
|
|
{
|
|
static u32 inet_ehash_secret __read_mostly;
|
|
|
|
net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
|
|
|
|
return __inet_ehashfn(laddr, lport, faddr, fport,
|
|
inet_ehash_secret + net_hash_mix(net));
|
|
}
|
|
|
|
/* This function handles inet_sock, but also timewait and request sockets
|
|
* for IPv4/IPv6.
|
|
*/
|
|
static u32 sk_ehashfn(const struct sock *sk)
|
|
{
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
if (sk->sk_family == AF_INET6 &&
|
|
!ipv6_addr_v4mapped(&sk->sk_v6_daddr))
|
|
return inet6_ehashfn(sock_net(sk),
|
|
&sk->sk_v6_rcv_saddr, sk->sk_num,
|
|
&sk->sk_v6_daddr, sk->sk_dport);
|
|
#endif
|
|
return inet_ehashfn(sock_net(sk),
|
|
sk->sk_rcv_saddr, sk->sk_num,
|
|
sk->sk_daddr, sk->sk_dport);
|
|
}
|
|
|
|
/*
|
|
* Allocate and initialize a new local port bind bucket.
|
|
* The bindhash mutex for snum's hash chain must be held here.
|
|
*/
|
|
struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
|
|
struct net *net,
|
|
struct inet_bind_hashbucket *head,
|
|
const unsigned short snum,
|
|
int l3mdev)
|
|
{
|
|
struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
|
|
|
|
if (tb) {
|
|
write_pnet(&tb->ib_net, net);
|
|
tb->l3mdev = l3mdev;
|
|
tb->port = snum;
|
|
tb->fastreuse = 0;
|
|
tb->fastreuseport = 0;
|
|
INIT_HLIST_HEAD(&tb->owners);
|
|
hlist_add_head(&tb->node, &head->chain);
|
|
}
|
|
return tb;
|
|
}
|
|
|
|
/*
|
|
* Caller must hold hashbucket lock for this tb with local BH disabled
|
|
*/
|
|
void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
|
|
{
|
|
if (hlist_empty(&tb->owners)) {
|
|
__hlist_del(&tb->node);
|
|
kmem_cache_free(cachep, tb);
|
|
}
|
|
}
|
|
|
|
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
|
|
const unsigned short snum)
|
|
{
|
|
inet_sk(sk)->inet_num = snum;
|
|
sk_add_bind_node(sk, &tb->owners);
|
|
inet_csk(sk)->icsk_bind_hash = tb;
|
|
}
|
|
|
|
/*
|
|
* Get rid of any references to a local port held by the given sock.
|
|
*/
|
|
static void __inet_put_port(struct sock *sk)
|
|
{
|
|
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
|
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
|
|
hashinfo->bhash_size);
|
|
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
|
|
struct inet_bind_bucket *tb;
|
|
|
|
spin_lock(&head->lock);
|
|
tb = inet_csk(sk)->icsk_bind_hash;
|
|
__sk_del_bind_node(sk);
|
|
inet_csk(sk)->icsk_bind_hash = NULL;
|
|
inet_sk(sk)->inet_num = 0;
|
|
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
|
|
spin_unlock(&head->lock);
|
|
}
|
|
|
|
void inet_put_port(struct sock *sk)
|
|
{
|
|
local_bh_disable();
|
|
__inet_put_port(sk);
|
|
local_bh_enable();
|
|
}
|
|
EXPORT_SYMBOL(inet_put_port);
|
|
|
|
int __inet_inherit_port(const struct sock *sk, struct sock *child)
|
|
{
|
|
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
|
|
unsigned short port = inet_sk(child)->inet_num;
|
|
const int bhash = inet_bhashfn(sock_net(sk), port,
|
|
table->bhash_size);
|
|
struct inet_bind_hashbucket *head = &table->bhash[bhash];
|
|
struct inet_bind_bucket *tb;
|
|
int l3mdev;
|
|
|
|
spin_lock(&head->lock);
|
|
tb = inet_csk(sk)->icsk_bind_hash;
|
|
if (unlikely(!tb)) {
|
|
spin_unlock(&head->lock);
|
|
return -ENOENT;
|
|
}
|
|
if (tb->port != port) {
|
|
l3mdev = inet_sk_bound_l3mdev(sk);
|
|
|
|
/* NOTE: using tproxy and redirecting skbs to a proxy
|
|
* on a different listener port breaks the assumption
|
|
* that the listener socket's icsk_bind_hash is the same
|
|
* as that of the child socket. We have to look up or
|
|
* create a new bind bucket for the child here. */
|
|
inet_bind_bucket_for_each(tb, &head->chain) {
|
|
if (net_eq(ib_net(tb), sock_net(sk)) &&
|
|
tb->l3mdev == l3mdev && tb->port == port)
|
|
break;
|
|
}
|
|
if (!tb) {
|
|
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
|
|
sock_net(sk), head, port,
|
|
l3mdev);
|
|
if (!tb) {
|
|
spin_unlock(&head->lock);
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
inet_csk_update_fastreuse(tb, child);
|
|
}
|
|
inet_bind_hash(child, tb, port);
|
|
spin_unlock(&head->lock);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__inet_inherit_port);
|
|
|
|
static struct inet_listen_hashbucket *
|
|
inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
|
|
{
|
|
u32 hash;
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
if (sk->sk_family == AF_INET6)
|
|
hash = ipv6_portaddr_hash(sock_net(sk),
|
|
&sk->sk_v6_rcv_saddr,
|
|
inet_sk(sk)->inet_num);
|
|
else
|
|
#endif
|
|
hash = ipv4_portaddr_hash(sock_net(sk),
|
|
inet_sk(sk)->inet_rcv_saddr,
|
|
inet_sk(sk)->inet_num);
|
|
return inet_lhash2_bucket(h, hash);
|
|
}
|
|
|
|
static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
|
|
{
|
|
struct inet_listen_hashbucket *ilb2;
|
|
|
|
if (!h->lhash2)
|
|
return;
|
|
|
|
ilb2 = inet_lhash2_bucket_sk(h, sk);
|
|
|
|
spin_lock(&ilb2->lock);
|
|
if (sk->sk_reuseport && sk->sk_family == AF_INET6)
|
|
hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
|
|
&ilb2->head);
|
|
else
|
|
hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
|
|
&ilb2->head);
|
|
ilb2->count++;
|
|
spin_unlock(&ilb2->lock);
|
|
}
|
|
|
|
static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
|
|
{
|
|
struct inet_listen_hashbucket *ilb2;
|
|
|
|
if (!h->lhash2 ||
|
|
WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
|
|
return;
|
|
|
|
ilb2 = inet_lhash2_bucket_sk(h, sk);
|
|
|
|
spin_lock(&ilb2->lock);
|
|
hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
|
|
ilb2->count--;
|
|
spin_unlock(&ilb2->lock);
|
|
}
|
|
|
|
static inline int compute_score(struct sock *sk, struct net *net,
|
|
const unsigned short hnum, const __be32 daddr,
|
|
const int dif, const int sdif)
|
|
{
|
|
int score = -1;
|
|
|
|
if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
|
|
!ipv6_only_sock(sk)) {
|
|
if (sk->sk_rcv_saddr != daddr)
|
|
return -1;
|
|
|
|
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
|
|
return -1;
|
|
score = sk->sk_bound_dev_if ? 2 : 1;
|
|
|
|
if (sk->sk_family == PF_INET)
|
|
score++;
|
|
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
|
|
score++;
|
|
}
|
|
return score;
|
|
}
|
|
|
|
static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
|
|
struct sk_buff *skb, int doff,
|
|
__be32 saddr, __be16 sport,
|
|
__be32 daddr, unsigned short hnum)
|
|
{
|
|
struct sock *reuse_sk = NULL;
|
|
u32 phash;
|
|
|
|
if (sk->sk_reuseport) {
|
|
phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
|
|
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
|
|
}
|
|
return reuse_sk;
|
|
}
|
|
|
|
/*
|
|
* Here are some nice properties to exploit here. The BSD API
|
|
* does not allow a listening sock to specify the remote port nor the
|
|
* remote address for the connection. So always assume those are both
|
|
* wildcarded during the search since they can never be otherwise.
|
|
*/
|
|
|
|
/* called with rcu_read_lock() : No refcount taken on the socket */
|
|
static struct sock *inet_lhash2_lookup(struct net *net,
|
|
struct inet_listen_hashbucket *ilb2,
|
|
struct sk_buff *skb, int doff,
|
|
const __be32 saddr, __be16 sport,
|
|
const __be32 daddr, const unsigned short hnum,
|
|
const int dif, const int sdif)
|
|
{
|
|
struct inet_connection_sock *icsk;
|
|
struct sock *sk, *result = NULL;
|
|
int score, hiscore = 0;
|
|
|
|
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
|
|
sk = (struct sock *)icsk;
|
|
score = compute_score(sk, net, hnum, daddr, dif, sdif);
|
|
if (score > hiscore) {
|
|
result = lookup_reuseport(net, sk, skb, doff,
|
|
saddr, sport, daddr, hnum);
|
|
if (result)
|
|
return result;
|
|
|
|
result = sk;
|
|
hiscore = score;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static inline struct sock *inet_lookup_run_bpf(struct net *net,
|
|
struct inet_hashinfo *hashinfo,
|
|
struct sk_buff *skb, int doff,
|
|
__be32 saddr, __be16 sport,
|
|
__be32 daddr, u16 hnum)
|
|
{
|
|
struct sock *sk, *reuse_sk;
|
|
bool no_reuseport;
|
|
|
|
if (hashinfo != &tcp_hashinfo)
|
|
return NULL; /* only TCP is supported */
|
|
|
|
no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
|
|
saddr, sport, daddr, hnum, &sk);
|
|
if (no_reuseport || IS_ERR_OR_NULL(sk))
|
|
return sk;
|
|
|
|
reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
|
|
if (reuse_sk)
|
|
sk = reuse_sk;
|
|
return sk;
|
|
}
|
|
|
|
struct sock *__inet_lookup_listener(struct net *net,
|
|
struct inet_hashinfo *hashinfo,
|
|
struct sk_buff *skb, int doff,
|
|
const __be32 saddr, __be16 sport,
|
|
const __be32 daddr, const unsigned short hnum,
|
|
const int dif, const int sdif)
|
|
{
|
|
struct inet_listen_hashbucket *ilb2;
|
|
struct sock *result = NULL;
|
|
unsigned int hash2;
|
|
|
|
/* Lookup redirect from BPF */
|
|
if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
|
|
result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
|
|
saddr, sport, daddr, hnum);
|
|
if (result)
|
|
goto done;
|
|
}
|
|
|
|
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
|
|
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
|
|
|
|
result = inet_lhash2_lookup(net, ilb2, skb, doff,
|
|
saddr, sport, daddr, hnum,
|
|
dif, sdif);
|
|
if (result)
|
|
goto done;
|
|
|
|
/* Lookup lhash2 with INADDR_ANY */
|
|
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
|
|
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
|
|
|
|
result = inet_lhash2_lookup(net, ilb2, skb, doff,
|
|
saddr, sport, htonl(INADDR_ANY), hnum,
|
|
dif, sdif);
|
|
done:
|
|
if (IS_ERR(result))
|
|
return NULL;
|
|
return result;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
|
|
|
|
/* All sockets share common refcount, but have different destructors */
|
|
void sock_gen_put(struct sock *sk)
|
|
{
|
|
if (!refcount_dec_and_test(&sk->sk_refcnt))
|
|
return;
|
|
|
|
if (sk->sk_state == TCP_TIME_WAIT)
|
|
inet_twsk_free(inet_twsk(sk));
|
|
else if (sk->sk_state == TCP_NEW_SYN_RECV)
|
|
reqsk_free(inet_reqsk(sk));
|
|
else
|
|
sk_free(sk);
|
|
}
|
|
EXPORT_SYMBOL_GPL(sock_gen_put);
|
|
|
|
void sock_edemux(struct sk_buff *skb)
|
|
{
|
|
sock_gen_put(skb->sk);
|
|
}
|
|
EXPORT_SYMBOL(sock_edemux);
|
|
|
|
struct sock *__inet_lookup_established(struct net *net,
|
|
struct inet_hashinfo *hashinfo,
|
|
const __be32 saddr, const __be16 sport,
|
|
const __be32 daddr, const u16 hnum,
|
|
const int dif, const int sdif)
|
|
{
|
|
INET_ADDR_COOKIE(acookie, saddr, daddr);
|
|
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
|
|
struct sock *sk;
|
|
const struct hlist_nulls_node *node;
|
|
/* Optimize here for direct hit, only listening connections can
|
|
* have wildcards anyways.
|
|
*/
|
|
unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
|
|
unsigned int slot = hash & hashinfo->ehash_mask;
|
|
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
|
|
|
|
begin:
|
|
sk_nulls_for_each_rcu(sk, node, &head->chain) {
|
|
if (sk->sk_hash != hash)
|
|
continue;
|
|
if (likely(INET_MATCH(net, sk, acookie, ports, dif, sdif))) {
|
|
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
|
|
goto out;
|
|
if (unlikely(!INET_MATCH(net, sk, acookie,
|
|
ports, dif, sdif))) {
|
|
sock_gen_put(sk);
|
|
goto begin;
|
|
}
|
|
goto found;
|
|
}
|
|
}
|
|
/*
|
|
* if the nulls value we got at the end of this lookup is
|
|
* not the expected one, we must restart lookup.
|
|
* We probably met an item that was moved to another chain.
|
|
*/
|
|
if (get_nulls_value(node) != slot)
|
|
goto begin;
|
|
out:
|
|
sk = NULL;
|
|
found:
|
|
return sk;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__inet_lookup_established);
|
|
|
|
/* called with local bh disabled */
|
|
static int __inet_check_established(struct inet_timewait_death_row *death_row,
|
|
struct sock *sk, __u16 lport,
|
|
struct inet_timewait_sock **twp)
|
|
{
|
|
struct inet_hashinfo *hinfo = death_row->hashinfo;
|
|
struct inet_sock *inet = inet_sk(sk);
|
|
__be32 daddr = inet->inet_rcv_saddr;
|
|
__be32 saddr = inet->inet_daddr;
|
|
int dif = sk->sk_bound_dev_if;
|
|
struct net *net = sock_net(sk);
|
|
int sdif = l3mdev_master_ifindex_by_index(net, dif);
|
|
INET_ADDR_COOKIE(acookie, saddr, daddr);
|
|
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
|
|
unsigned int hash = inet_ehashfn(net, daddr, lport,
|
|
saddr, inet->inet_dport);
|
|
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
|
|
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
|
|
struct sock *sk2;
|
|
const struct hlist_nulls_node *node;
|
|
struct inet_timewait_sock *tw = NULL;
|
|
|
|
spin_lock(lock);
|
|
|
|
sk_nulls_for_each(sk2, node, &head->chain) {
|
|
if (sk2->sk_hash != hash)
|
|
continue;
|
|
|
|
if (likely(INET_MATCH(net, sk2, acookie, ports, dif, sdif))) {
|
|
if (sk2->sk_state == TCP_TIME_WAIT) {
|
|
tw = inet_twsk(sk2);
|
|
if (twsk_unique(sk, sk2, twp))
|
|
break;
|
|
}
|
|
goto not_unique;
|
|
}
|
|
}
|
|
|
|
/* Must record num and sport now. Otherwise we will see
|
|
* in hash table socket with a funny identity.
|
|
*/
|
|
inet->inet_num = lport;
|
|
inet->inet_sport = htons(lport);
|
|
sk->sk_hash = hash;
|
|
WARN_ON(!sk_unhashed(sk));
|
|
__sk_nulls_add_node_rcu(sk, &head->chain);
|
|
if (tw) {
|
|
sk_nulls_del_node_init_rcu((struct sock *)tw);
|
|
__NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
|
|
}
|
|
spin_unlock(lock);
|
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
|
|
|
if (twp) {
|
|
*twp = tw;
|
|
} else if (tw) {
|
|
/* Silly. Should hash-dance instead... */
|
|
inet_twsk_deschedule_put(tw);
|
|
}
|
|
return 0;
|
|
|
|
not_unique:
|
|
spin_unlock(lock);
|
|
return -EADDRNOTAVAIL;
|
|
}
|
|
|
|
static u64 inet_sk_port_offset(const struct sock *sk)
|
|
{
|
|
const struct inet_sock *inet = inet_sk(sk);
|
|
|
|
return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
|
|
inet->inet_daddr,
|
|
inet->inet_dport);
|
|
}
|
|
|
|
/* Searches for an exsiting socket in the ehash bucket list.
|
|
* Returns true if found, false otherwise.
|
|
*/
|
|
static bool inet_ehash_lookup_by_sk(struct sock *sk,
|
|
struct hlist_nulls_head *list)
|
|
{
|
|
const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
|
|
const int sdif = sk->sk_bound_dev_if;
|
|
const int dif = sk->sk_bound_dev_if;
|
|
const struct hlist_nulls_node *node;
|
|
struct net *net = sock_net(sk);
|
|
struct sock *esk;
|
|
|
|
INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
|
|
|
|
sk_nulls_for_each_rcu(esk, node, list) {
|
|
if (esk->sk_hash != sk->sk_hash)
|
|
continue;
|
|
if (sk->sk_family == AF_INET) {
|
|
if (unlikely(INET_MATCH(net, esk, acookie,
|
|
ports, dif, sdif))) {
|
|
return true;
|
|
}
|
|
}
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
else if (sk->sk_family == AF_INET6) {
|
|
if (unlikely(inet6_match(net, esk,
|
|
&sk->sk_v6_daddr,
|
|
&sk->sk_v6_rcv_saddr,
|
|
ports, dif, sdif))) {
|
|
return true;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* Insert a socket into ehash, and eventually remove another one
|
|
* (The another one can be a SYN_RECV or TIMEWAIT)
|
|
* If an existing socket already exists, socket sk is not inserted,
|
|
* and sets found_dup_sk parameter to true.
|
|
*/
|
|
bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
|
|
{
|
|
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
|
struct hlist_nulls_head *list;
|
|
struct inet_ehash_bucket *head;
|
|
spinlock_t *lock;
|
|
bool ret = true;
|
|
|
|
WARN_ON_ONCE(!sk_unhashed(sk));
|
|
|
|
sk->sk_hash = sk_ehashfn(sk);
|
|
head = inet_ehash_bucket(hashinfo, sk->sk_hash);
|
|
list = &head->chain;
|
|
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
|
|
|
spin_lock(lock);
|
|
if (osk) {
|
|
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
|
|
ret = sk_nulls_del_node_init_rcu(osk);
|
|
} else if (found_dup_sk) {
|
|
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
|
|
if (*found_dup_sk)
|
|
ret = false;
|
|
}
|
|
|
|
if (ret)
|
|
__sk_nulls_add_node_rcu(sk, list);
|
|
|
|
spin_unlock(lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
|
|
{
|
|
bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
|
|
|
|
if (ok) {
|
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
|
} else {
|
|
percpu_counter_inc(sk->sk_prot->orphan_count);
|
|
inet_sk_set_state(sk, TCP_CLOSE);
|
|
sock_set_flag(sk, SOCK_DEAD);
|
|
inet_csk_destroy_sock(sk);
|
|
}
|
|
return ok;
|
|
}
|
|
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
|
|
|
|
static int inet_reuseport_add_sock(struct sock *sk,
|
|
struct inet_listen_hashbucket *ilb)
|
|
{
|
|
struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
|
|
const struct hlist_nulls_node *node;
|
|
struct sock *sk2;
|
|
kuid_t uid = sock_i_uid(sk);
|
|
|
|
sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
|
|
if (sk2 != sk &&
|
|
sk2->sk_family == sk->sk_family &&
|
|
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
|
|
sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
|
|
inet_csk(sk2)->icsk_bind_hash == tb &&
|
|
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
|
|
inet_rcv_saddr_equal(sk, sk2, false))
|
|
return reuseport_add_sock(sk, sk2,
|
|
inet_rcv_saddr_any(sk));
|
|
}
|
|
|
|
return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
|
|
}
|
|
|
|
int __inet_hash(struct sock *sk, struct sock *osk)
|
|
{
|
|
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
|
struct inet_listen_hashbucket *ilb;
|
|
int err = 0;
|
|
|
|
if (sk->sk_state != TCP_LISTEN) {
|
|
local_bh_disable();
|
|
inet_ehash_nolisten(sk, osk, NULL);
|
|
local_bh_enable();
|
|
return 0;
|
|
}
|
|
WARN_ON(!sk_unhashed(sk));
|
|
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
|
|
|
|
spin_lock(&ilb->lock);
|
|
if (sk->sk_reuseport) {
|
|
err = inet_reuseport_add_sock(sk, ilb);
|
|
if (err)
|
|
goto unlock;
|
|
}
|
|
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
|
|
sk->sk_family == AF_INET6)
|
|
__sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
|
|
else
|
|
__sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
|
|
inet_hash2(hashinfo, sk);
|
|
ilb->count++;
|
|
sock_set_flag(sk, SOCK_RCU_FREE);
|
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
|
unlock:
|
|
spin_unlock(&ilb->lock);
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(__inet_hash);
|
|
|
|
int inet_hash(struct sock *sk)
|
|
{
|
|
int err = 0;
|
|
|
|
if (sk->sk_state != TCP_CLOSE)
|
|
err = __inet_hash(sk, NULL);
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL_GPL(inet_hash);
|
|
|
|
static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
|
|
{
|
|
if (sk_unhashed(sk))
|
|
return;
|
|
|
|
if (rcu_access_pointer(sk->sk_reuseport_cb))
|
|
reuseport_detach_sock(sk);
|
|
if (ilb) {
|
|
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
|
|
|
inet_unhash2(hashinfo, sk);
|
|
ilb->count--;
|
|
}
|
|
__sk_nulls_del_node_init_rcu(sk);
|
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
|
}
|
|
|
|
void inet_unhash(struct sock *sk)
|
|
{
|
|
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
|
|
|
if (sk_unhashed(sk))
|
|
return;
|
|
|
|
if (sk->sk_state == TCP_LISTEN) {
|
|
struct inet_listen_hashbucket *ilb;
|
|
|
|
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
|
|
/* Don't disable bottom halves while acquiring the lock to
|
|
* avoid circular locking dependency on PREEMPT_RT.
|
|
*/
|
|
spin_lock(&ilb->lock);
|
|
__inet_unhash(sk, ilb);
|
|
spin_unlock(&ilb->lock);
|
|
} else {
|
|
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
|
|
|
spin_lock_bh(lock);
|
|
__inet_unhash(sk, NULL);
|
|
spin_unlock_bh(lock);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(inet_unhash);
|
|
|
|
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
|
|
* Note that we use 32bit integers (vs RFC 'short integers')
|
|
* because 2^16 is not a multiple of num_ephemeral and this
|
|
* property might be used by clever attacker.
|
|
*
|
|
* RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
|
|
* attacks were since demonstrated, thus we use 65536 by default instead
|
|
* to really give more isolation and privacy, at the expense of 256kB
|
|
* of kernel memory.
|
|
*/
|
|
#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER)
|
|
static u32 *table_perturb;
|
|
|
|
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
|
|
struct sock *sk, u64 port_offset,
|
|
int (*check_established)(struct inet_timewait_death_row *,
|
|
struct sock *, __u16, struct inet_timewait_sock **))
|
|
{
|
|
struct inet_hashinfo *hinfo = death_row->hashinfo;
|
|
struct inet_timewait_sock *tw = NULL;
|
|
struct inet_bind_hashbucket *head;
|
|
int port = inet_sk(sk)->inet_num;
|
|
struct net *net = sock_net(sk);
|
|
struct inet_bind_bucket *tb;
|
|
u32 remaining, offset;
|
|
int ret, i, low, high;
|
|
int l3mdev;
|
|
u32 index;
|
|
|
|
if (port) {
|
|
local_bh_disable();
|
|
ret = check_established(death_row, sk, port, NULL);
|
|
local_bh_enable();
|
|
return ret;
|
|
}
|
|
|
|
l3mdev = inet_sk_bound_l3mdev(sk);
|
|
|
|
inet_get_local_port_range(net, &low, &high);
|
|
high++; /* [32768, 60999] -> [32768, 61000[ */
|
|
remaining = high - low;
|
|
if (likely(remaining > 1))
|
|
remaining &= ~1U;
|
|
|
|
get_random_slow_once(table_perturb,
|
|
INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
|
|
index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
|
|
|
|
offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
|
|
offset %= remaining;
|
|
|
|
/* In first pass we try ports of @low parity.
|
|
* inet_csk_get_port() does the opposite choice.
|
|
*/
|
|
offset &= ~1U;
|
|
other_parity_scan:
|
|
port = low + offset;
|
|
for (i = 0; i < remaining; i += 2, port += 2) {
|
|
if (unlikely(port >= high))
|
|
port -= remaining;
|
|
if (inet_is_local_reserved_port(net, port))
|
|
continue;
|
|
head = &hinfo->bhash[inet_bhashfn(net, port,
|
|
hinfo->bhash_size)];
|
|
spin_lock_bh(&head->lock);
|
|
|
|
/* Does not bother with rcv_saddr checks, because
|
|
* the established check is already unique enough.
|
|
*/
|
|
inet_bind_bucket_for_each(tb, &head->chain) {
|
|
if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
|
|
tb->port == port) {
|
|
if (tb->fastreuse >= 0 ||
|
|
tb->fastreuseport >= 0)
|
|
goto next_port;
|
|
WARN_ON(hlist_empty(&tb->owners));
|
|
if (!check_established(death_row, sk,
|
|
port, &tw))
|
|
goto ok;
|
|
goto next_port;
|
|
}
|
|
}
|
|
|
|
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
|
|
net, head, port, l3mdev);
|
|
if (!tb) {
|
|
spin_unlock_bh(&head->lock);
|
|
return -ENOMEM;
|
|
}
|
|
tb->fastreuse = -1;
|
|
tb->fastreuseport = -1;
|
|
goto ok;
|
|
next_port:
|
|
spin_unlock_bh(&head->lock);
|
|
cond_resched();
|
|
}
|
|
|
|
offset++;
|
|
if ((offset & 1) && remaining > 1)
|
|
goto other_parity_scan;
|
|
|
|
return -EADDRNOTAVAIL;
|
|
|
|
ok:
|
|
/* Here we want to add a little bit of randomness to the next source
|
|
* port that will be chosen. We use a max() with a random here so that
|
|
* on low contention the randomness is maximal and on high contention
|
|
* it may be inexistent.
|
|
*/
|
|
i = max_t(int, i, (prandom_u32() & 7) * 2);
|
|
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
|
|
|
|
/* Head lock still held and bh's disabled */
|
|
inet_bind_hash(sk, tb, port);
|
|
if (sk_unhashed(sk)) {
|
|
inet_sk(sk)->inet_sport = htons(port);
|
|
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
|
|
}
|
|
if (tw)
|
|
inet_twsk_bind_unhash(tw, hinfo);
|
|
spin_unlock(&head->lock);
|
|
if (tw)
|
|
inet_twsk_deschedule_put(tw);
|
|
local_bh_enable();
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Bind a port for a connect operation and hash it.
|
|
*/
|
|
int inet_hash_connect(struct inet_timewait_death_row *death_row,
|
|
struct sock *sk)
|
|
{
|
|
u64 port_offset = 0;
|
|
|
|
if (!inet_sk(sk)->inet_num)
|
|
port_offset = inet_sk_port_offset(sk);
|
|
return __inet_hash_connect(death_row, sk, port_offset,
|
|
__inet_check_established);
|
|
}
|
|
EXPORT_SYMBOL_GPL(inet_hash_connect);
|
|
|
|
void inet_hashinfo_init(struct inet_hashinfo *h)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
|
|
spin_lock_init(&h->listening_hash[i].lock);
|
|
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
|
|
i + LISTENING_NULLS_BASE);
|
|
h->listening_hash[i].count = 0;
|
|
}
|
|
|
|
h->lhash2 = NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
|
|
|
|
static void init_hashinfo_lhash2(struct inet_hashinfo *h)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i <= h->lhash2_mask; i++) {
|
|
spin_lock_init(&h->lhash2[i].lock);
|
|
INIT_HLIST_HEAD(&h->lhash2[i].head);
|
|
h->lhash2[i].count = 0;
|
|
}
|
|
}
|
|
|
|
void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
|
|
unsigned long numentries, int scale,
|
|
unsigned long low_limit,
|
|
unsigned long high_limit)
|
|
{
|
|
h->lhash2 = alloc_large_system_hash(name,
|
|
sizeof(*h->lhash2),
|
|
numentries,
|
|
scale,
|
|
0,
|
|
NULL,
|
|
&h->lhash2_mask,
|
|
low_limit,
|
|
high_limit);
|
|
init_hashinfo_lhash2(h);
|
|
|
|
/* this one is used for source ports of outgoing connections */
|
|
table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
|
|
sizeof(*table_perturb), GFP_KERNEL);
|
|
if (!table_perturb)
|
|
panic("TCP: failed to alloc table_perturb");
|
|
}
|
|
|
|
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
|
|
{
|
|
h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL);
|
|
if (!h->lhash2)
|
|
return -ENOMEM;
|
|
|
|
h->lhash2_mask = INET_LHTABLE_SIZE - 1;
|
|
/* INET_LHTABLE_SIZE must be a power of 2 */
|
|
BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask);
|
|
|
|
init_hashinfo_lhash2(h);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod);
|
|
|
|
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
|
|
{
|
|
unsigned int locksz = sizeof(spinlock_t);
|
|
unsigned int i, nblocks = 1;
|
|
|
|
if (locksz != 0) {
|
|
/* allocate 2 cache lines or at least one spinlock per cpu */
|
|
nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
|
|
nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
|
|
|
|
/* no more locks than number of hash buckets */
|
|
nblocks = min(nblocks, hashinfo->ehash_mask + 1);
|
|
|
|
hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
|
|
if (!hashinfo->ehash_locks)
|
|
return -ENOMEM;
|
|
|
|
for (i = 0; i < nblocks; i++)
|
|
spin_lock_init(&hashinfo->ehash_locks[i]);
|
|
}
|
|
hashinfo->ehash_locks_mask = nblocks - 1;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);
|