From a255b78d14324f8a4a49f88e983b9f00818d1194 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Jul 2025 13:00:06 +0200 Subject: [PATCH 1/8] selftests/futex: Adapt the private hash test to RCU related changes The auto scaling on create creation used to automatically assign the new hash because there was the private hash was unused and could be replaced right away. This is already racy because if the private hash is in use by a thread then the visibile resize will be delayed. With the upcoming change to wait for a RCU grace period before the hash can be assigned, the test will always fail. If the reported number of hash buckets is not updated after an auto scaling event, block on an acquired lock with a timeout. The timeout is the delay to wait towards a grace period and locking and a locked pthread_mutex_t ensure that glibc calls into kernel using futex operation which will assign new private hash if available. This will retry every 100ms up to 2 seconds in total. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-2-bigeasy@linutronix.de --- .../futex/functional/futex_priv_hash.c | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 24a92dc94eb8..625e3be4129c 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -111,6 +111,30 @@ static void join_max_threads(void) } } +#define SEC_IN_NSEC 1000000000 +#define MSEC_IN_NSEC 1000000 + +static void futex_dummy_op(void) +{ + pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + struct timespec timeout; + int ret; + + pthread_mutex_lock(&lock); + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_nsec += 100 * MSEC_IN_NSEC; + if (timeout.tv_nsec >= SEC_IN_NSEC) { + timeout.tv_nsec -= SEC_IN_NSEC; + timeout.tv_sec++; + } + ret = pthread_mutex_timedlock(&lock, &timeout); + if (ret == 0) + ksft_exit_fail_msg("Succeffuly locked an already locked mutex.\n"); + + if (ret != ETIMEDOUT) + ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret); +} + static void usage(char *prog) { printf("Usage: %s\n", prog); @@ -129,7 +153,7 @@ int main(int argc, char *argv[]) int futex_slots1, futex_slotsn, online_cpus; pthread_mutexattr_t mutex_attr_pi; int use_global_hash = 0; - int ret; + int ret, retry = 20; int c; while ((c = getopt(argc, argv, "cghv:")) != -1) { @@ -208,8 +232,24 @@ int main(int argc, char *argv[]) */ ksft_print_msg("Online CPUs: %d\n", online_cpus); if (online_cpus > 16) { +retry_getslots: futex_slotsn = futex_hash_slots_get(); if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) { + retry--; + /* + * Auto scaling on thread creation can be slightly delayed + * because it waits for a RCU grace period twice. The new + * private hash is assigned upon the first futex operation + * after grace period. + * To cover all this for testing purposes the function + * below will acquire a lock and acquire it again with a + * 100ms timeout which must timeout. This ensures we + * sleep for 100ms and issue a futex operation. + */ + if (retry > 0) { + futex_dummy_op(); + goto retry_getslots; + } ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", futex_slots1, futex_slotsn); ksft_exit_fail_msg(test_msg_auto_inc); From 56180dd20c19e5b0fa34822997a9ac66b517e7b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Jul 2025 13:00:07 +0200 Subject: [PATCH 2/8] futex: Use RCU-based per-CPU reference counting instead of rcuref_t The use of rcuref_t for reference counting introduces a performance bottleneck when accessed concurrently by multiple threads during futex operations. Replace rcuref_t with special crafted per-CPU reference counters. The lifetime logic remains the same. The newly allocate private hash starts in FR_PERCPU state. In this state, each futex operation that requires the private hash uses a per-CPU counter (an unsigned int) for incrementing or decrementing the reference count. When the private hash is about to be replaced, the per-CPU counters are migrated to a atomic_t counter mm_struct::futex_atomic. The migration process: - Waiting for one RCU grace period to ensure all users observe the current private hash. This can be skipped if a grace period elapsed since the private hash was assigned. - futex_private_hash::state is set to FR_ATOMIC, forcing all users to use mm_struct::futex_atomic for reference counting. - After a RCU grace period, all users are guaranteed to be using the atomic counter. The per-CPU counters can now be summed up and added to the atomic_t counter. If the resulting count is zero, the hash can be safely replaced. Otherwise, active users still hold a valid reference. - Once the atomic reference count drops to zero, the next futex operation will switch to the new private hash. call_rcu_hurry() is used to speed up transition which otherwise might be delay with RCU_LAZY. There is nothing wrong with using call_rcu(). The side effects would be that on auto scaling the new hash is used later and the SET_SLOTS prctl() will block longer. [bigeasy: commit description + mm get/ put_async] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-3-bigeasy@linutronix.de --- include/linux/futex.h | 16 +-- include/linux/mm_types.h | 5 + include/linux/sched/mm.h | 2 +- init/Kconfig | 4 - kernel/fork.c | 8 +- kernel/futex/core.c | 243 ++++++++++++++++++++++++++++++++++++--- 6 files changed, 243 insertions(+), 35 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index b37193653e6b..9e9750f04980 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -85,18 +85,12 @@ int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) #ifdef CONFIG_FUTEX_PRIVATE_HASH int futex_hash_allocate_default(void); void futex_hash_free(struct mm_struct *mm); - -static inline void futex_mm_init(struct mm_struct *mm) -{ - RCU_INIT_POINTER(mm->futex_phash, NULL); - mm->futex_phash_new = NULL; - mutex_init(&mm->futex_hash_lock); -} +int futex_mm_init(struct mm_struct *mm); #else /* !CONFIG_FUTEX_PRIVATE_HASH */ static inline int futex_hash_allocate_default(void) { return 0; } -static inline void futex_hash_free(struct mm_struct *mm) { } -static inline void futex_mm_init(struct mm_struct *mm) { } +static inline int futex_hash_free(struct mm_struct *mm) { return 0; } +static inline int futex_mm_init(struct mm_struct *mm) { return 0; } #endif /* CONFIG_FUTEX_PRIVATE_HASH */ #else /* !CONFIG_FUTEX */ @@ -118,8 +112,8 @@ static inline int futex_hash_allocate_default(void) { return 0; } -static inline void futex_hash_free(struct mm_struct *mm) { } -static inline void futex_mm_init(struct mm_struct *mm) { } +static inline int futex_hash_free(struct mm_struct *mm) { return 0; } +static inline int futex_mm_init(struct mm_struct *mm) { return 0; } #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d6b91e8a66d6..0f0662157066 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1070,6 +1070,11 @@ struct mm_struct { struct mutex futex_hash_lock; struct futex_private_hash __rcu *futex_phash; struct futex_private_hash *futex_phash_new; + /* futex-ref */ + unsigned long futex_batches; + struct rcu_head futex_rcu; + atomic_long_t futex_atomic; + unsigned int __percpu *futex_ref; #endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index b13474825130..2201da0afecc 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -140,7 +140,7 @@ static inline bool mmget_not_zero(struct mm_struct *mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) || defined(CONFIG_FUTEX_PRIVATE_HASH) /* same as above but performs the slow path from the async context. Can * be called from the atomic context as well */ diff --git a/init/Kconfig b/init/Kconfig index 666783eb50ab..af4c2f085455 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1716,13 +1716,9 @@ config FUTEX_PI depends on FUTEX && RT_MUTEXES default y -# -# marked broken for performance reasons; gives us one more cycle to sort things out. -# config FUTEX_PRIVATE_HASH bool depends on FUTEX && !BASE_SMALL && MMU - depends on BROKEN default y config FUTEX_MPOL diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..0b885dcbde9a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1046,7 +1046,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); - futex_mm_init(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS) mm->pmd_huge_pte = NULL; #endif @@ -1061,6 +1060,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm->def_flags = 0; } + if (futex_mm_init(mm)) + goto fail_mm_init; + if (mm_alloc_pgd(mm)) goto fail_nopgd; @@ -1090,6 +1092,8 @@ fail_nocontext: fail_noid: mm_free_pgd(mm); fail_nopgd: + futex_hash_free(mm); +fail_mm_init: free_mm(mm); return NULL; } @@ -1145,7 +1149,7 @@ void mmput(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mmput); -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) || defined(CONFIG_FUTEX_PRIVATE_HASH) static void mmput_async_fn(struct work_struct *work) { struct mm_struct *mm = container_of(work, struct mm_struct, diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 90d53fb0ee9e..1dcb4c8a2585 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include @@ -65,7 +64,7 @@ static struct { #define futex_queues (__futex_data.queues) struct futex_private_hash { - rcuref_t users; + int state; unsigned int hash_mask; struct rcu_head rcu; void *mm; @@ -129,6 +128,12 @@ static struct futex_hash_bucket * __futex_hash(union futex_key *key, struct futex_private_hash *fph); #ifdef CONFIG_FUTEX_PRIVATE_HASH +static bool futex_ref_get(struct futex_private_hash *fph); +static bool futex_ref_put(struct futex_private_hash *fph); +static bool futex_ref_is_dead(struct futex_private_hash *fph); + +enum { FR_PERCPU = 0, FR_ATOMIC }; + static inline bool futex_key_is_private(union futex_key *key) { /* @@ -142,15 +147,14 @@ bool futex_private_hash_get(struct futex_private_hash *fph) { if (fph->immutable) return true; - return rcuref_get(&fph->users); + return futex_ref_get(fph); } void futex_private_hash_put(struct futex_private_hash *fph) { - /* Ignore return value, last put is verified via rcuref_is_dead() */ if (fph->immutable) return; - if (rcuref_put(&fph->users)) + if (futex_ref_put(fph)) wake_up_var(fph->mm); } @@ -243,14 +247,18 @@ static bool __futex_pivot_hash(struct mm_struct *mm, fph = rcu_dereference_protected(mm->futex_phash, lockdep_is_held(&mm->futex_hash_lock)); if (fph) { - if (!rcuref_is_dead(&fph->users)) { + if (!futex_ref_is_dead(fph)) { mm->futex_phash_new = new; return false; } futex_rehash_private(fph, new); } - rcu_assign_pointer(mm->futex_phash, new); + new->state = FR_PERCPU; + scoped_guard(rcu) { + mm->futex_batches = get_state_synchronize_rcu(); + rcu_assign_pointer(mm->futex_phash, new); + } kvfree_rcu(fph, rcu); return true; } @@ -289,9 +297,7 @@ again: if (!fph) return NULL; - if (fph->immutable) - return fph; - if (rcuref_get(&fph->users)) + if (futex_private_hash_get(fph)) return fph; } futex_pivot_hash(mm); @@ -1527,16 +1533,219 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb, #define FH_IMMUTABLE 0x02 #ifdef CONFIG_FUTEX_PRIVATE_HASH + +/* + * futex-ref + * + * Heavily inspired by percpu-rwsem/percpu-refcount; not reusing any of that + * code because it just doesn't fit right. + * + * Dual counter, per-cpu / atomic approach like percpu-refcount, except it + * re-initializes the state automatically, such that the fph swizzle is also a + * transition back to per-cpu. + */ + +static void futex_ref_rcu(struct rcu_head *head); + +static void __futex_ref_atomic_begin(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + /* + * The counter we're about to switch to must have fully switched; + * otherwise it would be impossible for it to have reported success + * from futex_ref_is_dead(). + */ + WARN_ON_ONCE(atomic_long_read(&mm->futex_atomic) != 0); + + /* + * Set the atomic to the bias value such that futex_ref_{get,put}() + * will never observe 0. Will be fixed up in __futex_ref_atomic_end() + * when folding in the percpu count. + */ + atomic_long_set(&mm->futex_atomic, LONG_MAX); + smp_store_release(&fph->state, FR_ATOMIC); + + call_rcu_hurry(&mm->futex_rcu, futex_ref_rcu); +} + +static void __futex_ref_atomic_end(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + unsigned int count = 0; + long ret; + int cpu; + + /* + * Per __futex_ref_atomic_begin() the state of the fph must be ATOMIC + * and per this RCU callback, everybody must now observe this state and + * use the atomic variable. + */ + WARN_ON_ONCE(fph->state != FR_ATOMIC); + + /* + * Therefore the per-cpu counter is now stable, sum and reset. + */ + for_each_possible_cpu(cpu) { + unsigned int *ptr = per_cpu_ptr(mm->futex_ref, cpu); + count += *ptr; + *ptr = 0; + } + + /* + * Re-init for the next cycle. + */ + this_cpu_inc(*mm->futex_ref); /* 0 -> 1 */ + + /* + * Add actual count, subtract bias and initial refcount. + * + * The moment this atomic operation happens, futex_ref_is_dead() can + * become true. + */ + ret = atomic_long_add_return(count - LONG_MAX - 1, &mm->futex_atomic); + if (!ret) + wake_up_var(mm); + + WARN_ON_ONCE(ret < 0); + mmput_async(mm); +} + +static void futex_ref_rcu(struct rcu_head *head) +{ + struct mm_struct *mm = container_of(head, struct mm_struct, futex_rcu); + struct futex_private_hash *fph = rcu_dereference_raw(mm->futex_phash); + + if (fph->state == FR_PERCPU) { + /* + * Per this extra grace-period, everybody must now observe + * fph as the current fph and no previously observed fph's + * are in-flight. + * + * Notably, nobody will now rely on the atomic + * futex_ref_is_dead() state anymore so we can begin the + * migration of the per-cpu counter into the atomic. + */ + __futex_ref_atomic_begin(fph); + return; + } + + __futex_ref_atomic_end(fph); +} + +/* + * Drop the initial refcount and transition to atomics. + */ +static void futex_ref_drop(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + /* + * Can only transition the current fph; + */ + WARN_ON_ONCE(rcu_dereference_raw(mm->futex_phash) != fph); + /* + * We enqueue at least one RCU callback. Ensure mm stays if the task + * exits before the transition is completed. + */ + mmget(mm); + + /* + * In order to avoid the following scenario: + * + * futex_hash() __futex_pivot_hash() + * guard(rcu); guard(mm->futex_hash_lock); + * fph = mm->futex_phash; + * rcu_assign_pointer(&mm->futex_phash, new); + * futex_hash_allocate() + * futex_ref_drop() + * fph->state = FR_ATOMIC; + * atomic_set(, BIAS); + * + * futex_private_hash_get(fph); // OOPS + * + * Where an old fph (which is FR_ATOMIC) and should fail on + * inc_not_zero, will succeed because a new transition is started and + * the atomic is bias'ed away from 0. + * + * There must be at least one full grace-period between publishing a + * new fph and trying to replace it. + */ + if (poll_state_synchronize_rcu(mm->futex_batches)) { + /* + * There was a grace-period, we can begin now. + */ + __futex_ref_atomic_begin(fph); + return; + } + + call_rcu_hurry(&mm->futex_rcu, futex_ref_rcu); +} + +static bool futex_ref_get(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + guard(rcu)(); + + if (smp_load_acquire(&fph->state) == FR_PERCPU) { + this_cpu_inc(*mm->futex_ref); + return true; + } + + return atomic_long_inc_not_zero(&mm->futex_atomic); +} + +static bool futex_ref_put(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + guard(rcu)(); + + if (smp_load_acquire(&fph->state) == FR_PERCPU) { + this_cpu_dec(*mm->futex_ref); + return false; + } + + return atomic_long_dec_and_test(&mm->futex_atomic); +} + +static bool futex_ref_is_dead(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + guard(rcu)(); + + if (smp_load_acquire(&fph->state) == FR_PERCPU) + return false; + + return atomic_long_read(&mm->futex_atomic) == 0; +} + +int futex_mm_init(struct mm_struct *mm) +{ + mutex_init(&mm->futex_hash_lock); + RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; + /* futex-ref */ + atomic_long_set(&mm->futex_atomic, 0); + mm->futex_batches = get_state_synchronize_rcu(); + mm->futex_ref = alloc_percpu(unsigned int); + if (!mm->futex_ref) + return -ENOMEM; + this_cpu_inc(*mm->futex_ref); /* 0 -> 1 */ + return 0; +} + void futex_hash_free(struct mm_struct *mm) { struct futex_private_hash *fph; + free_percpu(mm->futex_ref); kvfree(mm->futex_phash_new); fph = rcu_dereference_raw(mm->futex_phash); - if (fph) { - WARN_ON_ONCE(rcuref_read(&fph->users) > 1); + if (fph) kvfree(fph); - } } static bool futex_pivot_pending(struct mm_struct *mm) @@ -1549,7 +1758,7 @@ static bool futex_pivot_pending(struct mm_struct *mm) return true; fph = rcu_dereference(mm->futex_phash); - return rcuref_is_dead(&fph->users); + return futex_ref_is_dead(fph); } static bool futex_hash_less(struct futex_private_hash *a, @@ -1598,11 +1807,11 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) } } - fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + fph = kvzalloc(struct_size(fph, queues, hash_slots), + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!fph) return -ENOMEM; - rcuref_init(&fph->users, 1); fph->hash_mask = hash_slots ? hash_slots - 1 : 0; fph->custom = custom; fph->immutable = !!(flags & FH_IMMUTABLE); @@ -1645,7 +1854,7 @@ again: * allocated a replacement hash, drop the initial * reference on the existing hash. */ - futex_private_hash_put(cur); + futex_ref_drop(cur); } if (new) { From fb3c553da7fa9991f9b1436d91dbb78c7477c86a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Jul 2025 13:00:08 +0200 Subject: [PATCH 3/8] futex: Make futex_private_hash_get() static futex_private_hash_get() is not used outside if its compilation unit. Make it static. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-4-bigeasy@linutronix.de --- kernel/futex/core.c | 2 +- kernel/futex/futex.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 1dcb4c8a2585..1981574a459d 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -143,7 +143,7 @@ static inline bool futex_key_is_private(union futex_key *key) return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED)); } -bool futex_private_hash_get(struct futex_private_hash *fph) +static bool futex_private_hash_get(struct futex_private_hash *fph) { if (fph->immutable) return true; diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index fcd1617212ee..c74eac572acd 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -228,14 +228,12 @@ extern void futex_hash_get(struct futex_hash_bucket *hb); extern void futex_hash_put(struct futex_hash_bucket *hb); extern struct futex_private_hash *futex_private_hash(void); -extern bool futex_private_hash_get(struct futex_private_hash *fph); extern void futex_private_hash_put(struct futex_private_hash *fph); #else /* !CONFIG_FUTEX_PRIVATE_HASH */ static inline void futex_hash_get(struct futex_hash_bucket *hb) { } static inline void futex_hash_put(struct futex_hash_bucket *hb) { } static inline struct futex_private_hash *futex_private_hash(void) { return NULL; } -static inline bool futex_private_hash_get(void) { return false; } static inline void futex_private_hash_put(struct futex_private_hash *fph) { } #endif From 760e6f7befbab9a84c54457a8ee45313b7b91ee5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Jul 2025 13:00:09 +0200 Subject: [PATCH 4/8] futex: Remove support for IMMUTABLE The FH_FLAG_IMMUTABLE flag was meant to avoid the reference counting on the private hash and so to avoid the performance regression on big machines. With the switch to per-CPU counter this is no longer needed. That flag was never useable on any released kernel. Remove any support for IMMUTABLE while preserve the flags argument and enforce it to be zero. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-5-bigeasy@linutronix.de --- include/uapi/linux/prctl.h | 2 -- kernel/futex/core.c | 36 +++--------------------------------- 2 files changed, 3 insertions(+), 35 deletions(-) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 43dec6eed559..3b93fb906e3c 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -367,8 +367,6 @@ struct prctl_mm_map { /* FUTEX hash management */ #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 1981574a459d..d9bb5567af0c 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -69,7 +69,6 @@ struct futex_private_hash { struct rcu_head rcu; void *mm; bool custom; - bool immutable; struct futex_hash_bucket queues[]; }; @@ -145,15 +144,11 @@ static inline bool futex_key_is_private(union futex_key *key) static bool futex_private_hash_get(struct futex_private_hash *fph) { - if (fph->immutable) - return true; return futex_ref_get(fph); } void futex_private_hash_put(struct futex_private_hash *fph) { - if (fph->immutable) - return; if (futex_ref_put(fph)) wake_up_var(fph->mm); } @@ -1530,7 +1525,6 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb, } #define FH_CUSTOM 0x01 -#define FH_IMMUTABLE 0x02 #ifdef CONFIG_FUTEX_PRIVATE_HASH @@ -1800,7 +1794,7 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) */ scoped_guard(rcu) { fph = rcu_dereference(mm->futex_phash); - if (fph && (!fph->hash_mask || fph->immutable)) { + if (fph && !fph->hash_mask) { if (custom) return -EBUSY; return 0; @@ -1814,7 +1808,6 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) fph->hash_mask = hash_slots ? hash_slots - 1 : 0; fph->custom = custom; - fph->immutable = !!(flags & FH_IMMUTABLE); fph->mm = mm; for (i = 0; i < hash_slots; i++) @@ -1838,7 +1831,7 @@ again: mm->futex_phash_new = NULL; if (fph) { - if (cur && (!cur->hash_mask || cur->immutable)) { + if (cur && !cur->hash_mask) { /* * If two threads simultaneously request the global * hash then the first one performs the switch, @@ -1931,19 +1924,6 @@ static int futex_hash_get_slots(void) return 0; } -static int futex_hash_get_immutable(void) -{ - struct futex_private_hash *fph; - - guard(rcu)(); - fph = rcu_dereference(current->mm->futex_phash); - if (fph && fph->immutable) - return 1; - if (fph && !fph->hash_mask) - return 1; - return 0; -} - #else static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) @@ -1956,10 +1936,6 @@ static int futex_hash_get_slots(void) return 0; } -static int futex_hash_get_immutable(void) -{ - return 0; -} #endif int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) @@ -1969,10 +1945,8 @@ int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) switch (arg2) { case PR_FUTEX_HASH_SET_SLOTS: - if (arg4 & ~FH_FLAG_IMMUTABLE) + if (arg4) return -EINVAL; - if (arg4 & FH_FLAG_IMMUTABLE) - flags |= FH_IMMUTABLE; ret = futex_hash_allocate(arg3, flags); break; @@ -1980,10 +1954,6 @@ int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) ret = futex_hash_get_slots(); break; - case PR_FUTEX_HASH_GET_IMMUTABLE: - ret = futex_hash_get_immutable(); - break; - default: ret = -EINVAL; break; From 16adc7f136dc143fdaa0d465172d7a1e6d5ae3c5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Jul 2025 13:00:10 +0200 Subject: [PATCH 5/8] selftests/futex: Remove support for IMMUTABLE Testing for the IMMUTABLE part of the futex interface is not needed after the removal of the interface. Remove support for IMMUTABLE from the sefltest. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-6-bigeasy@linutronix.de --- .../futex/functional/futex_priv_hash.c | 71 ++++++------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 625e3be4129c..a9cedc365102 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -26,14 +26,12 @@ static int counter; #ifndef PR_FUTEX_HASH #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif -static int futex_hash_slots_set(unsigned int slots, int flags) +static int futex_hash_slots_set(unsigned int slots) { - return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags); + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, 0); } static int futex_hash_slots_get(void) @@ -41,16 +39,11 @@ static int futex_hash_slots_get(void) return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); } -static int futex_hash_immutable_get(void) -{ - return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); -} - static void futex_hash_slots_set_verify(int slots) { int ret; - ret = futex_hash_slots_set(slots, 0); + ret = futex_hash_slots_set(slots); if (ret != 0) { ksft_test_result_fail("Failed to set slots to %d: %m\n", slots); ksft_finished(); @@ -64,13 +57,13 @@ static void futex_hash_slots_set_verify(int slots) ksft_test_result_pass("SET and GET slots %d passed\n", slots); } -static void futex_hash_slots_set_must_fail(int slots, int flags) +static void futex_hash_slots_set_must_fail(int slots) { int ret; - ret = futex_hash_slots_set(slots, flags); - ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n", - slots, flags); + ret = futex_hash_slots_set(slots); + ksft_test_result(ret < 0, "futex_hash_slots_set(%d)\n", + slots); } static void *thread_return_fn(void *arg) @@ -152,18 +145,14 @@ int main(int argc, char *argv[]) { int futex_slots1, futex_slotsn, online_cpus; pthread_mutexattr_t mutex_attr_pi; - int use_global_hash = 0; int ret, retry = 20; int c; - while ((c = getopt(argc, argv, "cghv:")) != -1) { + while ((c = getopt(argc, argv, "chv:")) != -1) { switch (c) { case 'c': log_color(1); break; - case 'g': - use_global_hash = 1; - break; case 'h': usage(basename(argv[0])); exit(0); @@ -178,7 +167,7 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(22); + ksft_set_plan(21); ret = pthread_mutexattr_init(&mutex_attr_pi); ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); @@ -191,10 +180,6 @@ int main(int argc, char *argv[]) if (ret != 0) ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret); - ret = futex_hash_immutable_get(); - if (ret != 0) - ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret); - ksft_test_result_pass("Basic get slots and immutable status.\n"); ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL); if (ret != 0) @@ -267,7 +252,7 @@ retry_getslots: futex_hash_slots_set_verify(32); futex_hash_slots_set_verify(16); - ret = futex_hash_slots_set(15, 0); + ret = futex_hash_slots_set(15); ksft_test_result(ret < 0, "Use 15 slots\n"); futex_hash_slots_set_verify(2); @@ -285,28 +270,23 @@ retry_getslots: ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", ret); - futex_hash_slots_set_must_fail(1 << 29, 0); + futex_hash_slots_set_must_fail(1 << 29); + futex_hash_slots_set_verify(4); /* - * Once the private hash has been made immutable or global hash has been requested, - * then this requested can not be undone. + * Once the global hash has been requested, then this requested can not + * be undone. */ - if (use_global_hash) { - ret = futex_hash_slots_set(0, 0); - ksft_test_result(ret == 0, "Global hash request\n"); - } else { - ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE); - ksft_test_result(ret == 0, "Immutable resize to 4\n"); - } + ret = futex_hash_slots_set(0); + ksft_test_result(ret == 0, "Global hash request\n"); if (ret != 0) goto out; - futex_hash_slots_set_must_fail(4, 0); - futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(8, 0); - futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(4); + futex_hash_slots_set_must_fail(8); + futex_hash_slots_set_must_fail(8); + futex_hash_slots_set_must_fail(0); + futex_hash_slots_set_must_fail(6); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); if (ret != 0) { @@ -317,14 +297,7 @@ retry_getslots: join_max_threads(); ret = futex_hash_slots_get(); - if (use_global_hash) { - ksft_test_result(ret == 0, "Continue to use global hash\n"); - } else { - ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n"); - } - - ret = futex_hash_immutable_get(); - ksft_test_result(ret == 1, "Hash reports to be immutable\n"); + ksft_test_result(ret == 0, "Continue to use global hash\n"); out: ksft_finished(); From 7497e947bc1d3f761b46c2105c8ae37af98add54 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 10 Jul 2025 13:00:11 +0200 Subject: [PATCH 6/8] perf bench futex: Remove support for IMMUTABLE It has been decided to remove the support IMMUTABLE futex. perf bench was one of the eary users for testing purposes. Now that the API is removed before it could be used in an official release, remove the bits from perf, too. Remove Remove support for IMMUTABLE futex. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250710110011.384614-7-bigeasy@linutronix.de --- tools/include/uapi/linux/prctl.h | 2 -- tools/perf/bench/futex-hash.c | 1 - tools/perf/bench/futex-lock-pi.c | 1 - tools/perf/bench/futex-requeue.c | 1 - tools/perf/bench/futex-wake-parallel.c | 1 - tools/perf/bench/futex-wake.c | 1 - tools/perf/bench/futex.c | 21 +++++-------------- tools/perf/bench/futex.h | 1 - .../trace/beauty/include/uapi/linux/prctl.h | 2 -- 9 files changed, 5 insertions(+), 26 deletions(-) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 43dec6eed559..3b93fb906e3c 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -367,8 +367,6 @@ struct prctl_mm_map { /* FUTEX hash management */ #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index d2d6d7f3ea33..7e29f04da744 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -56,7 +56,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, "Specify amount of futexes per threads"), diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 5144a158512c..40640b674427 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -47,7 +47,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_BOOLEAN( 'M', "multi", ¶ms.multi, "Use multiple futexes"), diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index a2f91ee1950b..0748b0fd689e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -52,7 +52,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, "Specify amount of threads to requeue at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index ee66482c29fd..6aede7c46b33 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -63,7 +63,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 8d6107f7cd94..a31fc1563862 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -52,7 +52,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, "Specify amount of threads to wake at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c index 4c4fee107e59..1481196a22f0 100644 --- a/tools/perf/bench/futex.c +++ b/tools/perf/bench/futex.c @@ -9,21 +9,17 @@ #ifndef PR_FUTEX_HASH #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif // PR_FUTEX_HASH void futex_set_nbuckets_param(struct bench_futex_parameters *params) { - unsigned long flags; int ret; if (params->nbuckets < 0) return; - flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0; - ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags); + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, 0); if (ret) { printf("Requesting %d hash buckets failed: %d/%m\n", params->nbuckets, ret); @@ -47,18 +43,11 @@ void futex_print_nbuckets(struct bench_futex_parameters *params) printf("Requested: %d in usage: %d\n", params->nbuckets, ret); err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); } - if (params->nbuckets == 0) { + if (params->nbuckets == 0) ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); - } else { - ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); - if (ret < 0) { - printf("Can't check if the hash is immutable: %m\n"); - err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); - } - ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets %s", - params->nbuckets, - ret == 1 ? "(immutable)" : ""); - } + else + ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets", + params->nbuckets); } else { if (ret <= 0) { ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 9c9a73f9d865..dd295d27044a 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -26,7 +26,6 @@ struct bench_futex_parameters { unsigned int nwakes; unsigned int nrequeue; int nbuckets; - bool buckets_immutable; }; /** diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 43dec6eed559..3b93fb906e3c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -367,8 +367,6 @@ struct prctl_mm_map { /* FUTEX hash management */ #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif /* _LINUX_PRCTL_H */ From 04850819c65c8242072818655d4341e70ae998b5 Mon Sep 17 00:00:00 2001 From: Cynthia Huang Date: Thu, 10 Jul 2025 18:36:30 +0800 Subject: [PATCH 7/8] selftests/futex: Define SYS_futex on 32-bit architectures with 64-bit time_t The kernel does not provide sys_futex() on 32-bit architectures that do not support 32-bit time representations, such as riscv32. As a result, glibc cannot define SYS_futex, causing compilation failures in tests that rely on this syscall. Define SYS_futex as SYS_futex_time64 in such cases to ensure successful compilation and compatibility. Signed-off-by: Cynthia Huang Signed-off-by: Ben Zong-You Xie Signed-off-by: Thomas Gleixner Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/all/20250710103630.3156130-1-ben717@andestech.com --- tools/testing/selftests/futex/include/futextest.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index ddbcfc9b7bac..7a5fd1d5355e 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -47,6 +47,17 @@ typedef volatile u_int32_t futex_t; FUTEX_PRIVATE_FLAG) #endif +/* + * SYS_futex is expected from system C library, in glibc some 32-bit + * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have + * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as + * SYS_futex_time64 in this situation to ensure the compilation and the + * compatibility. + */ +#if !defined(SYS_futex) && defined(SYS_futex_time64) +#define SYS_futex SYS_futex_time64 +#endif + /** * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex From e40892214b454c8734350d82374f46c2e495a4d2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 15 Jul 2025 14:06:26 +0100 Subject: [PATCH 8/8] selftests/futex: Fix spelling mistake "Succeffuly" -> "Successfully" There is a spelling mistake in a ksft_exit_fail_msg() message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250715130627.1907017-1-colin.i.king@gmail.com --- tools/testing/selftests/futex/functional/futex_priv_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index a9cedc365102..aea001ac4946 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -122,7 +122,7 @@ static void futex_dummy_op(void) } ret = pthread_mutex_timedlock(&lock, &timeout); if (ret == 0) - ksft_exit_fail_msg("Succeffuly locked an already locked mutex.\n"); + ksft_exit_fail_msg("Successfully locked an already locked mutex.\n"); if (ret != ETIMEDOUT) ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret);