softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT

softirqs are preemptible on PREEMPT_RT. There is synchronisation between
individual sections which disable bottom halves. This in turn means that
a forced threaded interrupt cannot preempt another forced threaded
interrupt. Instead it will PI-boost the other handler and wait for its
completion.

This is required because code within a softirq section is assumed to be
non-preemptible and may expect exclusive access to per-CPU resources
such as variables or pinned timers.

Code with such expectation has been identified and updated to use
local_lock_nested_bh() for locking of the per-CPU resource. This means the
softirq lock can be removed.

Disable the softirq synchronization, but add a new config switch
CONFIG_PREEMPT_RT_NEEDS_BH_LOCK which allows to re-enable the synchronized
behavior in case that there are issues, which haven't been detected yet.

The softirq_ctrl.cnt accounting remains to let the NOHZ code know if
softirqs are currently handled.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
Sebastian Andrzej Siewior 2025-09-04 16:25:25 +02:00 committed by Thomas Gleixner
parent fd4e876f59
commit 3253cb49cb
2 changed files with 76 additions and 20 deletions

View File

@ -103,6 +103,19 @@ config PREEMPT_RT
Select this if you are building a kernel for systems which Select this if you are building a kernel for systems which
require real-time guarantees. require real-time guarantees.
config PREEMPT_RT_NEEDS_BH_LOCK
bool "Enforce softirq synchronisation on PREEMPT_RT"
depends on PREEMPT_RT
help
Enforce synchronisation across the softirqs context. On PREEMPT_RT
the softirq is preemptible. This enforces the same per-CPU BLK
semantic non-PREEMPT_RT builds have. This should not be needed
because per-CPU locks were added to avoid the per-CPU BKL.
This switch provides the old behaviour for testing reasons. Select
this if you suspect an error with preemptible softirq and want test
the old synchronized behaviour.
config PREEMPT_COUNT config PREEMPT_COUNT
bool bool

View File

@ -165,7 +165,11 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
/* First entry of a task into a BH disabled section? */ /* First entry of a task into a BH disabled section? */
if (!current->softirq_disable_cnt) { if (!current->softirq_disable_cnt) {
if (preemptible()) { if (preemptible()) {
if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK))
local_lock(&softirq_ctrl.lock); local_lock(&softirq_ctrl.lock);
else
migrate_disable();
/* Required to meet the RCU bottomhalf requirements. */ /* Required to meet the RCU bottomhalf requirements. */
rcu_read_lock(); rcu_read_lock();
} else { } else {
@ -177,7 +181,8 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
* Track the per CPU softirq disabled state. On RT this is per CPU * Track the per CPU softirq disabled state. On RT this is per CPU
* state to allow preemption of bottom half disabled sections. * state to allow preemption of bottom half disabled sections.
*/ */
newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt);
/* /*
* Reflect the result in the task state to prevent recursion on the * Reflect the result in the task state to prevent recursion on the
* local lock and to make softirq_count() & al work. * local lock and to make softirq_count() & al work.
@ -189,30 +194,65 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
lockdep_softirqs_off(ip); lockdep_softirqs_off(ip);
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
} }
} else {
bool sirq_dis = false;
if (!current->softirq_disable_cnt)
sirq_dis = true;
this_cpu_add(softirq_ctrl.cnt, cnt);
current->softirq_disable_cnt += cnt;
WARN_ON_ONCE(current->softirq_disable_cnt < 0);
if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_dis) {
raw_local_irq_save(flags);
lockdep_softirqs_off(ip);
raw_local_irq_restore(flags);
}
}
} }
EXPORT_SYMBOL(__local_bh_disable_ip); EXPORT_SYMBOL(__local_bh_disable_ip);
static void __local_bh_enable(unsigned int cnt, bool unlock) static void __local_bh_enable(unsigned int cnt, bool unlock)
{ {
unsigned long flags; unsigned long flags;
bool sirq_en = false;
int newcnt; int newcnt;
if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
this_cpu_read(softirq_ctrl.cnt)); this_cpu_read(softirq_ctrl.cnt));
if (softirq_count() == cnt)
sirq_en = true;
} else {
if (current->softirq_disable_cnt == cnt)
sirq_en = true;
}
if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_en) {
raw_local_irq_save(flags); raw_local_irq_save(flags);
lockdep_softirqs_on(_RET_IP_); lockdep_softirqs_on(_RET_IP_);
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
} }
newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt);
current->softirq_disable_cnt = newcnt; current->softirq_disable_cnt = newcnt;
if (!newcnt && unlock) { if (!newcnt && unlock) {
rcu_read_unlock(); rcu_read_unlock();
local_unlock(&softirq_ctrl.lock); local_unlock(&softirq_ctrl.lock);
} }
} else {
current->softirq_disable_cnt -= cnt;
this_cpu_sub(softirq_ctrl.cnt, cnt);
if (unlock && !current->softirq_disable_cnt) {
migrate_enable();
rcu_read_unlock();
} else {
WARN_ON_ONCE(current->softirq_disable_cnt < 0);
}
}
} }
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
@ -228,7 +268,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
lock_map_release(&bh_lock_map); lock_map_release(&bh_lock_map);
local_irq_save(flags); local_irq_save(flags);
curcnt = __this_cpu_read(softirq_ctrl.cnt); if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK))
curcnt = this_cpu_read(softirq_ctrl.cnt);
else
curcnt = current->softirq_disable_cnt;
/* /*
* If this is not reenabling soft interrupts, no point in trying to * If this is not reenabling soft interrupts, no point in trying to