实时分为硬实时和软实时,硬实时要求绝对保证响应时间不超过期限,如果超过期限,会造成灾难性的后果,例如汽车在发生碰撞事故时必须快速展开安全气囊;软实时只需尽力使响应时间不超过期限,如果偶尔超过期限,不会造成灾难性的后果,例如数字电视机顶盒,需要实时地对视频流解码,偶尔丢失几个视频帧,影响不大。
RTLinux、QNX和VxWorks这些操作系统提供了硬实时能力,Linux这种通用操作系统只能提供软实时能力。
目前Linux内核主线不支持软实时,而是使用下面2个仓库存放和Linux内核主线的版本对应的实时内核的源代码。
第1个仓库存放正在开发的实时内核的源代码。在发布稳定的版本以后,把开发版本转移到第2个仓库。
内核社区原计划在5.3版本把软实时补丁合并到主线,但是测试的时候发现了问题,所以放弃了。直到5.11版本还没有把软实时补丁合并到主线。
本文分析软实时Linux内核5.10.8版本。下载源代码的方法是:从“https://mirrors.edge.kernel.org/pub/linux/kernel/v5.x/”下载5.10.8版本的内核压缩包,然后从“https://mirrors.edge.kernel.org/pub/linux/kernel/projects/rt/5.10/”下载5.10.8版本的实时补丁压缩包,把实时补丁应用到内核源代码树。
1.影响实时性的因素
2.内核抢占模型
kernel/locking/mutex.c
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
if (!__mutex_trylock_fast(lock))
__mutex_lock_slowpath(lock);
}
宏might_sleep()的定义如下。
include/linux/kernel.h
# define might_sleep() do { might_resched(); } while (0)
#ifdef CONFIG_PREEMPT_VOLUNTARY
extern int _cond_resched(void);
# define might_resched() _cond_resched()
#else
# define might_resched() do { } while (0)
#endif
kernel/sched/core.c
#ifndef CONFIG_PREEMPTION
int __sched _cond_resched(void)
{
if (should_resched(0)) {
preempt_schedule_common();
return 1;
}
...
return 0;
}
#endif
3.调度策略
图3.1 限期调度策略
4.中断线程化
include/linux/interrupt.h
extern int __must_check
request_threaded_irq(unsigned int irq, irq_handler_t handler,
irq_handler_t thread_fn,
unsigned long flags, const char *name, void *dev);
kernel/irq/manage.c
static irqreturn_t irq_default_primary_handler(int irq, void *dev_id)
{
return IRQ_WAKE_THREAD;
}
include/linux/interrupt.h
static inline int __must_check
request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
const char *name, void *dev)
{
return request_threaded_irq(irq, handler, NULL, flags, name, dev);
}
5.高精度定时器
include/linux/hrtimer.h
1 enum hrtimer_mode {
2 HRTIMER_MODE_ABS = 0x00,
3 HRTIMER_MODE_REL = 0x01,
4 HRTIMER_MODE_PINNED = 0x02,
5 HRTIMER_MODE_SOFT = 0x04,
6 HRTIMER_MODE_HARD = 0x08,
7
8 HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
9 HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
10
11 HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT,
12 HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT,
13
14 HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
15 HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
16
17 HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
18 HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
19
20 HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
21 HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
22 };
6.软中断线程化
kernel/softirq.c
1 void irq_exit(void)
2 {
3 __irq_exit_rcu();
4 ...
5 }
6
7 static inline void __irq_exit_rcu(void)
8 {
9 ...
10 if (!in_interrupt() && local_softirq_pending())
11 invoke_softirq();
12 ...
13 }
14
15 #ifdef CONFIG_PREEMPT_RT
16 static inline void invoke_softirq(void)
17 {
18 if (should_wake_ksoftirqd())
19 wakeup_softirqd();
20 }
21 #else /* CONFIG_PREEMPT_RT */
22 ...
23 #endif /* !CONFIG_PREEMPT_RT */
24
25 static void wakeup_softirqd(void)
26 {
27 /* Interrupts are disabled: no need to stop preemption */
28 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
29
30 if (tsk && tsk->state != TASK_RUNNING)
31 wake_up_process(tsk);
32 }
7.解决RCU读端临界区不可抢占的问题
include/linux/rcupdate.h
1 static inline void rcu_read_lock_bh(void)
2 {
3 local_bh_disable();
4 ...
5 }
6
7 include/linux/bottom_half.h
8 static inline void local_bh_disable(void)
9 {
10 __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
11 }
12
13 kernel/softirq.c
14 #ifdef CONFIG_PREEMPT_RT
15 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
16 {
17 unsigned long flags;
18 int newcnt;
19
20 ...
21 /* First entry of a task into a BH disabled section? */
22 if (!current->softirq_disable_cnt) {
23 if (preemptible()) {
24 local_lock(&softirq_ctrl.lock);
25 /* Required to meet the RCU bottomhalf requirements. */
26 rcu_read_lock();
27 } else {
28 DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
29 }
30 }
31 ...
32 }
33 #else /* CONFIG_PREEMPT_RT */
34 ...
35 #endif /* !CONFIG_PREEMPT_RT */
include/linux/rcupdate.h
1 static inline void rcu_read_unlock_bh(void)
2 {
3 ...
4 local_bh_enable();
5 }
6
7 include/linux/bottom_half.h
8 static inline void local_bh_enable(void)
9 {
10 __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
11 }
12
13 kernel/softirq.c
14 #ifdef CONFIG_PREEMPT_RT
15 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
16 {
17 ...
18 out:
19 __local_bh_enable(cnt, preempt_on);
20 ...
21 }
22
23 static void __local_bh_enable(unsigned int cnt, bool unlock)
24 {
25 ...
26 if (!newcnt && unlock) {
27 rcu_read_unlock();
28 local_unlock(&softirq_ctrl.lock);
29 }
30 }
31
32 #else /* CONFIG_PREEMPT_RT */
33 ...
34 #endif /* !CONFIG_PREEMPT_RT */
35
8.解决优先级反转问题
rt_mutex_lock() -> __rt_mutex_lock() -> rt_mutex_lock_state()
-> __rt_mutex_lock_state() -> rt_mutex_fastlock() -> rt_mutex_slowlock()
-> rt_mutex_slowlock_locked() -> task_blocks_on_rt_mutex()
kernel/locking/rtmutex.c
1 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
2 struct rt_mutex_waiter *waiter,
3 struct task_struct *task,
4 enum rtmutex_chainwalk chwalk)
5 {
6 ...
7 if (waiter == rt_mutex_top_waiter(lock)) {
8 rt_mutex_dequeue_pi(owner, top_waiter);
9 rt_mutex_enqueue_pi(owner, waiter);
10
11 rt_mutex_adjust_prio(owner);
12 if (rt_mutex_real_waiter(owner->pi_blocked_on))
13 chain_walk = 1;
14 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
15 chain_walk = 1;
16 }
17
18 ...
19 }
20
21 static void rt_mutex_adjust_prio(struct task_struct *p)
22 {
23 ...
24 if (task_has_pi_waiters(p))
25 pi_task = task_top_pi_waiter(p)->task;
26
27 rt_mutex_setprio(p, pi_task);
28 }
29
30 kernel/sched/core.c
31 void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
32 {
33 ...
34 prio = __rt_effective_prio(pi_task, p->normal_prio);
35 ...
36
37 prev_class = p->sched_class;
38 queued = task_on_rq_queued(p);
39 running = task_current(rq, p);
40 if (queued)
41 dequeue_task(rq, p, queue_flag);
42 if (running)
43 put_prev_task(rq, p);
44
45 if (dl_prio(prio)) {
46 if (!dl_prio(p->normal_prio) ||
47 (pi_task && dl_prio(pi_task->prio) &&
48 dl_entity_preempt(&pi_task->dl, &p->dl))) {
49 p->dl.pi_se = pi_task->dl.pi_se;
50 queue_flag |= ENQUEUE_REPLENISH;
51 } else {
52 p->dl.pi_se = &p->dl;
53 }
54 p->sched_class = &dl_sched_class;
55 } else if (rt_prio(prio)) {
56 if (dl_prio(oldprio))
57 p->dl.pi_se = &p->dl;
58 if (oldprio < prio)
59 queue_flag |= ENQUEUE_HEAD;
60 p->sched_class = &rt_sched_class;
61 } else {
62 if (dl_prio(oldprio))
63 p->dl.pi_se = &p->dl;
64 if (rt_prio(oldprio))
65 p->rt.timeout = 0;
66 p->sched_class = &fair_sched_class;
67 }
68
69 p->prio = prio;
70
71 if (queued)
72 enqueue_task(rq, p, queue_flag);
73 if (running)
74 set_next_task(rq, p);
75
76 check_class_changed(rq, p, prev_class, oldprio);
77 ...
78 }
79
80 static inline void check_class_changed(struct rq *rq, struct task_struct *p,
81 const struct sched_class *prev_class,
82 int oldprio)
83 {
84 if (prev_class != p->sched_class) {
85 if (prev_class->switched_from)
86 prev_class->switched_from(rq, p);
87
88 p->sched_class->switched_to(rq, p);
89 } else if (oldprio != p->prio || dl_task(p))
90 p->sched_class->prio_changed(rq, p, oldprio);
91 }
include/linux/mutex.h
#ifdef CONFIG_PREEMPT_RT
# include
#else
...
#endif /* !PREEMPT_RT */
include/linux/mutex_rt.h
struct mutex {
struct rt_mutex lock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
include/linux/rwsem.h
#ifdef CONFIG_PREEMPT_RT
#include
#else /* PREEMPT_RT */
...
#endif /* !PREEMPT_RT */
include/linux/rwsem-rt.h
struct rw_semaphore {
atomic_t readers;
struct rt_mutex rtmutex;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
9.对自旋锁的修改
include/linux/spinlock.h
#include
include/linux/spinlock_types.h
#ifndef CONFIG_PREEMPT_RT
# include
...
#else
...
# include
...
#endif
include/linux/spinlock_types_rt.h
typedef struct spinlock {
struct rt_mutex lock;
unsigned int break_lock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
} spinlock_t;
10.对读写锁的修改
include/linux/spinlock.h
#include
include/linux/spinlock_types.h
#ifndef CONFIG_PREEMPT_RT
...
# include
#else
...
# include
#endif
include/linux/rwlock_types_rt.h
typedef struct rt_rw_lock rwlock_t;
struct rt_rw_lock {
struct rt_mutex rtmutex;
atomic_t readers;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
11.修改使用禁止内核抢占或硬中断保护的临界区
include/linux/local_lock_internal.h
typedef struct {
#ifdef CONFIG_PREEMPT_RT
spinlock_t lock;
struct task_struct *owner;/* 持有本地锁的进程 */
int nestcnt;/* 嵌套层数 */
#elif defined(CONFIG_DEBUG_LOCK_ALLOC)
struct lockdep_map dep_map;
struct task_struct *owner;
#endif
} local_lock_t;
include/linux/local_lock.h
1 #define local_lock(lock) __local_lock(lock)
2
3 include/linux/local_lock_internal.h
4 #ifdef CONFIG_PREEMPT_RT
5
6 #define __local_lock(lock) \
7 do { \
8 migrate_disable(); \
9 local_lock_acquire(this_cpu_ptr(lock)); \
10 } while (0)
11
12 ...
13 #else
14 ...
15 #endif
16
17 #ifdef CONFIG_PREEMPT_RT
18
19 static inline void local_lock_acquire(local_lock_t *l)
20 {
21 if (l->owner != current) {
22 spin_lock(&l->lock);
23 ...
24 l->owner = current;
25 }
26 l->nestcnt++;
27 }
28
29 ...
30 #elif defined(CONFIG_DEBUG_LOCK_ALLOC)
31 ...
32 #else /* CONFIG_DEBUG_LOCK_ALLOC */
33 ...
34 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
include/linux/local_lock.h
1 #define local_unlock(lock) __local_unlock(lock)
2
3 include/linux/local_lock_internal.h
4 #ifdef CONFIG_PREEMPT_RT
5
6 #define __local_unlock(lock) \
7 do { \
8 local_lock_release(this_cpu_ptr(lock)); \
9 migrate_enable(); \
10 } while (0)
11
12 ...
13 #else
14 ...
15 #endif
16
17 #ifdef CONFIG_PREEMPT_RT
18
19 static inline void local_lock_release(local_lock_t *l)
20 {
21 ...
22 if (--l->nestcnt)
23 return;
24
25 l->owner = NULL;
26 spin_unlock(&l->lock);
27 }
28 ...
29 #elif defined(CONFIG_DEBUG_LOCK_ALLOC)
30 ...
31 #else /* CONFIG_DEBUG_LOCK_ALLOC */
32 ...
33 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
mm/page_alloc.c
1 struct pa_lock {
2 local_lock_t l;
3 };
4 static DEFINE_PER_CPU(struct pa_lock, pa_lock) = {
5 .l = INIT_LOCAL_LOCK(l),
6 };
7
8 static void __free_pages_ok(struct page *page, unsigned int order,
9 fpi_t fpi_flags)
10 {
11 unsigned long flags;
12 int migratetype;
13 unsigned long pfn = page_to_pfn(page);
14
15 if (!free_pages_prepare(page, order, true))
16 return;
17
18 migratetype = get_pfnblock_migratetype(page, pfn);
19 local_lock_irqsave(&pa_lock.l, flags);
20 __count_vm_events(PGFREE, 1 << order);
21 free_one_page(page_zone(page), page, pfn, order, migratetype,
22 fpi_flags);
23 local_unlock_irqrestore(&pa_lock.l, flags);
24 }
12.修改使用禁止软中断保护的临界区
kernel/softirq.c
1 struct softirq_ctrl {
2 local_lock_t lock;
3 int cnt;/* 禁止软中断的嵌套层数 */
4 };
5
6 static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
7 .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock),
8 };
12.1.函数local_bh_disable()
include/linux/bottom_half.h
1 static inline void local_bh_disable(void)
2 {
3 __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
4 }
5
6 kernel/softirq.c
7 #ifdef CONFIG_PREEMPT_RT
8 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
9 {
10 unsigned long flags;
11 int newcnt;
12
13 ...
14 /* First entry of a task into a BH disabled section? */
15 if (!current->softirq_disable_cnt) {
16 if (preemptible()) {
17 local_lock(&softirq_ctrl.lock);
18 /* Required to meet the RCU bottomhalf requirements. */
19 rcu_read_lock();
20 } else {
21 DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
22 }
23 }
24
25 /*
26 * Track the per CPU softirq disabled state. On RT this is per CPU
27 * state to allow preemption of bottom half disabled sections.
28 */
29 newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
30 /*
31 * Reflect the result in the task state to prevent recursion on the
32 * local lock and to make softirq_count() & al work.
33 */
34 current->softirq_disable_cnt = newcnt;
35 ...
36 }
37 #else /* CONFIG_PREEMPT_RT */
38 ...
39 #endif /* !CONFIG_PREEMPT_RT */
12.2.函数local_bh_enable()
include/linux/bottom_half.h
1 static inline void local_bh_enable(void)
2 {
3 __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
4 }
5
6 kernel/softirq.c
7 #ifdef CONFIG_PREEMPT_RT
8 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
9 {
10 bool preempt_on = preemptible();
11 unsigned long flags;
12 u32 pending;
13 int curcnt;
14
15 ...
16 local_irq_save(flags);
17 curcnt = this_cpu_read(softirq_ctrl.cnt);
18
19 /*
20 * If this is not reenabling soft interrupts, no point in trying to
21 * run pending ones.
22 */
23 if (curcnt != cnt)
24 goto out;
25
26 pending = local_softirq_pending();
27 if (!pending || ksoftirqd_running(pending))
28 goto out;
29
30 /*
31 * If this was called from non preemptible context, wake up the
32 * softirq daemon.
33 */
34 if (!preempt_on) {
35 wakeup_softirqd();
36 goto out;
37 }
38
39 /*
40 * Adjust softirq count to SOFTIRQ_OFFSET which makes
41 * in_serving_softirq() become true.
42 */
43 cnt = SOFTIRQ_OFFSET;
44 __local_bh_enable(cnt, false);
45 __do_softirq();
46
47 out:
48 __local_bh_enable(cnt, preempt_on);
49 local_irq_restore(flags);
50 }
51 #else /* CONFIG_PREEMPT_RT */
52 ...
53 #endif /* !CONFIG_PREEMPT_RT */
kernel/softirq.c
1 #ifdef CONFIG_PREEMPT_RT
2 static void __local_bh_enable(unsigned int cnt, bool unlock)
3 {
4 unsigned long flags;
5 int newcnt;
6
7 ...
8 newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
9 current->softirq_disable_cnt = newcnt;
10
11 if (!newcnt && unlock) {
12 rcu_read_unlock();
13 local_unlock(&softirq_ctrl.lock);
14 }
15 }
16 #else /* CONFIG_PREEMPT_RT */
17 ...
18 #endif /* !CONFIG_PREEMPT_RT */
12.3.软中断线程
kernel/softirq.c
1 static void run_ksoftirqd(unsigned int cpu)
2 {
3 ksoftirqd_run_begin();
4 if (local_softirq_pending()) {
5 __do_softirq();
6 ksoftirqd_run_end();
7 cond_resched();
8 return;
9 }
10 ksoftirqd_run_end();
11 }
12
13 #ifdef CONFIG_PREEMPT_RT
14 static inline void ksoftirqd_run_begin(void)
15 {
16 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
17 local_irq_disable();
18 }
19
20 static inline void ksoftirqd_run_end(void)
21 {
22 __local_bh_enable(SOFTIRQ_OFFSET, true);
23 WARN_ON_ONCE(in_interrupt());
24 local_irq_enable();
25 }
26 #else /* CONFIG_PREEMPT_RT */
27 ...
28 #endif /* !CONFIG_PREEMPT_RT */