- 低分辨率定时器的代码和jiffies的关系太过紧密,并且默认按32位进行设计,并且它的代码已经经过长时间的优化,目前的使用也是没有任何错误,如果硬要基于它来实现高分辨率定时器,势必会打破原有的时间轮概念,并且会引入一大堆#if--#else判断;
- 虽然大部分时间里,时间轮可以实现O(1)时间复杂度,但是当有进位发生时,不可预测的O(N)定时器级联迁移时间,这对于低分辨率定时器来说问题不大,可是它大大地影响了定时器的精度;
- 低分辨率定时器几乎是为“超时”而设计的,并为此对它进行了大量的优化,对于这些以“超时”未目的而使用定时器,它们大多数期望在超时到来之前获得正确的结果,然后删除定时器,精确时间并不是它们主要的目的,例如网络通信、设备IO等等。
为此,内核为高精度定时器重新设计了一套软件架构,它可以为我们提供纳秒级的定时精度,以满足对精确时间有迫切需求的应用程序或内核驱动,例如多媒体应用,音频设备的驱动程序等等。以下的讨论用hrtimer(high resolution timer)表示高精度定时器。
1. 如何组织hrtimer?
- 稳定而且快速的查找能力;
- 快速地插入和删除定时器的能力;
- 排序功能;
- struct hrtimer {
- struct timerqueue_node node;
- ktime_t _softexpires;
- enum hrtimer_restart (*function)(struct hrtimer *);
- struct hrtimer_clock_base *base;
- unsigned long state;
- ......
- };
- enum hrtimer_restart {
- HRTIMER_NORESTART, /* Timer is not restarted */
- HRTIMER_RESTART, /* Timer must be restarted */
- };
- #define HRTIMER_STATE_INACTIVE 0x00 // 定时器未激活
- #define HRTIMER_STATE_ENQUEUED 0x01 // 定时器已经被排入红黑树中
- #define HRTIMER_STATE_CALLBACK 0x02 // 定时器的回调函数正在被调用
- #define HRTIMER_STATE_MIGRATE 0x04 // 定时器正在CPU之间做迁移
- enum hrtimer_base_type {
- HRTIMER_BASE_MONOTONIC, // 单调递增的monotonic时间,不包含休眠时间
- HRTIMER_BASE_REALTIME, // 平常使用的墙上真实时间
- HRTIMER_BASE_BOOTTIME, // 单调递增的boottime,包含休眠时间
- };
- struct hrtimer_cpu_base {
- ......
- struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
- };
- struct hrtimer_clock_base {
- struct hrtimer_cpu_base *cpu_base; // 指向所属cpu的hrtimer_cpu_base结构
- ......
- struct timerqueue_head active; // 红黑树,包含了所有使用该时间基准系统的hrtimer
- ktime_t resolution; // 时间基准系统的分辨率
- ktime_t (*get_time)(void); // 获取该基准系统的时间函数
- ktime_t softirq_time;// 当用jiffies
- ktime_t offset; //
- };
- struct timerqueue_node {
- struct rb_node node; // 红黑树的节点
- ktime_t expires; // 该节点代表队hrtimer的到期时间,与hrtimer结构中的_softexpires稍有不同
- };
- struct timerqueue_head {
- struct rb_root head; // 红黑树的根节点
- struct timerqueue_node *next; // 该红黑树中最早到期的节点,也就是最左下的节点
- };
图 1.1 每个cpu的hrtimer组织结构
- 每个cpu有一个hrtimer_cpu_base结构;
- hrtimer_cpu_base结构管理着3种不同的时间基准系统的hrtimer,分别是:实时时间,启动时间和单调时间;
- 每种时间基准系统通过它的active字段(timerqueue_head结构指针),指向它们各自的红黑树;
- 红黑树上,按到期时间进行排序,最先到期的hrtimer位于最左下的节点,并被记录在active.next字段中;
- 3中时间基准的最先到期时间可能不同,所以,它们之中最先到期的时间被记录在hrtimer_cpu_base的expires_next字段中。
2. hrtimer如何运转
2.1 添加一个hrtimer
- void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
- enum hrtimer_mode mode);
- timer.function = hr_callback;
- int hrtimer_start(struct hrtimer *timer, ktime_t tim,
- const enum hrtimer_mode mode);
- hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
- unsigned long range_ns, const enum hrtimer_mode mode);
- int hrtimer_cancel(struct hrtimer *timer);
- extern u64
- hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
- /* Forward a hrtimer so it expires after the hrtimer's current now */
- static inline u64 hrtimer_forward_now(struct hrtimer *timer,
- ktime_t interval)
- {
- return hrtimer_forward(timer, timer->base->get_time(), interval);
- }
- static inline int hrtimer_active(const struct hrtimer *timer)
- {
- return timer->state != HRTIMER_STATE_INACTIVE;
- }
- static inline int hrtimer_is_queued(struct hrtimer *timer)
- {
- return timer->state & HRTIMER_STATE_ENQUEUED;
- }
- static inline int hrtimer_callback_running(struct hrtimer *timer)
- {
- return timer->state & HRTIMER_STATE_CALLBACK;
- }
- static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
- enum hrtimer_mode mode)
- {
- struct hrtimer_cpu_base *cpu_base;
- int base;
- memset(timer, 0, sizeof(struct hrtimer));
- cpu_base = &__raw_get_cpu_var(hrtimer_bases);
- if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
- clock_id = CLOCK_MONOTONIC;
- base = hrtimer_clockid_to_base(clock_id);
- timer->base = &cpu_base->clock_base[base];
- timerqueue_init(&timer->node);
- ......
- }
- int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
- unsigned long delta_ns, const enum hrtimer_mode mode,
- int wakeup)
- {
- ......
- /* 取得hrtimer_clock_base指针 */
- base = lock_hrtimer_base(timer, &flags);
- /* 如果已经在红黑树中,先移除它: */
- ret = remove_hrtimer(timer, base); ......
- /* 如果是相对时间,则需要加上当前时间,因为内部是使用绝对时间 */
- if (mode & HRTIMER_MODE_REL) {
- tim = ktime_add_safe(tim, new_base->get_time());
- ......
- }
- /* 设置到期的时间范围 */
- hrtimer_set_expires_range_ns(timer, tim, delta_ns);
- ......
- /* 把hrtime按到期时间排序,加入到对应时间基准系统的红黑树中 */
- /* 如果该定时器的是最早到期的,将会返回true */
- leftmost = enqueue_hrtimer(timer, new_base);
- /*
- * Only allow reprogramming if the new base is on this CPU.
- * (it might still be on another CPU if the timer was pending)
- *
- * XXX send_remote_softirq() ?
- * 定时器比之前的到期时间要早,所以需要重新对tick_device进行编程,重新设定的的到期时间
- */
- if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
- hrtimer_enqueue_reprogram(timer, new_base, wakeup);
- unlock_hrtimer_base(timer, &flags);
- return ret;
- }
- <p>
- </p>
2.2 hrtimer的到期处理
- 没有切换到高精度模式时,在每个jiffie的tick事件中断中进行查询和处理;
- 在HRTIMER_SOFTIRQ软中断中进行查询和处理;
- 切换到高精度模式后,在每个clock_event_device的到期事件中断中进行查询和处理;
低精度模式 因为系统并不是一开始就会支持高精度模式,而是在系统启动后的某个阶段,等待所有的条件都满足后,才会切换到高精度模式,当系统还没有切换到高精度模式时,所有的高精度定时器运行在低精度模式下,在每个jiffie的tick事件中断中进行到期定时器的查询和处理,显然这时候的精度和低分辨率定时器是一样的(HZ级别)。低精度模式下,每个tick事件中断中,hrtimer_run_queues函数会被调用,由它完成定时器的到期处理。hrtimer_run_queues首先判断目前高精度模式是否已经启用,如果已经切换到了高精度模式,什么也不做,直接返回:
- void hrtimer_run_queues(void)
- {
- if (hrtimer_hres_active())
- return;
- for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
- base = &cpu_base->clock_base[index];
- if (!timerqueue_getnext(&base->active))
- continue;
- if (gettime) {
- hrtimer_get_softirq_time(cpu_base);
- gettime = 0;
- }
- raw_spin_lock(&cpu_base->lock);
- while ((node = timerqueue_getnext(&base->active))) {
- struct hrtimer *timer;
- timer = container_of(node, struct hrtimer, node);
- if (base->softirq_time.tv64 <=
- hrtimer_get_expires_tv64(timer))
- break;
- __run_hrtimer(timer, &base->softirq_time);
- }
- raw_spin_unlock(&cpu_base->lock);
- }
高精度模式 切换到高精度模式后,原来给cpu提供tick事件的tick_device(clock_event_device)会被高精度定时器系统接管,它的中断事件回调函数被设置为hrtimer_interrupt,红黑树中最左下的节点的定时器的到期时间被编程到该clock_event_device中,这样每次clock_event_device的中断意味着至少有一个高精度定时器到期。另外,当timekeeper系统中的时间需要修正,或者clock_event_device的到期事件时间被重新编程时,系统会发出HRTIMER_SOFTIRQ软中断,软中断的处理函数run_hrtimer_softirq最终也会调用hrtimer_interrupt函数对到期定时器进行处理,所以在这里我们只要讨论hrtimer_interrupt函数的实现即可。
- void hrtimer_interrupt(struct clock_event_device *dev)
- {
- ......
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
- ......
- while ((node = timerqueue_getnext(&base->active))) {
- ......
- if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
- ktime_t expires;
- expires = ktime_sub(hrtimer_get_expires(timer),
- base->offset);
- if (expires.tv64 < expires_next.tv64)
- expires_next = expires;
- break;
- }
- __run_hrtimer(timer, &basenow);
- }
- }
- /*
- * Store the new expiry value so the migration code can verify
- * against it.
- */
- cpu_base->expires_next = expires_next;
- raw_spin_unlock(&cpu_base->lock);
- /* Reprogramming necessary ? */
- if (expires_next.tv64 == KTIME_MAX ||
- !tick_program_event(expires_next, 0)) {
- cpu_base->hang_detected = 0;
- return;
- }
- 系统正在被调试跟踪,导致时间在走,程序不走;
- 定时器的回调函数花了太长的时间;
- 系统运行在虚拟机中,而虚拟机被调度导致停止运行;
- raw_spin_lock(&cpu_base->lock);
- now = hrtimer_update_base(cpu_base);
- cpu_base->nr_retries++;
- if (++retries < 3)
- goto retry;
- delta = ktime_sub(now, entry_time);
- if (delta.tv64 > cpu_base->max_hang_time.tv64)
- cpu_base->max_hang_time = delta;
- /*
- * Limit it to a sensible value as we enforce a longer
- * delay. Give the CPU at least 100ms to catch up.
- */
- if (delta.tv64 > 100 * NSEC_PER_MSEC)
- expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
- else
- expires_next = ktime_add(now, delta);
- tick_program_event(expires_next, 1);
- printk_once(KERN_WARNING "hrtimer: interrupt took %llu nsn",
- ktime_to_ns(delta));
- }
3. 切换到高精度模式
- void hrtimer_run_pending(void)
- {
- if (hrtimer_hres_active())
- return;
- ......
- if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
- hrtimer_switch_to_hres();
- }
- DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = {
- ......
- }
- static inline int hrtimer_hres_active(void)
- {
- return __this_cpu_read(hrtimer_bases.hres_active);
- }
- int tick_check_oneshot_change(int allow_nohz)
- {
- struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
- if (!test_and_clear_bit(0, &ts->check_clocks))
- return 0;
- if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
- return 0;
- if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
- return 0;
- if (!allow_nohz)
- return 1;
- tick_nohz_switch_to_nohz();
- return 0;
- }
- NOHZ_MODE_LOWRES // 启用NO_HZ模式,hrtimer工作于低精度模式下
- NOHZ_MODE_HIGHRES // 启用NO_HZ模式,hrtimer工作于高精度模式下
所以当系统不允许高精度模式时,将会在tick_check_oneshot_change函数内,通过tick_nohz_switch_to_nohz切换至NOHZ_MODE_LOWRES 模式,如果系统允许高精度模式,传入的allow_nohz参数为false,tick_check_oneshot_change函数返回1,回到上面的hrtimer_run_pending函数,hrtimer_switch_to_hres函数将会被调用,已完成切换到NOHZ_MODE_HIGHRES高精度模式。好啦,真正的切换函数找到了,我们看一看它如何切换:
- static int hrtimer_switch_to_hres(void)
- {
- int i, cpu = smp_processor_id();
- struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
- unsigned long flags;
- if (base->hres_active)
- return 1;
- local_irq_save(flags);
- if (tick_init_highres()) {
- local_irq_restore(flags);
- printk(KERN_WARNING "Could not switch to high resolution "
- "mode on CPU %dn", cpu);
- return 0;
- }
- base->hres_active = 1;
- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
- base->clock_base[i].resolution = KTIME_HIGH_RES;
- tick_setup_sched_timer();
- /* "Retrigger" the interrupt to get things going */
- retrigger_next_event(NULL);
- local_irq_restore(flags);
- return 1;
图3.1 低精度模式切换至高精度模式
4. 模拟tick事件
在kernel/time/tick-sched.c中,内核定义了一个per_cpu全局变量:tick_cpu_sched,从而为每个cpu提供了一个tick_sched结构, 该结构主要用于管理NO_HZ配置下的tickless处理,因为模拟tick事件与tickless有很强的相关性,所以高精度定时器系统也利用了该结构的以下字段,用于完成模拟tick事件的操作:
- struct tick_sched {
- struct hrtimer sched_timer;
- unsigned long check_clocks;
- enum tick_nohz_mode nohz_mode;
- ......
- };
- void tick_setup_sched_timer(void)
- {
- struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
- ktime_t now = ktime_get();
- /*
- * Emulate tick processing via per-CPU hrtimers:
- */
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- ts->sched_timer.function = tick_sched_timer;
- /* Get the next period (per cpu) */
- hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
- for (;;) {
- hrtimer_forward(&ts->sched_timer, now, tick_period);
- hrtimer_start_expires(&ts->sched_timer,
- /* Check, if the timer was already in the past */
- if (hrtimer_active(&ts->sched_timer))
- break;
- now = ktime_get();
- }
- #ifdef CONFIG_NO_HZ
- if (tick_nohz_enabled)
- ts->nohz_mode = NOHZ_MODE_HIGHRES;
- #endif
- }
- static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
- {
- ......
- #ifdef CONFIG_NO_HZ
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
- tick_do_timer_cpu = cpu;
- #endif
- /* Check, if the jiffies need an update */
- if (tick_do_timer_cpu == cpu)
- tick_do_update_jiffies64(now);
- if (regs) {
- ......
- update_process_times(user_mode(regs));
- ......
- }
- hrtimer_forward(timer, now, tick_period);
- }
发表评论 取消回复