linux的定时器一般分为两种,一种是timeout类型,也就是在指定时间之前完成相应的任务即可,这种定时器对精度要求较低,晚几毫秒执行不会有很大的影响,而且一般这种类型的定时器要处理的任务在超时之前就已经完成,并且从定时器的队列中删除了,用不着真正的等到timeout然后由定时器模块来处理,这种较低精度要求的定时器一般使用timer wheel定时器。另一种类型的就是timer类型定时器,这就要求必须在指定的时间执行相应的任务,因此精度要求较高,这种场合一般适用高精度的定时器hrtimer。
timer wheel和hrtimer使用两种不同的机制实现定时器,timer wheel使用jiffies为基准来判断任务是否过期,由于jiffies计数系统的节拍,系统每次时钟中断都会将这个值加1,系统每秒的时钟中断的次数为HZ(宏定义的一个常量,一般为100),因此jiffies为timer wheel定时器提供了毫秒级的精度。而hrtimer需要高精度的时钟设备,为系统提供纳秒级的定时器。这两者都通过软中断来驱动,timer wheel定时器通过软中断TIMER_SOFTIRQ, 而hrtimer通过HRTIMER_SOFTIRQ来相应定时器。
1. timer wheel定时器
timer wheel定时器的请求通过struct timer_list来抽象,然后按照定时器的过期时间和基准时间的差值将其组织在双链表中,且相同过期时间的定时器放在同一个链表中,当响应软中断时,则将过期时间在当前时间之前的定时器全部删除,并且执行相应的回调函数。
struct timer_list {
* All fields that change during normal runtime grouped to the
* same cacheline
struct list_head entry; //双链表的节点
unsigned long expires; //过期时间
struct tvec_base *base; /*由于定时器的基准时间不会随着jiffies的值实时更新,这个为定时器提供了基准时间,并且组织所有的在这个base上的timer_list对象,从下面tvec_base定义可以看到是缓冲区对齐,因此base变量的最后一位肯定是0,可以用这一位来表示其他信息,当base最后一位为1表示此定时器是deferrable的,可以延迟一定时间执行*/
void (*function)(unsigned long); //回调函数和回调函数的参数
unsigned long data;
int slack;
#ifdef CONFIG_TIMER_STATS //统计相关
int start_pid;
void *start_site;
char start_comm[16];
struct lockdep_map lockdep_map;
#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) #define TVN_SIZE (1 << TVN_BITS) #define TVR_SIZE (1 << TVR_BITS) #define TVN_MASK (TVN_SIZE - 1) #define TVR_MASK (TVR_SIZE - 1) struct tvec { struct list_head vec[TVN_SIZE]; }; struct tvec_root { struct list_head vec[TVR_SIZE]; }; //定时器双链表的表头 struct tvec_base { spinlock_t lock; struct timer_list *running_timer; //正在执行的timer_list unsigned long timer_jiffies; //上文说的基准时间 unsigned long next_timer; //距离timer_jiffies最近的过期时间 struct tvec_root tv1; //tv1--tv5根据过期时间的大小将timer_list放入其中,tv1表示过期时间最短的任务 struct tvec tv2; struct tvec tv3; struct tvec tv4; struct tvec tv5; } ____cacheline_aligned;

struct timer_list对象是放在tv1--tv5中那个struct tvec上是通过timer_list.expire-tvec_base.timer_jiffies来确定的,也就是说,是通过过期时间和基准时间之间的差值来确定struct timer_list对象在哪个tvX上的。

若上面的差值可以在TVR_BITS位内表示出来,则将相应的timer_list放在tv1上,而TVR_BITS内的数值作为timer_list在tvec数组上的索引,将其串到双链表上,若可以在TVR_BITS + TVN_BITS位内表示差值,则将其放在tv2上,TVN_BITS位段内的值当做tv2数组内部的索引,然后依次类推,若差值大于1<<TVR_BITS+3*TVN_BTS,则将其全部放在tv5内,表示过期时间还很长,一段时间内轮不到其执行,最后的TVN_BITS位作为tv5内的索引。
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
unsigned long expires = timer->expires;
unsigned long idx = expires - base->timer_jiffies;
struct list_head *vec;
if (idx < TVR_SIZE) { //tv1位段内
int i = expires & TVR_MASK;
vec = base->tv1.vec + i;
} else if (idx < 1 << (TVR_BITS + TVN_BITS)) { //tv2位段内
int i = (expires >> TVR_BITS) & TVN_MASK;
vec = base->tv2.vec + i;
} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
vec = base->tv3.vec + i;
} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
vec = base->tv4.vec + i;
} else if ((signed long) idx < 0) { //如果插入的定时器已经过期,则将其放在最先过期的tv1保证内迅速执行
* Can happen if you add a timer with expires == jiffies,
* or you set a timer to go off in the past
vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
} else {
int i;
/* If the timeout is larger than 0xffffffff on 64-bit
* architectures then we use the maximum timeout:
if (idx > 0xffffffffUL) {
idx = 0xffffffffUL;
expires = idx + base->timer_jiffies;
i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
vec = base->tv5.vec + i;
* Timers are FIFO:
list_add_tail(&timer->entry, vec);
static int cascade(struct tvec_base *base, struct tvec *tv, int index)
/* cascade all the timers from tv up one level */
struct timer_list *timer, *tmp;
struct list_head tv_list;
list_replace_init(tv->vec + index, &tv_list); //将链表头放到tv_list中
* We are removing _all_ timers from the list, so we
* don't have to detach them individually.
list_for_each_entry_safe(timer, tmp, &tv_list, entry) { //将所有的链表元素重新加到base中
BUG_ON(tbase_get_base(timer->base) != base);
internal_add_timer(base, timer);
return index;
#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) /*利用基准时间来计算tvN上的位置,由于timer_list加到base中也是用expire来计算位置的值,因此这个索引就是在tvN中刚好过期的链表的索引*/
static inline void __run_timers(struct tvec_base *base)
struct timer_list *timer;
while (time_after_eq(jiffies, base->timer_jiffies)) {/*比较当前时间和基准,判断base上是否有过期的定时器*/
struct list_head work_list;
struct list_head *head = &work_list;
int index = base->timer_jiffies & TVR_MASK;/*只有当基准时间最后TVR_BITS位内的值重新归零之后才迁移链表,表示若基准时间继续增加,tv1内的过期链表就可能为空,需要重新填充*/
* Cascade timers:
if (!index &&
(!cascade(base, &base->tv2, INDEX(0))) &&
(!cascade(base, &base->tv3, INDEX(1))) &&
!cascade(base, &base->tv4, INDEX(2)))/*一直迁移链表,直到某个tvec中没有过期对象*/
cascade(base, &base->tv5, INDEX(3));
list_replace_init(base->tv1.vec + index, &work_list);
while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
timer = list_first_entry(head, struct timer_list,entry);/*获取timer_list对象和回调函数*/
fn = timer->function;
data = timer->data;
base->running_timer = timer;/*设为当前运行定时器,并从tvec_base中删除定时器*/
detach_timer(timer, 1);
call_timer_fn(timer, fn, data);//执行回调函数
base->running_timer = NULL;
static void run_timer_softirq(struct softirq_action *h)
struct tvec_base *base = __this_cpu_read(tvec_bases);
hrtimer_run_pending(); //每次tick都执行,若没有激活高精度定时器则尝试切换至高精度定时器
if (time_after_eq(jiffies, base->timer_jiffies))
timer wheel定时器的过来大概如此,为系统提供了低精度的定时或过期的要求。
static inline void __run_timers(struct tvec_base *base)
struct timer_list *timer;
while (time_after_eq(jiffies, base->timer_jiffies)) {/*比较当前时间和基准,判断base上是否有过期的定时器*/
struct list_head work_list;
struct list_head *head = &work_list;
int index = base->timer_jiffies & TVR_MASK;/*只有当基准时间最后TVR_BITS位内的值重新归零之后才迁移链表,表示若基准时间继续增加,tv1内的过期链表就可能为空,需要重新填充*/
* Cascade timers:
if (!index &&
(!cascade(base, &base->tv2, INDEX(0))) &&
(!cascade(base, &base->tv3, INDEX(1))) &&
!cascade(base, &base->tv4, INDEX(2)))/*一直迁移链表,直到某个tvec中没有过期对象*/
cascade(base, &base->tv5, INDEX(3));
list_replace_init(base->tv1.vec + index, &work_list);
while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
timer = list_first_entry(head, struct timer_list,entry);/*获取timer_list对象和回调函数*/
fn = timer->function;
data = timer->data;
base->running_timer = timer;/*设为当前运行定时器,并从tvec_base中删除定时器*/
detach_timer(timer, 1);
call_timer_fn(timer, fn, data);//执行回调函数
base->running_timer = NULL;
static void run_timer_softirq(struct softirq_action *h)
struct tvec_base *base = __this_cpu_read(tvec_bases);
hrtimer_run_pending(); //每次tick都执行,若没有激活高精度定时器则尝试切换至高精度定时器
if (time_after_eq(jiffies, base->timer_jiffies))
timer wheel定时器的过来大概如此,为系统提供了低精度的定时或过期的要求。
2. hrtimer定时器
hrtimer定时器提供不能类型的定时器请求,包括实时时间基类型、相对于boot的时间基类型,定时器请求使用struct hrtimer表示每一个请求,然后将请求对象放到请求队列中,如CFS,请求队列使用红黑树实现。
* struct hrtimer - the basic hrtimer structure
* @node: timerqueue node, which also manages node.expires,
* the absolute expiry time in the hrtimers internal
* representation. The time is related to the clock on
* which the timer is based. Is setup by adding
* slack to the _softexpires value. For non range timers
* identical to _softexpires.
* @_softexpires: the absolute earliest expiry time of the hrtimer.
* The time which was given as expiry time when the timer
* was armed.
* @function: timer expiry callback function
* @base: pointer to the timer base (per cpu and per clock)
* @state: state information (See bit values above)
* @start_site: timer statistics field to store the site where the timer
* was started
* @start_comm: timer statistics field to store the name of the process which
* started the timer
* @start_pid: timer statistics field to store the pid of the task which
* started the timer
* The hrtimer structure must be initialized by hrtimer_init()
struct hrtimer {
struct timerqueue_node node; //请求队列中的节点
ktime_t _softexpires; //过期时间
enum hrtimer_restart (*function)(struct hrtimer *); //回调函数
struct hrtimer_clock_base *base; //时间基
unsigned long state; //hrtimer的状态,包括是否可用、在队列上、正在执行等
int start_pid;
void *start_site;
char start_comm[16];
同timer wheel的定时器,通过一个基准时间来控制定时器对象的过期,hrtimer定时器的基准时间使用hrtimer_clock_base,这个基准时间实例也用来将hrtimer对象组织在active成员所表示的红黑树根节点上。
enum hrtimer_base_type { //不同类型的基准时间
struct hrtimer_clock_base {
struct hrtimer_cpu_base *cpu_base; //一个全局控制的实例
int index; //clock base在hrtimer_cpu_base的clock_base数组中的索引
clockid_t clockid; //clock base的类型
struct timerqueue_head active; //hrtimer红黑树的根节点
ktime_t resolution; //定时器的精度
ktime_t (*get_time)(void); //获取不同类型基准时间的时钟时间
ktime_t softirq_time; //基准时间
ktime_t offset; /*使用这个成员获得一个单调递增的时间,保证在验证定时器是否过期的时候基准时间不会往回走*/
struct hrtimer_cpu_base {
raw_spinlock_t lock;
unsigned long active_bases; //一个flag值,表示哪种类型clock_base是可以使用
ktime_t expires_next; //下一个timer event来的绝对时间,就是下一次的过期时间
int hres_active; //高精度定时器是否有效
int hang_detected; //最后一次响应中断时是否被挂起
unsigned long nr_events; //timer event的个数
unsigned long nr_retries; //在响应hrtimer软中断时失败重试的次数
unsigned long nr_hangs; //总的挂起的次数
ktime_t max_hang_time; //挂起时,中断响应最大运行时间
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; //不同类型的基准时钟对象
static int hrtimer_reprogram(struct hrtimer *timer,
struct hrtimer_clock_base *base)
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); //获得当前cpu的cpu_base对象
ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); /*获得一个精确的和单调基准时间匹配的过期时间*/
int res;
WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
* When the callback is running, we do not reprogram the clock event
* device. The timer callback is either running on a different CPU or
* the callback is executed in the hrtimer_interrupt context. The
* reprogramming is handled either by the softirq, which called the
* callback or at the end of the hrtimer_interrupt.
if (hrtimer_callback_running(timer)) //当前定时器正在运行,则直接返回,没什么东西可以检测的
return 0;
* CLOCK_REALTIME timer might be requested with an absolute
* expiry time which is less than base->offset. Nothing wrong
* about that, just avoid to call into the tick code, which
* has now objections against negative expiry values.
if (expires.tv64 < 0)
return -ETIME;
if (expires.tv64 >= cpu_base->expires_next.tv64) //如果过期时间在下一次过期时间之后,则肯定没过期
return 0;
* If a hang was detected in the last timer interrupt then we
* do not schedule a timer which is earlier than the expiry
* which we enforced in the hang detection. We want the system
* to make progress.
if (cpu_base->hang_detected)
return 0;
* Clockevents returns -ETIME, when the event was in the past.
res = tick_program_event(expires, 0); /*从时间设备验证定时器的过期时间是否是过去的时间,不是过去的时间则设置expires为下一次的timer event的时间,若是则返回出错*/
if (!IS_ERR_VALUE(res))
cpu_base->expires_next = expires; //不是过去的时间,且比下一次过期时间早,因此重设下一次的过期时间
return res;
若没有激活高精度定时器,在timer wheel中可以看到,每次tick都会尝试切换到高精度定时器模式,切换高精度定时就需要对时钟设备重编程:
static int hrtimer_switch_to_hres(void)
int i, cpu = smp_processor_id();
struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
unsigned long flags;
if (base->hres_active)
return 1;
if (tick_init_highres()) { //切换到高精度模式,就是将时钟设备的事件响应函数设为后面的hrtimer_interrupt
printk(KERN_WARNING "Could not switch to high resolution "
"mode on CPU %dn", cpu);
return 0;
base->hres_active = 1; //设置状态,已经激活
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
base->clock_base[i].resolution = KTIME_HIGH_RES;
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL); //调整每个基准时钟的offset,并且对时钟设备重编程,设置新的expire_next值
return 1;
int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
unsigned long delta_ns, const enum hrtimer_mode mode,
int wakeup)
struct hrtimer_clock_base *base, *new_base;
unsigned long flags;
int ret, leftmost;
base = lock_hrtimer_base(timer, &flags);
/* Remove an active timer from the queue: */
ret = remove_hrtimer(timer, base); //如果已经在队列上,从队列删除,没有再队列上什么都不做
/* Switch the timer base, if necessary: */
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); /*如果基准时钟需要切换,则切换基准时钟,会尽量将任务切换到当前cpu的基准时钟下*/
if (mode & HRTIMER_MODE_REL) {//如果设置的是相对的过期时间,则计算绝对时间*/
tim = ktime_add_safe(tim, new_base->get_time());
* CONFIG_TIME_LOW_RES is a temporary way for architectures
* to signal that they simply return xtime in
* do_gettimeoffset(). In this case we want to round up by
* resolution when starting a relative timer, to avoid short
* timeouts. This will go away with the GTOD framework.
tim = ktime_add_safe(tim, base->resolution);
hrtimer_set_expires_range_ns(timer, tim, delta_ns); //设置timer的过期时间
leftmost = enqueue_hrtimer(timer, new_base); /* 将timer对象加入到红黑树,并且如果加入到红黑树中是最左边的对象,也就是说是新加入的定时器是过期时间最早的,就返回1,否则返回0 */
* Only allow reprogramming if the new base is on this CPU.
* (it might still be on another CPU if the timer was pending)
* XXX send_remote_softirq() ?
if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
hrtimer_enqueue_reprogram(timer, new_base, wakeup); /* 由于是最早的,需要对时钟设备重编程*/
unlock_hrtimer_base(timer, &flags);
return ret;
void hrtimer_interrupt(struct clock_event_device *dev)
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
ktime_t expires_next, now, entry_time, delta;
int i, retries = 0;
dev->next_event.tv64 = KTIME_MAX;
entry_time = now = ktime_get(); //获得当前时间
expires_next.tv64 = KTIME_MAX;
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
* the migration code. This does not affect enqueueing of
* timers which run their callback and need to be requeued on
* this CPU.
cpu_base->expires_next.tv64 = KTIME_MAX;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
struct hrtimer_clock_base *base;
struct timerqueue_node *node;
ktime_t basenow;
if (!(cpu_base->active_bases & (1 << i)))
base = cpu_base->clock_base + i;
basenow = ktime_add(now, base->offset); //修正获得一个递增的时间
while ((node = timerqueue_getnext(&base->active))) { /*从红黑树上获得最左边的节点,这个指针缓存在timer_queue结构中,直接返回即可*/
struct hrtimer *timer;
timer = container_of(node, struct hrtimer, node); //contain_of机制获得timer对象
* The immediate goal for using the softexpires is
* minimizing wakeups, not running timers at the
* earliest interrupt after their soft expiration.
* This allows us to avoid using a Priority Search
* Tree, which can answer a stabbing querry for
* overlapping intervals and instead use the simple
* BST we already have.
* We don't add extra wakeups by delaying timers that
* are right-of a not yet expired timer, because that
* timer will have to trigger a wakeup anyway.
if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { /*过期时间比当前时间晚则在这个clock_base上不存在过期的对象,直接跳过这个clock_base,返回for循环*/
ktime_t expires;
expires = ktime_sub(hrtimer_get_expires(timer),
if (expires.tv64 < expires_next.tv64)
expires_next = expires;
__run_hrtimer(timer, &basenow); //执行回调函数并且删除timer对象
* Store the new expiry value so the migration code can verify
* against it.
cpu_base->expires_next = expires_next; //由上面得出新的下一次过期时间
/* Reprogramming necessary ? */
if (expires_next.tv64 == KTIME_MAX || /*对时间设备重编程,若由于过长的回调函数消耗时间等原因导致expires_next是过去的时间,则重编程失败*/
!tick_program_event(expires_next, 0)) {
cpu_base->hang_detected = 0;
* The next timer was already expired due to:
* - tracing
* - long lasting callbacks
* - being scheduled away when running in a VM
* We need to prevent that we loop forever in the hrtimer
* interrupt routine. We give it 3 attempts to avoid
* overreacting on some spurious event.
now = ktime_get(); //重编程失败,则重试3次
if (++retries < 3)
goto retry;
* Give the system a chance to do something else than looping
* here. We stored the entry time, so we know exactly how long
* we spent here. We schedule the next event this amount of
* time away.
cpu_base->nr_hangs++; //如果三次重试都失败,则当前cpu_base挂起
cpu_base->hang_detected = 1;
delta = ktime_sub(now, entry_time);
if (delta.tv64 > cpu_base->max_hang_time.tv64)
cpu_base->max_hang_time = delta;
* Limit it to a sensible value as we enforce a longer
* delay. Give the CPU at least 100ms to catch up.
if (delta.tv64 > 100 * NSEC_PER_MSEC)
expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
expires_next = ktime_add(now, delta);
tick_program_event(expires_next, 1);/*给CPU最大100ms的时间的来处理cpu_base挂起,重编程时钟设备,在最多100ms后重新产生事件*/
printk_once(KERN_WARNING "hrtimer: interrupt took %llu nsn",
发表评论 取消回复