概述
#ifdef __NR_vfork
DO_CALL (vfork, 0)
#else
DO_CALL (fork, 0)
在汇编中是Special system call wrappers特殊的系统调用
sys_fork_wrapper:
add r0, sp, #S_OFF
b sys_fork)直接调
#define DO_CALL(syscall_name, args)
DOARGS_##args;
ldr r7, =SYS_ify (syscall_name);
swi 0x0;
UNDOARGS_##args
/* 在C库里的Unistd.h中 */
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
extern int open (const char *__file, int __oflag, ...) __nonnull ((1));
register int _a1 asm ("r0"), _nr asm ("r7");
LOAD_ARGS_##nr (args)
_nr = name;
asm volatile ("swi 0x0 @ syscall " #name
: "=r" (_a1)
: "r" (_nr) ASM_ARGS_##nr
: "memory");
_a1; })
调了一堆宏最后调用到swi 0x00000000 作为 ARM 汇编语言开发成果的一部分,SWI 指令已重命名为 SVC。 在此版本的 RVCT 中,SWI 指令反汇编为 SVC,并提供注释以指明这是以前的 SWI。
其实就是SVC 0x00000000 对于x86架构就是int 0x80 执行到这里C库肯定已经帮我们找到了open函数的调用号5,并且放在了寄存器R7为什么?(看_nr = name;name是上面传过来的INTERNAL_SYSCALL_RAW(SYS_ify(name), err, nr, args) 第一个参数就是#define SYS_ify(syscall_name) (__NR_##syscall_name) 就是__NR_open,就是5在#define __NR_open (__NR_SYSCALL_BASE+ 5)定义在includeasm-arm所以_nr就是5 传到了r7上层的4只不过是参数而已 asm是嵌入汇编的意思)
再看arch/arm/kernel/head.S
在__turn_mmu_on中,将寄存器r0的值写到了cp15协处理器的寄存器C1中。到这里便完成了将异常中断向量表的位置放到了0xffff0000.(ARM规定)
entry-armv.S (archarmkernel)
.LCvswi:
.word vector_swi
.globl __stubs_end
__stubs_end:
.equ stubs_offset, __vectors_start + 0x200 - __stubs_start
.globl __vectors_start
__vectors_start:
ARM( swi SYS_ERROR0 )
THUMB( svc #0 )
THUMB( nop )
W(b) vector_und + stubs_offset
W(ldr) pc, .LCvswi + stubs_offset
W(b) vector_pabt + stubs_offset
W(b) vector_dabt + stubs_offset
W(b) vector_addrexcptn + stubs_offset
W(b) vector_irq + stubs_offset
W(b) vector_fiq + stubs_offset
.globl __vectors_end
__vectors_end:
.globl __vectors_start
这个表示外部可以访问的符号,否则在执行C函数的时候,编译器怎么知道
__vectors_start
是什么?
C语言在这里调用
void __init early_trap_init(void)
2: {
3: unsigned long vectors = CONFIG_VECTORS_BASE; // 就是0xFFFF0000
4: extern char __stubs_start[], __stubs_end[];
5: extern char __vectors_start[], __vectors_end[];
6: extern char __kuser_helper_start[], __kuser_helper_end[];
7: int kuser_sz = __kuser_helper_end - __kuser_helper_start;
8:
9: /*
10: * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
11: * into the vector page, mapped at 0xffff0000, and ensure these
12: * are visible to the instruction stream.
13: */
14: memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
15: memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
16: memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
17:
18: /*
19: * Copy signal return handlers into the vector page, and
20: * set sigreturn to be a pointer to these.
21: */
22: memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
23: sizeof(sigreturn_codes));
24:
25: flush_icache_range(vectors, vectors + PAGE_SIZE);
26: modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
27: }
所以一但发生SWI就会跳转到0x8也就是
W(ldr) pc, .LCvswi + stubs_offset
再看这个宏ENTRY(vector_swi)
ENTRY(vector_swi)
sub sp, sp, #S_FRAME_SIZE
stmia sp, {r0 - r12} @ Calling r0 - r12
add r8, sp, #S_PC
stmdb r8, {sp, lr}^ @ Calling sp, lr
mrs r8, spsr @ called from non-FIQ mode, so ok.
str lr, [sp, #S_PC] @ Save calling PC
str r8, [sp, #S_PSR] @ Save CPSR
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
zero_fp
/*
* Get the system call number.
*/
#if defined(CONFIG_OABI_COMPAT)
/*
* If we have CONFIG_OABI_COMPAT then we need to look at the swi
* value to determine if it is an EABI or an old ABI call.
*/
#ifdef CONFIG_ARM_THUMB
tst r8, #PSR_T_BIT
movne r10, #0 @ no thumb OABI emulation
ldreq r10, [lr, #-4] @ get SWI instruction
#else
ldr r10, [lr, #-4] @ get SWI instruction
A710( and ip, r10, #0x0f000000 @ check for SWI )
A710( teq ip, #0x0f000000 )
A710( bne .Larm710bug )
#endif
#elif defined(CONFIG_AEABI)
/*
* Pure EABI user space always put syscall number into scno (r7).
*/
A710( ldr ip, [lr, #-4] @ get SWI instruction )
A710( and ip, ip, #0x0f000000 @ check for SWI )
A710( teq ip, #0x0f000000 )
A710( bne .Larm710bug )
#elif defined(CONFIG_ARM_THUMB)
/* Legacy ABI only, possibly thumb mode. */
tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs
addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in
ldreq scno, [lr, #-4]
#else
/* Legacy ABI only. */
ldr scno, [lr, #-4] @ get SWI instruction
A710( and ip, scno, #0x0f000000 @ check for SWI )
A710( teq ip, #0x0f000000 )
A710( bne .Larm710bug )
#endif
#ifdef CONFIG_ALIGNMENT_TRAP
ldr ip, __cr_alignment
ldr ip, [ip]
mcr p15, 0, ip, c1, c0 @ update control register
#endif
enable_irq
get_thread_info tsk
adr tbl, sys_call_table @ load syscall table pointer
ldr ip, [tsk, #TI_FLAGS] @ check for syscall tracing
#if defined(CONFIG_OABI_COMPAT)
/*
* If the swi argument is zero, this is an EABI call and we do nothing.
*
* If this is an old ABI call, get the syscall number into scno and
* get the old ABI syscall table address.
*/
bics r10, r10, #0xff000000
eorne scno, r10, #__NR_OABI_SYSCALL_BASE
ldrne tbl, =sys_oabi_call_table
#elif !defined(CONFIG_AEABI)
bic scno, scno, #0xff000000 @ mask off SWI op-code
eor scno, scno, #__NR_SYSCALL_BASE @ check OS number
#endif
stmdb sp!, {r4, r5} @ push fifth and sixth args
tst ip, #_TIF_SYSCALL_TRACE @ are we tracing syscalls?
bne __sys_trace
cmp scno, #NR_syscalls @ check upper syscall limit
adr lr, ret_fast_syscall @ return address
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
get_thread_info tsk其中,tsk是寄存器r9的别名,get_thread_info是一个宏定义(将sp进行8KB对齐后的值赋给寄存器r9)这个就涉及到Linux的内核栈了。Linux为每个进程都分配了一个8KB的内核栈,在内核栈的尾端存放有关于这个进程的struct therad_info结构(这个就不说了)
adr tbl, sys_call_table 找到它的宏
ENTRY(sys_call_table)
#include "calls.S" 包含进来/* 0 */ CALL(sys_restart_syscall)
CALL(sys_exit)
CALL(sys_fork_wrapper)
CALL(sys_read)
CALL(sys_write)
/* 5 */ CALL(sys_open) 5 号 是 open函数 这个CALL的宏 就是 .long
回答上面的问题:使用的是svc 0,后面跟的并不是系统调用号,而是0,这里把系统调用号存放在了寄存器r7中因为反汇编得到程序确实这么干了
这里的scno是就是寄存器r7的别名,它的值是sys_open的系统调用号5,由于在calls.S中每个系统调用标号占用4个字节,所以这个将scno的值乘以4然后再加上tbl,tbl是系统调用表sys_call_table的基地址。然后就跳入开始执行sys_open了。
asmlinkage long sys_open(const char __user *filename, int flags, int mode)
{
long ret;
if (force_o_largefile())
flags |= O_LARGEFILE;
ret = do_sys_open(AT_FDCWD, filename, flags, mode);
/* avoid REGPARM breakage on x86: */
prevent_tail_call(ret);
return ret;
}
scno .req r7 @ syscall number
tbl .req r8 @ syscall table pointer
why .req r8 @ Linux syscall (!= 0)
tsk .req r9 @ current thread_info 在entry-header.S中定义
Linux通过fork进入内核
asmlinkage int sys_fork(struct pt_regs *regs)
{
#ifdef CONFIG_MMU
return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
#else
/* can not support in nommu mode */
return(-EINVAL);
#endif
}
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
struct task_struct *p;
int trace = 0;
struct pid *pid = alloc_pid();
long nr;
if (!pid)
return -EAGAIN;
nr = pid->nr;
if (unlikely(current->ptrace)) {
trace = fork_traceflag (clone_flags);
if (trace)
clone_flags |= CLONE_PTRACE;
}
p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
/*
调用
alloc_pid();
分配一个PID开始copy_process最后copy_process返回一个指向子进程的指针。
p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
调用
/* Perform scheduler related setup. Assign this task to a CPU. */执行调度器相关设置。将此任务分配给CPU
sched_fork(p, clone_flags);
调用static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
{
task_thread_info(p)->cpu = cpu;
}将这个任务的CPU设置为当前CPU
之后设置
p->state = TASK_RUNNING;
我们把这个任务看作是在这里运行,但实际上没有。将它插入到运行队列中。这保证了没有人会真正运行它,信号或其他外部事件不能唤醒它并将其插入到运行队列中。
最后在copy_process返回之后调用wake_up_new_task(p, clone_flags); 里的list_add_tail(&p->run_list, ¤t->run_list);
将p加入到运行链表
如果虚拟空间没有被克隆
if (!(clone_flags & CLONE_VM)) {
/*
* The VM isn't cloned, so we're in a good position to
* do child-runs-first in anticipation of an exec. This
* usually avoids a lot of COW overhead.
*/
if (unlikely(!current->array))
__activate_task(p, rq);
else {
p->prio = current->prio;
p->normal_prio = current->normal_prio;
list_add_tail(&p->run_list, ¤t->run_list);
p->array = current->array;
p->array->nr_active++;
inc_nr_running(p, rq);
}
set_need_resched();
}
} else
/* Run child last */
__activate_task(p, rq);
设置调度标志位否则只把它加入到运行队列即可
__activate_task(p, rq);会调用enqueue_task()函数将进程放入红黑数中从这里看应该是唤醒之后将进程加入到运行队列调整它的位置在红黑树中。
如果标志位被置位 则以下情况会调用schedule()函数。在系统调用返回的时候会调度,和从中断处理程序返回到用户空间的时候会调度,这些是用户抢占,LINUX支持内核抢占。也会调用调度。
什么时候会设置需要被调度标志为?1、当某个进程应该被抢占scheduler_tick()【在时钟周期中断被调用】就会去设置。2、当一个高优先级进程进入可运行状态时try_to_wake_up()【wake_up调用】会比较当前与被哦唤醒的进程如果应该抢占就设置标志位。
接下来就是调度。可能是子进程也可能不是。是的话更好。
nr = pid->nr; 分配的时候nr = pid所以调度之后假如是父进程那么继续运行(共享代码段)return nr;
再来看子进程 那么为什么会返回0? 不是说共享父进程吗?怎么返回不一样的值。猜想肯定在copy代码段的时候改变了什么东西,
int
copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
{
struct thread_info *thread = task_thread_info(p);
struct pt_regs *childregs = task_pt_regs(p);
*childregs = *regs;
childregs->ARM_r0 = 0;
childregs->ARM_sp = stack_start;
memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
thread->cpu_context.sp = (unsigned long)childregs;
thread->cpu_context.pc = (unsigned long)ret_from_fork;
if (clone_flags & CLONE_SETTLS)
thread->tp_value = regs->ARM_r3;
return 0;
}
注意
thread->cpu_context.pc = (unsigned long)ret_from_fork;
改变CPU上下文。进入汇编
ENTRY(ret_from_fork)
bl schedule_tail
get_thread_info tsk
ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing
mov why, #1
tst r1, #_TIF_SYSCALL_TRACE @ are we tracing syscalls?
beq ret_slow_syscall
mov r1, sp
mov r0, #1 @ trace exit [IP = 1]
bl syscall_trace
b ret_slow_syscall
ret_slow_syscall:
disable_irq @ disable interrupts
ldr r1, [tsk, #TI_FLAGS]
tst r1, #_TIF_WORK_MASK
bne work_pending
work_pending:
tst r1, #_TIF_NEED_RESCHED
bne work_resched
tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING
beq no_work_pending
mov r0, sp @ 'regs'
mov r2, why @ 'syscall'
bl do_notify_resume
b ret_slow_syscall @ Check work again
work_resched:
bl schedule
no_work_pending:
/* perform architecture specific actions before user return */
arch_ret_to_user r1, lr
@ slow_restore_user_regs
ldr r1, [sp, #S_PSR] @ get calling cpsr
ldr lr, [sp, #S_PC]! @ get pc
msr spsr_cxsf, r1 @ save in spsr_svc
ldmdb sp, {r0 - lr}^ @ get calling r1 - lr
mov r0, r0
add sp, sp, #S_FRAME_SIZE - S_PC
movs pc, lr @ return & move spsr_svc into cpsr
就是说程序本来有意设置父进程的调度标志位,但是如果时钟滴答刚好没有执行的话。还是父进程返回子进程的nr。
但是如果时间抵达那么会调度到子进程(反正肯定不是父进程)并且执行ret_from_fork 最后返回0(因为在
copy_thread
函数中将R0 = 0 传递给用户空间) ;
最后
以上就是瘦瘦睫毛为你收集整理的系统分析fork()函数以及系统调用的全部内容,希望文章能够帮你解决系统分析fork()函数以及系统调用所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复