zoukankan      html  css  js  c++  java
  • Mac操作系统XNU内核(八)系统调用过程代码简单分析

    (一)首先,系统调用有两种方式:

    •     0x80、0x81、0x82三个中断号;
    •     专门指令(至少分Intel架构和ARM架构),比如SYSENTER/SYSCALL

    (二)话分两头,先说中断向量方式

      这是终端向量定义的部分代码:

    INTERRUPT(0x7d)
    INTERRUPT(0x7e)
    USER_TRAP(0x7f, idt64_dtrace_ret) /* Required by dtrace "fasttrap" */
    
    USER_TRAP_SPC(0x80,idt64_unix_scall)
    USER_TRAP_SPC(0x81,idt64_mach_scall)
    USER_TRAP_SPC(0x82,idt64_mdep_scall)
    
    INTERRUPT(0x83)
    INTERRUPT(0x84)
    INTERRUPT(0x85)
    INTERRUPT(0x86)

      (BSD风格的系统调用,终端号就是0x80)

      触发中断以及后面的逻辑,都在汇编文件idt64.s中实现,下面简单看看:

    /*
     * System call handlers.
     * These are entered via a syscall interrupt. The system call number in %rax
     * is saved to the error code slot in the stack frame. We then branch to the
     * common state saving code.
     */
            
    #ifndef UNIX_INT
    #error NO UNIX INT!!!
    #endif
    Entry(idt64_unix_scall)
        swapgs                /* switch to kernel gs (cpu_data) */
        pushq    %rax            /* save system call number */
        PUSH_FUNCTION(HNDL_UNIX_SCALL)
        pushq    $(UNIX_INT)

      接下来执行PUSH_FUNCTIOIN(HNDL_UNIX_SCALL),先展开PUSH_FUNCTION看看:

    #if 1
    #define PUSH_FUNCTION(func)              
        sub    $8, %rsp            ;
        push    %rax                ;
        leaq    func(%rip), %rax        ;
        movq    %rax, 8(%rsp)            ;
        pop    %rax
    #else
    #define PUSH_FUNCTION(func) pushq func
    #endif

      系统调用号,在寄存器RAX,接下来看看HNDL_UNIX_SCALL:

    Entry(hndl_unix_scall)
    
            TIME_TRAP_UENTRY
    
        movq    %gs:CPU_ACTIVE_THREAD,%rcx    /* get current thread     */
        movq    TH_TASK(%rcx),%rbx        /* point to current task  */
        incl    TH_SYSCALLS_UNIX(%rcx)        /* increment call count   */
    
        /* Check for active vtimers in the current task */
        TASK_VTIMER_CHECK(%rbx,%rcx)
    
        sti
    
        CCALL1(unix_syscall, %r15)
        /*
         * always returns through thread_exception_return
         */

      主要有一行:unix_syscall,看看unix_syscall函数的definition:

    /*
     * Function:    unix_syscall
     *
     * Inputs:    regs    - pointer to i386 save area
     *
     * Outputs:    none
     */
    void
    unix_syscall(x86_saved_state_t *state)
    {
        thread_t        thread;
        void            *vt;
        unsigned int        code;
        struct sysent        *callp;
    
        int            error;
        vm_offset_t        params;
        struct proc        *p;
        struct uthread        *uthread;
        x86_saved_state32_t    *regs;
        boolean_t        is_vfork;
    
        assert(is_saved_state32(state));
        regs = saved_state32(state);
    #if DEBUG
        if (regs->eax == 0x800)
            thread_exception_return();
    #endif
        thread = current_thread();
        uthread = get_bsdthread_info(thread);
    
        /* Get the approriate proc; may be different from task's for vfork() */
        is_vfork = uthread->uu_flag & UT_VFORK;
        if (__improbable(is_vfork != 0))
            p = current_proc();
        else 
            p = (struct proc *)get_bsdtask_info(current_task());
    
        /* Verify that we are not being called from a task without a proc */
        if (__improbable(p == NULL)) {
            regs->eax = EPERM;
            regs->efl |= EFL_CF;
            task_terminate_internal(current_task());
            thread_exception_return();
            /* NOTREACHED */
        }
    
        code = regs->eax & I386_SYSCALL_NUMBER_MASK;
        DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u
    ",
                                  code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip);
        params = (vm_offset_t) (regs->uesp + sizeof (int));
    
        regs->efl &= ~(EFL_CF);
    
        callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
    
        if (__improbable(callp == sysent)) {
            code = fuword(params);
            params += sizeof(int);
            callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
        }

    .........

      通过寄存器中的数据得到code,再通过code取得数组sysent中的系统调用函数,交给callp;后面的代码冗长,这里就不全部贴出来咯。

      (关于sysent数组,改天详述)

      (三)再说系统调用专用指令方式(以Intel架构为例)

      SYSENTER用于32位,SYSCALL用于64位,只说SYSCALL吧,先看汇编:

    Entry(hi64_syscall)
    Entry(idt64_syscall)
    L_syscall_continue:
        swapgs                /* Kapow! get per-cpu data area */
        mov    %rsp, %gs:CPU_UBER_TMP    /* save user stack */
        mov    %gs:CPU_UBER_ISF, %rsp    /* switch stack to pcb */
    
        /*
         * Save values in the ISF frame in the PCB
         * to cons up the saved machine state.
         */
        movl    $(USER_DS), ISF64_SS(%rsp)    
        movl    $(SYSCALL_CS), ISF64_CS(%rsp)    /* cs - a pseudo-segment */
        mov    %r11, ISF64_RFLAGS(%rsp)    /* rflags */
        mov    %rcx, ISF64_RIP(%rsp)        /* rip */
        mov    %gs:CPU_UBER_TMP, %rcx
        mov    %rcx, ISF64_RSP(%rsp)        /* user stack */
        mov    %rax, ISF64_ERR(%rsp)        /* err/rax - syscall code */
        movq    $(T_SYSCALL), ISF64_TRAPNO(%rsp)    /* trapno */
        leaq    HNDL_SYSCALL(%rip), %r11;
        movq    %r11, ISF64_TRAPFN(%rsp)
        mov    ISF64_RFLAGS(%rsp), %r11    /* Avoid leak, restore R11 */
        jmp    L_dispatch_U64            /* this can only be 64-bit */

      主要看看HNDL_SYSCALL:

    /*
     * 64bit Tasks
     * System call entries via syscall only:
     *
     *    r15     x86_saved_state64_t
     *    rsp     kernel stack
     *
     *    both rsp and r15 are 16-byte aligned
     *    interrupts disabled
     *    direction flag cleared
     */
    
    Entry(hndl_syscall)
        TIME_TRAP_UENTRY
    
        movq    %gs:CPU_ACTIVE_THREAD,%rcx    /* get current thread     */
        movq    TH_TASK(%rcx),%rbx        /* point to current task  */
    
        /* Check for active vtimers in the current task */
        TASK_VTIMER_CHECK(%rbx,%rcx)
    
        /*
         * We can be here either for a mach, unix machdep or diag syscall,
         * as indicated by the syscall class:
         */
        movl    R64_RAX(%r15), %eax        /* syscall number/class */
        movl    %eax, %edx
        andl    $(SYSCALL_CLASS_MASK), %edx    /* syscall class */
        cmpl    $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
        je    EXT(hndl_mach_scall64)
        cmpl    $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
        je    EXT(hndl_unix_scall64)
        cmpl    $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
        je    EXT(hndl_mdep_scall64)
        cmpl    $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
        je    EXT(hndl_diag_scall64)
    
        /* Syscall class unknown */
        sti
        CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
        /* no return */

      可以看到,这里根据寄存器和全局参数区分4种系统调用,BSD风格的系统调用只是第1种,还有3种:mach syscall、machdep syscall、diag syscall;

      如果是BSD风格系统调用,那么就继续执行hndl_unix_scall64:

    Entry(hndl_unix_scall64)
        incl    TH_SYSCALLS_UNIX(%rcx)        /* increment call count   */
        sti
    
        CCALL1(unix_syscall64, %r15)
        /*
         * always returns through thread_exception_return
         */

      只有一个函数调用,unix_syscall64,接下来看看这个函数的definition:

    void
    unix_syscall64(x86_saved_state_t *state)
    {
        thread_t    thread;
        unsigned int    code;
        struct sysent    *callp;
        void        *uargp;
        int        args_in_regs;
        int        error;
        struct proc    *p;
        struct uthread    *uthread;
        x86_saved_state64_t *regs;
    
        assert(is_saved_state64(state));
        regs = saved_state64(state);
    #if    DEBUG
        if (regs->rax == 0x2000800)
            thread_exception_return();
    #endif
        thread = current_thread();
        uthread = get_bsdthread_info(thread);
    
        /* Get the approriate proc; may be different from task's for vfork() */
        if (__probable(!(uthread->uu_flag & UT_VFORK)))
            p = (struct proc *)get_bsdtask_info(current_task());
        else 
            p = current_proc();
    
        /* Verify that we are not being called from a task without a proc */
        if (__improbable(p == NULL)) {
            regs->rax = EPERM;
            regs->isf.rflags |= EFL_CF;
            task_terminate_internal(current_task());
            thread_exception_return();
            /* NOTREACHED */
        }
        args_in_regs = 6;
    
        code = regs->rax & SYSCALL_NUMBER_MASK;
        DEBUG_KPRINT_SYSCALL_UNIX(
            "unix_syscall64: code=%d(%s) rip=%llx
    ",
            code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip);
        callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
        uargp = (void *)(&regs->rdi);
    
        if (__improbable(callp == sysent)) {
                /*
             * indirect system call... system call number
             * passed as 'arg0'
             */
                code = regs->rdi;
            callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
            uargp = (void *)(&regs->rsi);
            args_in_regs = 5;
        }

    ..........

      可以看到这里首先从x86_saved_state_t中取得系统调用号code,然后从数组sysent中得到系统调用函数,给callp;再后面是一些参数处理,和callp的执行。

      接下去就到了具体的系统调用函数。

      (大概介绍如上,有人拍砖吗?一起了解啊~)

  • 相关阅读:
    Photoshop 基础七 位图 矢量图 栅格化
    Photoshop 基础六 图层
    Warfare And Logistics UVALive
    Walk Through the Forest UVA
    Airport Express UVA
    Guess UVALive
    Play on Words UVA
    The Necklace UVA
    Food Delivery ZOJ
    Brackets Sequence POJ
  • 原文地址:https://www.cnblogs.com/andypeker/p/4385802.html
Copyright © 2011-2022 走看看