zoukankan      html  css  js  c++  java
  • 浅析基于ARM的Linux下的系统调用的实现

    在Linux下系统调用是用软中断实现的,下面以一个简单的open例子简要分析一下应用层的open是如何调用到内核中的sys_open的。

    t8.c

       1:  #include <stdio.h>
       2:  #include <sys/types.h>
       3:  #include <sys/stat.h>
       4:  #include <fcntl.h>
       5:   
       6:  int main(int argc, const char *argv[])
       7:  {
       8:      int fd;
       9:   
      10:      fd = open(".", O_RDWR);
      11:   
      12:      close(fd);
      13:      return 0;
      14:  }

    这里需要注意的是:open是C库提供的库函数,并不是系统调用,系统调用时在内核空间的,应用空间无法直接调用。在《Linux内核设计与实现》中说:要访问系统调用(在Linux中常称作syscall),通常通过C库中定义的函数调用来进行。

    将t8.c进行静态编译,然后反汇编,看一下是如何调用open的?

       1:  arm-linux-gcc t8.c --static
       2:  arm-linux-objdump -D a.out >a.dis

    下面我们截取a.dis中的一部分进行说明:

       1:  ......
       2:  00008228 <main>:
       3:      8228:   e92d4800    push    {fp, lr}
       4:      822c:   e28db004    add fp, sp, #4  ; 0x4
       5:      8230:   e24dd010    sub sp, sp, #16 ; 0x10
       6:      8234:   e50b0010    str r0, [fp, #-16]
       7:      8238:   e50b1014    str r1, [fp, #-20]
       8:      823c:   e59f0028    ldr r0, [pc, #40]   ; 826c <main+0x44>
       9:      8240:   e3a01002    mov r1, #2  ; 0x2   ;  #define  O_RDWR  00000002 
      10:      8244:   eb002e7d    bl  13c40 <__libc_open>
      11:      8248:   e1a03000    mov r3, r0
      12:      824c:   e50b3008    str r3, [fp, #-8]
      13:      8250:   e51b0008    ldr r0, [fp, #-8]
      14:      8254:   eb002e9d    bl  13cd0 <__libc_close>
      15:      8258:   e3a03000    mov r3, #0  ; 0x0
      16:      825c:   e1a00003    mov r0, r3
      17:      8260:   e24bd004    sub sp, fp, #4  ; 0x4
      18:      8264:   e8bd4800    pop {fp, lr}
      19:      8268:   e12fff1e    bx  lr
      20:      826c:   00064b8c    .word   0x00064b8c
      21:  ......
      22:  00013c40 <__libc_open>:
      23:     13c40:   e51fc028    ldr ip, [pc, #-40]  ; 13c20 <___fxstat64+0x50>
      24:     13c44:   e79fc00c    ldr ip, [pc, ip]
      25:     13c48:   e33c0000    teq ip, #0  ; 0x0
      26:     13c4c:   1a000006    bne 13c6c <__libc_open+0x2c>
      27:     13c50:   e1a0c007    mov ip, r7
      28:     13c54:   e3a07005    mov r7, #5  ; 0x5   
    
    
      #在arch/arm/include/asm/unistd.h中:#define __NR_open  (__NR_SYSCALL_BASE+5)
                        其中,__NR_OABI_SYSCALL_BASE是0
    
    
      29:     
    13c58: ef000000 svc 0x00000000 #产生软中断
      30:     13c5c:   e1a0700c    mov r7, ip
      31:     13c60:   e3700a01    cmn r0, #4096   ; 0x1000
      32:     13c64:   312fff1e    bxcc    lr
      33:     13c68:   ea0008d4    b   15fc0 <__syscall_error>
      34:  ......

    通过上面的代码注释,可以看到,系统调用sys_open的系统调用号是5,将系统调用号存放到寄存器R7当中,然后应用程序通过svc 0x00000000产生软中断,陷入内核空间。

    也许会好奇,ARM软中断不是用SWI吗,这里怎么变成了SVC了,请看下面一段话,是从ARM官网copy的:

    SVC

    超级用户调用。
    语法

    SVC{cond} #immed

    其中:

    cond

        是一个可选的条件代码(请参阅条件执行)。
    immed

        是一个表达式,其取值为以下范围内的一个整数:

            在 ARM 指令中为 0 到 224–1(24 位值)

            在 16 位 Thumb 指令中为 0-255(8 位值)。

    用法

    SVC 指令会引发一个异常。 这意味着处理器模式会更改为超级用户模式,CPSR 会保存到超级用户模式 SPSR,并且执行会跳转到 SVC 向量(请参阅《开发指南》中的第 6 章 处理处理器异常)。

    处理器会忽略 immed。 但异常处理程序会获取它,借以确定所请求的服务。
    Note

    作为 ARM 汇编语言开发成果的一部分,SWI 指令已重命名为 SVC。 在此版本的 RVCT 中,SWI 指令反汇编为 SVC,并提供注释以指明这是以前的 SWI。
    条件标记

    此指令不更改标记。
    体系结构

    此 ARM 指令可用于所有版本的 ARM 体系结构。

    在基于ARM的Linux中,异常向量表已经被放置在了0xFFFF0000这个位置。这个过程的完成:

    start_kernel ---> setup_arch ---> early_trap_init

       1:  void __init early_trap_init(void)
       2:  {
       3:      unsigned long vectors = CONFIG_VECTORS_BASE;  // 就是0xFFFF0000
       4:      extern char __stubs_start[], __stubs_end[];
       5:      extern char __vectors_start[], __vectors_end[];
       6:      extern char __kuser_helper_start[], __kuser_helper_end[];
       7:      int kuser_sz = __kuser_helper_end - __kuser_helper_start;
       8:   
       9:      /*
      10:       * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
      11:       * into the vector page, mapped at 0xffff0000, and ensure these
      12:       * are visible to the instruction stream.
      13:       */
      14:      memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
      15:      memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
      16:      memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
      17:   
      18:      /*
      19:       * Copy signal return handlers into the vector page, and
      20:       * set sigreturn to be a pointer to these.
      21:       */
      22:      memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
      23:             sizeof(sigreturn_codes));
      24:   
      25:      flush_icache_range(vectors, vectors + PAGE_SIZE);
      26:      modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
      27:  }

    关于上面这个函数的详细解释,参见:

    http://www.cnblogs.com/pengdonglin137/p/3603549.html

    把异常中断向量表的位置设置为0xffff0000的话,需要修改协处理器CP15的寄存器C1的第13位,将其设置为1。以Tq2440的提供的内核2.6.30.4为例看一下:

     arch/arm/kernel/head.S

       1:      adr    lr, __enable_mmu        @ return (PIC) address
       2:      add    pc, r10, #PROCINFO_INITFUNC

    其中,PROCINFO_INITFUNC的值是16,r10的值是__arm920_proc_info的地址:

       1:  __arm920_proc_info:
       2:      .long    0x41009200
       3:      .long    0xff00fff0
       4:      .long   PMD_TYPE_SECT | 
       5:          PMD_SECT_BUFFERABLE | 
       6:          PMD_SECT_CACHEABLE | 
       7:          PMD_BIT4 | 
       8:          PMD_SECT_AP_WRITE | 
       9:          PMD_SECT_AP_READ
      10:      .long   PMD_TYPE_SECT | 
      11:          PMD_BIT4 | 
      12:          PMD_SECT_AP_WRITE | 
      13:          PMD_SECT_AP_READ
      14:      b    __arm920_setup
      15:      .long    cpu_arch_name
      16:      .long    cpu_elf_name
      17:       ......
      18:      .size    __arm920_proc_info, . - __arm920_proc_info

    看一下__arm920_setup的实现(proc-arm920.S (archarmmm)):

       1:      .type    __arm920_setup, #function
       2:  __arm920_setup:
       3:      mov    r0, #0
       4:      mcr    p15, 0, r0, c7, c7        @ invalidate I,D caches on v4
       5:      mcr    p15, 0, r0, c7, c10, 4        @ drain write buffer on v4
       6:  #ifdef CONFIG_MMU
       7:      mcr    p15, 0, r0, c8, c7        @ invalidate I,D TLBs on v4
       8:  #endif
       9:      adr    r5, arm920_crval
      10:      ldmia    r5, {r5, r6}            @ 参看以下下面的arm920_crval的实现,本句话执行完后r5和r6分别为:0x3f3f和0x3135
      11:      mrc    p15, 0, r0, c1, c0        @ get control register v4   获取协处理器p15的寄存器才c1
      12:      
    bic r0, r0, r5
      13:      
    orr r0, r0, r6 @ 我们只关注第13位,这里将r0的第13位设置为了1
      14:      
    mov pc, lr
      15:      .size    __arm920_setup, . - __arm920_setup
      16:   
      17:      /*
      18:       *  R
      19:       * .RVI ZFRS BLDP WCAM
      20:       * ..11 0001 ..11 0101
      21:       * 
      22:       */
      23:      .type    arm920_crval, #object
      24:  arm920_crval:
      25:      crval    clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130

    在看一下crval的实现(proc-macros.S (archarmmm)):

       1:      .macro    crval, clear, mmuset, ucset
       2:  #ifdef CONFIG_MMU
       3:      .word    clear
       4:      .word    mmuset
       5:  #else
       6:      .word    clear
       7:      .word    ucset
       8:  #endif
       9:      .endm

    在__arm920_setup中执行完 mov pc, lr后,便跳入了下面的语句:

       1:  __enable_mmu:
       2:  #ifdef CONFIG_ALIGNMENT_TRAP
       3:      orr    r0, r0, #CR_A
       4:  #else
       5:      bic    r0, r0, #CR_A
       6:  #endif
       7:  #ifdef CONFIG_CPU_DCACHE_DISABLE
       8:      bic    r0, r0, #CR_C
       9:  #endif
      10:  #ifdef CONFIG_CPU_BPREDICT_DISABLE
      11:      bic    r0, r0, #CR_Z
      12:  #endif
      13:  #ifdef CONFIG_CPU_ICACHE_DISABLE
      14:      bic    r0, r0, #CR_I
      15:  #endif
      16:      mov    r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | 
      17:                domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | 
      18:                domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | 
      19:                domain_val(DOMAIN_IO, DOMAIN_CLIENT))
      20:      mcr    p15, 0, r5, c3, c0, 0        @ load domain access register
      21:      mcr    p15, 0, r4, c2, c0, 0        @ load page table pointer
      22:      b    __turn_mmu_on
      23:  ENDPROC(__enable_mmu)

    看一下__turn_mmu_on的实现(head.S (archarmkernel)):

       1:      .align    5
       2:  __turn_mmu_on:
       3:      mov    r0, r0
       4:      mcr    p15, 0, r0, c1, c0, 0        @ write control reg
       5:      mrc    p15, 0, r3, c0, c0, 0        @ read id reg
       6:      mov    r3, r3
       7:      mov    r3, r3
       8:      mov    pc, r13
       9:  ENDPROC(__turn_mmu_on)

    在__turn_mmu_on中,将寄存器r0的值写到了cp15协处理器的寄存器C1中。到这里便完成了将异常中断向量表的位置放到了0xffff0000.

    说完异常向量表的位置,接下来看看软中断的实现。

    ARM提供的中断类型:

    image

    ARM的异常处理模型:

    image

    entry-armv.S (archarmkernel)

       1:  .LCvswi:
       2:     
    .word vector_swi
       3:   
       4:      .globl    __stubs_end
       5:  __stubs_end:
       6:   
       7:      .equ    stubs_offset, __vectors_start + 0x200 - __stubs_start
       8:   
       9:      .globl    __vectors_start
      10:  __vectors_start:
      11:      swi    SYS_ERROR0
      12:      b    vector_und + stubs_offset
      13:      ldr    pc, .LCvswi + stubs_offset     @发生软中断后先跳到这里
      14:      b    vector_pabt + stubs_offset
      15:      b    vector_dabt + stubs_offset
      16:      b    vector_addrexcptn + stubs_offset
      17:      b    vector_irq + stubs_offset
      18:      b    vector_fiq + stubs_offset
      19:   
      20:      .globl    __vectors_end
      21:  __vectors_end:
      22:   
      23:      .data
      24:   
      25:      .globl    cr_alignment
      26:      .globl    cr_no_alignment
      27:  cr_alignment:
      28:      .space    4
      29:  cr_no_alignment:
      30:      .space    4

    接下来看一下vector_swi的实现,根据实际的宏定义进行了简化

       1:  ENTRY(vector_swi)
       2:      sub    sp, sp, #S_FRAME_SIZE
       3:      stmia    sp, {r0 - r12}            @ Calling r0 - r12
       4:      add    r8, sp, #S_PC
       5:      stmdb    r8, {sp, lr}^            @ Calling sp, lr
       6:      mrs    r8, spsr            @ called from non-FIQ mode, so ok.
       7:      str    lr, [sp, #S_PC]            @ Save calling PC
       8:      str    r8, [sp, #S_PSR]        @ Save CPSR
       9:      str    r0, [sp, #S_OLD_R0]        @ Save OLD_R0
      10:      zero_fp
      11:   
      12:      /*
      13:       * Get the system call number.
      14:       */
      15:   
      16:      /*
      17:       * If we have CONFIG_OABI_COMPAT then we need to look at the swi
      18:       * value to determine if it is an EABI or an old ABI call.
      19:       */
      20:      ldr    r10, [lr, #-4]   
    
    
                @ get SWI instruction  r10中存放的就是引起软中断的那条指令的机器码
                  发生软中断的时候,系统自动将PC-4存放到了lr寄存器,由于是三级流水,
                  并且是ARM状态,还需要减4才能得到发生软中断的那条指令的机器码所在的地址
    
    
    
    
      21:    A710(    and    ip, r10, #0x0f000000        @ check for SWI        )
      22:    A710(    teq    ip, #0x0f000000                        )
      23:    A710(    bne    .Larm710bug                        )
      24:   
      25:      ldr    ip, __cr_alignment
      26:      ldr    ip, [ip]
      27:      mcr    p15, 0, ip, c1, c0        @ update control register
      28:      enable_irq   @在发生中断的时候,相应的中断线在在所有CPU上都会被屏蔽掉
      29:   
      30:      
    get_thread_info tsk @ 参看下面的介绍

    31: adr tbl, sys_call_table

    @ load syscall table pointer 此时tbl(r8)中存放的就是sys_call_table的起始地址

      32:      ldr    ip, [tsk, #TI_FLAGS]        @ check for syscall tracing
      33:   
      34:      /*
      35:       * If the swi argument is zero, this is an EABI call and we do nothing.
      36:       *
      37:       * If this is an old ABI call, get the syscall number into scno and
      38:       * get the old ABI syscall table address.
      39:       */
      40:      bics    r10, r10, #0xff000000
      41:      eorne    scno, r10, #__NR_OABI_SYSCALL_BASE
      42:      ldr
    ne
        tbl, =sys_oabi_call_table
      43:   
      44:      stmdb    sp!, {r4, r5}            @ push fifth and sixth args
      45:      tst    ip, #_TIF_SYSCALL_TRACE        @ are we tracing syscalls?
      46:      bne    __sys_trace
      47:   
      48:      cmp    scno, #NR_syscalls        @ check upper syscall limit
      49:      adr    lr, ret_fast_syscall        @ return address
      50:      
    ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
      51:   
      52:      add    r1, sp, #S_OFF
      53:  2:    mov    why, #0                @ no longer a real syscall
      54:      cmp    scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
      55:      eor    r0, scno, #__NR_SYSCALL_BASE    @ put OS number back
      56:      bcs    arm_syscall
      57:      b    sys_ni_syscall            @ not private func
      58:  ENDPROC(vector_swi)

    entry-common.S (archarmkernel下面是entry-header.S (archarmkernel)的部分内容:

       1:  /*
       2:   * These are the registers used in the syscall handler, and allow us to
       3:   * have in theory up to 7 arguments to a function - r0 to r6.
       4:   *
       5:   * r7 is reserved for the system call number for thumb mode.
       6:   *
       7:   * Note that tbl == why is intentional.
       8:   *
       9:   * We must set at least "tsk" and "why" when calling ret_with_reschedule.
      10:   */
      11:  scno    .req    r7        @ syscall number
      12:  tbl    .req    r8        @ syscall table pointer
      13:  why    .req    r8        @ Linux syscall (!= 0)
      14:  tsk    .req    r9        @ current thread_info

    .req 是伪汇编,以 scno .req r7 为例,表示scno是寄存器r7的别名。

    • get_thread_info tsk

    其中,tsk是寄存器r9的别名,get_thread_info是一个宏定义,如下:

       1:      .macro    get_thread_info, rd
       2:      mov    
    d, sp, lsr #13
       3:      mov    
    d, 
    d, lsl #13
       4:      .endm

    即:将sp进行8KB对齐后的值赋给寄存器r9,什么意思?

    这个就涉及到Linux的内核栈了。Linux为每个进程都分配了一个8KB的内核栈,在内核栈的尾端存放有关于这个进程的struct therad_info结构:

       1:  struct thread_info {
       2:      unsigned long        flags;        /* low level flags */
       3:      int            preempt_count;    /* 0 => preemptable, <0 => bug */
       4:      mm_segment_t        addr_limit;    /* address limit */
       5:      struct task_struct    *task;        /* main task structure */
       6:      struct exec_domain    *exec_domain;    /* execution domain */
       7:      __u32            cpu;        /* cpu */
       8:      __u32            cpu_domain;    /* cpu domain */
       9:      struct cpu_context_save    cpu_context;    /* cpu context */
      10:      __u32            syscall;    /* syscall number */
      11:      __u8            used_cp[16];    /* thread used copro */
      12:      unsigned long        tp_value;
      13:      struct crunch_state    crunchstate;
      14:      union fp_state        fpstate __attribute__((aligned(8)));
      15:      union vfp_state        vfpstate;
      16:  #ifdef CONFIG_ARM_THUMBEE
      17:      unsigned long        thumbee_state;    /* ThumbEE Handler Base register */
      18:  #endif
      19:      struct restart_block    restart_block;
      20:  };

    通过上面的操作,寄存器r9中就是这个进程的thread_info结构的起始地址。

    • sys_call_table

    entry-common.S (archarmkernel)

       1:      .type    sys_call_table, #object
       2:  ENTRY(sys_call_table)
       3:  #include "calls.S"
       4:  #undef ABI
       5:  #undef OBSOLETE

    其中,calls.S的内容如下:

       1:  /*
       2:   *  linux/arch/arm/kernel/calls.S
       3:   *
       4:   *  Copyright (C) 1995-2005 Russell King
       5:   *
       6:   * This program is free software; you can redistribute it and/or modify
       7:   * it under the terms of the GNU General Public License version 2 as
       8:   * published by the Free Software Foundation.
       9:   *
      10:   *  This file is included thrice in entry-common.S
      11:   */
      12:  /* 0 */        CALL(sys_restart_syscall)
      13:          CALL(sys_exit)
      14:          CALL(sys_fork_wrapper)
      15:          CALL(sys_read)
      16:          CALL(sys_write)
      17:  /* 5 */        CALL(sys_open)
      18:          CALL(sys_close)
      19:          CALL(sys_ni_syscall)        /* was sys_waitpid */
      20:          CALL(sys_creat)
      21:          CALL(sys_link)
      22:  /* 10 */    CALL(sys_unlink)
      23:          CALL(sys_execve_wrapper)
      24:          CALL(sys_chdir)
      25:          CALL(OBSOLETE(sys_time))    /* used by libc4 */
      26:          CALL(sys_mknod)
      27:  ......
      28:  /* 355 */    CALL(sys_signalfd4)
      29:          CALL(sys_eventfd2)
      30:          CALL(sys_epoll_create1)
      31:          CALL(sys_dup3)
      32:          CALL(sys_pipe2)
      33:  /* 360 */    CALL(sys_inotify_init1)
      34:          CALL(sys_preadv)
      35:          CALL(sys_pwritev)
      36:  #ifndef syscalls_counted
      37:  .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
      38:  #define syscalls_counted
      39:  #endif
      40:  .rept syscalls_padding
      41:          CALL(sys_ni_syscall)
      42:  .endr

    关于这个部分的更多介绍参见:

    http://www.cnblogs.com/pengdonglin137/p/3714981.html

    • bics    r10, r10, #0xff000000

    执行这个操作的时候,r10中存放的是SWI instruction,在我们的例子中就是(a.dis):

    image

    即:r10 为 0xEF000000

    显然,bics这条指令下面的两个语句由于条件不成立,无法获得执行。这条指令的作用是获得系统调用号

    可以参考这个手册,看一下svc执行的格式:

    http://files.cnblogs.com/pengdonglin137/DUI0203IC_rvct_developer_guide.pdf

    image

    可以看到,[23:0]存放的就是svc指令后面的那个立即数,也即系统调用号。

    不过需要注意的是:我们这里并没有这样做,我们的做法是(a.dis中可以看到):

    image

    使用的是svc 0,后面跟的并不是系统调用号,而是0,这里把系统调用号存放在了寄存器r7中(a.dis中):

    image

    可以看到,由于使用的sys_open系统调用,所以把它的系统调用号5存放到了寄存器r7当中

    • ldrcc    pc, [tbl, scno, lsl #2]        @ call sys_* routine

    这里的scno是就是寄存器r7的别名,它的值是sys_open的系统调用号5,由于在calls.S中每个系统调用标号占用4个字节,所以这个将scno的值乘以4然后再加上tbl,tbl是系统调用表sys_call_table的基地址。然后就跳入开始执行sys_open了。

    asmlinkage long sys_open(const char __user *filename,
                    int flags, int mode);

    那么sys_open在哪呢?在内核源码中直接搜索sys_open,无法搜到它的实现代码,实际上它是在fs/open.c中实现的:

       1:  SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
       2:  {
       3:      long ret;
       4:   
       5:      if (force_o_largefile())
       6:          flags |= O_LARGEFILE;
       7:   
       8:      ret = do_sys_open(AT_FDCWD, filename, flags, mode);
       9:      /* avoid REGPARM breakage on x86: */
      10:      asmlinkage_protect(3, ret, filename, flags, mode);
      11:      return ret;
      12:  }

    其中SYSCALL_DEFINE3是一个宏:

    syscalls.h (includelinux)

    #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)

    SYSCALL_DEFINEx也是一个宏:

    syscalls.h (includelinux)

    #define SYSCALL_DEFINEx(x, sname, ...)                
        __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

    __SYSCALL_DEFINEx仍然是个宏:

    syscalls.h (includelinux)

    #define __SYSCALL_DEFINEx(x, name, ...)                    
        asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))

    所以展开后的结果就是:

    asmlinkage long sys_open(__SC_DECL3(__VA_ARGS__))

    其中,__SC_DECL3定义如下:

    syscalls.h (includelinux)

       1:  #define __SC_DECL1(t1, a1)    t1 a1
       2:  #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
       3:  #define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)

    所以最终的结果如下:

       1:  asmlinkage long sys_open(const char __user *filename, int flags, int mode)
       2:  {
       3:      long ret;
       4:   
       5:      if (force_o_largefile())
       6:          flags |= O_LARGEFILE;
       7:   
       8:      ret = do_sys_open(AT_FDCWD, filename, flags, mode);
       9:      /* avoid REGPARM breakage on x86: */
      10:      asmlinkage_protect(3, ret, filename, flags, mode);
      11:      return ret;
      12:   
      13:  }

    关于sys_open本身的实现这里就不深入分析了。

    接下来看一下返回。

    • adr    lr, ret_fast_syscall        @ return address

    当sys_open中return后,便跳入ret_fast_syscall处开始执行:

       1:  /*
       2:   * This is the fast syscall return path.  We do as little as
       3:   * possible here, and this includes saving r0 back into the SVC
       4:   * stack.
       5:   */
       6:  ret_fast_syscall:
       7:   UNWIND(.fnstart    )
       8:   UNWIND(.cantunwind    )
       9:      disable_irq                @ disable interrupts
      10:      
    ldr r1, [tsk, #TI_FLAGS] @将thread_info中的flags成员存放到r1中
      11:      tst    r1, #_TIF_WORK_MASK
      12:      bne    fast_work_pending  
      13:   
      14:      /* perform architecture specific actions before user return */
      15:      arch_ret_to_user r1, lr
      16:   
      17:      @ fast_restore_user_regs
      18:      ldr    r1, [sp, #S_OFF + S_PSR]    @ get calling cpsr
      19:      ldr    lr, [sp, #S_OFF + S_PC]!    @ get pc
      20:      msr    spsr_cxsf, r1            @ save in spsr_svc
      21:      ldmdb    sp, {r1 - lr}^            @ get calling r1 - lr
      22:      mov    r0, r0
      23:      add    sp, sp, #S_FRAME_SIZE - S_PC
      24:      movs    pc, lr                @ return & move spsr_svc into cpsr
      25:   UNWIND(.fnend        )
      26:   
      27:  /*
      28:   * Ok, we need to do extra processing, enter the slow path.
      29:   */
      30:  fast_work_pending:
      31:      str    r0, [sp, #S_R0+S_OFF]!        @ returned r0
      32:  work_pending:
      33:      tst    r1, #_TIF_NEED_RESCHED       @判断是否需要进行进程调度
      34:      bne    work_resched
      35:      tst    r1, #_TIF_SIGPENDING
      36:      beq    no_work_pending
      37:      mov    r0, sp                @ 'regs'
      38:      mov    r2, why                @ 'syscall'
      39:      bl    do_notify_resume
      40:      b    ret_slow_syscall        @ Check work again
      41:   
      42:  
    work_resched:
      43:      bl    schedule
      44:  /*
      45:   * "slow" syscall return path.  "why" tells us if this was a real syscall.
      46:   */
      47:  ENTRY(ret_to_user)
      48:  ret_slow_syscall:
      49:      disable_irq                @ disable interrupts
      50:      ldr    r1, [tsk, #TI_FLAGS]
      51:      tst    r1, #_TIF_WORK_MASK
      52:      bne    work_pending
      53:  no_work_pending:
      54:      /* perform architecture specific actions before user return */
      55:      arch_ret_to_user r1, lr
      56:   
      57:      @ slow_restore_user_regs
      58:      ldr    r1, [sp, #S_PSR]        @ get calling cpsr
      59:      ldr    lr, [sp, #S_PC]!        @ get pc
      60:      msr    spsr_cxsf, r1            @ save in spsr_svc
      61:      ldmdb    sp, {r0 - lr}^            @ get calling r0 - lr
      62:      mov    r0, r0
      63:      add    sp, sp, #S_FRAME_SIZE - S_PC
      64:      movs    pc, lr                @ return & move spsr_svc into cpsr
      65:  ENDPROC(ret_to_user)

    在返回的时候要看是否要进行进程调用。

    image

    先分析到这里。

  • 相关阅读:
    Java 基本知识
    开源框架 Java
    Java 常用工具
    centos7设置静态IP
    VMnet1、VMnet8到底是什么?
    centos7修改主机名的方法
    防火墙阻止了虚拟机与主机之间互相ping通解决方案
    虚拟机centos与主机互相Ping通
    centos个性化命令行提示符
    更多的常用命令
  • 原文地址:https://www.cnblogs.com/pengdonglin137/p/3878316.html
Copyright © 2011-2022 走看看