zoukankan      html  css  js  c++  java
  • 从fork系统调用分析system_call中断处理过程

    1. 系统调用初始化流程

    内核在完成初始化后, 执行的第一个内核程序是init/main.c中定义的asmlinkage void __init start_kernel(void)启动内核; start_kernel()执行时, 又会调用arch/x86/kernel/traps.c中定义的void __init trap_init(void)初始化陷阱门及中断门;trap_init()通过执行set_system_gate(SYSCALL_VECTOR, &system_call)完成系统调用的挂接。

    init/main.c:
    500 asmlinkage __visible void __init start_kernel(void)
    501 {
    502 char *command_line;
    503 char *after_dashes;
    ...
    560 sort_main_extable();
    561 trap_init(); // 初始化陷阱门及中断门
    562 mm_init();
    ...
    }
    
    arch/x86/kernel/traps.c:
    792 void __init trap_init(void)
    793 {
    794 int i;
    795 
    796 #ifdef CONFIG_EISA
    797 void __iomem *p = early_ioremap(0x0FFFD9, 4);
    798 
    799 if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
    800 EISA_bus = 1;
    801 early_iounmap(p, 4);
    802 #endif
    803 
    804 set_intr_gate(X86_TRAP_DE, divide_error);
    805 set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
    806 /* int4 can be called from all */
    807 set_system_intr_gate(X86_TRAP_OF, &overflow);
    ...
    837 
    838 #ifdef CONFIG_X86_32
    839 set_system_trap_gate(SYSCALL_VECTOR, &system_call); // 初始化系统调用
    840 set_bit(SYSCALL_VECTOR, used_vectors);
    841 #endif
    842
    ...
    }
    
    arch/x86/include/asm/irq_vectors.h: 
    49 #define IA32_SYSCALL_VECTOR 0x80
    50 #ifdef CONFIG_X86_32
    51 # define SYSCALL_VECTOR 0x80  // 系统调用中断号: 0x80
    52 #endif

    2. 系统调用执行过程

    在执行"int 0x80"汇编指令时, 便开始执行system_call: 首先, 切换到内核空间, 保护中断现场; 其次, 比较系统调用号是否在允许的范围内(宏NR_syscalls定义了最大的系统调用号), 若不在此范围内, 则跳转到syscall_badsys处执行; 若在此范围内, 则根据调用号获取sys_call_table中对应的服务程序地址, 调用该服务程序; 最后保存返回值, 恢复中断现场。

     488 
     489     # system call handler stub
     490 ENTRY(system_call)
     491     RING0_INT_FRAME         # can't unwind into user space anyway
     492     ASM_CLAC
     493     pushl_cfi %eax          # save orig_eax
     494     SAVE_ALL
     495     GET_THREAD_INFO(%ebp)
     496                     # system call tracing in operation / emulation
     497     testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
     498     jnz syscall_trace_entry
     499     cmpl $(NR_syscalls), %eax # 功能号是否有效
     500     jae syscall_badsys
     501 syscall_call:
     502     call *sys_call_table(,%eax,4) # 执行对应的服务程序
     503 syscall_after_call:
     504     movl %eax,PT_EAX(%esp)      # store the return value
     505 syscall_exit:
     506     LOCKDEP_SYS_EXIT
     507     DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
     508                     # setting need_resched or sigpending
     509                     # between sampling and the iret
     510     TRACE_IRQS_OFF
     511     movl TI_flags(%ebp), %ecx
     512     testl $_TIF_ALLWORK_MASK, %ecx  # current->work
     513     jne syscall_exit_work
     514 
     515 restore_all:
     516     TRACE_IRQS_IRET
     517 restore_all_notrace:
     518 #ifdef CONFIG_X86_ESPFIX32
     519     movl PT_EFLAGS(%esp), %eax  # mix EFLAGS, SS and CS
     520     # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
     521     # are returning to the kernel.
     522     # See comments in process.c:copy_thread() for details.
     523     movb PT_OLDSS(%esp), %ah
     524     movb PT_CS(%esp), %al
     525     andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
     526     cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
     527     CFI_REMEMBER_STATE
     528     je ldt_ss           # returning to user-space with LDT SS
     529 #endif
     530 restore_nocheck:
     531     RESTORE_REGS 4          # skip orig_eax/error_code
     532 irq_return:
     533     INTERRUPT_RETURN
     534 .section .fixup,"ax"
     535 ENTRY(iret_exc)
     536     pushl $0            # no error code
     537     pushl $do_iret_error
     538     jmp error_code
     539 .previous
     540     _ASM_EXTABLE(irq_return,iret_exc)
     541 
     542 #ifdef CONFIG_X86_ESPFIX32
     543     CFI_RESTORE_STATE
     544 ldt_ss:
     545 #ifdef CONFIG_PARAVIRT
     546     /*
     547      * The kernel can't run on a non-flat stack if paravirt mode
     548      * is active.  Rather than try to fixup the high bits of
     549      * ESP, bypass this code entirely.  This may break DOSemu
     550      * and/or Wine support in a paravirt VM, although the option
     551      * is still available to implement the setting of the high
     552      * 16-bits in the INTERRUPT_RETURN paravirt-op.
     553      */
     554     cmpl $0, pv_info+PARAVIRT_enabled
     555     jne restore_nocheck
     556 #endif
     557 
    RING0_INT_FRAME宏定义:
     256 
     257 .macro RING0_INT_FRAME
     258     CFI_STARTPROC simple
     259     CFI_SIGNAL_FRAME
     260     CFI_DEF_CFA esp, 3*4
     261     /*CFI_OFFSET cs, -2*4;*/
     262     CFI_OFFSET eip, -3*4
     263 .endm
     264 

     SAVE_ALL宏定义

     186 .macro SAVE_ALL
     187     cld
     188     PUSH_GS
     189     pushl_cfi %fs
     190     /*CFI_REL_OFFSET fs, 0;*/
     191     pushl_cfi %es
     192     /*CFI_REL_OFFSET es, 0;*/
     193     pushl_cfi %ds
     194     /*CFI_REL_OFFSET ds, 0;*/
     195     pushl_cfi %eax
     196     CFI_REL_OFFSET eax, 0
     197     pushl_cfi %ebp
     198     CFI_REL_OFFSET ebp, 0
     199     pushl_cfi %edi
     200     CFI_REL_OFFSET edi, 0
     201     pushl_cfi %esi
     202     CFI_REL_OFFSET esi, 0
     203     pushl_cfi %edx
     204     CFI_REL_OFFSET edx, 0
     205     pushl_cfi %ecx
     206     CFI_REL_OFFSET ecx, 0
     207     pushl_cfi %ebx
     208     CFI_REL_OFFSET ebx, 0
     209     movl $(__USER_DS), %edx
     210     movl %edx, %ds
     211     movl %edx, %es
     212     movl $(__KERNEL_PERCPU), %edx
     213     movl %edx, %fs
     214     SET_KERNEL_GS %edx
     215 .endm

    sys_call_table定义:

    arch/x86/kernel/syscall_32.c:
    __visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
    /*
    * Smells like a compiler bug -- it doesn't work
    * when the & below is removed.
    */
    [0 ... __NR_syscall_max] = &sys_ni_syscall,
    #include <asm/syscalls_32.h>
    };
    
    arch/x86/include/generated/asm/syscalls_32.h:
    __SYSCALL_I386(0, sys_restart_syscall, sys_restart_syscall)
    __SYSCALL_I386(1, sys_exit, sys_exit)
    __SYSCALL_I386(2, sys_fork, stub32_fork)
    __SYSCALL_I386(3, sys_read, sys_read)
    __SYSCALL_I386(4, sys_write, sys_write)
    __SYSCALL_I386(5, sys_open, compat_sys_open)
    __SYSCALL_I386(6, sys_close, sys_close)
    __SYSCALL_I386(7, sys_waitpid, sys32_waitpid)
    __SYSCALL_I386(8, sys_creat, sys_creat)
    __SYSCALL_I386(9, sys_link, sys_link)
    __SYSCALL_I386(10, sys_unlink, sys_unlink)
    __SYSCALL_I386(11, sys_execve, stub32_execve)
    __SYSCALL_I386(12, sys_chdir, sys_chdir)
    __SYSCALL_I386(13, sys_time, compat_sys_time)
    __SYSCALL_I386(14, sys_mknod, sys_mknod)
    __SYSCALL_I386(15, sys_chmod, sys_chmod)
    __SYSCALL_I386(16, sys_lchown16, sys_lchown16)
    __SYSCALL_I386(18, sys_stat, sys_stat)
    __SYSCALL_I386(19, sys_lseek, compat_sys_lseek)
    __SYSCALL_I386(20, sys_getpid, sys_getpid)
    ...

    3. fork系统调用执行分析:

    在执行系统调用指令之前, 我们先设置了系统调用的功能号"mov $0x02, %eax", 然后执行"int $0x80"。根据在trap_init()中设置的系统陷阱门, 得到中断号0x80对应的中断服务程序的入口地址是system_call, 系统开始执行system_call。先确保不会返回到用户空间, 保护中断现场, 确保系统调用的功能号有效, 并根据该功能号得到系统调用表sys_call_table中的偏移, 从而得到该功能号对应的服务程序入口地址, 即得到sys_fork。 之后, 便调用sys_fork完成fork进程的任务。最后, 保存返回值, 恢复现场。

  • 相关阅读:
    nodejs + typescirpt + vs code
    NodeJs使用nodejs-websocket + protobuf
    Windows10环境下使用VisualSVN server搭建SVN服务器
    微信小游戏下socket.io的使用
    JS中实现种子随机数
    帧同步和状态同步
    EgretPaper学习笔记一 (安装环境,新建项目)
    反编译微信小游戏
    微信小游戏 小程序跳转修改 不支持动态更新,只能在发布时修改
    HTML5实现本地JSON文件的读写
  • 原文地址:https://www.cnblogs.com/long3216/p/4392841.html
Copyright © 2011-2022 走看看