简介
本文分析linux内核exec系统调用执行过程中可执行文件的加载过程和栈的设置,内核代码版本为2.6.32
分析
archia64kernelprocess.c中有sys_exec函数的实现,是exec的系统调用服务例程
long sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp, struct pt_regs *regs) { char *fname; int error; //得到文件名字 fname = getname(filename); error = PTR_ERR(fname); if (IS_ERR(fname)) goto out; error = do_execve(fname, argv, envp, regs); putname(fname); out: return error; }
fs amei.c中有getname函数的实现,在getname中,会从slab分配器中分配空间,然后从用户空间读取名字。所以sys_execve的主要工作有do_execve来实现,do_execve实现在fsexec.c中,下面分析do_execve的实现
首先是共享打开文件描述符
retval = unshare_files(&displaced);
unshare是linux中名称空间的控制函数,files_struct是挂靠在进程文件描述符上的,表示一个进程打开文件的信息,包含打开文件列表等待信息。这里的unshare_files就是复制原打开文件列表,所以说,exec后,子进程是共享父进程的打开文件列表的,包括标准输入输出和错误输出
struct linux_binprm *bprm; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
这里动态分配了linux_binprm结构,linux_binprm是exec过程中信息的结构
/* * This structure is used to hold the arguments that are used when loading binaries. */ struct linux_binprm{ char buf[BINPRM_BUF_SIZE]; #ifdef CONFIG_MMU struct vm_area_struct *vma; #else # define MAX_ARG_PAGES 32 struct page *page[MAX_ARG_PAGES]; #endif struct mm_struct *mm; unsigned long p; /* current top of mem */ unsigned int cred_prepared:1,/* true if creds already prepared (multiple * preps happen for interpreters) */ cap_effective:1;/* true if has elevated effective capabilities, * false if not; except for init which inherits * its parent's caps anyway */ #ifdef __alpha__ unsigned int taso:1; #endif unsigned int recursion_depth; struct file * file; struct cred *cred; /* new credentials */ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; char * filename; /* Name of binary as seen by procps */ char * interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; };
接下来的prepare_bprm_creds新建一个cred结构,设置linux_binprm中的cred结构,就是信任状相关内容,包含gid,uid等信息,经常用来提权
retval = prepare_bprm_creds(bprm);
然后打开文件,并初始化文件相关结构
file = open_exec(filename); bprm->file = file; bprm->filename = filename; bprm->interp = filename;
建立内存管理的mm结构
retval = bprm_mm_init(bprm);
bprm->argc = count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) goto out; bprm->envc = count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) goto out;
接着是prepare_binprm函数,prepare_binprm函数检查了文件是否可以执行,初始化了binprm中cred的几个字段,然后还从文件中读取了BINPRM_BUF_SIZE的内容到binprm的buf中
int prepare_binprm(struct linux_binprm *bprm) { umode_t mode; struct inode * inode = bprm->file->f_path.dentry->d_inode; int retval; mode = inode->i_mode; if (bprm->file->f_op == NULL) return -EACCES; /* clear any previous set[ug]id data from a previous binary */ bprm->cred->euid = current_euid(); bprm->cred->egid = current_egid(); if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) { /* Set-uid? */ if (mode & S_ISUID) { bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->euid = inode->i_uid; } /* Set-gid? */ /* * If setgid is set but no group execute bit then this * is a candidate for mandatory locking, not a setgid * executable. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->egid = inode->i_gid; } } /* fill in binprm security blob */ retval = security_bprm_set_creds(bprm); if (retval) return retval; bprm->cred_prepared = 1; memset(bprm->buf, 0, BINPRM_BUF_SIZE); return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE); }
下面是复制几个字符串的工作
retval = copy_strings_kernel(1, &bprm->filename, bprm); if (retval < 0) goto out; bprm->exec = bprm->p; retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) goto out; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out;
其中copy_string_kernel也是调用copy_string实现的,只不过是从你和中拷贝,具体实现就是使用set_fs设置段限制为内核数据段。
看copy_string函数之前,先看看linux_binprm中的两个字段,page和p,page表示的是存放参数的页面数组,而p表示的是在这些数组的顶部,因为这些字符串是按照栈的方式存放的,也就是说,先分配地址更高的数组,向低地址方向增长,p就指向栈顶部
下面copy_string的实现也就清楚了。
static int copy_strings(int argc, char __user * __user * argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; char *kaddr = NULL; unsigned long kpos = 0; int ret; /* 这里使用的是argc不断减少,也就是说get_user取得的是逆序的argv字符串 */ while (argc-- > 0) { char __user *str; int len; unsigned long pos; if (get_user(str, argv+argc) || !(len = strnlen_user(str, MAX_ARG_STRLEN))) { ret = -EFAULT; goto out; } if (!valid_arg_len(bprm, len)) { ret = -E2BIG; goto out; } /* p指向的是内存区域的最高长度,不断减少 pos也是指向字符串结尾处的偏移量 str指向用户态字符串结尾处 */ /* We're going to work our way backwords. */ pos = bprm->p; str += len; bprm->p -= len; while (len > 0) { int offset, bytes_to_copy; //offset表示的是在页内的偏移量的末尾 offset = pos % PAGE_SIZE; if (offset == 0) offset = PAGE_SIZE; bytes_to_copy = offset; if (bytes_to_copy > len) bytes_to_copy = len; //这一步让offset指向页内偏移的开始位置,此时字符串应该被拷贝进offset到offset+bytes_to_copy处 offset -= bytes_to_copy; pos -= bytes_to_copy; str -= bytes_to_copy; len -= bytes_to_copy; if (!kmapped_page || kpos != (pos & PAGE_MASK)) { struct page *page; //从bprm的page字段中取出第一个page,如果不存在则分配页 page = get_arg_page(bprm, pos, 1); if (!page) { ret = -E2BIG; goto out; } if (kmapped_page) { flush_kernel_dcache_page(kmapped_page); kunmap(kmapped_page); put_arg_page(kmapped_page); } //建立映射到永久内存映射区,虚拟地址是kmap kmapped_page = page; kaddr = kmap(kmapped_page); kpos = pos & PAGE_MASK; flush_arg_page(bprm, kpos, kmapped_page); } //这一步从用户空间拷贝数据进内核,这些数据存放在binprm的page中 if (copy_from_user(kaddr+offset, str, bytes_to_copy)) { ret = -EFAULT; goto out; } } } ret = 0; out: if (kmapped_page) { flush_kernel_dcache_page(kmapped_page); kunmap(kmapped_page); put_arg_page(kmapped_page); } return ret; }
接着就是寻找可执行文件的过程
retval = search_binary_handler(bprm,regs);
在linux内核中,有一个全局的链表formats,表示系统中所有的可执行文件格式,其中链上挂接的结构是linux_binfmt结构,表示一个可执行文件格式,包好了3个重要的函数,分别用来加载可执行文件、共享库和生成core_dump核心转储文件,search_binary_handler就是用这个链上的load_binary来执行。
struct linux_binfmt { struct list_head lh; struct module *module; int (*load_binary)(struct linux_binprm *, struct pt_regs * regs); int (*load_shlib)(struct file *); int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); unsigned long min_coredump; /* minimal dump size */ int hasvdso; };
其中elf的结构定义在fsinfmt_elf.c中,如下:
static struct linux_binfmt elf_format = { .module = THIS_MODULE, .load_binary = load_elf_binary, .load_shlib = load_elf_library, .core_dump = elf_core_dump, .min_coredump = ELF_EXEC_PAGESIZE, .hasvdso = 1 };
回头看do_execve,search_binary_handler下面的内容也没有什么了,清除一些分配的结构等等。所以主要的加载实现是在load_elf_binary,这个函数接受了之前初始化的linux_binprm和寄存器上下文。加载可执行文件
下面看load_elf_binary函数
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) { struct file *interpreter = NULL; /* to shut gcc up */ unsigned long load_addr = 0, load_bias = 0; int load_addr_set = 0; char * elf_interpreter = NULL; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata; unsigned long elf_bss, elf_brk; int retval, i; unsigned int size; unsigned long elf_entry; unsigned long interp_load_addr = 0; unsigned long start_code, end_code, start_data, end_data; unsigned long reloc_func_desc = 0; int executable_stack = EXSTACK_DEFAULT; unsigned long def_flags = 0; //直接在栈上分配两个elf头,表示可执行文件和动态链接器的头 struct { struct elfhdr elf_ex; struct elfhdr interp_elf_ex; } *loc; //分配内存 loc = kmalloc(sizeof(*loc), GFP_KERNEL); if (!loc) { retval = -ENOMEM; goto out_ret; } //之前初始化bprm的时候从文件中读取了一些数据放到buf中 loc->elf_ex = *((struct elfhdr *)bprm->buf); retval = -ENOEXEC; //这里做一些简单的一致性检查 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0) goto out; if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN) goto out; if (!elf_check_arch(&loc->elf_ex)) goto out; if (!bprm->file->f_op||!bprm->file->f_op->mmap) goto out; /* Now read in all of the header information */ if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr)) goto out; if (loc->elf_ex.e_phnum < 1 || loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr)) goto out; //程序头表的大小 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr); retval = -ENOMEM; //分配程序头表的内存空间 elf_phdata = kmalloc(size, GFP_KERNEL); if (!elf_phdata) goto out; //读入程序头表的内容 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *)elf_phdata, size); if (retval != size) { if (retval >= 0) retval = -EIO; goto out_free_ph; } elf_ppnt = elf_phdata; elf_bss = 0; elf_brk = 0; start_code = ~0UL; end_code = 0; start_data = 0; end_data = 0; //遍历所有的段,找到类型为PT_INTERP的段,这个段内存放的是动态链接器的地址 for (i = 0; i < loc->elf_ex.e_phnum; i++) { if (elf_ppnt->p_type == PT_INTERP) { /* This is the program interpreter used for * shared libraries - for now assume that this * is an a.out format binary */ retval = -ENOEXEC; //验证动态链接器的路径是否符合路径要求 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) goto out_free_ph; retval = -ENOMEM; //分配内存存放动态链接器路径 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); if (!elf_interpreter) goto out_free_ph; //读取动态链接器路径 retval = kernel_read(bprm->file, elf_ppnt->p_offset, elf_interpreter, elf_ppnt->p_filesz); if (retval != elf_ppnt->p_filesz) { if (retval >= 0) retval = -EIO; goto out_free_interp; } /* make sure path is NULL terminated */ retval = -ENOEXEC; //确认字符串路径最后一定是' '字符 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '