zoukankan      html  css  js  c++  java
  • linux内核之情景分析mmap操作

    进程可以通过mmap把一个已打开文件映射到用户空间.
    1. mmap(void*start,size_t length,int prot,int flags,int fd,off_t offset)
    start表示用户空间映射的起始地址,offset文件的起始length长度.
    1. asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
    2. unsigned long prot, unsigned long flags,
    3. unsigned long fd, unsigned long pgoff)
    4. {
    5. return do_mmap2(addr, len, prot, flags, fd, pgoff);
    6. }
    其主体是do_mmap2,注意其标志MAP_ANONYMOUS表示匿名映射
    1. /* common code for old and new mmaps */
    2. static inline long do_mmap2(
    3. unsigned long addr, unsigned long len,
    4. unsigned long prot, unsigned long flags,
    5. unsigned long fd, unsigned long pgoff)
    6. {
    7. int error = -EBADF;
    8. struct file * file = NULL;
    9. flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
    10. if (!(flags & MAP_ANONYMOUS)) {//map_anonymous表示没有文件,只是在指定位置分配内存
    11. file = fget(fd);//上一条表示,没有文件,就跳过if以下,有文件则打开文件
    12. if (!file)//如果文件不存在,直接返回错误
    13. goto out;
    14. }
    15. down(&current->mm->mmap_sem);//信号量down操作
    16. error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);//mmap主体操作还是这个
    17. up(&current->mm->mmap_sem);//信号量up操作
    18. if (file)
    19. fput(file);
    20. out:
    21. return error;
    22. }
    其主体为do_mmap_pgoff
    1. do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
    第一个参数为打开文件,第二个地址,第三长度,第四个参数为访问权限,第五个参数为其他控制目的,第6个为偏移量
    1. unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
    2. unsigned long prot, unsigned long flags, unsigned long pgoff)
    3. {
    4. struct mm_struct * mm = current->mm;//获取当前进程的内存描述符
    5. struct vm_area_struct * vma;
    6. int correct_wcount = 0;
    7. int error;
    8. //file非0表示是文件,其对应一定有相关操作函数.
    9. if (file && (!file->f_op || !file->f_op->mmap))
    10. return -ENODEV;
    11. //长度对齐,如果为0,直接返回
    12. if ((len = PAGE_ALIGN(len)) == 0)
    13. return addr;
    14. //长度大于3g或者addr+len映射区域超过用户空间,返回错误
    15. if (len > TASK_SIZE || addr > TASK_SIZE-len)
    16. return -EINVAL;
    17. // 偏移量是否超过了长度
    18. /* offset overflow? */
    19. if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
    20. return -EINVAL;
    21. //映射次数是否超过了限定
    22. /* Too many mappings? */
    23. if (mm->map_count > MAX_MAP_COUNT)
    24. return -ENOMEM;
    25. //是否加锁?这里不知道了
    26. /* mlock MCL_FUTURE? */
    27. if (mm->def_flags & VM_LOCKED) {
    28. unsigned long locked = mm->locked_vm << PAGE_SHIFT;
    29. locked += len;
    30. if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
    31. return -EAGAIN;
    32. }
    33. /* Do simple checking here so the lower-level routines won't have
    34. * to. we assume access permissions have been handled by the open
    35. * of the memory object, so we don't do any here.
    36. */
    37. if (file != NULL) { //如果文件存在
    38. switch (flags & MAP_TYPE) {//映射类型:读写
    39. case MAP_SHARED://共享映射
    40. if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
    41. return -EACCES;
    42. //确保我们不被允许写在一个只可追加的文件
    43. /* Make sure we don't allow writing to an append-only file.. */
    44. if (IS_APPEND(file->f_dentry->d_inode) && (file->f_mode & FMODE_WRITE))
    45. return -EACCES;
    46. //确保我们的文件没有锁
    47. /* make sure there are no mandatory locks on the file. */
    48. if (locks_verify_locked(file->f_dentry->d_inode))
    49. return -EAGAIN;
    50. /* fall through */
    51. case MAP_PRIVATE://私有映射
    52. if (!(file->f_mode & FMODE_READ))
    53. return -EACCES;
    54. break;
    55. default:
    56. return -EINVAL;
    57. }
    58. }
    59. /* Obtain the address to map to. we verify (or select) it and ensure
    60. * that it represents a valid section of the address space.
    61. */
    62. if (flags & MAP_FIXED) {//如果参数flag的标志位map_fixed为0表示,指定映射位置只是一个参考值
    63. if (addr & ~PAGE_MASK)
    64. return -EINVAL;
    65. } else {//不满足由内核从空洞执行分配一个区域
    66. addr = get_unmapped_area(addr, len);
    67. if (!addr)
    68. return -ENOMEM;
    69. }
    70. /* Determine the object being mapped and call the appropriate
    71. * specific mapper. the address has already been validated, but
    72. * not unmapped, but the maps are removed from the list.
    73. */
    74. vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);//从slab获取一个vma结构
    75. if (!vma)
    76. return -ENOMEM;
    77. vma->vm_mm = mm;//指向内存描述符
    78. vma->vm_start = addr;//vma的起始地址指向映射的起始地址
    79. vma->vm_end = addr + len;//同上
    80. vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;//设置vma属性
    81. if (file) {//如果file为0,表示匿名映射,仅仅是为了创建虚拟区间,或者仅在于建立从物理空间到虚存空间映射,而非文件映射
    82. VM_ClearReadHint(vma);//以下代码设置一堆属性
    83. vma->vm_raend = 0;
    84. if (file->f_mode & FMODE_READ)
    85. vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
    86. if (flags & MAP_SHARED) {
    87. vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
    88. /* This looks strange, but when we don't have the file open
    89. * for writing, we can demote the shared mapping to a simpler
    90. * private mapping. That also takes care of a security hole
    91. * with ptrace() writing to a shared mapping without write
    92. * permissions.
    93. *
    94. * We leave the VM_MAYSHARE bit on, just to get correct output
    95. * from /proc/xxx/maps..
    96. */
    97. if (!(file->f_mode & FMODE_WRITE))
    98. vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
    99. }
    100. } else {
    101. vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
    102. if (flags & MAP_SHARED)
    103. vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
    104. }
    105. vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
    106. vma->vm_ops = NULL;
    107. vma->vm_pgoff = pgoff;//表示所映射内容在文件的起点,此值用于发生缺页异常根据虚存地址计算出相应页面的文件位置
    108. vma->vm_file = NULL;
    109. vma->vm_private_data = NULL;
    110. /* Clear old maps */
    111. error = -ENOMEM;
    112. if (do_munmap(mm, addr, len))//检查目的地址的vma空间是否已经使用(如果map_fixed设置为1的话)
    113. goto free_vma;//已经使用则释放free_vma
    114. //检查是否超过了限制
    115. /* Check against address space limit. */
    116. if ((mm->total_vm << PAGE_SHIFT) + len
    117. > current->rlim[RLIMIT_AS].rlim_cur)
    118. goto free_vma;
    119. //检查当前进程专用的可写区间而物理页面不足的情况
    120. /* Private writable mapping? Check memory availability.. */
    121. if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
    122. !(flags & MAP_NORESERVE) &&
    123. !vm_enough_memory(len >> PAGE_SHIFT))
    124. goto free_vma;
    125. if (file) {//vm_deanwrite职位表示从文件到vma映射,表示不允许同过常规方式读写文件
    126. if (vma->vm_flags & VM_DENYWRITE) {
    127. error = deny_write_access(file);
    128. if (error)
    129. goto free_vma;
    130. correct_wcount = 1;
    131. }
    132. vma->vm_file = file;
    133. get_file(file);//递增file结构的共享计数
    134. error = file->f_op->mmap(file, vma);//一个文件操作必须存在mmap,否则释放vma
    135. if (error)
    136. goto unmap_and_free_vma;
    137. } else if (flags & MAP_SHARED) {//共享映射
    138. error = shmem_zero_setup(vma);
    139. if (error)
    140. goto free_vma;
    141. }
    142. /* Can addr have changed??
    143. *为了防止flags与addr有变化,再重新设置一遍,
    144. * Answer: Yes, several device drivers can do it in their
    145. * f_op->mmap method. -DaveM
    146. */
    147. flags = vma->vm_flags;
    148. addr = vma->vm_start;
    149. insert_vm_struct(mm, vma);//插入当前进程的内存描述符
    150. if (correct_wcount)
    151. atomic_inc(&file->f_dentry->d_inode->i_writecount);
    152. mm->total_vm += len >> PAGE_SHIFT;//映射区域+len>>page_shit
    153. if (flags & VM_LOCKED) {//需要加锁
    154. mm->locked_vm += len >> PAGE_SHIFT;
    155. make_pages_present(addr, addr + len);//建立初始映射
    156. }
    157. return addr;
    158. unmap_and_free_vma:
    159. if (correct_wcount)
    160. atomic_inc(&file->f_dentry->d_inode->i_writecount);
    161. vma->vm_file = NULL;
    162. fput(file);
    163. /* Undo any partial mapping done by a device driver. */
    164. flush_cache_range(mm, vma->vm_start, vma->vm_end);
    165. zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
    166. flush_tlb_range(mm, vma->vm_start, vma->vm_end);
    167. free_vma:
    168. kmem_cache_free(vm_area_cachep, vma);
    169. return error;
    170. }
    以上是文件与虚拟区间之间建立的映射,但具体的映射(从虚拟地址映射到物理地址)还没开始,而是把具体页面的映射推迟到真正需要的时候才进行,具体映射的简历,物理页面的换入和换出分别准备了一些函数,filemap_nopage(),ext2_readpage(),ext2_writepage()
    什么时候调用呢
    (1)该区间中的一个页面首次收到访问时,会由于页面没映射发生缺页异常,相应的异常处理程序do_no_page(),对于ext2系统,do_no_page()会通过ext2_readpage()分配一个空闲内存页面并从文件读入相应页面,并建立映射.
    (2)建立映射后,往页面写使得页面变脏,但页面的内容并不会立即写回文件.而是由内核线程bdflush()周期性的运行时通过page_launder()间接调用ext2_writepage(),将页面的内容写入文件.如果页面很长时间没有收到访问,那就会被try_to_swap_out()解除映射而转入不活跃状态,如果页面是脏的那就也调用ext2_writepage()写入然后再解除映射
    (3)解除了映射的页面再次收到访问时又会发生缺页异常,因为页面无映射进入do_no_page()
    mmap映射,如果文件映射的一个页面长期得不到访问,将直接把页表项设置为0,如果访问到将重新alloc_page分配一个新页面,然后把文件读取到新页面,再建立映射,对于普通的换入/换出则是发生缺页异常从swap分区查找到换出的页面,然后建立映射















  • 相关阅读:
    Appium+python自动化20-查看iOS上app元素属性【转载】
    Appium+python自动化19-iOS模拟器(iOS Simulator)安装自家APP【转载】
    Appium+python自动化18-brew、carthage和appium-doctor【转载】
    Appium+python自动化17-启动iOS模拟器APP源码案例【转载】
    Appium+python自动化16-appium1.6在mac上环境搭建启动ios模拟器上Safari浏览器【转载】
    Appium+python自动化15-在Mac上环境搭建【转载】
    Centos-清屏命令-clear
    Centos-切换用户身份-su
    Centos-修改密码-passwd
    Centos-显示或修改系统时间与日期-date
  • 原文地址:https://www.cnblogs.com/zengyiwen/p/8cc1bc51a06eddb0cc99155dcaa513dc.html
Copyright © 2011-2022 走看看