zoukankan      html  css  js  c++  java
  • Linux3.10.0块IO子系统流程(1)-- 上层提交请求

    Linux通用块层提供给上层的接口函数是submit_bio。上层在构造好bio之后,调用submit_bio提交给通用块层处理。
     
    submit_bio函数如下:
     
    void submit_bio(int rw, struct bio *bio)
    {
        bio->bi_rw |= rw;    //记录读写方式
        /*
         * 执行有数据传输的读写或屏障请求统计,暂不关心
         */
        if (bio_has_data(bio)) {
            unsigned int count;
            if (unlikely(rw & REQ_WRITE_SAME))
                count = bdev_logical_block_size(bio->bi_bdev) >> 9;
            else
                count = bio_sectors(bio);
            if (rw & WRITE) {
                count_vm_events(PGPGOUT, count);
            } else {
                task_io_account_read(bio->bi_size);
                count_vm_events(PGPGIN, count);
            }
            if (unlikely(block_dump)) {
                char b[BDEVNAME_SIZE];
                printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)
    ",
                current->comm, task_pid_nr(current),
                    (rw & WRITE) ? "WRITE" : "READ",
                    (unsigned long long)bio->bi_sector,
                    bdevname(bio->bi_bdev, b),
                    count);
            }
        }
        //执行真实的IO处理
        generic_make_request(bio);
    }
    void generic_make_request(struct bio *bio)
    {
        struct bio_list bio_list_on_stack;
        if (!generic_make_request_checks(bio))
            return;
    
        if (current->bio_list) {
            bio_list_add(current->bio_list, bio);
            return;
        }
    
        BUG_ON(bio->bi_next);
        bio_list_init(&bio_list_on_stack);
        current->bio_list = &bio_list_on_stack;
        do {
            struct request_queue *q = bdev_get_queue(bio->bi_bdev);    //获取bio对应的请求队列
            q->make_request_fn(q, bio);                                //调用请求队列的回调函数来处理IO
            bio = bio_list_pop(current->bio_list);
        } while (bio);
        current->bio_list = NULL; /* deactivate */
    }
    在调用make_request_fn处理bio的时候,可能会产生新的bio,即make_request_fn会递归调用generic_make_request 最直观的例子就是“栈式块设备”。为了防止栈式块设备执行请求可能出现问题,在一个时刻只允许进程有一个generic_make_request被调用。为此,在进程结构中定义了一个bio等待处理链表:bio_list。同时区分“活动”和“非活动”状态。活动状态表示进程已经在调用generic_make_request。这时,所有后续产生的bio都链入bio_list链表,在当前bio完成的情况下,逐个处理。
     
    generic_make_request的执行过程:
    1. generic_make_request_checks
    2. 判断make_request是否处于活动状态。如果current->bio_list不为NULL,则表明当前进程已经有generic_make_request在执行,这时候传进来的bio都将链接到当前进程等待处理的bio链表尾部
    3. 设置current->bio_list表明当前的generic_make_request为活动状态,让后来的bio有机会插入等待链表
    4. 处理bio。这里的bio可能是传入的bio,也可能是当前进程待处理bio链表中的bio。如果是前者,上层保证了其bi_next必然为NULL;如果是后者,则在将bio从链表中脱离的时候,已经设置了bi_next为NULL
    5. 调用make_request_fn回调处理bio
    6. 检查当前进程的等待链表中是否还有bio,如果有,跳到第三步
    7. 至此,generic_make_request的“本轮执行周期”已经完毕,清零current->bio_list,使得generic_make_request处于“非活动”状态
    这里再看下generic_make_request_checks
     
     1 static noinline_for_stack bool
     2 generic_make_request_checks(struct bio *bio)
     3 {
     4     struct request_queue *q;
     5     int nr_sectors = bio_sectors(bio);
     6     int err = -EIO;
     7     char b[BDEVNAME_SIZE];
     8     struct hd_struct *part;
     9 
    10     might_sleep();
    11 
    12     // 检查bio的扇区有没有超过块设备的扇区数
    13     if (bio_check_eod(bio, nr_sectors))
    14         goto end_io;
    15 
    16     // 检测块设备的请求队列是否为空
    17     q = bdev_get_queue(bio->bi_bdev);
    18     if (unlikely(!q)) {
    19         printk(KERN_ERR
    20                "generic_make_request: Trying to access "
    21             "nonexistent block-device %s (%Lu)
    ",
    22             bdevname(bio->bi_bdev, b),
    23             (long long) bio->bi_sector);
    24         goto end_io;
    25     }
    26     
    27     // 检测请求的扇区长度是否超过物理限制
    28     if (likely(bio_is_rw(bio) &&
    29            nr_sectors > queue_max_hw_sectors(q))) {
    30         printk(KERN_ERR "bio too big device %s (%u > %u)
    ",
    31                bdevname(bio->bi_bdev, b),
    32                bio_sectors(bio),
    33                queue_max_hw_sectors(q));
    34         goto end_io;
    35     }
    36 
    37     part = bio->bi_bdev->bd_part;
    38     if (should_fail_request(part, bio->bi_size) ||
    39         should_fail_request(&part_to_disk(part)->part0,
    40                 bio->bi_size))
    41         goto end_io;
    42 
    43     /*
    44      * If this device has partitions, remap block n of partition p to block n+start(p) of the disk.
    45      * 如果请求的块设备可能代表一个分区,这里重新映射到所在的磁盘设备
    46      */
    47     blk_partition_remap(bio);
    48 
    49     if (bio_check_eod(bio, nr_sectors))
    50         goto end_io;
    51 
    52     /*
    53      * Filter flush bio's early so that make_request based
    54      * drivers without flush support don't have to worry
    55      * about them.
    56      */
    57     if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
    58         bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
    59         if (!nr_sectors) {
    60             err = 0;
    61             goto end_io;
    62         }
    63     }
    64 
    65      // 检查设备对DISCARD命令的支持
    66     if ((bio->bi_rw & REQ_DISCARD) &&
    67         (!blk_queue_discard(q) ||
    68          ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
    69         err = -EOPNOTSUPP;
    70         goto end_io;
    71     }
    72 
    73     if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
    74         err = -EOPNOTSUPP;
    75         goto end_io;
    76     }
    77 
    78     /*
    79      * Various block parts want %current->io_context and lazy ioc
    80      * allocation ends up trading a lot of pain for a small amount of
    81      * memory.  Just allocate it upfront.  This may fail and block
    82      * layer knows how to live with it.
    83      */
    84     create_io_context(GFP_ATOMIC, q->node);
    85 
    86     if (blk_throtl_bio(q, bio))
    87         return false;    /* throttled, will be resubmitted later */
    88 
    89     trace_block_bio_queue(q, bio);
    90     return true;
    91 
    92 end_io:
    93     bio_endio(bio, err);
    94     return false;
    95 }
    generic_make_request_checks
  • 相关阅读:
    zookeeper编译环境搭建
    windows下zookeeper安装并发布成windows服务
    ScheduledThreadPoolExecutor源码
    AbstractExecutorService源码
    FutureTask源码2
    FutureTask源码
    ThreadPoolExecutor源码2
    ThreadPoolExecutor源码1
    二进制转10进制
    Android ANR Waiting because no window has focus问题分析
  • 原文地址:https://www.cnblogs.com/luxiaodai/p/9254779.html
Copyright © 2011-2022 走看看