zoukankan      html  css  js  c++  java
  • Linux3.10.0块IO子系统流程(2)-- 构造、排序、合并请求

    Linux块设备可以分为三类。分别针对顺序访问物理设备、随机访问物理设备和逻辑设备(即“栈式设备”)
     
    类型 make_request_fn request_fn 备注
    SCSI 设备等 从bio构造request(经过合并和排序),返回0 逐个处理request 调用blk_init_queue,使用默认的__make_request,提供策略例程
    SSD等 直接处理bio,返回0 调用blk_alloc_queue,提供make_request_fn
    RAID或Device Mapper设备 重定向bio,返回非零值 调用blk_alloc_queue,提供make_request_fn
     
    blk_init_queue原型:
     1 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
     2 {
     3     return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
     4 }
     5 EXPORT_SYMBOL(blk_init_queue);
     6 
     7 struct request_queue *
     8 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
     9 {
    10     struct request_queue *uninit_q, *q;
    11     uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
    12     if (!uninit_q)
    13         return NULL;
    14     q = blk_init_allocated_queue(uninit_q, rfn, lock);
    15     if (!q)
    16         blk_cleanup_queue(uninit_q);
    17     return q;
    18 }
    19 EXPORT_SYMBOL(blk_init_queue_node);
    20 
    21 struct request_queue *
    22 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
    23              spinlock_t *lock)
    24 {
    25     if (!q)
    26         return NULL;
    27     if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
    28         return NULL;
    29     q->request_fn        = rfn;
    30     q->prep_rq_fn        = NULL;
    31     q->unprep_rq_fn        = NULL;
    32     q->queue_flags        |= QUEUE_FLAG_DEFAULT;
    33     /* Override internal queue lock with supplied lock pointer */
    34     if (lock)
    35         q->queue_lock        = lock;
    36     /*
    37      * This also sets hw/phys segments, boundary and size
    38      */
    39     blk_queue_make_request(q, blk_queue_bio);  //使用blk_init_queue会默认绑定blk_queue_bio来处理IO
    40     q->sg_reserved_size = INT_MAX;
    41     /* init elevator */
    42     if (elevator_init(q, NULL))    // 初始化IO调度
    43         return NULL;
    44     return q;
    45 }
    46 EXPORT_SYMBOL(blk_init_allocated_queue);

    下面来跟踪blk_queue_bio函数:

      1 void blk_queue_bio(struct request_queue *q, struct bio *bio)
      2 {
      3     const bool sync = !!(bio->bi_rw & REQ_SYNC);
      4     struct blk_plug *plug;
      5     int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
      6     struct request *req;
      7     unsigned int request_count = 0;
      8     /*
      9      * low level driver can indicate that it wants pages above a
     10      * certain limit bounced to low memory (ie for highmem, or even
     11      * ISA dma in theory)
     12      */
     13     blk_queue_bounce(q, &bio);    // 如果需要,创建反弹缓冲区
     14     if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
     15         bio_endio(bio, -EIO);
     16         return;
     17     }
     18     if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
     19         spin_lock_irq(q->queue_lock);
     20         where = ELEVATOR_INSERT_FLUSH;
     21         goto get_rq;
     22     }
     23     /*
     24      * Check if we can merge with the plugged list before grabbing any locks
     25      * 首先尝试请求合并
     26      */
     27     if (attempt_plug_merge(q, bio, &request_count))
     28         return;
     29     spin_lock_irq(q->queue_lock);
     30     el_ret = elv_merge(q, &req, bio);    // 判断是否bio是否可以合并
     31     // 如果可以合并的话,分为向前和向后合并
     32     if (el_ret == ELEVATOR_BACK_MERGE) {
     33         if (bio_attempt_back_merge(q, req, bio)) {
     34             elv_bio_merged(q, req, bio);    // 请求如果在硬件上允许,则进行合并
     35             if (!attempt_back_merge(q, req))    // 合并之后可能两个request可以合并
     36                 elv_merged_request(q, req, el_ret);
     37             goto out_unlock;
     38         }
     39     } else if (el_ret == ELEVATOR_FRONT_MERGE) {
     40         if (bio_attempt_front_merge(q, req, bio)) {
     41             elv_bio_merged(q, req, bio);
     42             if (!attempt_front_merge(q, req))
     43                 elv_merged_request(q, req, el_ret);
     44             goto out_unlock;
     45         }
     46     }
     47 // 不能合并就根据bio构造request
     48 get_rq:
     49     /*
     50      * This sync check and mask will be re-done in init_request_from_bio(),
     51      * but we need to set it earlier to expose the sync flag to the
     52      * rq allocator and io schedulers.
     53      */
     54     rw_flags = bio_data_dir(bio);
     55     if (sync)
     56         rw_flags |= REQ_SYNC;
     57     /*
     58      * Grab a free request. This is might sleep but can not fail.
     59      * Returns with the queue unlocked.
     60      */
     61     req = get_request(q, rw_flags, bio, GFP_NOIO);    // 获取一个request
     62     if (unlikely(!req)) {
     63         bio_endio(bio, -ENODEV);    /* @q is dead */
     64         goto out_unlock;
     65     }
     66     /*
     67      * After dropping the lock and possibly sleeping here, our request
     68      * may now be mergeable after it had proven unmergeable (above).
     69      * We don't worry about that case for efficiency. It won't happen
     70      * often, and the elevators are able to handle it.
     71      */
     72     init_request_from_bio(req, bio);    // 根据bio构造一个request,并添加到IO调度器队列
     73     if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
     74         req->cpu = raw_smp_processor_id();
     75     plug = current->plug;
     76     // 接下来是蓄流/泄流策略
     77     if (plug) {
     78         /*
     79          * If this is the first request added after a plug, fire
     80          * of a plug trace. If others have been added before, check
     81          * if we have multiple devices in this plug. If so, make a
     82          * note to sort the list before dispatch.
     83          */
     84         if (list_empty(&plug->list))
     85             trace_block_plug(q);
     86         else {
     87             if (request_count >= BLK_MAX_REQUEST_COUNT) {
     88                 blk_flush_plug_list(plug, false);
     89                 trace_block_plug(q);
     90             }
     91         }
     92         list_add_tail(&req->queuelist, &plug->list);
     93         drive_stat_acct(req, 1);
     94     } else {
     95         spin_lock_irq(q->queue_lock);
     96         add_acct_request(q, req, where);  // 将请求添加到IO调度队列或请求队列,主要被用来处理屏障请求
     97         __blk_run_queue(q);
     98 out_unlock:
     99         spin_unlock_irq(q->queue_lock);
    100     }
    101 }
    102 EXPORT_SYMBOL_GPL(blk_queue_bio);    /* for device mapper only */

    第13行,blk_queue_bounce创建一个反弹缓冲区。通常是在驱动尝试在外围设备不可达到的地址。例如高端内存上执行DMA等。创建反弹缓冲区后,数据要在原缓冲区和反弹缓冲区之间进行与读写方向对应的复制。毫无疑问,使用反弹缓冲区会降低性能,但也没有其他办法。

    所谓反弹,实际上是分配一个新的bio描述符,它和原始bio的segment一一对应。如果原始bio的segment使用的页面在DMA内存范围外,则分配一个在DMA范围内的页面,赋给新的bio对应的segment。对于写操作,需要将旧bio页面的内容复制到新的bio中。如果原始的bio的segment使用的页面在DMA范围内,则将新的bio指向同一地方。

    最后将原始bio保存在新的bio的bi_private域中,并设置新bio的完成回调函数。

    接下来交给IO调度器,由它负责合并和排序请求。合并是指将对磁盘上连续位置的请求合并为一个,通过一次SCSI命令完成。排序是将多个请求对磁盘上的访问位置顺序重新排列,使得磁头尽可能向一个方向移动。请求的合并和排序是在SCSI设备的请求队列描述符上进行的。

     1 int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
     2 {
     3     struct elevator_queue *e = q->elevator;
     4     struct request *__rq;
     5     int ret;
     6 
     7     /*
     8      * Levels of merges:
     9      *     nomerges:  No merges at all attempted
    10      *     noxmerges: Only simple one-hit cache try
    11      *     merges:       All merge tries attempted
    12      */
    13     if (blk_queue_nomerges(q))    // 如果设置了QUEUE_FLAG_NOMERGES的标志位,就直接返回不合并
    14         return ELEVATOR_NO_MERGE;
    15 
    16     /*
    17      * First try one-hit cache.
    18      */
    19     // 如果请求队列的last_merge有缓存下来的request,调用blk_try_merge来进行尝试和它进行合并,如果可以合并,通过参数输出这个req
    20     if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
    21         ret = blk_try_merge(q->last_merge, bio);
    22         if (ret != ELEVATOR_NO_MERGE) {
    23             *req = q->last_merge;
    24             return ret;
    25         }
    26     }
    27 
    28      // 如果设置了QUEUE_FLAG_NOXMERGES的标志位,表明不要进行“扩展”的合并尝试
    29     if (blk_queue_noxmerges(q))
    30         return ELEVATOR_NO_MERGE;
    31 
    32     /*
    33      * See if our hash lookup can find a potential backmerge.
    34      * 后面的代码就是所谓的“扩展”合并尝试,它包含两方面的内容:
    35      * 第一部分是各种IO调度算法全都适用的,而第二部分则是各种IO调度算法特定的
    36      */
    37     __rq = elv_rqhash_find(q, bio->bi_sector);
    38     if (__rq && elv_rq_merge_ok(__rq, bio)) {
    39         *req = __rq;
    40         return ELEVATOR_BACK_MERGE;
    41     }
    42 
    43     /*
    44      * IO调度特定的合并算法是通过电梯队列操作表的elevator_merge_fn回调实现的
    45      */
    46     if (e->type->ops.elevator_merge_fn)
    47         return e->type->ops.elevator_merge_fn(q, req, bio);
    48 
    49     return ELEVATOR_NO_MERGE;
    50 }

    如果我们的请求不能合并到现有的request中,那么就要新申请request描述符了,根据bio对它初始化,并添加到IO调度器队列

    最后Linux块设备层采用蓄流/泄流技术来改进吞吐量,蓄流是为了将请求合并和排序,然后一起泄流,泄流函数为__blk_run_queue(q)

    /**
    * __blk_run_queue - run a single device queue
    * @q:    The queue to run
    *
    * Description:
    *    See @blk_run_queue. This variant must be called with the queue lock
    *    held and interrupts disabled.
    */
    void __blk_run_queue(struct request_queue *q)
    {
        if (unlikely(blk_queue_stopped(q)))
            return;
        __blk_run_queue_uncond(q);
    }
    
    
    
    /**
    * __blk_run_queue_uncond - run a queue whether or not it has been stopped
    * @q:    The queue to run
    *
    * Description:
    *    Invoke request handling on a queue if there are any pending requests.
    *    May be used to restart request handling after a request has completed.
    *    This variant runs the queue whether or not the queue has been
    *    stopped. Must be called with the queue lock held and interrupts
    *    disabled. See also @blk_run_queue.
    */
    inline void __blk_run_queue_uncond(struct request_queue *q)
    {
        if (unlikely(blk_queue_dead(q)))
            return;
        /*
         * Some request_fn implementations, e.g. scsi_request_fn(), unlock
         * the queue lock internally. As a result multiple threads may be
         * running such a request function concurrently. Keep track of the
         * number of active request_fn invocations such that blk_drain_queue()
         * can wait until all these request_fn calls have finished.
         */
        q->request_fn_active++;
        q->request_fn(q);    // 回调函数实例化为scsi_request_fn,也就是通常所说的SCSI策略例程
        q->request_fn_active--;
    }
    __blk_run_queue

    对于SCSI设备,在为它分配请求队列时,将请求队列的request_fn回调函数实例化为scsi_request_fn,也就是通常所说的SCSI策略例程。

  • 相关阅读:
    简单理解桶排序
    实现js的类似alert效果的函数
    简单理解插入排序
    一个js简单的日历显示效果的函数
    详解一个自己原创的正则匹配IP的表达式
    一个简单的js实现倒计时函数
    简单理解冒泡排序
    简单理解js的this
    vue项目分享html页面(服务器只能内网访问)
    vue项目移动端查看、分享pdf(服务器只能内网访问)
  • 原文地址:https://www.cnblogs.com/luxiaodai/p/9257021.html
Copyright © 2011-2022 走看看