zoukankan      html  css  js  c++  java
  • linux nvme的sendfile流程

    在nvme的硬盘上使用sendfile系统调用,到底需要经过哪些流程?

    do_sendfile--->do_splice_direct-->splice_direct_to_actor--->do_splice_to 对于xfs,其实就是xfs_file_splice_read

    xfs_file_splice_read--->generic_file_splice_read--->__generic_file_splice_read--->mapping->a_ops->readpage--->xfs_vm_readpage-->mpage_readpage--->submit_bio

     在splice_direct_to_actor函数中,有一个while循环,执行一段direct_splice_actor,返回之后,就执行do_splice_from-->generic_splice_sendpage-->splice_from_pipe-->__splice_from_pipe-->

    splice_from_pipe_feed-->pipe_to_sendpage-->sock_sendpage-->kernel_sendpage-->inet_sendpage-->udp_sendpage(我用的是udp)

    堆栈如下:

    0xffffffff816093ed : inet_sendpage+0x6d/0xe0 [kernel]
    0xffffffff8156b0bb : kernel_sendpage+0x1b/0x30 [kernel]
    0xffffffff8156b0f7 : sock_sendpage+0x27/0x30 [kernel]
    0xffffffff812329c3 : pipe_to_sendpage+0x63/0xa0 [kernel]
    0xffffffff812328be : splice_from_pipe_feed+0x7e/0x120 [kernel]
    0xffffffff81232e8e : __splice_from_pipe+0x6e/0x90 [kernel]
    0xffffffff8123483e : splice_from_pipe+0x5e/0x90 [kernel]
    0xffffffff81234905 : generic_splice_sendpage+0x15/0x20 [kernel]
    0xffffffff8123368d : do_splice_from+0xad/0xf0 [kernel]
    0xffffffff812336f0 : direct_splice_actor+0x20/0x30 [kernel]
    0xffffffff81233424 : splice_direct_to_actor+0xd4/0x200 [kernel]
    0xffffffff812335b2 : do_splice_direct+0x62/0x90 [kernel]
    0xffffffff81203518 : do_sendfile+0x1d8/0x3c0 [kernel]
    0xffffffff81204b6e : SyS_sendfile64+0x5e/0xb0 [kernel]
    0xffffffff816b78c9 : system_call_fastpath+0x16/0x1b [kernel]

    流程真长啊。

    在2.6的内核中,generic_make_request会先调用__generic_make_request,然后__generic_make_request再调用q->make_request_fn 这个回调函数,

    在3.10的内核中,generic_make_request 会直接回调 q->make_request_fn,针对nvme,多队列的这种情况,使用的是 blk_mq_requeue_work.

    submit_bio-->generic_make_request--->q->make_request_fn--->blk_mq_requeue_work

    任务的执行:blk_mq_make_request--->blk_mq_run_hw_queue,blk_mq_map_request等。

    static struct request *blk_mq_map_request(struct request_queue *q,
                          struct bio *bio,
                          struct blk_map_ctx *data)
    {
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        struct request *rq;
        int rw = bio_data_dir(bio);
        struct blk_mq_alloc_data alloc_data;
    
        blk_queue_enter_live(q);
        ctx = blk_mq_get_ctx(q);
    
    
    /*
     * This assumes per-cpu software queueing queues. They could be per-node
     * as well, for instance. For now this is hardcoded as-is. Note that we don't
     * care about preemption, since we know the ctx's are persistent. This does
     * mean that we can't rely on ctx always matching the currently running CPU.
     */
    static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
    {
        return __blk_mq_get_ctx(q, get_cpu());
    }

    static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
    unsigned int cpu)
    {
    return per_cpu_ptr(q->queue_ctx, cpu);
    }

     

     在nvme中,如何把bio插入的queue,映射为在各个cpu上运行的sq呢?利用的是blk_mq_map_queue函数,

    static struct blk_mq_ops nvme_mq_admin_ops = {
        .queue_rq    = nvme_queue_rq,------------------指定blk-mq向驱动提交request的函数
        .complete    = nvme_complete_rq,---------------完成队列处理
        .map_queue    = blk_mq_map_queue,--------------映射函数,将software queue和hardware queue对应
        .init_hctx    = nvme_admin_init_hctx,----------hardware Queue创建时调用,将NVMe Queue与Hardware Queue绑定
        .exit_hctx      = nvme_admin_exit_hctx,
        .init_request    = nvme_admin_init_request,----在分配Request时调用
        .timeout    = nvme_timeout,--------------------发生timeout时的调用
    };
    
    static struct blk_mq_ops nvme_mq_ops = {
        .queue_rq    = nvme_queue_rq,
        .complete    = nvme_complete_rq,
        .map_queue    = blk_mq_map_queue,----------映射函数
        .init_hctx    = nvme_init_hctx,
        .init_request    = nvme_init_request,
        .timeout    = nvme_timeout,
    };

     queue_rq指定blk-mq向驱动提交request的函数,map_queue定义如何将software queue和hardware queue对应,init_hctx是hardware Queue创建时调用(可以在这里将NVMe Queue与Hardware Queue绑定),init_request是在分配Request时调用,timeout是发生timeout时的调用。

    水平有限,如果有错误,请帮忙提醒我。如果您觉得本文对您有帮助,可以点击下面的 推荐 支持一下我。版权所有,需要转发请带上本文源地址,博客一直在更新,欢迎 关注 。
  • 相关阅读:
    Spring
    数据库架构
    Spring
    Spring
    Spring
    服务的有状态和无状态(转)
    Java基础
    Ubuntu
    Ubuntu
    Ubuntu
  • 原文地址:https://www.cnblogs.com/10087622blog/p/8067443.html
Copyright © 2011-2022 走看看