zoukankan      html  css  js  c++  java
  • 磁盘性能统计

    iostat统计磁盘信息的时候,使用的是/proc/diskstats 。而/proc/diskstats是谁在写入呢?

    主要数据结构:

    //genhd.h
    struct disk_stats {
        unsigned long sectors[2];    /* READs and WRITEs */
        unsigned long ios[2];
        unsigned long merges[2];
        unsigned long ticks[2]; // jiffies差
        unsigned long io_ticks; // 从入队列到完成io的时间
        unsigned long time_in_queue;
    };

    proc初始化:

    //block/genhd.c
    static int __init proc_genhd_init(void)
    {
        proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
        proc_create("partitions", 0, NULL, &proc_partitions_operations);
        return 0;
    }
    static const struct file_operations proc_diskstats_operations = {
        .open        = diskstats_open,
        .read        = seq_read,
        .llseek        = seq_lseek,
        .release    = seq_release,
    };
    static int diskstats_open(struct inode *inode, struct file *file)
    {
        return seq_open(file, &diskstats_op);
    }
    static const struct seq_operations diskstats_op = {
        .start    = disk_seqf_start,
        .next    = disk_seqf_next,
        .stop    = disk_seqf_stop,
        .show    = diskstats_show
    };

    看到,diskstats_show这个函数才是关键:

    static int diskstats_show(struct seq_file *seqf, void *v)
    {
        ......
         disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
        while ((hd = disk_part_iter_next(&piter))) {
            cpu = part_stat_lock();
            part_round_stats(cpu, hd);
            part_stat_unlock();
            seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
                   "%u %lu %lu %lu %u %u %u %u
    ",
                   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                   disk_name(gp, hd->partno, buf),
                   part_stat_read(hd, ios[READ]),
                   part_stat_read(hd, merges[READ]),
                   part_stat_read(hd, sectors[READ]),
                   jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
                   part_stat_read(hd, ios[WRITE]),
                   part_stat_read(hd, merges[WRITE]),
                   part_stat_read(hd, sectors[WRITE]),
                   jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
                   part_in_flight(hd),
                   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
                   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
                );
        }
        disk_part_iter_exit(&piter);
    }

    /proc/diskstats各列具体的函数参考下面:

    $cat /proc/diskstats

    22 0 hdc 159807 57894 6328277 1476593 179991 467858 5184662 2664218 0 886604 4140851

    $cat /sys/block/hdc/stat

    159807 57894 6328277 1476593 179989 467844 5184534 2664218 0 886604 4140851


    /proc/diskstats文件比/sys/block/hdc/stat文件多3个域,从左至右分别对应主设备号,次设备号和设备名称。后续的11个域在这两个文件里是相同的,它们的函义将在下面解释。除了第9个域,所有的域都是从启动时的累积值。

    第1个域:读磁盘的次数,成功完成读的总次数。

    第2个域:合并读次数, field 6 – 合并写次数。为了效率可能会合并相邻的读和写。从而两次4K的读在它最终被处理到磁盘上之前可能会变成一次8K的读,才被计数(和排队),因此只有一次I/O操作。这个域使你知道这样的操作有多频繁。

    第3个域:读扇区的次数,成功读过的扇区总次数。

    第4个域:读花费的毫秒数,这是所有读操作所花费的毫秒数(用__make_request()到end_that_request_last()测量)。

    第5个域:写完成的次数,成功写完成的总次数。

    第7个域:写扇区的次数,成功写扇区总次数。

    第8个域:写花费的毫秒数,这是所有写操作所花费的毫秒数(用__make_request()到end_that_request_last()测量)。

    第9个域:I/O的当前进度,只有这个域应该是0。当请求被交给适当的request_queue_t时增加和请求完成时减小。

    第10个域:花在I/O操作上的毫秒数,这个域会增长只要field 9不为0。

    第11个域:加权, 花在I/O操作上的毫秒数,在每次I/O开始,I/O结束,I/O合并时这个域都会增加。这可以给I/O完成时间和存储那些可以累积的提供一个便利的测量标准。

    而驱动层需要怎么提供这些数据呢?driver需要调用类似这样的一组函数:

    part_stat_inc、part_stat_add、__part_stat_add(其中part_stat_add是调用 __part_stat_add,只不够它同时操作partition)

    iostat是怎么根据/proc/diskstats来得到各项数据呢?

     1 //iostat.c function read_diskstats_stat
     2 if ((fp = fopen(DISKSTATS, "r")) == NULL)
     3       return;
     4 
     5    while (fgets(line, 256, fp) != NULL) {
     6 
     7       /* major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq */
     8       i = sscanf(line, "%u %u %s %lu %lu %llu %lu %lu %lu %llu %lu %lu %lu %lu",
     9          &major, &minor, dev_name,
    10          &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios, &rd_ticks_or_wr_sec,
    11          &wr_ios, &wr_merges, &wr_sec, &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks);
    12 
    13       if (i == 14) {
    14      /* Device */
    15      sdev.rd_ios     = rd_ios;
    16      sdev.rd_merges  = rd_merges_or_rd_sec;
    17      sdev.rd_sectors = rd_sec_or_wr_ios;
    18      sdev.rd_ticks   = rd_ticks_or_wr_sec;
    19      sdev.wr_ios     = wr_ios;
    20      sdev.wr_merges  = wr_merges;
    21      sdev.wr_sectors = wr_sec;
    22      sdev.wr_ticks   = wr_ticks;
    23      sdev.ios_pgr    = ios_pgr;
    24      sdev.tot_ticks  = tot_ticks;
    25      sdev.rq_ticks   = rq_ticks;
    26       }
    27       else if (i == 7) {
    28      /* Partition */
    29      if (DISPLAY_EXTENDED(flags) || (!dlist_idx && !DISPLAY_PARTITIONS(flags)))
    30         continue;
    31 
    32      sdev.rd_ios     = rd_ios;
    33      sdev.rd_sectors = rd_merges_or_rd_sec;
    34      sdev.wr_ios     = rd_sec_or_wr_ios;
    35      sdev.wr_sectors = rd_ticks_or_wr_sec;
    36       }
    37       else
    38      /* Unknown entry: Ignore it */
    39      continue;
     1 void write_ext_stat(int curr, unsigned long long itv, int flags, int fctr,
     2             struct io_hdr_stats *shi, struct io_stats *ioi,
     3             struct io_stats *ioj)
     4 {
     5    unsigned long long rd_sec, wr_sec;
     6    double tput, util, await, svctm, arqsz, nr_ios;
     7     
     8    /*
     9     * Counters overflows are possible, but don't need to be handled in
    10     * a special way: the difference is still properly calculated if the
    11     * result is of the same type as the two values.
    12     * Exception is field rq_ticks which is incremented by the number of
    13     * I/O in progress times the number of milliseconds spent doing I/O.
    14     * But the number of I/O in progress (field ios_pgr) happens to be
    15     * sometimes negative...
    16     */
    17    nr_ios = (ioi->rd_ios - ioj->rd_ios) + (ioi->wr_ios - ioj->wr_ios);
    18    tput = ((double) nr_ios) * HZ / itv;
    19    util = S_VALUE(ioj->tot_ticks, ioi->tot_ticks, itv);
    20    svctm = tput ? util / tput : 0.0;
    21    /*
    22     * Kernel gives ticks already in milliseconds for all platforms
    23     * => no need for further scaling.
    24     */
    25    await = nr_ios ?
    26       ((ioi->rd_ticks - ioj->rd_ticks) + (ioi->wr_ticks - ioj->wr_ticks)) /
    27       nr_ios : 0.0;
    28 
    29    rd_sec = ioi->rd_sectors - ioj->rd_sectors;
    30    if ((ioi->rd_sectors < ioj->rd_sectors) && (ioj->rd_sectors <= 0xffffffff))
    31       rd_sec &= 0xffffffff;
    32    wr_sec = ioi->wr_sectors - ioj->wr_sectors;
    33    if ((ioi->wr_sectors < ioj->wr_sectors) && (ioj->wr_sectors <= 0xffffffff))
    34       wr_sec &= 0xffffffff;
    35 
    36    arqsz = nr_ios ? (rd_sec + wr_sec) / nr_ios : 0.0;
    37 
    38    /*      DEV   rrq/s wrq/s   r/s   w/s  rsec  wsec  rqsz  qusz await svctm %util */
    39    printf("%-13s %8.2f %8.2f %7.2f %7.2f %8.2f %8.2f %8.2f %8.2f %7.2f %6.2f %6.2f
    ",
    40       shi->name,
    41       S_VALUE(ioj->rd_merges, ioi->rd_merges, itv),
    42       S_VALUE(ioj->wr_merges, ioi->wr_merges, itv),
    43       S_VALUE(ioj->rd_ios, ioi->rd_ios, itv),
    44       S_VALUE(ioj->wr_ios, ioi->wr_ios, itv),
    45       ll_s_value(ioj->rd_sectors, ioi->rd_sectors, itv) / fctr,
    46       ll_s_value(ioj->wr_sectors, ioi->wr_sectors, itv) / fctr,
    47       arqsz,
    48       S_VALUE(ioj->rq_ticks, ioi->rq_ticks, itv) / 1000.0,
    49       await,
    50       /* The ticks output is biased to output 1000 ticks per second */
    51       svctm,
    52       /* Again: Ticks in milliseconds */
    53       util / 10.0);
    54 }
  • 相关阅读:
    集合set
    字典
    元组
    列表
    for循环
    Windows调试2.异常产生详细流程
    双机环境搭建
    Windows调试1.WinDbg基本使用-异常基础知识
    PE基础7-HOOK练习
    PE基础6_远程线程注入-HOOK(消息-InLine-IAT)
  • 原文地址:https://www.cnblogs.com/hbt19860104/p/3457929.html
Copyright © 2011-2022 走看看