zoukankan      html  css  js  c++  java
  • Oprofile分析(android oprofile性能分析)

    一、内核支持:
    make menuconfig

    1、评测菜单中启用 Oprofile ,在 .config 文件中设置?CONFIG_PROFILING=y?和?CONFIG_OPROFILE=y
    2、Kernel Feature->[]Enable hardware performance counter support for perf events不要勾选
    3、在boot options->console=ttyFIQ0 androidboot.console=ttyFIQ0 init=/init profile=1
    二、修改源码 将kernel/arch/arm/oprofile中的common.文件修改

    /**                                
     * @file common.c                                
     *                                
     * @remark Copyright 2004 Oprofile Authors                                
     * @remark Copyright 2010 ARM Ltd.                                
     * @remark Read the file COPYING                                
     *                                
     * @author Zwane Mwaikambo                                
     * @author Will Deacon [move to perf]                                
     */                                
                                    
    #include <linux/cpumask.h>                                
    #include <linux/err.h>                                
    #include <linux/errno.h>                                
    #include <linux/init.h>                                
    #include <linux/mutex.h>                                
    #include <linux/oprofile.h>                                
    #include <linux/perf_event.h>                                
    #include <linux/platform_device.h>                                
    #include <linux/slab.h>                                
    #include <asm/stacktrace.h>                                
    #include <linux/uaccess.h>                                
                                    
    #include <asm/perf_event.h>                                
    #include <asm/ptrace.h>                                
                                    
    #ifdef CONFIG_HW_PERF_EVENTS                                
    /*                                
     * Per performance monitor configuration as set via oprofilefs.                                
     */                                
    struct op_counter_config {                                
        unsigned long count;                            
        unsigned long enabled;                            
        unsigned long event;                            
        unsigned long unit_mask;                            
        unsigned long kernel;                            
        unsigned long user;                            
        struct perf_event_attr attr;                            
    };                                
                                    
    static int op_arm_enabled;                                
    static DEFINE_MUTEX(op_arm_mutex);                                
                                    
    static struct op_counter_config *counter_config;                                
    static struct perf_event **perf_events[nr_cpumask_bits];                                
    static int perf_num_counters_bak;                                
                                    
    /*                                
     * Overflow callback for oprofile.                                
     */                                
    static void op_overflow_handler(struct perf_event *event, int unused,                                
                struct perf_sample_data *data, struct pt_regs *regs)                    
    {                                
        int id;                            
        u32 cpu = smp_processor_id();                            
                                    
        for (id = 0; id < perf_num_counters_bak; ++id)                            
            if (perf_events[cpu][id] == event)                        
                break;                    
                                    
        if (id != perf_num_counters_bak)                            
            oprofile_add_sample(regs, id);                        
        else                            
            pr_warning("oprofile: ignoring spurious overflow "                        
                    on cpu %u
    , cpu);                
    }                                
                                    
    /*                                
     * Called by op_arm_setup to create perf attributes to mirror the oprofile                                
     * settings in counter_config. Attributes are created as `pinned' events and                                
     * so are permanently scheduled on the PMU.                                
     */                                
    static void op_perf_setup(void)                                
    {                                
        int i;                            
        u32 size = sizeof(struct perf_event_attr);                            
        struct perf_event_attr *attr;                            
                                    
        for (i = 0; i < perf_num_counters_bak; ++i) {                            
            attr = &counter_config[i].attr;                        
            memset(attr, 0, size);                        
            attr->type        = PERF_TYPE_RAW;                
            attr->size        = size;                
            attr->config        = counter_config[i].event;                
            attr->sample_period    = counter_config[i].count;                    
            attr->pinned        = 1;                
        }                            
    }                                
                                    
    static int op_create_counter(int cpu, int event)                                
    {                                
        int ret = 0;                            
        struct perf_event *pevent;                            
                                    
        if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))                            
            return ret;                        
                                    
        pevent = perf_event_create_kernel_counter(&counter_config[event].attr,                            
                              cpu, NULL,        
                              op_overflow_handler);        
                                    
        if (IS_ERR(pevent)) {                            
            ret = PTR_ERR(pevent);                        
        } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {                            
            pr_warning("oprofile: failed to enable event %d "                        
                    on CPU %d
    , event, cpu);                
            ret = -EBUSY;                        
        } else {                            
            perf_events[cpu][event] = pevent;                        
        }                            
                                    
        return ret;                            
    }                                
                                    
    static void op_destroy_counter(int cpu, int event)                                
    {                                
        struct perf_event *pevent = perf_events[cpu][event];                            
                                    
        if (pevent) {                            
            perf_event_release_kernel(pevent);                        
            perf_events[cpu][event] = NULL;                        
        }                            
    }                                
                                    
    /*                                
     * Called by op_arm_start to create active perf events based on the                                
     * perviously configured attributes.                                
     */                                
    static int op_perf_start(void)                                
    {                                
        int cpu, event, ret = 0;                            
                                    
        for_each_online_cpu(cpu) {                            
            for (event = 0; event < perf_num_counters_bak; ++event) {                        
                ret = op_create_counter(cpu, event);                    
                if (ret)                    
                    goto out;                
            }                        
        }                            
                                    
    out:                                
        return ret;                            
    }                                
                                    
    /*                                
     * Called by op_arm_stop at the end of a profiling run.                                
     */                                
    static void op_perf_stop(void)                                
    {                                
        int cpu, event;                            
                                    
        for_each_online_cpu(cpu)                            
            for (event = 0; event < perf_num_counters_bak; ++event)                        
                op_destroy_counter(cpu, event);                    
    }                                
                                    
    char *op_name_from_perf_id(void)                                
    {                                
        enum arm_perf_pmu_ids id = armpmu_get_pmu_id();                            
                                    
        switch (id) {                            
        case ARM_PERF_PMU_ID_XSCALE1:                            
            return "arm/xscale1";                        
        case ARM_PERF_PMU_ID_XSCALE2:                            
            return "arm/xscale2";                        
        case ARM_PERF_PMU_ID_V6:                            
            return "arm/armv6";                        
        case ARM_PERF_PMU_ID_V6MP:                            
            return "arm/mpcore";                        
        case ARM_PERF_PMU_ID_CA8:                            
            return "arm/armv7";                        
        case ARM_PERF_PMU_ID_CA9:                            
            return "arm/armv7-ca9";                        
        default:                            
            return NULL;                        
        }                            
    }                                
                                    
    static int op_arm_create_files(struct super_block *sb, struct dentry *root)                                
    {                                
        unsigned int i;                            
                                    
        for (i = 0; i < perf_num_counters_bak; i++) {                            
            struct dentry *dir;                        
            char buf[4];                        
                                    
            snprintf(buf, sizeof buf, "%d", i);                        
            dir = oprofilefs_mkdir(sb, root, buf);                        
            oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);                        
            oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);                        
            oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);                        
            oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);                        
            oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);                        
            oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);                        
        }                            
                                    
        return 0;                            
    }                                
                                    
    static int op_arm_setup(void)                                
    {                                
        spin_lock(&oprofilefs_lock);                            
        op_perf_setup();                            
        spin_unlock(&oprofilefs_lock);                            
        return 0;                            
    }                                
                                    
    static int op_arm_start(void)                                
    {                                
        int ret = -EBUSY;                            
                                    
        mutex_lock(&op_arm_mutex);                            
        if (!op_arm_enabled) {                            
            ret = 0;                        
            op_perf_start();                        
            op_arm_enabled = 1;                        
        }                            
        mutex_unlock(&op_arm_mutex);                            
        return ret;                            
    }                                
                                    
    static void op_arm_stop(void)                                
    {                                
        mutex_lock(&op_arm_mutex);                            
        if (op_arm_enabled)                            
            op_perf_stop();                        
        op_arm_enabled = 0;                            
        mutex_unlock(&op_arm_mutex);                            
    }                                
                                    
    #ifdef CONFIG_PM                                
    static int op_arm_suspend(struct platform_device *dev, pm_message_t state)                                
    {                                
        mutex_lock(&op_arm_mutex);                            
        if (op_arm_enabled)                            
            op_perf_stop();                        
        mutex_unlock(&op_arm_mutex);                            
        return 0;                            
    }                                
                                    
    static int op_arm_resume(struct platform_device *dev)                                
    {                                
        mutex_lock(&op_arm_mutex);                            
        if (op_arm_enabled && op_perf_start())                            
            op_arm_enabled = 0;                        
        mutex_unlock(&op_arm_mutex);                            
        return 0;                            
    }                                
                                    
    static struct platform_driver oprofile_driver = {                                
        .driver        = {                    
            .name        = "arm-oprofile",                
        },                            
        .resume        = op_arm_resume,                    
        .suspend    = op_arm_suspend,                        
    };                                
                                    
    static struct platform_device *oprofile_pdev;                                
                                    
    static int __init init_driverfs(void)                                
    {                                
        int ret;                            
                                    
        ret = platform_driver_register(&oprofile_driver);                            
        if (ret)                            
            goto out;                        
                                    
        oprofile_pdev =    platform_device_register_simple(                        
                    oprofile_driver.driver.name, 0, NULL, 0);                
        if (IS_ERR(oprofile_pdev)) {                            
            ret = PTR_ERR(oprofile_pdev);                        
            platform_driver_unregister(&oprofile_driver);                        
        }                            
                                    
    out:                                
        return ret;                            
    }                                
                                    
    static void  exit_driverfs(void)                                
    {                                
        platform_device_unregister(oprofile_pdev);                            
        platform_driver_unregister(&oprofile_driver);                            
    }                                
    #else                                
    static int __init init_driverfs(void) { return 0; }                                
    #define exit_driverfs() do { } while (0)                                
    #endif /* CONFIG_PM */                                
                                    
    static int report_trace(struct stackframe *frame, void *d)                                
    {                                
        unsigned int *depth = d;                            
                                    
        if (*depth) {                            
            oprofile_add_trace(frame->pc);                        
            (*depth)--;                        
        }                            
                                    
        return *depth == 0;                            
    }                                
                                    
    /*                                
     * The registers we're interested in are at the end of the variable                                
     * length saved register structure. The fp points at the end of this                                
     * structure so the address of this struct is:                                
     * (struct frame_tail *)(xxx->fp)-1                                
     */                                
    struct frame_tail {                                
        struct frame_tail *fp;                            
        unsigned long sp;                            
        unsigned long lr;                            
    } __attribute__((packed));                                
                                    
    static struct frame_tail* user_backtrace(struct frame_tail *tail)                                
    {                                
        struct frame_tail buftail[2];                            
                                    
        /* Also check accessibility of one struct frame_tail beyond */                            
        if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))                            
            return NULL;                        
        if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail)))                            
            return NULL;                        
                                    
        oprofile_add_trace(buftail[0].lr);                            
                                    
        /* frame pointers should strictly progress back up the stack                            
         * (towards higher addresses) */                            
        if (tail + 1 >= buftail[0].fp)                            
            return NULL;                        
                                    
        return buftail[0].fp-1;                            
    }                                
                                    
    static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)                                
    {                                
        struct frame_tail *tail = ((struct frame_tail *) regs->ARM_fp) - 1;                            
                                    
        if (!user_mode(regs)) {                            
            struct stackframe frame;                        
            frame.fp = regs->ARM_fp;                        
            frame.sp = regs->ARM_sp;                        
            frame.lr = regs->ARM_lr;                        
            frame.pc = regs->ARM_pc;                        
            walk_stackframe(&frame, report_trace, &depth);                        
            return;                        
        }                            
                                    
        while (depth-- && tail && !((unsigned long) tail & 3))                            
            tail = user_backtrace(tail);                        
    }                                
                                    
    int __init oprofile_arch_init(struct oprofile_operations *ops)                                
    {                                
        int cpu, ret = 0;                            
                                    
        perf_num_counters_bak = armpmu_get_max_events();                            
                                    
        counter_config = kcalloc(perf_num_counters_bak,                            
                sizeof(struct op_counter_config), GFP_KERNEL);                    
                                    
        if (!counter_config) {                            
            pr_info("oprofile: failed to allocate %d "                        
                    counters
    , perf_num_counters_bak);                
            return -ENOMEM;                        
        }                            
                                    
        ret = init_driverfs();                            
        if (ret) {                            
            kfree(counter_config);                        
            return ret;                        
        }                            
                                    
        for_each_possible_cpu(cpu) {                            
            perf_events[cpu] = kcalloc(perf_num_counters_bak,                        
                    sizeof(struct perf_event *), GFP_KERNEL);                
            if (!perf_events[cpu]) {                        
                pr_info("oprofile: failed to allocate %d perf events "                    
                        for cpu %d
    , perf_num_counters_bak, cpu);            
                while (--cpu >= 0)                    
                    kfree(perf_events[cpu]);                
                return -ENOMEM;                    
            }                        
        }                            
                                    
        ops->backtrace        = arm_backtrace;                    
        ops->create_files    = op_arm_create_files;                        
        ops->setup        = op_arm_setup;                    
        ops->start        = op_arm_start;                    
        ops->stop        = op_arm_stop;                    
        ops->shutdown        = op_arm_stop;                    
        ops->cpu_type        = op_name_from_perf_id();                    
                                    
        if (!ops->cpu_type)                            
            ret = -ENODEV;                        
        else                            
            pr_info("oprofile: using %s
    ", ops->cpu_type);                        
                                    
        return ret;                            
    }                                
                                    
    void oprofile_arch_exit(void)                                
    {                                
        int cpu, id;                            
        struct perf_event *event;                            
                                    
        if (*perf_events) {                            
            exit_driverfs();                        
            for_each_possible_cpu(cpu) {                        
                for (id = 0; id < perf_num_counters_bak; ++id) {                    
                    event = perf_events[cpu][id];                
                    if (event != NULL)                
                        perf_event_release_kernel(event);            
                }                    
                kfree(perf_events[cpu]);                    
            }                        
        }                            
                                    
        if (counter_config)                            
            kfree(counter_config);                        
    }                                
                                    
    #else                                
    int __init oprofile_arch_init(struct oprofile_operations *ops)                                
    {                                
        pr_info("oprofile: hardware counters not available
    ");                            
        return -ENODEV;                            
    }                                
    void oprofile_arch_exit(void) {}                                
    #endif /* CONFIG_HW_PERF_EVENTS */

    三、相关
    kernel/sched.c--------profile_hit(SCHED_PROFILING, __builtin_return_address(0));
    下面是对profile的解释:Linux -- profile内核版本2.6.18-RC7
    profile只是内核的一个调试性能的工具,这个可以通过menuconfig中的Instrumentation Support->profile打开。
    1. 如何使用profile:
    首先确认内核支持profile,然后在内核启动时加入以下参数:profile=1或者其它参数, 新的内核支持profile=schedule 1
    2. 内核启动后会创建/proc/profile文件,这个文件可以通过readprofile读取,
    如readprofile -m /proc/kallsyms | sort -nr > ~/cur_profile.log,
    或者readprofile -r -m /proc/kallsyms |sort -nr,
    或者readprofile -r && sleep 1 && readprofile -m /proc/kallsyms |sort -nr >~/cur_profile.log
    3. 读取/proc/profile可获得哪些内容?
    根据启动配置profile=?的不同,获取的内容不同:
    如果配置成profile=? 可以获得每个函数执行次数,用来调试函数性能很有用
    如果设置成profile=schedule ?可以获得每个函数调用schedule的次数,用来调试schedule很有用

    四、总结一下:
    opcontrol –init 加载模块, mout /dev/oprofile 创建必需的文件和目录
    opcontrol --no-vmlinux 或者 opcontrol --vmlinux=/boot/vmlinux-`uname -r` 决定是否对 kernel 进行 profiling
    opcontrol --list-events //命令可以查看此结构中支持的事件
    opcontrol --event=CPU_CLK_UNHALTED:5000 --event=DATA_CACHE_MISSES:1000 --event=INSTRUCTION_CACHE_MISSES:1000 --event=MEMORY_REQUESTS:1000
    opcontrol --reset 清楚当前会话中的数据
    opcontrol --start 开始 profiling
    ./wls 运行应用程序, oprofile 会对它进行 profiling 通过opcontrol --event=L2_CACHE_MISS:500 --event=L1_DTLB_MISS_AND_L2_DTLB_HIT:500
    opcontrol --dump 把收集到的数据写入文件 --event=.........命令来进行事件的设置;
    opcontrol --stop 停止 profiling 此命令--event参数必须依次给出,无论有多少个,不可分行,切记
    opcotrol -h 关闭守护进程 oprofiled
    opcontrol --shutdown 停止 oprofiled
    opcontrol --deinit 卸载模块

    常用的是 3→7 这几个过程,得到性能数据之后,可以使用 opreport, opstack, opgprof, opannotate几个工具进行分析,我常用的是
    opreport, opannotate 进行分析。

    五、示例与常见问题解诀
    Command format :$:tager board, #:Host
    1.Extract opf.tar.gz and busybox.tar.gz ($: tar -zxvf ***.tar.gz)
    2.Use adb push opf to /data/opf and busybox to /data/busybox
    (#: adb push opf /data/opf and #: adb push busybox /data/busybox)
    3.Run $:export PATH=$PATH:/data/opf

    $: is command to begin
    4.Run $:opcontrol --init

    Remark: if target board have oprofile,please delete the files(path:/system/xbin)

    Question:
    grep: /etc/mtab: No such file or directory
    grep: /etc/mtab: No such file or directory
    Kernel support not available, missing opcontrol --init as root

    Answer:
    a.$:mount -o remount rw /
    b.$:mount -o rw,remount -t yaffs2 /dev/block/mmcblk0p9 /system
    c.$:touch /system/etc/mtab or touch /etc/mtab
    d.$:echo nodev /dev/oprofile oprofilefs rw 0 0 > /system/etc/mtab or /etc/mtab
    e.make sure mtab the nodev $:cat /system/etc/mtab

    5.$:opcontrol --init

    Question:
    cat: can't open '/dev/oprofile/cpu_type': No such file or directory
    Unable to open cpu_type file for reading
    Make sure you have done opcontrol --init
    cpu_type 'unset' is not valid
    you should upgrade oprofile or force the use of timer mode

    Answer:
    a.$:mkdir /dev/oprofile (if not exist)
    b.mount -t oprofilefs nodev /dev/oprofile

    6.$:opcontrol --session-dir=/data/first --no-vmlinux --callgraph=5

    7.$:opcontrol --start

    Question:
    oprofile: could not open unit mask description file /home/henry/workspace/opf/pc/build/oprofile/share/oprofile//arm/armv7-ca9/unit_masks
    Using default event: CPU_CYCLES:100000:0:1:1
    oprofile: could not open unit mask description file /home/henry/workspace/opf/pc/build/oprofile/share/oprofile//arm/armv7-ca9/unit_masks

    Answer:
    a.To target board running $:mkdir -p /home/henry/workspace/opf/pc/build/oprofile
    b.Extract share.tar.gz
    c.Use ADB push share (#:adb push share /home/henry/workspace/opf/pc/build/oprofile/share)

    8.Run App to profile
    9.$:opcontrol --dump
    10.$:opcontrol --stop
    11.$:opreport --session-dir=/data/first -l /data/test/test
    $:opreport --session-dir=/data/first -l >log
    $:opreport --session-dir=/data/first -l /data/data/Service/lib/libXXX.so >logXXX
    $:opreport --session-dir=/data/first -l /data/data/Service/lib/libBBB.so >logBBB
    12.$:opcontrol --reset
    13.opcontrol --shutdown

    Remark: $:opcontrol --shutdown and $:opcontrol --init only execution once

  • 相关阅读:
    推荐系统 蒋凡译 第一章 引言 读书笔记
    神经网络与深度学习 邱锡鹏 第5章 卷积神经网络 读书笔记
    神经网络与深度学习 邱锡鹏 第4章 前馈神经网络 读书笔记
    神经网络与深度学习 邱锡鹏 第3章 线性模型 读书笔记
    神经网络与深度学习 邱锡鹏 第2章 机器学习概述 读书笔记
    神经网络与深度学习 邱锡鹏 第1章 绪论 作业
    神经网络与深度学习 邱锡鹏 第1章 绪论 读书笔记
    算法笔记 上机训练实战指南 第13章 专题扩展 学习笔记
    算法笔记 第13章 专题扩展 学习笔记
    算法笔记 上机训练实战指南 第11章 提高篇(5)--动态规划专题 学习笔记
  • 原文地址:https://www.cnblogs.com/wenrenhua08/p/3934300.html
Copyright © 2011-2022 走看看