zoukankan      html  css  js  c++  java
  • yolov2源码分析

    分析过程
    首先我们从yolo的训练命令开始分析(yolo的源码是用c++写的):

    ./darknet detector train cfg/voc.data cfg/yolo-voc.cfg darknet19_448.conv.23

    从这里我们可以看出yolo主函数main中的参数argv[]在其中对应的值分别是 argv[0] -> darknet argv[1] -> detector argv[2] -> train .....(剩下的自己看),从这里我们可以看出,yolo主函数main一定在examples/darknet.c中,让我们来看一下主函数:

    int main(int argc, char **argv)
    {
        //test_resize("data/bad.jpg");
        //test_box();
        //test_convolutional_layer();
        if(argc < 2){
            fprintf(stderr, "usage: %s <function>
    ", argv[0]);
            return 0;
        }
        gpu_index = find_int_arg(argc, argv, "-i", 0);
        if(find_arg(argc, argv, "-nogpu")) {
            gpu_index = -1;
        }
    
    #ifndef GPU
        gpu_index = -1;
    #else
        if(gpu_index >= 0){
            cuda_set_device(gpu_index);
        }
    #endif
    
        if (0 == strcmp(argv[1], "average")){
            average(argc, argv);
        } else if (0 == strcmp(argv[1], "yolo")){
            run_yolo(argc, argv);
        } else if (0 == strcmp(argv[1], "voxel")){
            run_voxel(argc, argv);
        } else if (0 == strcmp(argv[1], "super")){
            run_super(argc, argv);
        } else if (0 == strcmp(argv[1], "lsd")){
            run_lsd(argc, argv);
        } else if (0 == strcmp(argv[1], "detector")){
            run_detector(argc, argv);
        } else if (0 == strcmp(argv[1], "detect")){
            float thresh = find_float_arg(argc, argv, "-thresh", .24);
            char *filename = (argc > 4) ? argv[4]: 0;
            char *outfile = find_char_arg(argc, argv, "-out", 0);
            int fullscreen = find_arg(argc, argv, "-fullscreen");
            test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen);
        } else if (0 == strcmp(argv[1], "cifar")){
            run_cifar(argc, argv);
        } else if (0 == strcmp(argv[1], "go")){
            run_go(argc, argv);
        } else if (0 == strcmp(argv[1], "rnn")){
            run_char_rnn(argc, argv);
        } else if (0 == strcmp(argv[1], "vid")){
            run_vid_rnn(argc, argv);
        } else if (0 == strcmp(argv[1], "coco")){
            run_coco(argc, argv);
        } else if (0 == strcmp(argv[1], "classify")){
            predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
        } else if (0 == strcmp(argv[1], "classifier")){
            run_classifier(argc, argv);
        } else if (0 == strcmp(argv[1], "regressor")){
            run_regressor(argc, argv);
        } else if (0 == strcmp(argv[1], "segmenter")){
            run_segmenter(argc, argv);
        } else if (0 == strcmp(argv[1], "art")){
            run_art(argc, argv);
        } else if (0 == strcmp(argv[1], "tag")){
            run_tag(argc, argv);
        } else if (0 == strcmp(argv[1], "compare")){
            run_compare(argc, argv);
        } else if (0 == strcmp(argv[1], "dice")){
            run_dice(argc, argv);
        } else if (0 == strcmp(argv[1], "writing")){
            run_writing(argc, argv);
        } else if (0 == strcmp(argv[1], "3d")){
            composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
        } else if (0 == strcmp(argv[1], "test")){
            test_resize(argv[2]);
        } else if (0 == strcmp(argv[1], "captcha")){
            run_captcha(argc, argv);
        } else if (0 == strcmp(argv[1], "nightmare")){
            run_nightmare(argc, argv);
        } else if (0 == strcmp(argv[1], "rgbgr")){
            rgbgr_net(argv[2], argv[3], argv[4]);
        } else if (0 == strcmp(argv[1], "reset")){
            reset_normalize_net(argv[2], argv[3], argv[4]);
        } else if (0 == strcmp(argv[1], "denormalize")){
            denormalize_net(argv[2], argv[3], argv[4]);
        } else if (0 == strcmp(argv[1], "statistics")){
            statistics_net(argv[2], argv[3]);
        } else if (0 == strcmp(argv[1], "normalize")){
            normalize_net(argv[2], argv[3], argv[4]);
        } else if (0 == strcmp(argv[1], "rescale")){
            rescale_net(argv[2], argv[3], argv[4]);
        } else if (0 == strcmp(argv[1], "ops")){
            operations(argv[2]);
        } else if (0 == strcmp(argv[1], "speed")){
            speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
        } else if (0 == strcmp(argv[1], "oneoff")){
            oneoff(argv[2], argv[3], argv[4]);
        } else if (0 == strcmp(argv[1], "oneoff2")){
            oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
        } else if (0 == strcmp(argv[1], "partial")){
            partial(argv[2], argv[3], argv[4], atoi(argv[5]));
        } else if (0 == strcmp(argv[1], "average")){
            average(argc, argv);
        } else if (0 == strcmp(argv[1], "visualize")){
            visualize(argv[2], (argc > 3) ? argv[3] : 0);
        } else if (0 == strcmp(argv[1], "mkimg")){
            mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]);
        } else if (0 == strcmp(argv[1], "imtest")){
            test_resize(argv[2]);
        } else {
            fprintf(stderr, "Not an option: %s
    ", argv[1]);
        }
        return 0;
    }
    


    很简单可以看出,主函数就是对于参数argv[1]的一个判断,根据argv[1]的内容来启动不同的程序。让我们继续跟着训练命令走argv[1] = detector时,调用的函数是run_detector,而这个函数在examples/detector.c的最后,让我们再来看看这个函数吧:

    void run_detector(int argc, char **argv)
    {
        char *prefix = find_char_arg(argc, argv, "-prefix", 0);
        float thresh = find_float_arg(argc, argv, "-thresh", .24);
        float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
        int cam_index = find_int_arg(argc, argv, "-c", 0);
        int frame_skip = find_int_arg(argc, argv, "-s", 0);
        int avg = find_int_arg(argc, argv, "-avg", 3);
        if(argc < 4){
            fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]
    ", argv[0], argv[1]);
            return;
        }
        char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
        char *outfile = find_char_arg(argc, argv, "-out", 0);
        int *gpus = 0;
        int gpu = 0;
        int ngpus = 0;
        if(gpu_list){
            printf("%s
    ", gpu_list);
            int len = strlen(gpu_list);
            ngpus = 1;
            int i;
            for(i = 0; i < len; ++i){
                if (gpu_list[i] == ',') ++ngpus;
            }
            gpus = calloc(ngpus, sizeof(int));
            for(i = 0; i < ngpus; ++i){
                gpus[i] = atoi(gpu_list);
                gpu_list = strchr(gpu_list, ',')+1;
            }
        } else {
            gpu = gpu_index;
            gpus = &gpu;
            ngpus = 1;
        }
    
        int clear = find_arg(argc, argv, "-clear");
        int fullscreen = find_arg(argc, argv, "-fullscreen");
        int width = find_int_arg(argc, argv, "-w", 0);
        int height = find_int_arg(argc, argv, "-h", 0);
        int fps = find_int_arg(argc, argv, "-fps", 0);
    
        char *datacfg = argv[3];
        char *cfg = argv[4];
        char *weights = (argc > 5) ? argv[5] : 0;
        char *filename = (argc > 6) ? argv[6]: 0;
        if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
        else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
        else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
        else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
        else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
        else if(0==strcmp(argv[2], "demo")) {
            list *options = read_data_cfg(datacfg);
            int classes = option_find_int(options, "classes", 2);
            char *name_list = option_find_str(options, "names", "data/names.list");
            char **names = get_labels(name_list);
            demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
        }
    }
    

     
    在这里 run_detector的主要作用还是在根据argv[]的值执行不同的函数,其他关于gpu啊,threshold啊之类的我们都可以不用管,这里最重要的是argv[2]的值,根据其值的不同,执行不同函数,这里的test_detector,train_detector这些函数在detector.c中都有定义,并且从名字上我们就可以看出这些函数是干什么的。这里我们依旧跟随之前的训练命令,argv[2] = train,这里让我们来看一下train_detector函数(注:这里是我修改过一部分的,不是原来的代码):

    void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
    {
        list *options = read_data_cfg(datacfg);
        char *train_images = option_find_str(options, "train", "scripts/train.txt");    //训练集路径
        char *backup_directory = option_find_str(options, "backup", "/backup/");        //备份训练结果路径
    
        srand(time(0));
        char *base = basecfg(cfgfile);
        printf("%s
    ", base);
        float avg_loss = -1;
        network *nets = calloc(ngpus, sizeof(network));
    
        srand(time(0));
        int seed = rand();
        int i;
        for(i = 0; i < ngpus; ++i){
            srand(seed);
    #ifdef GPU
            cuda_set_device(gpus[i]);
    #endif
            nets[i] = load_network(cfgfile, weightfile, clear);        //载入网络
            nets[i].learning_rate *= ngpus;
        }
        srand(time(0));
        network net = nets[0];
    
        int imgs = net.batch * net.subdivisions * ngpus;
        printf("Learning Rate: %g, Momentum: %g, Decay: %g
    ", net.learning_rate, net.momentum, net.decay);
        data train, buffer;
    
        layer l = net.layers[net.n - 1];
    
        int classes = l.classes;
        float jitter = l.jitter;
    
        list *plist = get_paths(train_images);
        //int N = plist->size;
        char **paths = (char **)list_to_array(plist);
    
        load_args args = {0};
        args.w = net.w;
        args.h = net.h;
        args.paths = paths;
        args.n = imgs;
        args.m = plist->size;
        args.classes = classes;
        args.jitter = jitter;
        args.num_boxes = l.max_boxes;
        args.d = &buffer;
        args.type = DETECTION_DATA;
        args.threads = 8;
    
        args.angle = net.angle;
        args.exposure = net.exposure;
        args.saturation = net.saturation;
        args.hue = net.hue;
    
        pthread_t load_thread = load_data(args);
        clock_t time;
        int count = 0;
        //while(i*imgs < N*120){
        while(get_current_batch(net) < net.max_batches){
            if(l.random && count++%10 == 0){
                printf("Resizing
    ");
                int dim = (rand() % 10 + 10) * 32;
                if (get_current_batch(net)+200 > net.max_batches) dim = 608;
                //int dim = (rand() % 4 + 16) * 32;
                printf("%d
    ", dim);
                args.w = dim;
                args.h = dim;
    
                pthread_join(load_thread, 0);
                train = buffer;
                free_data(train);
                load_thread = load_data(args);
    
                for(i = 0; i < ngpus; ++i){
                    resize_network(nets + i, dim, dim);
                }
                net = nets[0];
            }
            time=clock();
            pthread_join(load_thread, 0);
            train = buffer;
            load_thread = load_data(args);
    
            /*
            int k;
            for(k = 0; k < l.max_boxes; ++k){
                box b = float_to_box(train.y.vals[10] + 1 + k*5);
                if(!b.x) break;
                printf("loaded: %f %f %f %f
    ", b.x, b.y, b.w, b.h);
            }
            */
            /*
            int zz;
            for(zz = 0; zz < train.X.cols; ++zz){
                image im = float_to_image(net.w, net.h, 3, train.X.vals[zz]);
                int k;
                for(k = 0; k < l.max_boxes; ++k){
                    box b = float_to_box(train.y.vals[zz] + k*5);
                    printf("%f %f %f %f
    ", b.x, b.y, b.w, b.h);
                    draw_bbox(im, b, 1, 1,0,0);
                }
                show_image(im, "truth11");
                cvWaitKey(0);
                save_image(im, "truth11");
            }
            */
    
            printf("Loaded: %lf seconds
    ", sec(clock()-time));
    
            time=clock();
            float loss = 0;
    #ifdef GPU
            if(ngpus == 1){
                loss = train_network(net, train);
            } else {
                loss = train_networks(nets, ngpus, train, 4);
            }
    #else
            loss = train_network(net, train);
    #endif
            if (avg_loss < 0) avg_loss = loss;
            avg_loss = avg_loss*.9 + loss*.1;
    
            i = get_current_batch(net);
            printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images
    ", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
            if(i%1000==0){
    #ifdef GPU
                if(ngpus != 1) sync_nets(nets, ngpus, 0);
    #endif
                char buff[256];
                sprintf(buff, "%s/%s.backup", backup_directory, base);
                save_weights(net, buff);
            }
            if(i%10000==0 || (i < 1000 && i%100 == 0)){
    #ifdef GPU
                if(ngpus != 1) sync_nets(nets, ngpus, 0);
    #endif
                char buff[256];
                sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
                save_weights(net, buff);
            }
            free_data(train);
        }
    #ifdef GPU
        if(ngpus != 1) sync_nets(nets, ngpus, 0);
    #endif
        char buff[256];
        sprintf(buff, "%s/%s_final.weights", backup_directory, base);
        save_weights(net, buff);
    }
    

     
    这里我们主要重视的函数是第7行的read_data_cfg,第8行的train_images,第9行的backup_directory和第25行的load_network函数:
    read_data_cfg中的参数datacfg在run_detector中可以看出就是arg[3],在本例中对应的就是voc.data
    train_images是用来指定所要训练的图片集的路径的。
    backup_directory是用来指定训练出来的权值的路劲的。
    而load_network是用来载入所要训练的网络结构和参数的,这里run_detector中可以看出load_network的参数之一cfgfile就是argv[4],在我们这个例子中也便就是yolo-voc.cfg

    这里我们先看一下cfg/voc.data(注:这里是我修改过了的,不是原来的)

    classes= 2
    train  = /home/iair339-04/darknet/scripts/train.txt
    valid  = /home/iair339-04/darknet/scripts/2007_test.txt
    names = data/kitti.names
    backup = backup
    


    这里可以看出voc.data是用来指定类别数classes,训练集路径train,测试集路径valid和类别名称names和备份文件路径backup的(so easy)。

    接下来我们来看一下yolo-voc.cfg文件(注:修改过)

    [net]
    # Testing
    #batch=1
    #subdivisions=1
    # Training
     batch=64
     subdivisions=8
    height=416
    width=416
    channels=3
    momentum=0.9
    decay=0.0005
    angle=0
    saturation = 1.5
    exposure = 1.5
    hue=.1
    
    learning_rate=0.001
    burn_in=1000
    max_batches = 80200
    policy=steps
    steps=40000,60000
    scales=.1,.1
    
    [convolutional]
    batch_normalize=1
    filters=32
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [maxpool]
    size=2
    stride=2
    
    [convolutional]
    batch_normalize=1
    filters=64
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [maxpool]
    size=2
    stride=2
    
    [convolutional]
    batch_normalize=1
    filters=128
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=64
    size=1
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=128
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [maxpool]
    size=2
    stride=2
    
    [convolutional]
    batch_normalize=1
    filters=256
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=128
    size=1
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=256
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [maxpool]
    size=2
    stride=2
    
    [convolutional]
    batch_normalize=1
    filters=512
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=256
    size=1
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=512
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=256
    size=1
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=512
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [maxpool]
    size=2
    stride=2
    
    [convolutional]
    batch_normalize=1
    filters=1024
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=512
    size=1
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=1024
    size=3
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=512
    size=1
    stride=1
    pad=1
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    filters=1024
    size=3
    stride=1
    pad=1
    activation=leaky
    
    
    #######
    
    [convolutional]
    batch_normalize=1
    size=3
    stride=1
    pad=1
    filters=1024
    activation=leaky
    
    [convolutional]
    batch_normalize=1
    size=3
    stride=1
    pad=1
    filters=1024
    activation=leaky
    
    [route]
    layers=-9
    
    [convolutional]
    batch_normalize=1
    size=1
    stride=1
    pad=1
    filters=64
    activation=leaky
    
    [reorg]
    stride=2
    
    [route]
    layers=-1,-4
    
    [convolutional]
    batch_normalize=1
    size=3
    stride=1
    pad=1
    filters=1024
    activation=leaky
    
    [convolutional]
    size=1
    stride=1
    pad=1
    filters=35    #此处修改
    activation=linear
    
    
    [region]
    anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
    bias_match=1
    classes=2    #此处修改种类
    coords=4
    num=5
    softmax=1
    jitter=.3
    rescore=1
    
    object_scale=5
    noobject_scale=1
    class_scale=1
    coord_scale=1
    
    absolute=1
    thresh = .6
    random=1
    


    这里[net]里面是网络的超参数的设置,而之后的便是yolo v2的网络结构了。

  • 相关阅读:
    android系列7.单元测试学习
    DELPHI DATASNAP 2010 入门操作(2)不写一行代码,绿色三层我也行
    基于Delphi的融合DLL中的窗口
    Delphi环境中编写调用DLL的方法和技巧 【转】
    利用Cookies实现ASP.NET跨域单点登录
    为项目安装添加WEB调用本地应用程序功能
    利用C#动态编译功能实现像Javascript中的Eval的功能来将一段字符串进行数学运算
    多音节单词的重读音节的位置飘雪搜狐博客
    NDK开发详细讲解转载自c101zxg的博客_赤松子耶_新浪博客
    Computer Vision and Image Processing Journal
  • 原文地址:https://www.cnblogs.com/MY0213/p/9856278.html
Copyright © 2011-2022 走看看