zoukankan      html  css  js  c++  java
  • caffe-----使用C++ 提取网络中间层特征数据

    最近实验,想要在c++下知道网络中间某一层的特征数据情况,查找了相关资料,记录一下。

    其实在caffe框架里面是包含这种操作的,可以模仿tools/extract_features.cpp中的操作来得到网络中间的特征数据。

    首先看下extract_features.cpp是如何写的

    template<typename Dtype>
    int feature_extraction_pipeline(int argc, char** argv) {
      ::google::InitGoogleLogging(argv[0]);
      const int num_required_args = 7;
      if (argc < num_required_args) {
        LOG(ERROR)<<
        "This program takes in a trained network and an input data layer, and then"
        " extract features of the input data produced by the net.
    "
        "Usage: extract_features  pretrained_net_param"
        "  feature_extraction_proto_file  extract_feature_blob_name1[,name2,...]"
        "  save_feature_dataset_name1[,name2,...]  num_mini_batches  db_type"
        "  [CPU/GPU] [DEVICE_ID=0]
    "
        "Note: you can extract multiple features in one pass by specifying"
        " multiple feature blob names and dataset names separated by ','."
        " The names cannot contain white space characters and the number of blobs"
        " and datasets must be equal.";
        return 1;
      }
      int arg_pos = num_required_args;
    
      arg_pos = num_required_args;
      if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0) {
        LOG(ERROR)<< "Using GPU";
        int device_id = 0;
        if (argc > arg_pos + 1) {
          device_id = atoi(argv[arg_pos + 1]);
          CHECK_GE(device_id, 0);
        }
        LOG(ERROR) << "Using Device_id=" << device_id;
        Caffe::SetDevice(device_id);
        Caffe::set_mode(Caffe::GPU);
      } else {
        LOG(ERROR) << "Using CPU";
        Caffe::set_mode(Caffe::CPU);
      }
    
      arg_pos = 0;  // the name of the executable
      std::string pretrained_binary_proto(argv[++arg_pos]);
    
      // Expected prototxt contains at least one data layer such as
      //  the layer data_layer_name and one feature blob such as the
      //  fc7 top blob to extract features.
      /*
       layers {
         name: "data_layer_name"
         type: DATA
         data_param {
           source: "/path/to/your/images/to/extract/feature/images_leveldb"
           mean_file: "/path/to/your/image_mean.binaryproto"
           batch_size: 128
           crop_size: 227
           mirror: false
         }
         top: "data_blob_name"
         top: "label_blob_name"
       }
       layers {
         name: "drop7"
         type: DROPOUT
         dropout_param {
           dropout_ratio: 0.5
         }
         bottom: "fc7"
         top: "fc7"
       }
       */
      std::string feature_extraction_proto(argv[++arg_pos]);
      boost::shared_ptr<Net<Dtype> > feature_extraction_net(
          new Net<Dtype>(feature_extraction_proto, caffe::TEST));
      feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto);//初始化网络
    
      std::string extract_feature_blob_names(argv[++arg_pos]);
      std::vector<std::string> blob_names;
      boost::split(blob_names, extract_feature_blob_names, boost::is_any_of(","));
    
      std::string save_feature_dataset_names(argv[++arg_pos]);
      std::vector<std::string> dataset_names;
      boost::split(dataset_names, save_feature_dataset_names,
                   boost::is_any_of(","));
      CHECK_EQ(blob_names.size(), dataset_names.size()) <<
          " the number of blob names and dataset names must be equal";
      size_t num_features = blob_names.size();
    
      for (size_t i = 0; i < num_features; i++) {
        CHECK(feature_extraction_net->has_blob(blob_names[i]))
            << "Unknown feature blob name " << blob_names[i]
            << " in the network " << feature_extraction_proto;
      }
    
      int num_mini_batches = atoi(argv[++arg_pos]);
    
      std::vector<boost::shared_ptr<db::DB> > feature_dbs;
      std::vector<boost::shared_ptr<db::Transaction> > txns;
      const char* db_type = argv[++arg_pos];
      for (size_t i = 0; i < num_features; ++i) {
        LOG(INFO)<< "Opening dataset " << dataset_names[i];
        boost::shared_ptr<db::DB> db(db::GetDB(db_type));
        db->Open(dataset_names.at(i), db::NEW);
        feature_dbs.push_back(db);
        boost::shared_ptr<db::Transaction> txn(db->NewTransaction());
        txns.push_back(txn);
      }
    
      LOG(ERROR)<< "Extracting Features";
    
      Datum datum;
      std::vector<int> image_indices(num_features, 0);
      for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) {
        feature_extraction_net->Forward();//首先进行前传 这样才能有中间数据
        for (int i = 0; i < num_features; ++i) {
          const boost::shared_ptr<Blob<Dtype> > feature_blob =
            feature_extraction_net->blob_by_name(blob_names[i]);//通过名字查找blob
          int batch_size = feature_blob->num();
          int dim_features = feature_blob->count() / batch_size;
          const Dtype* feature_blob_data;
          for (int n = 0; n < batch_size; ++n) {
            datum.set_height(feature_blob->height());
            datum.set_width(feature_blob->width());
            datum.set_channels(feature_blob->channels());
            datum.clear_data();
            datum.clear_float_data();
            feature_blob_data = feature_blob->cpu_data() +
                feature_blob->offset(n);
            for (int d = 0; d < dim_features; ++d) {
              datum.add_float_data(feature_blob_data[d]);//将feature_blob的数据都保存到datum里
            }
            string key_str = caffe::format_int(image_indices[i], 10);
    
            string out;
            CHECK(datum.SerializeToString(&out));//将datum保存到本地
            txns.at(i)->Put(key_str, out);
            ++image_indices[i];
            if (image_indices[i] % 1000 == 0) {
              txns.at(i)->Commit();
              txns.at(i).reset(feature_dbs.at(i)->NewTransaction());
              LOG(ERROR)<< "Extracted features of " << image_indices[i] <<
                  " query images for feature blob " << blob_names[i];
            }
          }  // for (int n = 0; n < batch_size; ++n)
        }  // for (int i = 0; i < num_features; ++i)
      }  // for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index)
      // write the last batch
      for (int i = 0; i < num_features; ++i) {
        if (image_indices[i] % 1000 != 0) {
          txns.at(i)->Commit();
        }
        LOG(ERROR)<< "Extracted features of " << image_indices[i] <<
            " query images for feature blob " << blob_names[i];
        feature_dbs.at(i)->Close();
      }
    
      LOG(ERROR)<< "Successfully extracted the features!";
      return 0;
    }

    主要三个核心步骤:

    1.初始化网络,并前传,

    net->Forward()

    2.通过blob的名字(prototxt中的name)来得到blob数据,

    const boost::shared_ptr<Blob<Dtype> > feature_blob = net->blob_by_name(blob_names[i])

    3.blob里面已经保存了所有的特征数据,按照需求取出来就好了。

    count = feature_blob->channels() * feature_blob->height() *  feature_blob->width();
    float* feature_array = new float[count]; 
    const float* feature_blob_data = feature_blob->cpu_data() + feature_blob->offset(n); // feature data generated from 
                                 // the nth input image within a batch 
    memcpy(feature_array, feature_blob_data, count * sizeof(float)); 
    ...// other operations
    delete [] feature_array;  

    如下是做实验时候的一个例子,提取出了blstm_input中的数据,并保存到了txt里。

    Blob<float>* input_layer = m_net->input_blobs()[0];
      input_layer->Reshape(1, m_channelNum, m_inputGeometry.height, m_inputGeometry.width);
      m_net->Reshape();
      std::vector<cv::Mat> input_channels;
      wrapInputLayer(&input_channels);
      preprocess(img, &input_channels);
      m_net->Forward();
      Blob<float>* output_layer = m_net->output_blobs()[0];  
      int alphabet_size=output_layer->shape(2);
      int time_step=output_layer->shape(0);
    
     vector<int> shape;
    
    const boost::shared_ptr<Blob<float> > blstm_input = m_net->blob_by_name("blstm_input");
        shape = blstm_input->shape();
        for(int i = 0; i < shape.size(); i++)
        {
            cout<<" blstm_input shape:"<<i<<"    :"<<shape[i]<<endl;
        }
        
        const boost::shared_ptr<Blob<float> > lstm1 = m_net->blob_by_name("lstm1");
        shape = lstm1->shape();
        for(int i = 0; i < shape.size(); i++)
        {
            cout<<" lstm1 shape:"<<i<<"    :"<<shape[i]<<endl;
        }
    
        cout<<"==============blob info======="<<endl;
        ofstream of("blstm.txt");
        for(int h = 0; h < 192; h++)
        {
            int count = blstm_input->channels() * blstm_input->height() * blstm_input->width();
            // cout<<"blstm_input->channels():"<<blstm_input->channels()<<"    blstm_input->height():"<<blstm_input->height()
            //     <<"    blstm_input->width():"<<blstm_input->width()<<endl;
            float* feature_array = new float[count]; 
            const float* feature_blob_data = blstm_input->cpu_data() +
                blstm_input->offset(h); // feature data generated from the nth input image within a batch 
            memcpy(feature_array, feature_blob_data, count * sizeof(float)); 
            
            
            for(int i = 0; i < count; i++ )
            {
                if(i && i % 512 == 0)
                {
                    of<<endl;
                }
                of<<"    ["<< h<< ","<<i % 512<< "]:"<<feature_blob_data[i];
            }
            of<<endl;
            delete [] feature_array;
        }
        of.close();

    参考:

    https://stackoverflow.com/questions/40938372/how-to-get-features-from-several-layers-using-c-in-caffe

  • 相关阅读:
    Ubuntu Mysql
    Ubuntu配置大全
    MyEclipse 手动安装 Subclipse 插件
    解决 Ubuntu 11.10 在 RTL8111/8168B 网卡下速度慢的问题
    Ubuntu 多硬盘 LVM 方式安装
    关于编码转换
    Ubuntu 安装时(initramfs) Unable to find a medium containing a live file system错误的解决
    关于 DirectShow 中各个例子的编译转换问题
    ubuntu 中文设置
    javascript 处理鼠标右键事件
  • 原文地址:https://www.cnblogs.com/hellowooorld/p/11348440.html
Copyright © 2011-2022 走看看