zoukankan      html  css  js  c++  java
  • 【神经网络与深度学习】Caffe部署中的几个train-test-solver-prototxt-deploy等说明

    一,train_val.prototxt

    复制代码
    name: "CIFAR10_quick"
    layer {
      name: "cifar"
      type: "Data"
      top: "data"
      top: "label"
      include {
        phase: TRAIN
      }
      transform_param {
        # mirror: true
        # mean_file: "examples/cifar10/mean.binaryproto"uu
        mean_file: "myself/00b/00bmean.binaryproto" 
      }
      data_param {
        # source: "examples/cifar10/cifar10_train_lmdb"
        source: "myself/00b/00b_train_lmdb"
        batch_size: 50
        backend: LMDB
      }
    }
    layer {
      name: "cifar"
      type: "Data"
      top: "data"
      top: "label"
      include {
        phase: TEST
      }
      transform_param {
        # mean_file: "examples/cifar10/mean.binaryproto"
        mean_file: "myself/00b/00bmean.binaryproto"
      }
      data_param {
        # source: "examples/cifar10/cifar10_test_lmdb"
        source: "myself/00b/00b_val_lmdb"
        batch_size: 50
        backend: LMDB
      }
    }
    layer {
      name: "conv1"
      type: "Convolution"
      bottom: "data"
      top: "conv1"
      param {
        lr_mult: 1
      }
      param {
        lr_mult: 2
      }
      convolution_param {
        num_output: 32
        # pad: 1
        kernel_size: 4
        stride: 1
        weight_filler {
          type: "gaussian"
          std: 0.0001
        }
        bias_filler {
          type: "constant"
        }
      }
    }
    layer {
      name: "pool1"
      type: "Pooling"
      bottom: "conv1"
      top: "pool1"
      pooling_param {
        pool: MAX
        kernel_size: 2
        stride: 2
      }
    }
    layer {
      name: "relu1"
      type: "ReLU"
      bottom: "pool1"
      top: "pool1"
    }
    layer {
      name: "conv2"
      type: "Convolution"
      bottom: "pool1"
      top: "conv2"
      param {
        lr_mult: 1
      }
      param {
        lr_mult: 2
      }
      convolution_param {
        num_output: 32
        # pad: 2
        kernel_size: 4
        stride: 1
        weight_filler {
          type: "gaussian"
          std: 0.01
        }
        bias_filler {
          type: "constant"
        }
      }
    }
    layer {
      name: "relu2"
      type: "ReLU"
      bottom: "conv2"
      top: "conv2"
    }
    layer {
      name: "pool2"
      type: "Pooling"
      bottom: "conv2"
      top: "pool2"
      pooling_param {
        pool: AVE
        kernel_size: 2
        stride: 2
      }
    }
    layer {
      name: "conv3"
      type: "Convolution"
      bottom: "pool2"
      top: "conv3"
      param {
        lr_mult: 1
      }
      param {
        lr_mult: 2
      }
      convolution_param {
        num_output: 32
        # pad: 2
        kernel_size: 4
        stride: 1
        weight_filler {
          type: "gaussian"
          std: 0.01
        }
        bias_filler {
          type: "constant"
        }
      }
    }
    layer {
      name: "relu3"
      type: "ReLU"
      bottom: "conv3"
      top: "conv3"
    }
    layer {
      name: "pool3"
      type: "Pooling"
      bottom: "conv3"
      top: "pool3"
      pooling_param {
        pool: AVE
        kernel_size: 2
        stride: 2
      }
    }
    layer {
      name: "conv4"
      type: "Convolution"
      bottom: "pool3"
      top: "conv4"
      param {
        lr_mult: 1
      }
      param {
        lr_mult: 2
      }
      convolution_param {
        num_output: 32
        # pad: 2
        kernel_size: 4
        stride: 1
        weight_filler {
          type: "gaussian"
          std: 0.01
        }
        bias_filler {
          type: "constant"
        }
      }
    }
    layer {
      name: "relu4"
      type: "ReLU"
      bottom: "conv4"
      top: "conv4"
    }
    layer {
      name: "pool4"
      type: "Pooling"
      bottom: "conv4"
      top: "pool4"
      pooling_param {
        pool: AVE
        kernel_size: 2
        stride: 2
      }
    }
    layer {
      name: "ip1"
      type: "InnerProduct"
      bottom: "pool4"
      top: "ip1"
      param {
        lr_mult: 1
      }
      param {
        lr_mult: 2
      }
      inner_product_param {
        num_output: 200
        weight_filler {
          type: "gaussian"
          std: 0.1
        }
        bias_filler {
          type: "constant"
        }
      }
    }
    layer {
      name: "ip2"
      type: "InnerProduct"
      bottom: "ip1"
      top: "ip2"
      param {
        lr_mult: 1
      }
      param {
        lr_mult: 2
      }
      inner_product_param {
        num_output: 3
        weight_filler {
          type: "gaussian"
          std: 0.1
        }
        bias_filler {
          type: "constant"
        }
      }
    }
    layer {
      name: "accuracy"
      type: "Accuracy"
      bottom: "ip2"
      bottom: "label"
      top: "accuracy"
      include {
        phase: TEST
      }
    }
    layer {
      name: "loss"
      type: "SoftmaxWithLoss"
      bottom: "ip2"
      bottom: "label"
      top: "loss"
    }
    复制代码

    二,solver.prototxt

    复制代码
    # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10
    
    # The train/test net protocol buffer definition
    net: "myself/00b/train_val.prototxt"
    # test_iter specifies how many forward passes the test should carry out.
    # In the case of MNIST, we have test batch size 100 and 100 test iterations,
    # covering the full 10,000 testing images.
    test_iter: 10
    # Carry out testing every 500 training iterations.
    test_interval: 70
    # The base learning rate, momentum and the weight decay of the network.
    base_lr: 0.001
    momentum: 0.9
    weight_decay: 0.004
    # The learning rate policy
    lr_policy: "fixed"
    # lr_policy: "step"
    gamma: 0.1
    stepsize: 100
    # Display every 100 iterations
    display: 10
    # The maximum number of iterations
    max_iter: 2000
    # snapshot intermediate results
    # snapshot: 3000
    # snapshot_format: HDF5
     snapshot_prefix: "myself/00b/00b"
    # solver mode: CPU or GPU
    solver_mode: CPU
    复制代码

    三,deploy.prototxt

    复制代码
    name: "CIFAR10_quick"
    layer {
      name: "data"
      type: "Input"
      top: "data"
      input_param { shape: { dim: 1 dim: 3 dim: 101 dim: 101 } }
    }
    layer {
      name: "conv1"
      type: "Convolution"
      bottom: "data"
      top: "conv1"
      convolution_param {
        num_output: 32
        kernel_size: 4
        stride: 1
      }
    }
    layer {
      name: "relu1"
      type: "ReLU"
      bottom: "conv1"
      top: "conv1"
    }
    layer {
      name: "pool1"
      type: "Pooling"
      bottom: "conv1"
      top: "pool1"
      pooling_param {
        pool: MAX
        kernel_size: 2
        stride: 2
      }
    }
    layer {
      name: "conv2"
      type: "Convolution"
      bottom: "pool1"
      top: "conv2"
      convolution_param {
        num_output: 32
        kernel_size: 4
        stride: 1
      }
    }
    layer {
      name: "relu2"
      type: "ReLU"
      bottom: "conv2"
      top: "conv2"
    }
    layer {
      name: "pool2"
      type: "Pooling"
      bottom: "conv2"
      top: "pool2"
      pooling_param {
        pool: MAX
        kernel_size: 2
        stride: 2
      }
    }
    layer {
      name: "conv3"
      type: "Convolution"
      bottom: "pool2"
      top: "conv3"
      convolution_param {
        num_output: 32
        kernel_size: 4
        stride: 1
      }
    }
    layer {
      name: "relu3"
      type: "ReLU"
      bottom: "conv3"
      top: "conv3"
    }
    layer {
      name: "pool3"
      type: "Pooling"
      bottom: "conv3"
      top: "pool3"
      pooling_param {
        pool: MAX
        kernel_size: 2
        stride: 2
      }
    }
    layer {
      name: "conv4"
      type: "Convolution"
      bottom: "pool3"
      top: "conv4"
      convolution_param {
        num_output: 32
        kernel_size: 4
        stride: 1
      }
    }
    layer {
      name: "relu4"
      type: "ReLU"
      bottom: "conv4"
      top: "conv4"
    }
    layer {
      name: "pool4"
      type: "Pooling"
      bottom: "conv4"
      top: "pool4"
      pooling_param {
        pool: MAX
        kernel_size: 2
        stride: 2
      }
    }
    layer {
      name: "ip1"
      type: "InnerProduct"
      bottom: "pool4"
      top: "ip1"
      inner_product_param {
        num_output: 200
      }
    }
    layer {
      name: "ip2"
      type: "InnerProduct"
      bottom: "ip1"
      top: "ip2"
      inner_product_param {
        num_output: 3
      }
    }
    layer {
      #name: "loss"
    
      name: "prob"
      type: "Softmax" 
      bottom: "ip2"
      top: "prob"
    
      #top: "loss"
    }
    复制代码

    参考一:

    模型就用程序自带的caffenet模型,位置在 models/bvlc_reference_caffenet/文件夹下, 将需要的两个配置文件,复制到myfile文件夹内

    复制代码
    # sudo cp models/bvlc_reference_caffenet/solver.prototxt examples/myfile/
    # sudo cp models/bvlc_reference_caffenet/train_val.prototxt examples/myfile/ 

    修改train_val.protxt,只需要修改两个阶段的data层就可以了,其它可以不用管。

    
    
    复制代码
    复制代码
    name: "CaffeNet"
    layer {
      name: "data"
      type: "Data"
      top: "data"
      top: "label"
      include {
        phase: TRAIN
      }
      transform_param {
        mirror: true
        crop_size: 227
        mean_file: "examples/myfile/mean.binaryproto"
      }
      data_param {
        source: "examples/myfile/img_train_lmdb"
        batch_size: 256
        backend: LMDB
      }
    }
    layer {
      name: "data"
      type: "Data"
      top: "data"
      top: "label"
      include {
        phase: TEST
      }
      transform_param {
        mirror: false
        crop_size: 227
        mean_file: "examples/myfile/mean.binaryproto"
      }
      data_param {
        source: "examples/myfile/img_test_lmdb"
        batch_size: 50
        backend: LMDB
      }
    }
    复制代码
     

    实际上就是修改两个data layer的mean_file和source这两个地方,其它都没有变化 。

    修改其中的solver.prototxt

    # sudo vi examples/myfile/solver.prototxt
    复制代码
    复制代码
    net: "examples/myfile/train_val.prototxt"
    test_iter: 2
    test_interval: 50
    base_lr: 0.001
    lr_policy: "step"
    gamma: 0.1
    stepsize: 100
    display: 20
    max_iter: 500
    momentum: 0.9
    weight_decay: 0.005
    solver_mode: GPU
    复制代码
    复制代码

    100个测试数据,batch_size为50,因此test_iter设置为2,就能全cover了。在训练过程中,调整学习率,逐步变小。

    复制代码

    参考二:

    前面做好了lmdb和均值文件,下面以Googlenet为例修改网络并训练模型。

     

    我们将caffe-mastermodels下的bvlc_googlenet文件夹复制到caffe-masterexamplesimagenet下。(因为我们的lmdb和均值都在这里,放一起方便些)

    打开train_val.txt,修改:

    1.修改data层:

     

    1. layer {  
    2.   name: "data"  
    3.   type: "Data"  
    4.   top: "data"  
    5.   top: "label"  
    6.   include {  
    7.     phase: TRAIN  
    8.   }  
    9.   transform_param {  
    10.     mirror: true  
    11.     crop_size: 224  
    12.     mean_file: "examples/imagenet/mydata_mean.binaryproto" #均值文件  
    13.     #mean_value: 104 #这些注释掉  
    14.     #mean_value: 117  
    15.     #mean_value: 123  
    16.   }  
    17.   data_param {  
    18.     source: "examples/imagenet/mydata_train_lmdb" #训练集的lmdb  
    19.     batch_size: 32 #根据GPU修改  
    20.     backend: LMDB  
    21.   }  
    22. }  
     
    1. layer {  
    2.   name: "data"  
    3.   type: "Data"  
    4.   top: "data"  
    5.   top: "label"  
    6.   include {  
    7.     phase: TEST  
    8.   }  
    9.   transform_param {  
    10.     mirror: false  
    11.     crop_size: 224  
    12.     mean_file: "examples/imagenet/mydata_mean.binaryproto" #均值文件  
    13.     #mean_value: 104  
    14.     #mean_value: 117  
    15.     #mean_value: 123  
    16.   }  
    17.   data_param {  
    18.     source: "examples/imagenet/mydata_val_lmdb" #验证集lmdb  
    19.     batch_size: 50 #和solver中的test_iter相乘约等于验证集大小  
    20.     backend: LMDB  
    21.   }  
    22. }  
     

    2.修改输出:

    由于Googlenet有三个输出,所以改三个地方,其他网络一般只有一个输出,则改一个地方即可。

    如果是微调,那么输出层的层名也要修改。(参数根据层名来初始化,由于输出改了,该层参数就不对应了,因此要改名)

    layer {
      name: "loss1/classifier"
      type: "InnerProduct"
      bottom: "loss1/fc"
      top: "loss1/classifier"
      param {
        lr_mult: 1
        decay_mult: 1
      }
      param {
        lr_mult: 2
        decay_mult: 0
      }
      inner_product_param {
        num_output: 1000 #改成你的数据集类别数
        weight_filler {
          type: "xavier"
        }
        bias_filler {
          type: "constant"
          value: 0
        }
      }
    }
    layer {
      name: "loss2/classifier"
      type: "InnerProduct"
      bottom: "loss2/fc"
      top: "loss2/classifier"
      param {
        lr_mult: 1
        decay_mult: 1
      }
      param {
        lr_mult: 2
        decay_mult: 0
      }
      inner_product_param {
        num_output: 1000 #改成你的数据集类别数
        weight_filler {
          type: "xavier"
        }
        bias_filler {
          type: "constant"
          value: 0
        }
      }
    }
    layer {
      name: "loss3/classifier"
      type: "InnerProduct"
      bottom: "pool5/7x7_s1"
      top: "loss3/classifier"
      param {
        lr_mult: 1
        decay_mult: 1
      }
      param {
        lr_mult: 2
        decay_mult: 0
      }
      inner_product_param {
        num_output: 1000 #改成你的数据集类别数
        weight_filler {
          type: "xavier"
        }
        bias_filler {
          type: "constant"
          value: 0
        }
      }
    }

    3.打开deploy.prototxt,修改:

    layer {
      name: "loss3/classifier"
      type: "InnerProduct"
      bottom: "pool5/7x7_s1"
      top: "loss3/classifier"
      param {
        lr_mult: 1
        decay_mult: 1
      }
      param {
        lr_mult: 2
        decay_mult: 0
      }
      inner_product_param {
        num_output: 1000 #改成你的数据集类别数
        weight_filler {
          type: "xavier"
        }
        bias_filler {
          type: "constant"
          value: 0
        }
      }
    }

    如果是微调,该层层名和train_val.prototxt修改一致。

    接着,打开solver,修改:

    net: "examples/imagenet/bvlc_googlenet/train_val.prototxt" #路径不要错
    test_iter: 1000 #前面已说明该值
    test_interval: 4000 #迭代多少次测试一次
    test_initialization: false
    display: 40
    average_loss: 40
    base_lr: 0.01
    lr_policy: "step"
    stepsize: 320000 #迭代多少次改变一次学习率
    gamma: 0.96
    max_iter: 10000000 #迭代次数
    momentum: 0.9
    weight_decay: 0.0002
    snapshot: 40000
    snapshot_prefix: "examples/imagenet/bvlc_googlenet" #生成的caffemodel保存在imagenet下,形如bvlc_googlenet_iter_***.caffemodel
    solver_mode: GPU
    

    这时,我们回到caffe-masterexamplesimagenet下,打开train_caffenet.sh,修改:

    (如果是微调,在脚本里加入-weights **/**/**.caffemodel即可,即用来微调的caffemodel路径)

    [plain]
    #!/usr/bin/env sh
    
    ./build/tools/caffe train 
        -solver examples/imagenet/bvlc_googlenet/solver.prototxt -gpu 0
    

    (如果有多个GPU,可自行选择) 然后,在caffe-master下执行改脚本即可开始训练:$caffe-master ./examples/imagenet/train_caffenet.sh

    训练得到的caffemodel就可以用来做图像分类了,此时,需要(1)得到的labels.txt,(2)得到的mydata_mean.binaryproto,(3)得到的caffemodel以及已经修改过的deploy.prototxt,共四个文件,具体过程看:http://blog.csdn.net/sinat_30071459/article/details/50974695

    参考三:

    *_train_test.prototxt,*_deploy.prototxt,*_slover.prototxt文件编写时注意

    1、*_train_test.prototxt文件

    这是训练与测试网络配置文件

    (1)在数据层中 参数include{

                                     phase:TRAIN/TEST

                                 }

    TRAIN与TEST不能有“...”否则会报错,还好提示信息里,会提示哪一行出现了问题,如下图:

    数字8就代表配置文件的第8行出现了错误

    (2)卷积层和全连接层相似:卷积层(Convolution),全连接层(InnerProduct,容易翻译成内积层)相似处有两个【1】:都有两个param{lr_mult:1

                                               decay_mult:1                            

                                   }

                                 param{lr_mult: 2

                                            decay_mult: 0            

                                  }

    【2】:convolution_param{}与inner_product_param{}里面的参数相似,甚至相同

    今天有事,明天再续!

    续上!

    (3)平均值文件*_mean.binaryproto要放在transform_param{}里,训练与测试数据集放在data_param{}里

    2.*_deploy.prototxt文件

    【1】*_deploy.prototxt文件的构造和*_train_test.prototxt文件的构造稍有不同首先没有test网络中的test模块,只有训练模块

    【2】数据层的写法和原来也有不同,更加简洁:

    input: "data" input_dim: 1 input_dim: 3 input_dim: 32 input_dim: 32

    注意红色部分,那是数据层的名字,没有这个的话,第一卷积层无法找到数据,我一开始没有加这句就报错。下面的四个参数有点类似batch_size(1,3,32,32)里四个参数

    【3】卷积层和全连接层中weight_filler{}与bias_filler{}两个参数不用再填写,应为这两个参数的值,由已经训练好的模型*.caffemodel文件提供

    【4】输出层的变化(1)没有了test模块测试精度(2)输出层

    *_train_test.prototxt文件:

    layer{   name: "loss"   type: "SoftmaxWithLoss"#注意此处与下面的不同   bottom: "ip2"   bottom: "label"#注意标签项在下面没有了,因为下面的预测属于哪个标签,因此不能提供标签   top: "loss" }

    *_deploy.prototxt文件:

    layer {   name: "prob"   type: "Softmax"   bottom: "ip2"   top: "prob" }

    ***注意在两个文件中输出层的类型都发生了变化一个是SoftmaxWithLoss,另一个是Softmax。另外为了方便区分训练与应用输出,训练是输出时是loss,应用时是prob。

    3、*_slover.prototxt

    net: "test.prototxt" #训练网络的配置文件 test_iter: 100 #test_iter 指明在测试阶段有多上个前向过程(也就是有多少图片)被执行。 在MNIST例子里,在网络配置文件里已经设置test网络的batch size=100,这里test_iter 设置为100,那在测试阶段共有100*100=10000 图片被处理 test_interval: 500 #每500次训练迭代后,执行一次test base_lr: 0.01 #学习率初始化为0.01 momentum:0.9 #u=0.9 weight_decay:0.0005 # lr_policy: "inv" gamma: 0.0001 power: 0.75 #以上三个参数都和降低学习率有关,详细的学习策略和计算公式见下面 // The learning rate decay policy. The currently implemented learning rate  

    // policies are as follows:  

    //    - fixed: always return base_lr.  

    //    - step: return base_lr * gamma ^ (floor(iter / step))  

    //    - exp: return base_lr * gamma ^ iter

    //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)  

    //    - multistep: similar to step but it allows non uniform steps defined by  

    //      stepvalue  

    //    - poly: the effective learning rate follows a polynomial decay, to be  

    //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)  

    //    - sigmoid: the effective learning rate follows a sigmod decay  

    //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))  

    // where base_lr, max_iter, gamma, step, stepvalue and power are defined  

    // in the solver parameter protocol buffer, and iter is the current iteration. display:100 #每100次迭代,显示结果 snapshot: 5000 #每5000次迭代,保存一次快照 snapshot_prefix: "path_prefix" #快照保存前缀:更准确的说是快照保存路径+前缀,应为文件名后的名字是固定的 solver_mode:GPU #选择解算器是用cpu还是gpu

    批处理文件编写:

    F:/caffe/caffe-windows-master/bin/caffe.exe train --solver=C:/Users/Administrator/Desktop/caffe_test/cifar-10/cifar10_slover_prototxt --gpu=all pause

    参考四:

     

    06

     

     

    将train_val.prototxt 转换成deploy.prototxt

     

                             

                                                                                                                                                  

     

    1.删除输入数据(如:type:data...inckude{phase: TRAIN}),然后添加一个数据维度描述。

     

    1. input: "data"   
    2. input_dim: 1   
    3. input_dim: 3   
    4. input_dim: 224   
    5. input_dim: 224  
    6. force_backward: true  
    input: "data" 
    input_dim: 1 
    input_dim: 3 
    input_dim: 224 
    input_dim: 224
    force_backward: true

     

    
    

     

    2.移除最后的“loss” 和“accuracy” 层,加入“prob”层。

     

    1. layers {  
    2.   name: "prob"  
    3.   type: SOFTMAX  
    4.   bottom: "fc8"  
    5.   top: "prob"  
    6. }  
    layers {
      name: "prob"
      type: SOFTMAX
      bottom: "fc8"
      top: "prob"
    }
    如果train_val文件中还有其他的预处理层,就稍微复杂点。如下,在'data'层,在‘data’层和‘conv1’层(with bottom:”data”  / top:”conv1″). 插入一个层来计算输入数据的均值。

     

     

    1. layer {  
    2. name: “mean”  
    3. type: “Convolution”  
    4. <strong>bottom: “data”  
    5. top: “data”</strong>  
    6. param {  
    7. lr_mult: 0  
    8. decay_mult: 0  
    9. }  
    10.   
    11. …}  
    <span style="line-height: 1.5; margin: 0px; padding: 0px; border: 0px currentcolor; vertical-align: baseline;">在deploy.prototxt文件中,“mean” 层必须保留,只是容器改变,相应的‘conv1’也要改变<span style="line-height: 24px; color: rgb(68, 68, 68); font-family: "Open Sans", Helvetica, Arial, sans-serif; font-size: 14px;"> ( <span style="margin: 0px; padding: 0px; border: 0px currentcolor; vertical-align: baseline;"><span style="line-height: 1.5; margin: 0px; padding: 0px; border: 0px currentcolor; vertical-align: baseline;">bottom:”mean”/ <span style="line-height: 24px; margin: 0px; padding: 0px; border: 0px currentcolor; vertical-align: baseline;">top:”conv1″ )。</span></span></span></span></span>
    1. layer {  
    2. name: “mean”  
    3. type: “Convolution”  
    4. <strong>bottom: “data”  
    5. top: “mean“</strong>  
    6. param {  
    7. lr_mult: 0  
    8. decay_mult: 0  
    9. }  
    10.   
    11. …}  

     

  • 相关阅读:
    HTTP权威指南笔记-1.概述
    C# 设计模式之工厂模式(一)
    C# 读取Excel内容
    C# 反射
    C# 分部类与分部方法
    图像处理
    mysql 使用问题?
    第一节mysql 安装
    软件包管理
    第四节基础篇
  • 原文地址:https://www.cnblogs.com/huty/p/8518086.html
Copyright © 2011-2022 走看看