zoukankan      html  css  js  c++  java
  • LUA中将未分类数据分为测试集和训练集

    require 'torch'
    require 'image'
    local  setting = {parent_root = '/home/pxu/image'}
    
    function list_children_root(path)
            local i,t,popen = 0,{},io.popen
            for file_name in popen('ls -a ' .. path):lines() do
                    i = i + 1
                    if i>2 then
                            t[i-2] = file_name
                    --if i>0 then
                            --t[i] = file_name
                    end
            end
            return t
    end
    
    function list_img(path)
            --print(path)
            local i,t,popen = 0,{},io.popen
            for file_name in popen('ls -a ' .. path .. ' |grep jpg'):lines() do
                    i = i + 1
                    t[i] = file_name
            end
            return t
    end
    print('obtain children root path ...')
    train_paths,train_labels = {},{}
    test_paths,test_labels = {}, {}
    children_paths = list_children_root(setting.parent_root)
    print(children_paths)
    num_train,num_test =1,1
    print('spit data begin')
    for i=1,table.getn(children_paths)  do
     children_root = setting.parent_root ..'/'..children_paths[i]
            print(children_root)
     img_names = list_img(children_root)
    ranIdx = torch.randperm(table.getn(img_names))
            for j=1,table.getn(img_names)do
                    if j<=math.floor(0.6*table.getn(img_names)) then
                            local idx = ranIdx[{j}]
                            train_paths[num_train] = children_root .. '/'..img_names[idx]
                            train_labels[num_train]=i
                            num_train = num_train+1
                    else
                            local idx = ranIdx[{j}]
                            test_paths[num_test]=children_root .. '/' ..img_names[idx]
                            test_labels[num_test]=i
                            num_test = num_test+1
    end
    end
    end
    print('begin copy')
    local nTrain,nTest = table.getn(train_paths),table.getn(test_paths)
    for i=1,nTrain do
            local aimpath = '/home/yqcui/image/train/'..train_labels[i]..'/'..i..'.jpg'
            local todo='cp '..train_paths[i]..' ' ..aimpath
            print(todo)
            os.execute(todo)
    end
    for i=1,nTest do
            local aimpath = '/home/yqcui/image/train/'..test_labels[i]..'/'..i..'.jpg'
            local todo='cp '..test_paths[i]..' ' .. aimpath
            print(todo)
            os.execute(todo)
    end

    将数据分为数据集和训练集,比例为6:4

  • 相关阅读:
    web.xml中的contextConfigLocation在spring中的作用
    folder、source folder、package 区别与联系
    mysql
    十六进制浮点转十进制浮点型
    float浮点数的二进制存储方式及转换
    API -- java.lang.Integer
    MyISAM与InnoDB区别
    mysql timestamp类型字段的CURRENT_TIMESTAMP与ON UPDATE CURRENT_TIMESTAMP属性
    刷新当前页面
    正则表达式
  • 原文地址:https://www.cnblogs.com/cyq041804/p/5737374.html
Copyright © 2011-2022 走看看