只用了200个数据集
以上是在复现代码时的一些发现记录
现在是将分类代码和分割代码写到了一起,事先重新规划了一下分类代码的编排
文件夹:
configuration.py

import torch class config(): batch_size = 4 num_epochs = 10 num_classes = 40 num_seg = 50 num_workers = 0 clacheckpoints_root = 'C:/Users/Dell/PycharmProjects/PointNet/clacheckpoints' segcheckpoints_root = 'C:/Users/Dell/PycharmProjects/PointNet/segcheckpoints' device = 'cuda' if torch.cuda.is_available() else 'cpu' clalog_dir = 'C:/Users/Dell/PycharmProjects/PointNet/clacheckpoints/clalog' seglog_dir = 'C:/Users/Dell/PycharmProjects/PointNet/segcheckpoints/seglog'
my_Dataset.py

import numpy as np import h5py import torch import random import torchvision.transforms as transforms from torch.utils import data import os #dataset要做个转置并变成tensor #classify dataset class Dataset(data.Dataset): def __init__(self, root): super(Dataset, self).__init__() file = h5py.File(root, 'r') self.data = file['data'][:] #label要展开并变成一维的 self.label = file['label'][:].reshape(-1, file['label'].shape[0]).squeeze(0) def __getitem__(self, index): #一个点云坐标输入进来时是n * 3,不像图片是n * m * 3,点云坐标没有高度,x、y、z分别代表三个通道 #图片用ToTensor变成3 * n * m, 点云坐标转置一下就行了,变成3 * n,然后再变成tensor return torch.tensor(self.data[index].T), self.label[index] def __len__(self): return len(self.label) #segmentation dataset class Seg_Dataset(data.Dataset): def __init__(self, data_root, label_root): super().__init__() self.data_root = data_root self.label_root = label_root self.data_file = os.listdir(data_root) self.label_file = os.listdir(label_root) self.data_file = sorted(self.data_file, key = lambda x : int(x.split('.')[0])) self.label_file = sorted(self.label_file, key = lambda x : int(x.split('.')[0])) def __getitem__(self, index): self.data = np.loadtxt(os.path.join(self.data_root, self.data_file[index])) self.label = np.loadtxt(os.path.join(self.label_root, self.label_file[index])) #采样2500个点,如果不够,则随机抽样补全 if self.data.shape[0] >= 2500: sample_list = random.sample(range(self.data.shape[0]), 2500) self.data = self.data[sample_list, :] self.label = self.label[sample_list] else: sample_list = random.sample(range(self.data.shape[0]), 2500 - self.data.shape[0]) dup_data = self.data[sample_list, :] dup_label = self.label[sample_list] self.data = np.concatenate([self.data, dup_data], 0) self.label = np.concatenate([self.label, dup_label], 0) self.label = torch.tensor(self.label) self.label = self.label.type(torch.LongTensor) self.data = torch.tensor(self.data.T) #label要是Longtensor,data要是float32 self.data = self.data.to(torch.float32) return self.data, self.label def __len__(self): return len(self.label_file)
Model.py

import torch import torch.nn as nn import torch.nn.functional as F import numpy as np from tqdm import tqdm from configuration import config device = 'cuda' if torch.cuda.is_available() else 'cpu' con = config() #T-Net:将点云传进来,生成一个矩阵,+E后return class T_Net(nn.Module): def __init__(self, k): super().__init__() self.k = k self.conv1 = nn.Conv1d(self.k, 64, 1) self.conv2 = nn.Conv1d(64, 128, 1) self.conv3 = nn.Conv1d(128, 1024, 1) self.fc1 = nn.Linear(1024, 512) self.fc2 = nn.Linear(512, 256) self.fc3 = nn.Linear(256, self.k * self.k) self.bn1 = nn.BatchNorm1d(64) self.bn2 = nn.BatchNorm1d(128) self.bn3 = nn.BatchNorm1d(1024) self.bn4 = nn.BatchNorm1d(512) self.bn5 = nn.BatchNorm1d(256) def forward(self, x): bs = x.size(0) x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) x = F.max_pool1d(x, x.size(-1)) x = x.view(x.size(0), -1) x = F.relu(self.bn4(self.fc1(x))) x = F.relu(self.bn5(self.fc2(x))) x = self.fc3(x) #生成一个3 * 3单位矩阵E,并将其扩充为bs个3 * 3的,repeat中的两个1表示3 * 1,3 * 1, #即为两个系数k、m,使得扩充后行列分别为,行 * k, 列 * m E = torch.eye(self.k, requires_grad = True).repeat(bs, 1, 1) E.to(device) matrix = x.view(-1, self.k, self.k) + E return matrix #Backbone Net class PNet(nn.Module): def __init__(self, flag): self.flag = flag super(PNet, self).__init__() self.input_transform = T_Net(k = 3) self.feature_transform = T_Net(k = 64) self.conv1 = nn.Conv1d(3, 64, 1) self.conv2 = nn.Conv1d(64, 128, 1) self.conv3 = nn.Conv1d(128, 1024, 1) self.bn1 = nn.BatchNorm1d(64) self.bn2 = nn.BatchNorm1d(128) self.bn3 = nn.BatchNorm1d(1024) def forward(self, x): input_matrix = self.input_transform(x) x = torch.bmm(torch.transpose(x, 1, 2), input_matrix).transpose(1, 2) x = F.relu(self.bn1(self.conv1(x))) feature_matrix = self.feature_transform(x) x = torch.bmm(x.transpose(1, 2), feature_matrix).transpose(1, 2) local_feature = x x = F.relu(self.bn2(self.conv2(x))) x = self.bn3(self.conv3(x)) #3 * n的点云坐标经过一系列卷积层之后,变成了1024 * n的 #经过max_pool求1024维的每一维最大值,变成了1024 * 1 #max_pool1d的第二个参数表示池化的范围,当然是n,-1表示size的倒数第一个 x = F.max_pool1d(x, x.size(-1)) if self.flag == 'Seg': x = x.view(-1, 1024, 1).repeat(1, 1, local_feature.size(-1)) x = torch.cat([x, local_feature], 1) return x, input_matrix, feature_matrix class Cla_Net(nn.Module): def __init__(self): super().__init__() self.PNet = PNet('Cla') self.fc1 = nn.Linear(1024, 512) self.fc2 = nn.Linear(512, 256) self.fc3 = nn.Linear(256, con.num_classes) self.bn1 = nn.BatchNorm1d(512) self.bn2 = nn.BatchNorm1d(256) self.dropout = nn.Dropout(0.3) def forward(self, x): x, input_matrix, feature_matrix = self.PNet(x) x = x.view(x.size(0), -1) x = F.relu(self.bn1(self.fc1(x))) x = F.relu(self.bn2(self.dropout(self.fc2(x)))) x = self.fc3(x) return F.log_softmax(x, dim = 1), input_matrix, feature_matrix class Seg_Net(nn.Module): def __init__(self): super().__init__() self.PNet = PNet('Seg') self.conv1 = nn.Conv1d(1088, 512, 1) self.conv2 = nn.Conv1d(512, 256, 1) self.conv3 = nn.Conv1d(256, 128, 1) self.conv4 = nn.Conv1d(128, con.num_seg, 1) self.bn1 = nn.BatchNorm1d(512) self.bn2 = nn.BatchNorm1d(256) self.bn3 = nn.BatchNorm1d(128) def forward(self, x): x, input_matirx, feature_matrix = self.PNet(x) x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) x = self.conv4(x) #现在x是bs * k * n,logsoftmax只能用于二维,而且我们一般对每一行进行softmax #因此,先变成bs * n * k,然后-1 * k, logsoftmax之后,再变回到bs * n * k x = x.transpose(2, 1).contiguous() x = F.log_softmax(x.view(-1, con.num_seg), dim = 1) return x, input_matirx, feature_matrix
train.py

import torch import torch.nn as nn import torch.utils.data.dataloader as Dataloader from tqdm import tqdm from my_Dataset import Dataset, Seg_Dataset from Model import Cla_Net, Seg_Net from configuration import config import os from tensorboardX import SummaryWriter import numpy as np con = config() def loss_funtion(output, label, input_matrix, featrue_matrix, alpha = 0.0001): loss = nn.NLLLoss() bs = con.batch_size E_3 = torch.eye(3, requires_grad = True).repeat(bs, 1, 1) E_64 = torch.eye(64, requires_grad = True).repeat(bs, 1, 1) E_3.to(con.device) E_64.to(con.device) diff3 = E_3 - torch.bmm(input_matrix, input_matrix.transpose(1, 2)) diff64 = E_64 - torch.bmm(featrue_matrix, featrue_matrix.transpose(1, 2)) #注意label要是int64类型 label = label.type(torch.LongTensor) return loss(output, label) + alpha * (torch.norm(diff3) + torch.norm(diff64)) / float(bs) def Cla_train(): data_path = 'H:/DataSet/modelnet40_ply_hdf5_2048/ply_data_train0.h5' dataset = Dataset(data_path) dataloader = Dataloader.DataLoader(dataset, batch_size = con.batch_size, shuffle = True, num_workers = con.num_workers) model = Cla_Net() model.to(con.device) optimizer = torch.optim.Adam(model.parameters(), lr = 0.01) tbwriter = SummaryWriter(logdir = con.clalog_dir) for epoch in range(con.num_epochs): total_loss = 0 total_true = 0 cnt = 0 total_img = 0 for data, label in tqdm(dataloader): data.to(con.device) label.to(con.device) optimizer.zero_grad() output, input_matrix, feature_matrix = model(data) loss_value = loss_funtion(output, label, input_matrix, feature_matrix) loss_value.backward() optimizer.step() pred = torch.max(output, 1)[1] total_true += torch.sum(pred == label) total_loss += loss_value cnt += 1 total_img += len(label) tbwriter.add_scalar('Loss', total_loss / float(cnt), epoch) tbwriter.add_scalar('Accuracy', total_true / float(total_img), epoch) print('Loss:{:.4f}, Accuracy:{:.2f}'.format(total_loss / float(cnt), total_true / float(total_img))) if (epoch + 1) % 10 == 0: state = { 'model': model.state_dict() } torch.save(state, os.path.join(con.clacheckpoints_root, 'clacheckpoint_{}.pkl'.format(epoch + 1))) print('Train Accepted') def Seg_train(): tbwriter = SummaryWriter(logdir = con.seglog_dir) data_root = 'H:/DataSet/shapenet/data' label_root = 'H:/DataSet/shapenet/label' dataset = Seg_Dataset(data_root, label_root) dataloader = Dataloader.DataLoader(dataset, batch_size = con.batch_size, shuffle = True, num_workers = con.num_workers) model = Seg_Net() model.to(con.device) optimizer = torch.optim.Adam(model.parameters(), lr = 0.001) model.train() for epoch in range(con.num_epochs): total_true = 0 total_loss = 0 cnt = 0 for data, label in tqdm(dataloader): optimizer.step() data.to(con.device) label.to(con.device) output, input_matrix, feature_matrix = model(data) #二维张量变一维 label = label.view(-1, 1)[:, 0] - 1 loss_value = loss_funtion(output, label, input_matrix, feature_matrix) loss_value.backward() optimizer.step() pred = torch.max(output, 1)[1] total_loss += loss_value.item() total_true += torch.sum(pred == label) cnt += 1 loss_mean = total_loss / float(cnt) accuracy = total_true / float(len(dataset) * 2500) tbwriter.add_scalar('Loss', loss_mean, epoch) tbwriter.add_scalar('Accuracy', accuracy, epoch) print('Loss:{:.4f}, Accuracy:{:.4f}'.format(loss_mean, accuracy)) if (epoch + 1) % con.num_epochs == 0: state = { 'model': model.state_dict() } torch.save(state, os.path.join(con.segcheckpoints_root, 'segcheckpoint_{}.pkl'.format(epoch + 1))) model.eval() shape_iou = [] for data, label in tqdm(dataloader): data.to(con.device) label.to(con.device) output,input_matrix, feature_matrix = model(data) output = output.view(con.batch_size, -1, con.num_seg) output = output.cpu().data.numpy() label = label.cpu().data.numpy() - 1 output = np.argmax(output, 2) for i in range(label.shape[0]): part_iou = [] for part in range(con.num_seg): I = np.sum(np.logical_and(output[i] == part, label[i] == part)) U = np.sum(np.logical_or(output[i] == part, label[i] == part)) if U == 0: iou = 1 else: iou = I / float(U) part_iou.append(iou) shape_iou.append(np.mean(part_iou)) print('mIOU:{:.2f}'.format(np.mean(shape_iou))) if __name__ == '__main__': #Cla_train() Seg_train();
test.py

import torch import torch.nn import torch.utils.data.dataloader as Dataloader from configuration import config from my_Dataset import Dataset, Seg_Dataset from Model import Cla_Net, Seg_Net import os import numpy as np from to_use import show_3d con = config() def Cla_test(): model = Cla_Net() checkpoint = torch.load(os.path.join(con.clacheckpoints_root, 'clacheckpoint_10.pkl')) model.load_state_dict(checkpoint['model']) model.to(con.device) dataset = Dataset('H:/DataSet/modelnet40_ply_hdf5_2048/ply_data_test0.h5') dataloader = Dataloader.DataLoader(dataset, batch_size=2, shuffle = True) cnt = 0 for data, label in dataloader: data.to(con.device) output = model(data)[0] pred = torch.max(output, 1)[1] print(pred, label) cnt += 1 if cnt == 20: break def Seg_test(): model = Seg_Net() checkpoint = torch.load(os.path.join(con.segcheckpoints_root, 'segcheckpoint_10.pkl')) model.load_state_dict(checkpoint['model']) model.to(con.device) model.eval() dataset = Seg_Dataset('H:/DataSet/shapenet/data', 'H:/DataSet/shapenet/label') # for i in range(len(dataset)): # data, label = dataset[i] # data = data.unsqueeze(0) # output = model(data)[0] # pred = torch.max(output, 1)[1] # print(pred.numpy()[0]) # print(label) data, label = dataset[0] data_p = data data = data.unsqueeze(0) output = model(data)[0] pred = torch.max(output, 1)[1] data_p = data_p.T show_3d(np.array(data_p[:, 0]), np.array(data_p[:, 1]), np.array(data_p[:, 2]), np.array(pred)) if __name__ == '__main__': Seg_test()
可视化代码
执行test.py就能显示
to_use.py

import numpy as np import mayavi.mlab as mlab def show_3d(x, y, z, pred): s = pred p3d = mlab.points3d(x, y, z, s, colormap = 'hsv', scale_mode='none') mlab.show()
数据集:链接:https://pan.baidu.com/s/1YN8k8Ra5whSFzcKyZu-T-Q
提取码:89cd