只用了modelnet40的第一个部分训练的
大致思路和图像一样,只不过用的conv1d,在数据集处理上也稍有不同
my_Dataset.py:

import numpy as np import h5py import torch import random import torchvision.transforms as transforms from torch.utils import data #dataset是否要做个转置呢并变成tensor class Dataset(data.Dataset): def __init__(self, root): super(Dataset, self).__init__() file = h5py.File(root, 'r') self.data = file['data'][:] #label要展开并变成一维的 self.label = file['label'][:].reshape(-1, file['label'].shape[0]).squeeze(0) def __getitem__(self, index): #一个点云坐标输入进来时是n * 3,不像图片是n * m * 3,点云坐标没有高度,x、y、z分别代表三个通道 #图片用ToTensor变成3 * n * m, 点云坐标转置一下就行了,变成3 * n,然后再变成tensor return torch.tensor(self.data[index].T), self.label[index] def __len__(self): return len(self.label)
configuratiion.py:

import torch class config(): batch_size = 4 num_epochs = 10 num_classes = 40 num_workers = 8 checkpoints_root = 'C:/Users/Dell/PycharmProjects/PointNet/checkpoints' device = 'cuda' if torch.cuda.is_available() else 'cpu' log_dir = 'C:/Users/Dell/PycharmProjects/PointNet/checkpoints/log'
Model.py:

import torch import torch.nn as nn import torch.nn.functional as F import numpy as np from tqdm import tqdm from configuration import config device = 'cuda' if torch.cuda.is_available() else 'cpu' con = config() #T-Net:将点云传进来,生成一个矩阵,+E后return class T_Net(nn.Module): def __init__(self, k): super().__init__() self.k = k self.conv1 = nn.Conv1d(self.k, 64, 1) self.conv2 = nn.Conv1d(64, 128, 1) self.conv3 = nn.Conv1d(128, 1024, 1) self.fc1 = nn.Linear(1024, 512) self.fc2 = nn.Linear(512, 256) self.fc3 = nn.Linear(256, self.k * self.k) self.bn1 = nn.BatchNorm1d(64) self.bn2 = nn.BatchNorm1d(128) self.bn3 = nn.BatchNorm1d(1024) self.bn4 = nn.BatchNorm1d(512) self.bn5 = nn.BatchNorm1d(256) def forward(self, x): bs = x.size(0) x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) x = F.max_pool1d(x, x.size(-1)) x = x.view(x.size(0), -1) x = F.relu(self.bn4(self.fc1(x))) x = F.relu(self.bn5(self.fc2(x))) x = self.fc3(x) #生成一个3 * 3单位矩阵E,并将其扩充为bs个3 * 3的,repeat中的两个1表示3 * 1,3 * 1, #即为两个系数k、m,使得扩充后行列分别为,行 * k, 列 * m E = torch.eye(self.k, requires_grad = True).repeat(bs, 1, 1) E.to(device) matrix = x.view(-1, self.k, self.k) + E return matrix class PNet(nn.Module): def __init__(self): super(PNet, self).__init__() self.input_transform = T_Net(k = 3) self.feature_transform = T_Net(k = 64) self.conv1 = nn.Conv1d(3, 64, 1) self.conv2 = nn.Conv1d(64, 128, 1) self.conv3 = nn.Conv1d(128, 1024, 1) self.fc1 = nn.Linear(1024, 512) self.fc2 = nn.Linear(512, 256) self.fc3 = nn.Linear(256, con.num_classes) self.bn1 = nn.BatchNorm1d(64) self.bn2 = nn.BatchNorm1d(128) self.bn3 = nn.BatchNorm1d(1024) self.bn4 = nn.BatchNorm1d(512) self.bn5 = nn.BatchNorm1d(256) self.dropout = nn.Dropout(0.3) def forward(self, x): input_matrix = self.input_transform(x) x = torch.bmm(torch.transpose(x, 1, 2), input_matrix).transpose(1, 2) x = F.relu(self.bn1(self.conv1(x))) feature_matrix = self.feature_transform(x) x = torch.bmm(x.transpose(1, 2), feature_matrix).transpose(1, 2) x = F.relu(self.bn2(self.conv2(x))) x = self.bn3(self.conv3(x)) #3 * n的点云坐标经过一系列卷积层之后,变成了1024 * n的 #经过max_pool求1024维的每一维最大值,变成了1024 * 1 #max_pool1d的第二个参数表示池化的范围,当然是n,-1表示size的倒数第一个 x = F.max_pool1d(x, x.size(-1)) x = x.view(x.size(0), -1) x = F.relu(self.bn4(self.fc1(x))) x = F.relu(self.bn5(self.dropout(self.fc2(x)))) x = self.fc3(x) return x, input_matrix, feature_matrix
test.py:

import torch import torch.nn import torch.utils.data.dataloader as Dataloader from configuration import config from my_Dataset import Dataset from Model import PNet import os if __name__ == '__main__': con = config() model = PNet() checkpoint = torch.load(os.path.join(con.checkpoints_root, 'checkpoint_10.pkl')) model.load_state_dict(checkpoint['model']) model.to(con.device) dataset = Dataset('H:/DataSet/modelnet40_ply_hdf5_2048/ply_data_test0.h5') dataloader = Dataloader.DataLoader(dataset, batch_size=2, shuffle = True) cnt = 0 for data, label in dataloader: data.to(con.device) output = model(data)[0] pred = torch.max(output, 1)[1] print(pred, label) cnt += 1 if cnt == 20: break
train.py:

import torch import torch.nn as nn import torch.utils.data.dataloader as Dataloader from tqdm import tqdm from my_Dataset import Dataset from Model import PNet from configuration import config import os from tensorboardX import SummaryWriter con = config() def loss_funtion(output, label, input_matrix, featrue_matrix, alpha = 0.0001): loss = nn.CrossEntropyLoss() bs = output.size(0) E_3 = torch.eye(3, requires_grad = True).repeat(bs, 1, 1) E_64 = torch.eye(64, requires_grad = True).repeat(bs, 1, 1) E_3.to(con.device) E_64.to(con.device) diff3 = E_3 - torch.bmm(input_matrix, input_matrix.transpose(1, 2)) diff64 = E_64 - torch.bmm(featrue_matrix, featrue_matrix.transpose(1, 2)) #注意label要是int64类型 label = label.type(torch.LongTensor) return loss(output, label) + alpha * (torch.norm(diff3) + torch.norm(diff64)) / float(bs) if __name__ == '__main__': data_path = 'H:/DataSet/modelnet40_ply_hdf5_2048/ply_data_train0.h5' dataset = Dataset(data_path) dataloader = Dataloader.DataLoader(dataset, batch_size = con.batch_size, shuffle = True, num_workers = con.num_workers) model = PNet() model.to(con.device) optimizer = torch.optim.Adam(model.parameters(), lr = 0.01) tbwriter = SummaryWriter(logdir = con.log_dir) for epoch in range(con.num_epochs): total_loss = 0 total_true = 0 cnt = 0 total_img = 0 for data, label in tqdm(dataloader): data.to(con.device) label.to(con.device) optimizer.zero_grad() output, input_matrix, feature_matrix = model(data) loss_value = loss_funtion(output, label, input_matrix, feature_matrix) loss_value.backward() optimizer.step() print(type(output), output) pred = torch.max(output, 1)[1] total_true += torch.sum(pred == label) total_loss += loss_value cnt += 1 total_img += len(label) tbwriter.add_scalar('Loss', total_loss / float(cnt), epoch) tbwriter.add_scalar('Accuracy', total_true / float(total_img), epoch) print('Loss:{:.4f}, Accuracy:{:.2f}'.format(total_loss / float(cnt), total_true / float(total_img))) if (epoch + 1) % 10 == 0: state = { 'model': model.state_dict() } torch.save(state, os.path.join(con.checkpoints_root, 'checkpoint_{}.pkl'.format(epoch + 1))) print('Train Accepted')