1 import os, sys, glob, shutil, json 2 3 os.environ["CUDA_VISIBLE_DEVICES"] = '0' 4 import cv2 5 from PIL import Image 6 import numpy as np 7 from tqdm import tqdm, tqdm_notebook 8 import torch 9 10 torch.manual_seed(0) 11 torch.backends.cudnn.deterministic = False 12 torch.backends.cudnn.benchmark = True 13 import torchvision.models as models 14 import torchvision.transforms as transforms 15 import torchvision.datasets as datasets 16 import torch.nn as nn 17 import torch.nn.functional as F 18 import torch.optim as optim 19 from torch.autograd import Variable 20 from torch.utils.data.dataset import Dataset 21 22 23 # 定义读取数据集 24 class SVHNDataset(Dataset): 25 def __init__(self, img_path, img_label, transform=None): 26 self.img_path = img_path 27 self.img_label = img_label 28 if transform is not None: 29 self.transform = transform 30 else: 31 self.transform = None 32 33 def __getitem__(self, index): 34 img = Image.open(self.img_path[index]).convert('RGB') 35 36 if self.transform is not None: 37 img = self.transform(img) 38 39 lbl = np.array(self.img_label[index], dtype=np.int) 40 lbl = list(lbl) + (5 - len(lbl)) * [10] 41 return img, torch.from_numpy(np.array(lbl[:5])) 42 43 def __len__(self): 44 return len(self.img_path) 45 46 47 # 这里使用ResNet18的模型进行特征提取 48 class SVHN_Model1(nn.Module): 49 def __init__(self): 50 super(SVHN_Model1, self).__init__() 51 model_conv = models.resnet18(pretrained=True) 52 model_conv.avgpool = nn.AdaptiveAvgPool2d(1) 53 model_conv = nn.Sequential(*list(model_conv.children())[:-1]) 54 self.cnn = model_conv 55 56 self.fc1 = nn.Linear(512, 11) 57 self.fc2 = nn.Linear(512, 11) 58 self.fc3 = nn.Linear(512, 11) 59 self.fc4 = nn.Linear(512, 11) 60 self.fc5 = nn.Linear(512, 11) 61 62 def forward(self, img): 63 feat = self.cnn(img) 64 # print(feat.shape) 65 feat = feat.view(feat.shape[0], -1) 66 c1 = self.fc1(feat) 67 c2 = self.fc2(feat) 68 c3 = self.fc3(feat) 69 c4 = self.fc4(feat) 70 c5 = self.fc5(feat) 71 return c1, c2, c3, c4, c5 72 73 74 def train(train_loader_, model_, criterion_, optimizer_, epoch_, use_cuda_): 75 # 切换模型为训练模式 76 model_.train() 77 train_loss_ = [] 78 79 for i, (input_, target_) in enumerate(train_loader_): 80 if use_cuda_: 81 input_ = input_.cuda() 82 target_ = target_.cuda() 83 84 # 问题:Expected object of scalar type Long but got scalar type Int for argument #2 'target' 85 # 解决:新增如下两句,将target_ 改为long类型(在执行此句之前,你可以查看下input、target的类型) 86 # 参考:https://blog.csdn.net/tony2278/article/details/105517524 87 88 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 89 target_ = target_.to(device, dtype=torch.long) 90 91 c0, c1, c2, c3, c4 = model_(input_) 92 loss_ = criterion_(c0, target_[:, 0]) + criterion_(c1, target_[:, 1]) + criterion_(c2, 93 target_[:, 2]) + criterion_( 94 c3, target_[:, 3]) + criterion_(c4, target_[:, 4]) 95 96 # loss /= 6 97 optimizer_.zero_grad() 98 loss_.backward() 99 optimizer_.step() 100 101 train_loss_.append(loss_.item()) 102 return np.mean(train_loss_) 103 104 105 def validate(val_loader, model, criterion, use_cuda): 106 # 切换模型为预测模型 107 model.eval() 108 val_loss = [] 109 110 # 不记录模型梯度信息 111 with torch.no_grad(): 112 for i, (input, target) in enumerate(val_loader): 113 if use_cuda: 114 input = input.cuda() 115 target = target.cuda() 116 117 # 问题:Expected object of scalar type Long but got scalar type Int for argument #2 'target' 118 # 解决:新增如下两句,将target_ 改为long类型(在执行此句之前,你可以查看下input、target的类型) 119 # 参考:https://blog.csdn.net/tony2278/article/details/105517524 120 121 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 122 target = target.to(device, dtype=torch.long) 123 124 #target = target.long() 125 c0, c1, c2, c3, c4 = model(input) 126 loss = criterion(c0, target[:, 0]) + 127 criterion(c1, target[:, 1]) + 128 criterion(c2, target[:, 2]) + 129 criterion(c3, target[:, 3]) + 130 criterion(c4, target[:, 4]) 131 # loss /= 6 132 val_loss.append(loss.item()) 133 return np.mean(val_loss) 134 135 136 def predict(test_loader, model, tta=10): 137 model.eval() 138 test_pred_tta = None 139 140 # TTA 次数 141 for _ in range(tta): 142 test_pred = [] 143 144 with torch.no_grad(): 145 for i, (input, target) in enumerate(test_loader): 146 if use_cuda: 147 input = input.cuda() 148 149 c0, c1, c2, c3, c4 = model(input) 150 if use_cuda: 151 output = np.concatenate([ 152 c0.data.cpu().numpy(), 153 c1.data.cpu().numpy(), 154 c2.data.cpu().numpy(), 155 c3.data.cpu().numpy(), 156 c4.data.cpu().numpy()], axis=1) 157 else: 158 output = np.concatenate([ 159 c0.data.numpy(), 160 c1.data.numpy(), 161 c2.data.numpy(), 162 c3.data.numpy(), 163 c4.data.numpy()], axis=1) 164 165 test_pred.append(output) 166 167 test_pred = np.vstack(test_pred) 168 if test_pred_tta is None: 169 test_pred_tta = test_pred 170 else: 171 test_pred_tta += test_pred 172 173 return test_pred_tta 174 175 176 if __name__ == '__main__': 177 # ----------------------------------------------【训练数据】----------------------------------------------------------- 178 # 定义读取数据dataloader 179 train_path = glob.glob('F:Kaggle1mchar_trainmchar_train*.png') 180 train_path.sort() 181 train_json = json.load(open('F:/Kaggle1/mchar_train.json')) 182 train_label = [train_json[x]['label'] for x in train_json] 183 print(len(train_path), len(train_label)) 184 185 # 数据增强 186 train_loader = torch.utils.data.DataLoader( 187 SVHNDataset(train_path, train_label, 188 transforms.Compose([ 189 transforms.Resize((64, 128)), 190 transforms.RandomCrop((60, 120)), 191 transforms.ColorJitter(0.3, 0.3, 0.2), 192 transforms.RandomRotation(10), 193 transforms.ToTensor(), 194 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 195 ])), 196 batch_size=40, 197 shuffle=True, 198 num_workers=10, 199 ) 200 201 # 打印输出:30000 30000 202 203 # ----------------------------------------------【验证数据】----------------------------------------------------------- 204 val_path = glob.glob('F:/Kaggle1/mchar_val/mchar_val/*.png') 205 val_path.sort() 206 val_json = json.load(open('F:/Kaggle1/mchar_val.json')) 207 val_label = [val_json[x]['label'] for x in val_json] 208 print(len(val_path), len(val_label)) 209 210 val_loader = torch.utils.data.DataLoader( 211 SVHNDataset(val_path, val_label, 212 transforms.Compose([ 213 transforms.Resize((60, 120)), 214 # transforms.ColorJitter(0.3, 0.3, 0.2), 215 # transforms.RandomRotation(5), 216 transforms.ToTensor(), 217 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 218 ])), 219 batch_size=40, 220 shuffle=False, 221 num_workers=10, 222 ) 223 224 # 打印输出:10000 10000 225 226 model = SVHN_Model1() 227 criterion = nn.CrossEntropyLoss() 228 optimizer = torch.optim.Adam(model.parameters(), 0.001) 229 best_loss = 1000.0 230 231 # 是否使用GPU 232 use_cuda = True 233 if use_cuda: 234 model = model.cuda() 235 236 for epoch in range(500): 237 train_loss = train(train_loader, model, criterion, optimizer, epoch, use_cuda) 238 print("train_loss = ", train_loss) 239 val_loss = validate(val_loader, model, criterion, use_cuda) 240 print("val_loss = ", val_loss) 241 # ... 242 val_label = [''.join(map(str, x)) for x in val_loader.dataset.img_label] 243 val_predict_label = predict(val_loader, model, 1) 244 val_predict_label = np.vstack([ 245 val_predict_label[:, :11].argmax(1), 246 val_predict_label[:, 11:22].argmax(1), 247 val_predict_label[:, 22:33].argmax(1), 248 val_predict_label[:, 33:44].argmax(1), 249 val_predict_label[:, 44:55].argmax(1), 250 ]).T 251 252 val_label_pred = [] 253 for x in val_predict_label: 254 val_label_pred.append(''.join(map(str, x[x != 10]))) 255 val_char_acc = np.mean(np.array(val_label_pred) == np.array(val_label)) 256 print('Epoch: {0}, Train loss: {1} Val loss: {2}'.format(epoch, train_loss, val_loss)) 257 #print(val_char_acc) 258 # 记录下验证集精度 259 if val_loss < best_loss: 260 best_loss = val_loss 261 torch.save(model.state_dict(), './model.pt')