zoukankan      html  css  js  c++  java
  • PyTorch训练模版

    参考:wfnian

    1 导入包以及设置随机种子

    import numpy as np
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import numpy as np
    from torch.utils.data import DataLoader, Dataset
    from sklearn.model_selection import train_test_split
    import matplotlib.pyplot as plt
    import time
    import random
    
    seed = 2
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    

    2 以类的方式定义超参数

    class argparse():
        pass
    
    args = argparse()
    args.epochs, args.learning_rate, args.patience = [30, 0.001, 4]
    args.device, = [torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),]
    

    3 定义自己的模型

    class MyModel(nn.Module):
        def __init__(self, Input_size, Output_size):
            super(MyModel, self).__init__()
            self.fc1 = nn.Sequential(
                nn.Linear(Input_size, 64),
                nn.BatchNorm1d(64),
                nn.ReLU(True)
            )
    
            self.fc2 = nn.Sequential(
                nn.Linear(64, Output_size),
                nn.BatchNorm1d(Output_size),
                nn.Sigmoid()
            )
    
        def forward(self, x):
            x = self.fc1(x)
            x = self.fc2(x)
            
            return x
    

    4 定义早停类

    class EarlyStopping():
        def __init__(self,patience=20,verbose=False,delta=0):
            self.patience = patience
            self.verbose = verbose
            self.counter = 0
            self.best_score = None
            self.early_stop = False
            self.val_loss_min = np.Inf
            self.delta = delta
        def __call__(self,val_loss,model,path):
            print("val_loss={:.4f}".format(val_loss))
            score = -val_loss
            if self.best_score is None:
                self.best_score = score
                self.save_checkpoint(val_loss,model,path)
            elif score < self.best_score+self.delta:
                self.counter+=1
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
                if self.counter>=self.patience:
                    self.early_stop = True
            else:
                self.best_score = score
                self.save_checkpoint(val_loss,model,path)
                self.counter = 0
        def save_checkpoint(self,val_loss,model,path):
            if self.verbose:
                print(
                    f'Validation loss decreased ({self.val_loss_min:.4f} --> {val_loss:.4f}).  Saving model to' + path)
            torch.save(model.state_dict(), path)
            self.val_loss_min = val_loss
    

    5 定义数据集、损失函数

    通过DataLoader定义自己的数据集

    DataLoader的参数还包括:

    • num_worlers:这个参数决定了有几个进程来处理data,默认为0
    • pin_memory:If True, the data loader will copy tensors into CUDA pinned memory before returning them
    class Dataset_name(Dataset):
        def __init__(self, flag='train'):
            assert flag in ['train', 'test', 'valid']
            self.flag = flag
            self.__load_data__()
    
        def __getitem__(self, index):
            pass
        def __len__(self):
            pass
    
        def __load_data__(self, csv_paths: list):
            pass
            print(
                "train_X.shape:{}\ntrain_Y.shape:{}\nvalid_X.shape:{}\nvalid_Y.shape:{}\n"
                .format(self.train_X.shape, self.train_Y.shape, self.valid_X.shape, self.valid_Y.shape))
    
    train_dataset = Dataset_name(flag='train')
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
    valid_dataset = Dataset_name(flag='valid')
    valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=64, shuffle=True)
    

    定义损失函数

    方案1

    '''
    在pytorch中定义了前向计算的公式,
    在训练时它会自动帮你计算反向传播.
    '''
    class MyLoss(torch.nn.Module):
        # 不要忘记继承 Module
        def __init__(self):
            super(MyLoss, self).__init__()
    
        def forward(self, output, target):
            """
            1.output和target的维度和后续的操作一致.
            2.返回是一个标量.
            """
            loss = 1 - torch.mul(output, target)
            loss[loss < 0] = 0
            # 不要忘记返回scalar
            return torch.mean(loss)
    

    方案2

    ''' 
    直接定义函数, 不需要维护参数梯度等信息.
    注意所有的数学操作需要使用tensor完成
    '''
    def MyLoss(output, target):
        return torch.mean(torch.pow((output - target), 2))
    

    6 实例化模型,设置loss

    定义模型以及基本功能

    model = MyModel().to(args.device)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=args.learning_rate)
    model_path = 'xxx.pth'
    
    train_loss = []
    valid_loss = []
    train_epochs_loss = []
    valid_epochs_loss = []
    
    # 设置早停
    early_stopping = EarlyStopping(patience=args.patience, verbose=True)
    

    设置自动调整学习率

    # 根据指标自动调整
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
         optimizer, 
         mode='min', 
         factor=0.33, 
         patience=10,
         verbose=False, 
         threshold=0.0001, 
         threshold_mode='rel', 
         cooldown=0, 
         min_lr=0, 
         eps=1e-08
         )
    
    • optimizer (Optimizer) – 包装的优化器
    • mode (str) – min, max中的一个. 在最小模式下,当监测量停止下降时,lr将减少; 在最大模式下,当监控量停止增加时,会减少。默认值:'min'
    • factor (float) – 使学习率降低的因素。 new_lr = lr * factor. 默认: 0.1.
    • patience (int) –epochs没有改善后,学习率将降低。 默认: 10.
    • verbose (bool) – 如果为True,则会向每个更新的stdout打印一条消息。 默认: False.
    • threshold (float) – 测量新的最优值的阈值,只关注显着变化。 默认: 1e-4.
    • threshold_mode (str) – rel, abs中的一个. 在rel模型, dynamic_threshold = best ( 1 + threshold ) in ‘max’ mode or best ( 1 - threshold ) 在最小模型. 在绝对值模型中, dynamic_threshold = best + threshold 在最大模式或最佳阈值最小模式. 默认: ‘rel’.
    • cooldown (int) – 在lr减少后恢复正常运行之前等待的时期数。默认的: 0.
    • min_lr (float or list) – 标量或标量的列表。对所有的组群或每组的学习速率的一个较低的限制。 默认: 0.
    • eps (float) – 适用于lr的最小衰减。如果新旧lr之间的差异小于eps,则更新将被忽略。默认: 1e-8.

    7 开始训练并调整学习率

    for epoch in range(args.epochs):
        start = time.time()
        model.train()
        train_epoch_loss = []
        for idx,(data_x,data_y) in enumerate(train_dataloader,0):
            data_x = data_x.to(args.device)
            data_y = data_y.to(args.device)
            outputs = model(data_x)
            optimizer.zero_grad()
            loss = criterion(outputs,data_y)
            loss.backward()
            optimizer.step()
            train_epoch_loss.append(loss.item())
            train_loss.append(loss.item())
            if idx%(len(train_dataloader)//2)==0:
                print("epoch={}/{},{}/{} of train, loss={:.4f}".format(
                    epoch+1, args.epochs, idx, len(train_dataloader),loss.item()))
        end = time.time()
        train_epochs_loss.append(np.average(train_epoch_loss))
        print('Epoch:{}/{} | train_loss:{:.4f} | time:{:.4f}s'.format(epoch+1, args.epochs, train_epochs_loss[-1],(end-start)))
        
        #=====================valid============================
        with torch.no_grad():
            model.eval()
            valid_epoch_loss = []
            for idx,(data_x,data_y) in enumerate(valid_dataloader,0):
                data_x = data_x.to(args.device)
                data_y = data_y.to(args.device)
                outputs = model(data_x)
                loss = criterion(outputs,data_y)
                valid_epoch_loss.append(loss.item())
                valid_loss.append(loss.item())
        valid_epochs_loss.append(np.average(valid_epoch_loss))
        #==================early stopping======================
        early_stopping(valid_epochs_loss[-1],model=model,path=model_path)
        if early_stopping.early_stop:
            print("Early stopping")
            break
        #====================adjust lr========================
        ''' 
        1. 通过设置的指标自动调整.
        '''
        scheduler.step(valid_epochs_loss[-1])
        
        '''
        2. 手动的方式, 分段调整学习率.
        '''
        lr_adjust = {
                2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
                10: 5e-7, 15: 1e-7, 20: 5e-8
            }
        if epoch in lr_adjust.keys():
            lr = lr_adjust[epoch]
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            print('Updating learning rate to {}'.format(lr))
    

    8 绘图

    plt.figure(figsize=(12,4))
    plt.subplot(121)
    plt.plot(train_loss[:])
    plt.title("train_loss")
    plt.subplot(122)
    plt.plot(train_epochs_loss[1:],'-o',label="train_loss")
    plt.plot(valid_epochs_loss[1:],'-o',label="valid_loss")
    plt.title("epochs_loss")
    plt.legend()
    plt.show()
    

    9 预测

    # 此处可定义一个预测集的Dataloader。也可以直接将你的预测数据reshape,添加batch_size=1
    model = MyModel()
    model.load_state_dict(torch.load(model_path))
    with torch.no_grad():
        model.eval()
        predict = model(test_data)
    
  • 相关阅读:
    熟练使用有棱有角的内存
    计算机进行小数运算时出错的原因
    数据是用二进制数表示的
    代码之外的功夫
    运行库实现
    系统调用与API
    运行库
    内存
    Windows下的动态链接
    Sharding-JDBC 实现垂直分库水平分表
  • 原文地址:https://www.cnblogs.com/MayeZhang/p/15619745.html
Copyright © 2011-2022 走看看