zoukankan      html  css  js  c++  java
  • otto-group-product-classification-challenge(Pytorch处理多分类问题)

    参考一:《Pytorch深度学习实践》(第九集)

    参考二:Otto-Neural-Net

    注意:使用的数据来自kaggle,链接

    由于上面给出的两个参考链接,对代码的讲解都已经很详细,所以这里不再赘述,下面按自己的理解整理了代码如下:

    Imports

    import numpy as np
    import pandas as pd
    import os
    import time
    from xgboost import XGBClassifier
    import matplotlib.pyplot as plt
    import torch
    from torch.utils.data import Dataset, DataLoader
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim

    Prepare Data

    # 处理数据
    csv_path = os.path.join('train.csv')
    datas = pd.read_csv(csv_path)
    datas = datas.copy() 
    datas = datas.drop(columns='id')
    datas = datas.sample(frac=1)
    
    # 将category转化为数字
    datas.target = datas.target.astype('category').cat.codes
    # 划分数据集
    rows, _ = datas.shape  # type(datas) 为 <class 'pandas.core.frame.DataFrame'>
    train_rows = int(rows*0.7)
    train_datas = datas.iloc[:train_rows, :]
    val_datas = datas.iloc[train_rows:, :]
    
    # 得到features和targets
    train_features = train_datas.loc[:, train_datas.columns!='target'].values  # values 得到一个矩阵
    train_targets = train_datas.target.values
    
    val_features = val_datas.loc[:, val_datas.columns!='target'].values
    val_targets = val_datas.target.values
    # 封装 继承dataset
    class CustomDataset(Dataset):
        def __init__(self, features, targets):
            super(CustomDataset, self).__init__()
            
            self.features = features
            self.targets = targets
        
        def __len__(self):
            return len(self.features)
        
        def __getitem__(self, idx):
            return self.features[idx], self.targets[idx]
    
    
    # 得到Dataset类
    train_ds = CustomDataset(train_features, train_targets)
    val_ds = CustomDataset(val_features, val_targets)
    
    # 得到DataLoader
    batch_size = 64
    train_loader = DataLoader(train_ds, batch_size=batch_size)
    val_loader = DataLoader(val_ds, batch_size=batch_size)

    Device

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    Design Model

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            
            self.l1 = torch.nn.Linear(93, 64)
            self.bn = torch.nn.BatchNorm1d(num_features=64)
            self.l2 = torch.nn.Linear(64, 32)
            self.dropout = torch.nn.Dropout(p=0.1)
            self.l3 = torch.nn.Linear(32, 16)
            self.l4 = torch.nn.Linear(16, 9)
        
        def forward(self, x):
            # first layer
            x = self.l1(x)
            x = self.bn(x)
            x = torch.relu(x)
            # second layer
            x = self.l2(x)
            x = self.dropout(x)
            x = torch.relu(x)
            
            x = torch.relu(self.l3(x))
            return self.l4(x)  # 注意 这里不要加relu
    
    model = Net().to(device)

    Construct Loss and Optimizer

    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

    Train and Validate

    def train():
        losses = 0.0
        accs = 0.0
        for idx, data in enumerate(train_loader, 0):
            inputs, targets = data
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            
            # forward + backward + update
            outputs = model(inputs.float())
            loss = criterion(outputs, targets.long())
            loss.backward()
            optimizer.step()
            
            losses += loss.item()
            _, predicted = torch.max(outputs.data, dim=1)
            accs += (predicted==targets).sum().item()
            
        losses /= len(train_loader)
        accs = accs * 100 / len(train_ds)
        print('Training:  loss: %.2f    accuracy: %.2f %%' %(losses, accs))
        
        return losses, accs
    def val():
        losses = 0.0
        accs = 0.0
        with torch.no_grad():
            for data in val_loader:
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs.float())
                losses += criterion(outputs, labels.long()).item()
                _, predicted = torch.max(outputs.data, dim=1)
                accs += (predicted == labels).sum().item()
        losses /= len(val_loader)
        accs = accs * 100 / len(val_ds)
        print('Validating:   loss: %.2f    accuracy: %.2f %%' %(losses, accs))
        
        return losses, accs
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    maxAcc = 0  # 记录最高准确率
    
    for epoch in range(100):
        print('[epoch: %d]' % (epoch+1))
        
        losses, accs = train()
        train_losses.append(losses)
        train_accs.append(accs)
        
        losses, accs = val()
        val_losses.append(losses)
        val_accs.append(accs)
        
        if maxAcc < accs:
            maxAcc = accs
        
        check = np.greater(maxAcc, val_accs[-5:])
        if (check.all()==True)  and (epoch>20):
            print('Convergence met!
    ')
            break
    
    print('Maximum validation accuracy: %.2f %%' % (maxAcc))

    Plot Losses

    # 这里采用了指数平滑
    import math
    import matplotlib.pyplot as plt
    
    tmp_train_losses = [math.e**i for i in train_losses]
    tmp_val_losses = [math.e**i for i in val_losses]
    plt.figure(figsize=(10, 5))
    plt.plot(tmp_train_losses, 'r', label='Training')
    plt.plot(tmp_val_losses, 'b', label='Validating')
    plt.xlabel('Epoch')
    plt.ylabel('Loss per epoch')
    plt.legend()
    plt.show()
  • 相关阅读:
    Java第七次作业
    JAVA第六次作业
    JAVA第五次作业
    JAVA第四次作业
    JAVA第三次作业
    JAVA第二次作业
    Java第一次作业
    2017《Java》预备作业02 计科1501刘喆
    2017《JAVA》预备作业 计科1501刘喆
    Java第十次作业--多线程
  • 原文地址:https://www.cnblogs.com/heyour/p/13466077.html
Copyright © 2011-2022 走看看