zoukankan      html  css  js  c++  java
  • otto-group-product-classification-challenge(Pytorch处理多分类问题)

    参考一:《Pytorch深度学习实践》(第九集)

    参考二:Otto-Neural-Net

    注意:使用的数据来自kaggle,链接

    由于上面给出的两个参考链接,对代码的讲解都已经很详细,所以这里不再赘述,下面按自己的理解整理了代码如下:

    Imports

    import numpy as np
    import pandas as pd
    import os
    import time
    from xgboost import XGBClassifier
    import matplotlib.pyplot as plt
    import torch
    from torch.utils.data import Dataset, DataLoader
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim

    Prepare Data

    # 处理数据
    csv_path = os.path.join('train.csv')
    datas = pd.read_csv(csv_path)
    datas = datas.copy() 
    datas = datas.drop(columns='id')
    datas = datas.sample(frac=1)
    
    # 将category转化为数字
    datas.target = datas.target.astype('category').cat.codes
    # 划分数据集
    rows, _ = datas.shape  # type(datas) 为 <class 'pandas.core.frame.DataFrame'>
    train_rows = int(rows*0.7)
    train_datas = datas.iloc[:train_rows, :]
    val_datas = datas.iloc[train_rows:, :]
    
    # 得到features和targets
    train_features = train_datas.loc[:, train_datas.columns!='target'].values  # values 得到一个矩阵
    train_targets = train_datas.target.values
    
    val_features = val_datas.loc[:, val_datas.columns!='target'].values
    val_targets = val_datas.target.values
    # 封装 继承dataset
    class CustomDataset(Dataset):
        def __init__(self, features, targets):
            super(CustomDataset, self).__init__()
            
            self.features = features
            self.targets = targets
        
        def __len__(self):
            return len(self.features)
        
        def __getitem__(self, idx):
            return self.features[idx], self.targets[idx]
    
    
    # 得到Dataset类
    train_ds = CustomDataset(train_features, train_targets)
    val_ds = CustomDataset(val_features, val_targets)
    
    # 得到DataLoader
    batch_size = 64
    train_loader = DataLoader(train_ds, batch_size=batch_size)
    val_loader = DataLoader(val_ds, batch_size=batch_size)

    Device

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    Design Model

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            
            self.l1 = torch.nn.Linear(93, 64)
            self.bn = torch.nn.BatchNorm1d(num_features=64)
            self.l2 = torch.nn.Linear(64, 32)
            self.dropout = torch.nn.Dropout(p=0.1)
            self.l3 = torch.nn.Linear(32, 16)
            self.l4 = torch.nn.Linear(16, 9)
        
        def forward(self, x):
            # first layer
            x = self.l1(x)
            x = self.bn(x)
            x = torch.relu(x)
            # second layer
            x = self.l2(x)
            x = self.dropout(x)
            x = torch.relu(x)
            
            x = torch.relu(self.l3(x))
            return self.l4(x)  # 注意 这里不要加relu
    
    model = Net().to(device)

    Construct Loss and Optimizer

    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

    Train and Validate

    def train():
        losses = 0.0
        accs = 0.0
        for idx, data in enumerate(train_loader, 0):
            inputs, targets = data
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            
            # forward + backward + update
            outputs = model(inputs.float())
            loss = criterion(outputs, targets.long())
            loss.backward()
            optimizer.step()
            
            losses += loss.item()
            _, predicted = torch.max(outputs.data, dim=1)
            accs += (predicted==targets).sum().item()
            
        losses /= len(train_loader)
        accs = accs * 100 / len(train_ds)
        print('Training:  loss: %.2f    accuracy: %.2f %%' %(losses, accs))
        
        return losses, accs
    def val():
        losses = 0.0
        accs = 0.0
        with torch.no_grad():
            for data in val_loader:
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs.float())
                losses += criterion(outputs, labels.long()).item()
                _, predicted = torch.max(outputs.data, dim=1)
                accs += (predicted == labels).sum().item()
        losses /= len(val_loader)
        accs = accs * 100 / len(val_ds)
        print('Validating:   loss: %.2f    accuracy: %.2f %%' %(losses, accs))
        
        return losses, accs
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    maxAcc = 0  # 记录最高准确率
    
    for epoch in range(100):
        print('[epoch: %d]' % (epoch+1))
        
        losses, accs = train()
        train_losses.append(losses)
        train_accs.append(accs)
        
        losses, accs = val()
        val_losses.append(losses)
        val_accs.append(accs)
        
        if maxAcc < accs:
            maxAcc = accs
        
        check = np.greater(maxAcc, val_accs[-5:])
        if (check.all()==True)  and (epoch>20):
            print('Convergence met!
    ')
            break
    
    print('Maximum validation accuracy: %.2f %%' % (maxAcc))

    Plot Losses

    # 这里采用了指数平滑
    import math
    import matplotlib.pyplot as plt
    
    tmp_train_losses = [math.e**i for i in train_losses]
    tmp_val_losses = [math.e**i for i in val_losses]
    plt.figure(figsize=(10, 5))
    plt.plot(tmp_train_losses, 'r', label='Training')
    plt.plot(tmp_val_losses, 'b', label='Validating')
    plt.xlabel('Epoch')
    plt.ylabel('Loss per epoch')
    plt.legend()
    plt.show()
  • 相关阅读:
    ArcGIS for JavaScript继承TiledMapServiceLayer来实现“动态切图”
    Commvault Oracle备份常用命令
    Commvault逻辑架构及组件说明
    数据保护平台如何为新一代应用,人工智能和数据科学提供动力
    SQL Server SSL/TLS 加密
    curl传递大json文件报错解决方法
    MySQL5.5 安装mcafee mysql-audit插件 不成功
    CentOS 6.5安装配置NFS服务器
    利用dump函数理解oracle如何存储各种类型数据
    xfs文件系统挂载大容量磁盘
  • 原文地址:https://www.cnblogs.com/heyour/p/13466077.html
Copyright © 2011-2022 走看看