zoukankan      html  css  js  c++  java
  • otto-group-product-classification-challenge(Pytorch处理多分类问题)

    参考一:《Pytorch深度学习实践》(第九集)

    参考二:Otto-Neural-Net

    注意:使用的数据来自kaggle,链接

    由于上面给出的两个参考链接,对代码的讲解都已经很详细,所以这里不再赘述,下面按自己的理解整理了代码如下:

    Imports

    import numpy as np
    import pandas as pd
    import os
    import time
    from xgboost import XGBClassifier
    import matplotlib.pyplot as plt
    import torch
    from torch.utils.data import Dataset, DataLoader
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim

    Prepare Data

    # 处理数据
    csv_path = os.path.join('train.csv')
    datas = pd.read_csv(csv_path)
    datas = datas.copy() 
    datas = datas.drop(columns='id')
    datas = datas.sample(frac=1)
    
    # 将category转化为数字
    datas.target = datas.target.astype('category').cat.codes
    # 划分数据集
    rows, _ = datas.shape  # type(datas) 为 <class 'pandas.core.frame.DataFrame'>
    train_rows = int(rows*0.7)
    train_datas = datas.iloc[:train_rows, :]
    val_datas = datas.iloc[train_rows:, :]
    
    # 得到features和targets
    train_features = train_datas.loc[:, train_datas.columns!='target'].values  # values 得到一个矩阵
    train_targets = train_datas.target.values
    
    val_features = val_datas.loc[:, val_datas.columns!='target'].values
    val_targets = val_datas.target.values
    # 封装 继承dataset
    class CustomDataset(Dataset):
        def __init__(self, features, targets):
            super(CustomDataset, self).__init__()
            
            self.features = features
            self.targets = targets
        
        def __len__(self):
            return len(self.features)
        
        def __getitem__(self, idx):
            return self.features[idx], self.targets[idx]
    
    
    # 得到Dataset类
    train_ds = CustomDataset(train_features, train_targets)
    val_ds = CustomDataset(val_features, val_targets)
    
    # 得到DataLoader
    batch_size = 64
    train_loader = DataLoader(train_ds, batch_size=batch_size)
    val_loader = DataLoader(val_ds, batch_size=batch_size)

    Device

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    Design Model

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            
            self.l1 = torch.nn.Linear(93, 64)
            self.bn = torch.nn.BatchNorm1d(num_features=64)
            self.l2 = torch.nn.Linear(64, 32)
            self.dropout = torch.nn.Dropout(p=0.1)
            self.l3 = torch.nn.Linear(32, 16)
            self.l4 = torch.nn.Linear(16, 9)
        
        def forward(self, x):
            # first layer
            x = self.l1(x)
            x = self.bn(x)
            x = torch.relu(x)
            # second layer
            x = self.l2(x)
            x = self.dropout(x)
            x = torch.relu(x)
            
            x = torch.relu(self.l3(x))
            return self.l4(x)  # 注意 这里不要加relu
    
    model = Net().to(device)

    Construct Loss and Optimizer

    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

    Train and Validate

    def train():
        losses = 0.0
        accs = 0.0
        for idx, data in enumerate(train_loader, 0):
            inputs, targets = data
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            
            # forward + backward + update
            outputs = model(inputs.float())
            loss = criterion(outputs, targets.long())
            loss.backward()
            optimizer.step()
            
            losses += loss.item()
            _, predicted = torch.max(outputs.data, dim=1)
            accs += (predicted==targets).sum().item()
            
        losses /= len(train_loader)
        accs = accs * 100 / len(train_ds)
        print('Training:  loss: %.2f    accuracy: %.2f %%' %(losses, accs))
        
        return losses, accs
    def val():
        losses = 0.0
        accs = 0.0
        with torch.no_grad():
            for data in val_loader:
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs.float())
                losses += criterion(outputs, labels.long()).item()
                _, predicted = torch.max(outputs.data, dim=1)
                accs += (predicted == labels).sum().item()
        losses /= len(val_loader)
        accs = accs * 100 / len(val_ds)
        print('Validating:   loss: %.2f    accuracy: %.2f %%' %(losses, accs))
        
        return losses, accs
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    maxAcc = 0  # 记录最高准确率
    
    for epoch in range(100):
        print('[epoch: %d]' % (epoch+1))
        
        losses, accs = train()
        train_losses.append(losses)
        train_accs.append(accs)
        
        losses, accs = val()
        val_losses.append(losses)
        val_accs.append(accs)
        
        if maxAcc < accs:
            maxAcc = accs
        
        check = np.greater(maxAcc, val_accs[-5:])
        if (check.all()==True)  and (epoch>20):
            print('Convergence met!
    ')
            break
    
    print('Maximum validation accuracy: %.2f %%' % (maxAcc))

    Plot Losses

    # 这里采用了指数平滑
    import math
    import matplotlib.pyplot as plt
    
    tmp_train_losses = [math.e**i for i in train_losses]
    tmp_val_losses = [math.e**i for i in val_losses]
    plt.figure(figsize=(10, 5))
    plt.plot(tmp_train_losses, 'r', label='Training')
    plt.plot(tmp_val_losses, 'b', label='Validating')
    plt.xlabel('Epoch')
    plt.ylabel('Loss per epoch')
    plt.legend()
    plt.show()
  • 相关阅读:
    工资是用来支付给责任的,责任越大,工资越高。 涨工资,是因为承担了更大的责任。
    水平分库分表的关键问题及解决思路
    APP多版本共存,服务端如何兼容?
    ListView动态加载数据分页(使用Handler+线程和AsyncTask两种方法)
    Java 并发专题 :闭锁 CountDownLatch 之一家人一起吃个饭
    Java进阶 创建和销毁对象
    Java OCR tesseract 图像智能字符识别技术
    网页信息抓取进阶 支持Js生成数据 Jsoup的不足之处
    从原理角度解析Android (Java) http 文件上传
    android pop3与imap方式接收邮件(javamail)
  • 原文地址:https://www.cnblogs.com/heyour/p/13466077.html
Copyright © 2011-2022 走看看