zoukankan      html  css  js  c++  java
  • Experimenrs on Foursquare

     1 import pandas as pd
     2 import numpy as np
     3 import codecs
     4 import math
     5 
     6 doc1 = codecs.open('dataset_TIST2015/dataset_TIST2015_Checkins.txt','rU','latin-1')
     7 checkins = pd.read_csv(doc1, delimiter='	')
     8 checkins.columns=['userId', 'venueId', 'timeUTC', 'timeOffset']
     9 
    10 len(checkins['userId'].unique())
    11 len(checkins['venueId'].unique())
    12 
    13 from sklearn.preprocessing import LabelEncoder
    14 from scipy.sparse import csr_matrix
    15 
    16 venueIdencoder = LabelEncoder().fit(checkins['venueId'])
    17 userIdencoder = LabelEncoder().fit(checkins['userId'])
    18 
    19 checkins['venueIdencoded'] = venueIdencoder.transform(checkins['venueId'])
    20 n_venues = len(venueIdencoder.classes_)
    21 
    22 from sklearn.cross_validation import train_test_split
    23 
    24 train_df, test_df = train_test_split(checkins, train_size = 0.8)
    25 
    26 train = csr_matrix((np.ones(train_df.shape[0]), (train_df.userId, train_df.venueIdencoded)), shape=((train_df.userId.max()+1),n_venues))
    27 
    28 test = csr_matrix((np.ones(test_df.shape[0]), (test_df.userId, test_df.venueIdencoded)), shape=((test_df.userId.max()+1),n_venues))
    29 
    30 #print(test.nnz)
    31 #print(train.nnz)
    32 
    33 #print(test.max())
    34 #print(train.max())
    35 
    36 from lightfm import LightFM
    37 from lightfm.evaluation import auc_score
    38 
    39 NUM_THREADS = 1
    40 NUM_COMPONENTS = 30
    41 NUM_EPOCHS = 1
    42 ITEM_ALPHA = math.exp(-6)
    43 
    44 model = LightFM(loss='warp',
    45                 item_alpha=ITEM_ALPHA,
    46                 no_components=NUM_COMPONENTS)
    47 
    48 model.fit(train,epochs=NUM_EPOCHS,num_threads=NUM_THREADS)
    49 
    50 
    51 train_auc = auc_score(model, train,num_threads=NUM_THREADS).mean()
    52 test_auc = auc_score(model, test,train_interactions=train,num_threads=NUM_THREADS).mean()
    53 
    54 print("Train_auc is %f" %train_auc)
    55 print("Test_aus is %f" %test_auc)

    Some problems :

    Expect to get a binary marix but no...

    Here is the code in console:

     1 train
     2 Out[6]: 
     3 <266910x3680125 sparse matrix of type '<class 'numpy.float64'>'
     4     with 12774460 stored elements in Compressed Sparse Row format>
     5 train.data.max()
     6 Out[7]: 520.0
     7 train.data.min()
     8 Out[8]: 1.0
     9 test.data.max()
    10 Out[9]: 140.0
    11 test.data.mean()
    12 Out[10]: 1.533210711390105
    13 test.data.min()
    14 Out[11]: 1.0        

     

    and Running on cluster for one night but got no results showed...

     

     

  • 相关阅读:
    Log4Net使用
    4月博文
    论坛题目练习
    职场冷笑话两则
    初识管理的一些心得
    Project中分清楚挣值项
    预留规划项
    小感触
    好事多磨,好事成双
    忧郁
  • 原文地址:https://www.cnblogs.com/fassy/p/7268682.html
Copyright © 2011-2022 走看看