zoukankan      html  css  js  c++  java
  • 代码片段

    shell:

    1,处理locationProbe的linux shell脚本:

    cat LocationProbe.csv | cut -d ',' -f 3,20,23|sed '1d'|sort  -u -n>location_dealed.csv

     2,将时间戳转换为可读时间的方法:

    其中@后面跟随的是时间戳的方式

    date -d "@1385991753" "+%Y/%m/%d %H:%M:%S"

     shell:

    sort

    uniq

    cut

    grep

    sed

    awk

    wc

    python:

     import sys
    import codecs
    import pickle
    #from operator import itemgetter
    #from random import shuffle
    from exceptions import ValueError
    from numpy.random import shuffle
    
    from recsys.algorithm import VERBOSE
    
    class Data:
        """
        Handles the relationshops among users and items
        """
        def __init__(self):
            #"""
            #:param data: a list of tuples
            #:type data: list
            #"""
            self._data = list([])
    
        def __repr__(self):
            s = '%d rows.' % len(self.get())
            if len(self.get()):
                s += '
    E.g: %s' % str(self.get()[0])
            return s
    
        def __len__(self):
            return len(self.get())
    
        def __getitem__(self, i):
            if i < len(self._data):
                return self._data[i]
            return None
    
        def __iter__(self):
            return iter(self.get())
    
        def set(self, data, extend=False):
            """
            Sets data to the dataset
    
            :param data: a list of tuples
            :type data: list
            """
            if extend:
                self._data.extend(data)
            else:
                self._data = data
    
        def get(self):
            """
            :returns: a list of tuples
            """
            return self._data
    
        def add_tuple(self, tuple):
            """
            :param tuple: a tuple containing <rating, user, item> information (e.g.  <value, row, col>)
            """
            #E.g: tuple = (25, "ocelma", "u2") -> "ocelma has played u2 25 times"
            if not len(tuple) == 3:
                raise ValueError('Tuple format not correct (should be: <value, row_id, col_id>)')
            value, row_id, col_id = tuple
            if not value and value != 0:
                raise ValueError('Value is empty %s' % (tuple,))
            if isinstance(value, basestring):
                raise ValueError('Value %s is a string (must be an int or float) %s' % (value, tuple,))
            if row_id is None or row_id == '':
                raise ValueError('Row id is empty %s' % (tuple,))
            if col_id is None or col_id == '':
                raise ValueError('Col id is empty %s' % (tuple,))
            self._data.append(tuple)
    
        def split_train_test(self, percent=80, shuffle_data=True):
            """
            Splits the data in two disjunct datasets: train and test
    
            :param percent: % of training set to be used (test set size = 100-percent)
            :type percent: int
            :param shuffle_data: shuffle dataset?
            :type shuffle_data: Boolean
    
            :returns: a tuple <Data, Data>
            """
            if shuffle_data:
                shuffle(self._data)
            length = len(self._data)
            train_list = self._data[:int(round(length*percent/100.0))]
            test_list = self._data[-int(round(length*(100-percent)/100.0)):]
            train = Data()
            train.set(train_list)
            test = Data()
            test.set(test_list)
    
            return train, test
    
        def load(self, path, force=True, sep='	', format=None, pickle=False):
            """
            Loads data from a file
    
            :param path: filename
            :type path: string
            :param force: Cleans already added data
            :type force: Boolean
            :param sep: Separator among the fields of the file content
            :type sep: string
            :param format: Format of the file content. 
                Default format is 'value': 0 (first field), then 'row': 1, and 'col': 2.
                E.g: format={'row':0, 'col':1, 'value':2}. The row is in position 0, 
                then there is the column value, and finally the rating. 
                So, it resembles to a matrix in plain format
            :type format: dict()
            :param pickle: is input file in  pickle format?
            :type pickle: Boolean
            """
            if VERBOSE:
                sys.stdout.write('Loading %s
    ' % path)
            if force:
                self._data = list([])
            if pickle:
                self._load_pickle(path)
            else:
                i = 0 
                for line in codecs.open(path, 'r', 'utf8'):
                    data = line.strip('
    ').split(sep)
                    value = None
                    if not data:
                        raise TypeError('Data is empty or None!')
                    if not format:
                        # Default value is 1
                        try:
                            value, row_id, col_id = data
                        except:
                            value = 1
                            row_id, col_id = data
                    else:
                        try:
                            # Default value is 1
                            try:
                                value = data[format['value']]
                            except KeyError, ValueError:
                                value = 1
                            try: 
                                row_id = data[format['row']]
                            except KeyError:
                                row_id = data[1]
                            try:
                                col_id = data[format['col']]
                            except KeyError:
                                col_id = data[2]
                            row_id = row_id.strip()
                            col_id = col_id.strip()
                            if format.has_key('ids') and (format['ids'] == int or format['ids'] == 'int'):
                                try:
                                    row_id = int(row_id)
                                except:
                                    print 'Error (ID is not int) while reading: %s' % data #Just ignore that line
                                    continue
                                try:
                                    col_id = int(col_id)
                                except:
                                    print 'Error (ID is not int) while reading: %s' % data #Just ignore that line
                                    continue
                        except IndexError:
                            #raise IndexError('while reading %s' % data)
                            print 'Error while reading: %s' % data #Just ignore that line
                            continue
                    # Try to convert ids to int
                    try:
                        row_id = int(row_id)
                    except: pass
                    try:
                        col_id = int(col_id)
                    except: pass
                    # Add tuple
                    try:
                        self.add_tuple((float(value), row_id, col_id))
                    except:
                        if VERBOSE:
                            sys.stdout.write('
    Error while reading (%s, %s, %s). Skipping this tuple
    ' % (value, row_id, col_id))
                        #raise ValueError('%s is not a float, while reading %s' % (value, data))
                    i += 1
                    if VERBOSE:
                        if i % 100000 == 0:
                            sys.stdout.write('.')
                        if i % 1000000 == 0:
                            sys.stdout.write('|')
                        if i % 10000000 == 0:
                            sys.stdout.write(' (%d M)
    ' % int(i/1000000))
                if VERBOSE:
                    sys.stdout.write('
    ')
    
        def _load_pickle(self, path):
            """
            Loads data from a pickle file
    
            :param path: output filename
            :type param: string
            """
            self._data = pickle.load(codecs.open(path))
    
        def save(self, path, pickle=False):
            """
            Saves data in output file
    
            :param path: output filename
            :type param: string
            :param pickle: save in pickle format?
            :type pickle: Boolean
            """
            if VERBOSE:
                sys.stdout.write('Saving data to %s
    ' % path)
            if pickle:
                self._save_pickle(path)
            else:
                out = codecs.open(path, 'w', 'utf8')
                for value, row_id, col_id in self._data:
                    try:
                        value = unicode(value, 'utf8')
                    except:
                        if not isinstance(value, unicode):
                            value = str(value)
                    try:
                        row_id = unicode(row_id, 'utf8')
                    except:
                        if not isinstance(row_id, unicode):
                            row_id = str(row_id)
                    try:
                        col_id = unicode(col_id, 'utf8')
                    except:
                        if not isinstance(col_id, unicode):
                            col_id = str(col_id)
    
                    s = '	'.join([value, row_id, col_id])
                    out.write(s + '
    ')
                out.close()
    
        def _save_pickle(self, path):
            """
            Saves data in output file, using pickle format
    
            :param path: output filename
            :type param: string
            """
            pickle.dump(self._data, open(path, "w"))
    datamodel
  • 相关阅读:
    LeetCode:33. Search in Rotated Sorted Array
    重拾 ACM-HDU 2000-2009
    hdu 1022 数据结构 stack模拟
    画椭圆
    声控灯
    VC++调用R语言
    Setup Factory打包时注册com dll
    折腾树莓派的一些总结
    老调重弹
    制作cpprefernce.chm
  • 原文地址:https://www.cnblogs.com/bobodeboke/p/3454732.html
Copyright © 2011-2022 走看看