zoukankan      html  css  js  c++  java
  • 数据处理02

    建立4个txt记录数据,james.txt/julie.txt/mikey.txt/sarah.txt
    with open('james.txt') as jaf:  #打开文件
        data = jaf.readline()
    james = data.strip().split(',')  #以逗号进行分隔
    with open('julie.txt') as juf:
        data = juf.readline()
    julie = data.strip().split(',')
    with open('mikey.txt') as mif:
        data = mif.readline()
    mikey = data.strip().split(',')
    with open('sarah.txt') as saf:
        data = saf.readline()
    sarah = data.strip().split(',')
    
    
    def sanitize(time_string):  #定义清洗数据函数
        if '-' in time_string:  #使用"in"操作符检查字符串是否包含一个短横线或冒号
            splitter = '-'
        elif ':' in time_string:
            splitter = ':'
        else:
            return(time_string) #如果字符串不需要清理,就什么也不做
        (mins, secs) = time_string.split(splitter)  #分解字符串,抽出分钟和秒部分
        return(mins + '.' + secs)
    
    james = sorted([sanitize(t) for t in james])  #清洗后的数据再赋给james
    julie = sorted([sanitize(t) for t in julie])
    mikey = sorted([sanitize(t) for t in mikey])
    sarah = sorted([sanitize(t) for t in sarah])
    
    unique_james = []  #定义一个特殊的james空数组
    for each_t in james:
        if each_t not in unique_james:
            unique_james.append(each_t)
    print(unique_james[0:3])  #打印第0到3项(不包括0项)
    
    unique_julie = []
    for each_t in julie:
        if each_t not in unique_julie:
            unique_julie.append(each_t)
    print(unique_julie[0:3])
    
    unique_mikey = []
    for each_t in mikey:
        if each_t not in unique_mikey:
            unique_mikey.append(each_t)
    print(unique_mikey[0:3])
    
    unique_sarah = []
    for each_t in sarah:
        if each_t not in unique_sarah:
            unique_sarah.append(each_t)
    print(unique_sarah[0:3])

    输出

    ['2.01', '2.22', '2.34']
    ['2.11', '2.23', '2.59']
    ['2.22', '2.38', '2.49']
    ['2.18', '2.25', '2.39']

    --------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    --------------------------------------------------------------------------------------------------------------------------------------------------------------------------

     数据理解

    # Author kevin_hou
    def get_coach_data(filename):  #定义一个通用打开文件的函数,替换with语句
        try:
            with open(filename) as f:
                data = f.readline()
            return(data.strip().split(','))
        except IOError as ioerr:
            print('File error:' + str(ioerr))
    # sarah = get_coach_data('sarah.txt')
    def sanitize(time_string):
        if '-' in time_string:
            splitter = '-'
        elif ':' in time_string:
            splitter = ':'
        else:
            return(time_string)
        (mins, secs) = time_string.split(splitter)
        return(mins + '.' + secs)
    
    james = get_coach_data('james.txt')
    julie = get_coach_data('julie.txt')
    mikey = get_coach_data('mikey.txt')
    sarah = get_coach_data('sarah.txt')
    
    # with open('james.txt') as jaf:  #等价于 james = get_coach_data('james.txt')
    #     data = jaf.readline()
    # james = data.strip().split(',')
    # with open('julie.txt') as juf:
    #     data = juf.readline()
    # julie = data.strip().split(',')
    # with open('mikey.txt') as mif:
    #     data = mif.readline()
    # mikey = data.strip().split(',')
    # with open('sarah.txt') as saf:
    #     data = saf.readline()
    # sarah = data.strip().split(',')
    
    print(sorted(set([sanitize(t) for t in james]))[0:3])
    print(sorted(set([sanitize(t) for t in julie]))[0:3])
    print(sorted(set([sanitize(t) for t in mikey]))[0:3])
    print(sorted(set([sanitize(t) for t in sarah]))[0:3])
    
    输出 ''' ['2.01', '2.22', '2.34'] ['2.11', '2.23', '2.59'] ['2.22', '2.38', '2.49'] ['2.18', '2.25', '2.39'] '''

      

      

  • 相关阅读:
    软件需求与分析课堂讨论
    Axios
    lodash
    table行拖拽
    js addDays ,addYears
    所在周的第一天
    Inner join case when
    npm 淘宝镜像安装以及安装报错window_nt 6.1.7601 解决
    jsTree
    Bootstrap Multiselect
  • 原文地址:https://www.cnblogs.com/kevin-hou1991/p/13641207.html
Copyright © 2011-2022 走看看