zoukankan      html  css  js  c++  java
  • 数据处理

    # Author kevin_hou
    
    with open('james.txt') as jaf:
        data = jaf.readline()
    james = data.strip().split(',')
    with open('julie.txt') as juf:
        data = juf.readline()
    julie = data.strip().split(',')
    with open('mikey.txt') as mif:
        data = mif.readline()
    mikey = data.strip().split(',')
    with open('sarah.txt') as saf:
        data = saf.readline()
    sarah = data.strip().split(',')
    
    # print(james)
    # print(julie)
    # print(mikey)
    # print(sarah)
    
    '''
    ['2:34', '3:21', '2:34', '2.45', '3.01', '2:01', '2:01', '3:10', '2:22']
    ['2.59', '2.11', '2:11', '2:23', '3:10', '2:23', '3:10', '3:21', '3-21']
    ['2:22', '3.01', '3:01', '3.02', '3:02', '3.02', '3:22', '2.49', '2:38']
    ['2:58', '2.58', '2:39', '2-25', '2-25', '2:54', '2.18', '2:55', '2:55']
    '''
    
    # data = [1,9,4,2,6,7,0]
    # print(data) #[1, 9, 4, 2, 6, 7, 0]
    
    # data.sort() #原地排序[0, 1, 2, 4, 6, 7, 9]
    # print(data)
    
    # data2 = sorted(data)
    # print(data) #对数据完成复制排序[1, 9, 4, 2, 6, 7, 0]
    # print(data2)    #复制排序[0, 1, 2, 4, 6, 7, 9]
    
    
    def sanitize(time_string):
        if '-' in time_string:  #使用"in"操作符检查字符串是否包含一个短横线或冒号
            splitter = '-'
        elif ':' in time_string:
            splitter = ':'
        else:
            return(time_string) #如果字符串不需要清理,就什么也不做
        (mins, secs) = time_string.split(splitter)  #分解字符串,抽出分钟和秒部分
        return(mins + '.' + secs)
    
    
    clean_james = []    #创建4个开始为空的新列表
    clean_julie = []
    clean_mikey = []
    clean_sarah = []
    for each_t in james:
        clean_james.append(sanitize(each_t))    #取原列表中的各个数据项,进行清理。
    for each_t in julie:                        #然后将清理后的数据追加到适当的新列表
        clean_julie.append(sanitize(each_t))
    for each_t in mikey:
        clean_mikey.append(sanitize(each_t))
    for each_t in sarah:
        clean_sarah.append(sanitize(each_t))
    
    
    print(sorted(clean_james))
    print(sorted(clean_julie))
    print(sorted(clean_mikey))
    print(sorted(clean_sarah))
    
    '''
    ['2.01', '2.01', '2.22', '2.34', '2.34', '2.45', '3.01', '3.10', '3.21']
    ['2.11', '2.11', '2.23', '2.23', '2.59', '3.10', '3.10', '3.21', '3.21']
    ['2.22', '2.38', '2.49', '3.01', '3.01', '3.02', '3.02', '3.02', '3.22']
    ['2.18', '2.25', '2.25', '2.39', '2.54', '2.55', '2.55', '2.58', '2.58']
    '''
    #默认的,sort()方法和sorted()  BIF都会按升序对数据排序。
    # 要以降序对数据排序,需向sort()或sorted()传入参数reverse=True,python会负责具体处理

    clean_mikey = [sanitize(each_t) for each_t in mikey]
    
    mins = [1,2,3]
    secs = [m * 60 for m in mins]
    print(secs) #[60, 120, 180]
    
    meters = [1, 10, 3]
    feet = [m*3.281 for m in meters]
    print(feet) #[3.281, 32.81, 9.843]
    
    lower = ["I", "don't", "like", "span"]
    upper = [s.upper() for s in lower]
    print(upper)    #['I', "DON'T", 'LIKE', 'SPAN']
    
    dirty = ['2-22', '2:22', '2.22']
    clean = [sanitize(t) for t in dirty]
    print(clean)    #['2.22', '2.22', '2.22']
    
    clean = [float(s) for s in clean]
    print(clean)    #[2.22, 2.22, 2.22]
    
    clean = [float((sanitize(t)) for t in ['2-22', '3:33', '4.44'])]
    print(clean)    #[2.22, 2.22, 2.22]
    

      


      

  • 相关阅读:
    NFC读写电子便签总结
    对字符串md5加密
    把ArrayList集合中的字符串内容写到文本文件中
    【原创】关于jquery实现格式化时间
    jQuery插件之ajaxFileUpload
    jxl读取excel实现导入excel写入数据库
    jxl写入excel实现数据导出功能
    多个Jar包的合并操作
    基于git的源代码管理模型——git flow
    Google Gson 使用简介
  • 原文地址:https://www.cnblogs.com/kevin-hou1991/p/13636200.html
Copyright © 2011-2022 走看看