建立4个txt记录数据,james.txt/julie.txt/mikey.txt/sarah.txt
with open('james.txt') as jaf: #打开文件
data = jaf.readline()
james = data.strip().split(',') #以逗号进行分隔
with open('julie.txt') as juf:
data = juf.readline()
julie = data.strip().split(',')
with open('mikey.txt') as mif:
data = mif.readline()
mikey = data.strip().split(',')
with open('sarah.txt') as saf:
data = saf.readline()
sarah = data.strip().split(',')
def sanitize(time_string): #定义清洗数据函数
if '-' in time_string: #使用"in"操作符检查字符串是否包含一个短横线或冒号
splitter = '-'
elif ':' in time_string:
splitter = ':'
else:
return(time_string) #如果字符串不需要清理,就什么也不做
(mins, secs) = time_string.split(splitter) #分解字符串,抽出分钟和秒部分
return(mins + '.' + secs)
james = sorted([sanitize(t) for t in james]) #清洗后的数据再赋给james
julie = sorted([sanitize(t) for t in julie])
mikey = sorted([sanitize(t) for t in mikey])
sarah = sorted([sanitize(t) for t in sarah])
unique_james = [] #定义一个特殊的james空数组
for each_t in james:
if each_t not in unique_james:
unique_james.append(each_t)
print(unique_james[0:3]) #打印第0到3项(不包括0项)
unique_julie = []
for each_t in julie:
if each_t not in unique_julie:
unique_julie.append(each_t)
print(unique_julie[0:3])
unique_mikey = []
for each_t in mikey:
if each_t not in unique_mikey:
unique_mikey.append(each_t)
print(unique_mikey[0:3])
unique_sarah = []
for each_t in sarah:
if each_t not in unique_sarah:
unique_sarah.append(each_t)
print(unique_sarah[0:3])
输出
['2.01', '2.22', '2.34']
['2.11', '2.23', '2.59']
['2.22', '2.38', '2.49']
['2.18', '2.25', '2.39']
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
数据理解
# Author kevin_hou
def get_coach_data(filename): #定义一个通用打开文件的函数,替换with语句
try:
with open(filename) as f:
data = f.readline()
return(data.strip().split(','))
except IOError as ioerr:
print('File error:' + str(ioerr))
# sarah = get_coach_data('sarah.txt')
def sanitize(time_string):
if '-' in time_string:
splitter = '-'
elif ':' in time_string:
splitter = ':'
else:
return(time_string)
(mins, secs) = time_string.split(splitter)
return(mins + '.' + secs)
james = get_coach_data('james.txt')
julie = get_coach_data('julie.txt')
mikey = get_coach_data('mikey.txt')
sarah = get_coach_data('sarah.txt')
# with open('james.txt') as jaf: #等价于 james = get_coach_data('james.txt')
# data = jaf.readline()
# james = data.strip().split(',')
# with open('julie.txt') as juf:
# data = juf.readline()
# julie = data.strip().split(',')
# with open('mikey.txt') as mif:
# data = mif.readline()
# mikey = data.strip().split(',')
# with open('sarah.txt') as saf:
# data = saf.readline()
# sarah = data.strip().split(',')
print(sorted(set([sanitize(t) for t in james]))[0:3])
print(sorted(set([sanitize(t) for t in julie]))[0:3])
print(sorted(set([sanitize(t) for t in mikey]))[0:3])
print(sorted(set([sanitize(t) for t in sarah]))[0:3])
输出
'''
['2.01', '2.22', '2.34']
['2.11', '2.23', '2.59']
['2.22', '2.38', '2.49']
['2.18', '2.25', '2.39']
'''