练习一
要求:使用csv模块,从给定的csv文件中获取name和data
name的值在表头的第二列,data就是表数据
import csv import os def parse_file(datafile): name='' data = [] #以读的模式打开csv文件 with open(datafile,'rb') as f: #将文件读进缓存中 csv_data = csv.reader(f) #使用next方法获默认获取表头的标题,[1]表示获取第2个单元格的值 name = csv_data.next()[1] #跳过表头,下移一行来获取表格的内容 csv_data.next() #循环数据,将每一行添加到data中 for line in csv_data: data.append(line) return (name,data)
测试函数
def test():
#如果执行过程中不报错,即为正确 datafile = os.path.join(DATADIR,DATAFILE) name,data = parse_file(datafile) assert name == "MOUNTAIN VIEW MOFFETT FLD NAS" assert data[0][1] == "01:00" assert data[2][0] == "01/01/2005" assert data[2][5] == "2" if __name__ == '__main__': test()
2.Excel至csv
要求:从excel中将COAST, EAST, FAR_WEST, NORTH, NORTH_C, SOUTHERN, SOUTH_C, WEST的最大的负载量和所对应的时间取出,
然后把这些值存到新的excel表中
import xlrd import csv import os #源文件 datafile = '2013_ERCOT_Hourly_Load_Data.xls' #目标文件 outfile = '2013_Max_loads.xls' def parse_file(datafile): #打开excel文件 workbook = xlrd.open_workbook(datafile) #获取sheet页 sheet = workbook.sheet_by_index(0) data = {}#存放结果集 #循环需要获取的列名的下标,一共是8个 for i in range(1,9): #获取列的名称,作为字典的key,第一个值是row,第二个值是column station = sheet.cell_value(0,i) #获取[1,8]的列值 cv = sheet.col_values(i,start_rowx=1,end_rowx=None) #获取该列的最大值 maxval = max(cv) #获取最大值的索引,因为索引从0开始计数,所以要+1 maxpos = cv.index(maxval)+1 #获取最大值所对应的时间 maxtime = sheet.cell_value(maxpos,0) #将excel的时间转为python中的时间 realtime = xlrd.xldate_as_tuple(maxtime,0) #将数据封装成字典,key是列头的名称,每一个key的value包含最大值和最大值的时间 data[station] = { 'maxval':maxval, 'maxtime':realtime } return data
def save_file(data,filename): #以写文件的状态打开csv文件 with open(filename,'w') as f: #写文件,打开文件句柄,字段与字段之间用'|'分割 w = csv.writer(f,delimiter='|') #写表头 w.writerow(['Station','Year','Month','Day','Hour','Max Load']) #根据传入的data写内容 for line in data:
#获取时间的值 year,month,day,hour,_,_=data[line]['maxtime']
#写数据 w.writerow([line,year,month,day,hour,data[line]['maxval']])
测试函数
def test(): data = parse_file(datafile) save_file(data, outfile) number_of_rows = 0 stations = [] ans = {'FAR_WEST': {'Max Load': '2281.2722140000024', 'Year': '2013', 'Month': '6', 'Day': '26', 'Hour': '17'}} correct_stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH', 'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST'] fields = ['Year', 'Month', 'Day', 'Hour', 'Max Load'] with open(outfile) as of: csvfile = csv.DictReader(of, delimiter="|") for line in csvfile: station = line['Station'] if station == 'FAR_WEST': for field in fields: # Check if 'Max Load' is within .1 of answer if field == 'Max Load': max_answer = round(float(ans[station][field]), 1) max_line = round(float(line[field]), 1) assert max_answer == max_line # Otherwise check for equality else: assert ans[station][field] == line[field] number_of_rows += 1 stations.append(station) # Output should be 8 lines not including header assert number_of_rows == 8 # Check Station Names assert set(stations) == set(correct_stations) if __name__ == "__main__": test()
如果测试通过会生成'2013_Max_loads.xls'的excel文件同时函数的值不会报错
3.读取json
要求:从json文件中提取出相应的值,提取文章的标题和url,标题按照模块分组,url按照标准格式分组
import json import codecs def get_from_file(kind, period): '''返回json格式的数据''' #获取json文件名 filename = "popular-{0}-{1}.json".format(kind, period) #以读的形式打开此文件,同时转为json数据格式 with open(filename, "r") as f: return json.loads(f.read()) def article_overview(kind,period): #获取json格式的数据 data = get_from_file(kind, period) titles = [] #存放需获取的标题 urls = [] #存放需要获取的url for article in data: title = article['title'] #根据key来获取title section = article['section'] #根据key获取section #封装titles,key为section,value为title titles.append({section:title}) #找media下的metadata下的format是标准格式的url if 'media' in article: for media in article['media']:#找到media for metadata in media['media-metadata']:#找到media下的media-metadata if metadata['format'] == 'Standard Thumbnail':#判断是否是标准格式 urls.append(metadata['url'])#是就加入到列表中最后返回 return (titles,urls)
测试函数
def test():
#如果执行过程中不报错,即为正确 titles, urls = article_overview("viewed", 1) assert len(titles) == 20 assert len(urls) == 30 assert titles[2] == {'Opinion': 'Professors, We Need You!'} assert urls[20] == 'http://graphics8.nytimes.com/images/2014/02/17/sports/ICEDANCE/ICEDANCE-thumbStandard.jpg' if __name__ == "__main__": test()