天一直阴沉沉的,真想早点下班啊。。。。。。
1 #encoding=utf-8 2 import random 3 from sets import Set 4 5 def copyfile(srcfile, dstfile, linenum): 6 """ 7 get linenum different lines out from srcfile at random 8 and write them into dstfile 9 """ 10 result = [] 11 ret = False 12 try: 13 srcfd = open(srcfile,'r') 14 except IOError: 15 print 'srcfile doesnot exist!' 16 return ret 17 try: 18 dstfd = open(dstfile,'w') 19 except IOError: 20 print 'dstfile doesnot exist!' 21 return ret 22 srclines = srcfd.readlines() 23 srclen = len(srclines) 24 while len(Set(result)) < int(linenum): 25 s = random.randint(0,srclen-1) 26 result.append(srclines[s]) 27 for content in Set(result): 28 dstfd.write(content) 29 srcfd.close() 30 dstfd.close() 31 ret = True 32 return ret 33 34 if __name__ == "__main__": 35 srcpath = raw_input('input srcfile path') 36 dstpath = raw_input('input dstfile path') 37 linenum = raw_input('input linenum') 38 print copyfile(srcpath,dstpath,linenum)
hadoop快点跑吧。。。。。。