http://www.cnblogs.com/wuyuegb2312/archive/2013/01/11/2856772.html
开始学Python,这篇文章来自于应用需求。
os.walk很方便,下面写了两个版本的函数进行遍历,分别是不使用walk和使用walk的。
import sys
import string
import os
def detect_nowalk(dir_path):
files = os.listdir(dir_path)
for filename in files:
print "file:%s
" % filename
next = os.path.join(dir_path, filename)
if os.path.isdir(next):
print "file folds:%s
" % filename
detect_nowalk(next)
if __name__ == "__main__":
detect_nowalk(".")
import sys
import os
def detect_walk(dir_path):
for root, dirs, files in os.walk(dir_path):
for filename in files:
print "file:%s
" % filename
for dirname in dirs:
print "dir:%s
" % dirname
if __name__ == "__main__":
detect_walk(".")
另外附上使用第一种方法转换文件编码的源码,有的文件转换后用gedit打开是乱码,但用vi查看是正确的。
import sys
import string
import codecs
import os
import shutil
def gbkToUtf8(path):
files = os.listdir(path)
for filename in files:
if os.path.isdir(filename):
print "file folds:%s
" % filename
gbkToUtf8(filename)
continue
try:
tokens = string.splitfields(filename, '.')
if len(tokens) != 2 or tokens[1] != 'txt':
#print tokens[1]
continue
else:
print 'Encode Converting (GBK to UTF-8) : ', filename
utfFile=open(filename)
tstr = utfFile.read()
#tstr = utfFile.read().decode("gbk") is wrong
tstr = tstr.encode("UTF-8")
utfFile.close()
utfFile = open(filename, 'w')
utfFile.write(tstr)
utfFile.close()
except:
print "error %s" %filename
if __name__ == "__main__":
gbkToUtf8(".")
1.14更新:发现linux自带的iconv -f gb18030 -t utf8 a.txt >> b.txt更好用,而且有的用decode("gb18030")会出现乱码("gbk"一样乱码)的情况不再存在。在python脚本不难调用,就不详细写了。
