zoukankan      html  css  js  c++  java
  • Python Quick list dir

    昨天 Python释放了 3.5 ,添加了 os.scandir 根据文档该API比os.listdirDocs

    which speeds it up by 3-5 times on POSIX systems and by 7-20 times on Windows systems

    以前因为目录太大(文件数过万),listdir又太慢,写了一个自己的listdir,发布一下 (仅支持Linux)

    #!/usr/bin/python
    
    import os
    import ctypes
    from ctypes.util import find_library
    
    clib = ctypes.CDLL(find_library('C'))
    
    class c_dir(ctypes.Structure):
    	pass
    	
    class c_dirent(ctypes.Structure):
    	_fields_ = (
    		('d_ino', ctypes.c_long),
    		('d_off', ctypes.c_long), # offset
    		('d_reclen', ctypes.c_ushort), # record length
    		('d_type', ctypes.c_byte),
    		('d_name', ctypes.c_char *4096),
    	)
    	
    c_dir_p = ctypes.POINTER(c_dir)
    c_dirent_p = ctypes.POINTER(c_dirent)
    
    opendir, readdir, closedir = clib.opendir, clib.readdir, clib.closedir
    opendir.argtypes = [ ctypes.c_char_p ]
    opendir.restype = c_dir_p
    
    readdir.argtypes = [ c_dir_p ]
    readdir.restype = c_dirent_p
    
    closedir.argtypes = [ c_dir_p ]
    closedir.restype = ctypes.c_int
    
    def countdir(path):
    	if not os.path.isdir(path):
    		raise ValueError('arg error, not a dir: '+path)
    	dirfd = opendir(path)
    	total_num, total_filename, total_metasize = 0, 0, 0
    	try:
    		while True:
    			entry = readdir(dirfd)
    			if not entry:
    				break
    			total_filename += len(entry.contents.d_name)
    			total_metasize += entry.contents.d_reclen
    			total_num += 1
    	finally:
    		closedir(dirfd)
    	return {"count":total_num-2, "total_filename":total_filename, "total_metasize":total_metasize,"dirsize":os.path.getsize(path)}
    
    def listdir(path):
    	'include two special dirs: . and .. '
    	if not os.path.isdir(path):
    		raise ValueError('arg error, not a dir: '+path)
    	dirfd = opendir(path)
    	try:
    		while True:
    			entry = readdir(dirfd)
    			if not entry:
    				break
    			yield {"name":entry.contents.d_name,
    				"inode": entry.contents.d_ino,
    				"metasize":entry.contents.d_reclen}
    	finally:
    		closedir(dirfd)
    		
    		
    if __name__ == '__main__':
    	import sys
    	i = 0
    	total = 0
    	path = sys.argv[1]
    	print( countdir(path) )
    	# for entry in listdir(path):
    		# print(entry['name'], entry['metasize'])
    		# total += entry['metasize']
    	# print('total:', total, 'dir size: ', os.path.getsize(path))
    
    
  • 相关阅读:
    【Nginx】url 带有 “https://” 双斜杠特殊处理
    【layui】tepmlet 格式化 table 数据
    于二零二零年:终章
    【Golang】练习-Web 处理 form 表单请求失败不刷新页面并保存输入的数据
    实现纸牌游戏的随机抽牌洗牌过程(item系列几个内置方法的实例)
    面向对象的进阶(item系列,__new__,__hash__,__eq__)
    面向对象阶段复习
    计算器实例
    反射
    静态方法staticmethod和类方法classmethod
  • 原文地址:https://www.cnblogs.com/i2u9/p/python-listdir.html
Copyright © 2011-2022 走看看