在 iOS 应用的开发过程中,随着项目的发展,工程中很有可能产生重复图片。
下面这段 Python 脚本可以用来扫描工程中是否有重复图片。
1 #!/usr/bin/env python
2 #coding=utf-8
3
4 """Usage: check_project_duplicate.py project_image_dir_path"""
5
6 import os
7 import sys
8 import fnmatch
9 import hashlib
10
11 def find_file(dir, pattern):
12 result = []
13 for dirpath, dirnames, filenames in os.walk(dir):
14 for filename in fnmatch.filter(filenames, pattern):
15 result.append(os.path.join(dirpath, filename))
16 return result
17
18
19 def chunk_reader(fobj, chunk_size=1024):
20 while True:
21 chunk = fobj.read(chunk_size)
22 if not chunk:
23 return
24 yield chunk
25
26
27 def main(me, args):
28 images = []
29 images.extend(find_file(args[0], '*.png'))
30 images.extend(find_file(args[0], '*.jpg'))
31 images.extend(find_file(args[0], '*.jpeg'))
32
33 hashes = {}
34 for file_path in images:
35 hashobj = hashlib.sha1()
36 for chunk in chunk_reader(open(file_path, 'rb')):
37 hashobj.update(chunk)
38 file_id = (hashobj.digest(), os.path.getsize(file_path))
39 duplicate = hashes.get(file_id, None)
40 if duplicate:
41 print "发现重复图片: '%s' 与 '%s'" % (file_path, duplicate)
42 else:
43 hashes[file_id] = file_path
44
45 return 0
46
47
48 if __name__ == '__main__':
49 sys.exit(main(sys.argv[0], sys.argv[1:]))