注:部分内容来自书籍或者网络,如有侵权,请联系删除。
#!/usr/bin/python
# -*- coding:UTF-8 -*-
from __future__ import print_function
import hashlib
import sys
import os
import fnmatch
CHUNK_SIZE = 8192
def is_file_match(filename,patterns):
for pattern in patterns:
if fnmatch.fnmatch(filename,pattern):
return True
return False
def find_specific_files(path,patterns=['*'],exclude_dirs=[]):
for dirpath,dirnames,filenames in os.walk(path):
for filename in filenames:
if is_file_match(filename,patterns):
yield os.path.join(dirpath,filename)
for d in exclude_dirs:
if d in dirnames:
dirnames.remove(d)
def get_chunk(filename):
with open(filename,'rb') as f:
while True:
chunk = f.read(CHUNK_SIZE)
if not chunk:
break
else:
yield chunk
def get_file_checksum(filename):
h = hashlib.md5()
for chunk in get_chunk(filename):
h.update(chunk)
return h.hexdigest()
def main():
sys.argv.append("")
directory = sys.argv[1]
if not os.path.isdir(directory):
raise SystemExit("{0} is not a directory".format(directory))
record = {}
for item in find_specific_files(directory):
checksum = get_file_checksum(item)
if checksum is record:
print('find duplicate file: {0} vs {1}'.format(record[checksum],item))
else:
record[checksum] = item
if __name__ == '__main__':
main()
实现效果
[root@Ansible Python]# python search.py /root/Python
find duplicate file: /root/Python/cmd1.py vs /root/Python/cmd2.py
find duplicate file: /root/Python/cmd1.py vs /root/Python/11.py
「 文章如果对你有帮助,请点个赞哦^^ 」 
0
若无特殊注明,文章均为本站原创或整理发布。
转载请注明本文地址:https://om.fangxiaoxiong.com/2738.html