import hashlib
import os
class DirProc():
def __init__(self):
self.main_dir = "C://software//导数据//mtm//imgs"
def md5(self, fname):
if os.path.exists(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def remove(self, fpath1, fpath2):
md51 = self.md5(fpath1)
md52 = self.md5(fpath2)
if md51 == md52:
os.remove(fpath1)
print("去重", fpath1)
def remove_duplicate(self):
dir_list = os.listdir(self.main_dir)
for name in dir_list:
sub_dir = self.main_dir + "//" + name;
file_list = os.listdir(sub_dir)
for fname in file_list:
for sub_fname in file_list:
if fname == sub_fname:
continue
f_dir = sub_dir + "//" + fname;
f_sub_dir = sub_dir + "//" + sub_fname;
self.remove(f_dir, f_sub_dir)
dirProc = DirProc()
dirProc.remove_duplicate()