forked from cw-somil/Duplicate-Remover
-
Notifications
You must be signed in to change notification settings - Fork 0
/
DuplicateRemover.py
68 lines (49 loc) · 2.45 KB
/
DuplicateRemover.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from PIL import Image
import imagehash
import os
import numpy as np
class DuplicateRemover:
def __init__(self,dirname,hash_size = 8):
self.dirname = dirname
self.hash_size = hash_size
def find_duplicates(self):
"""
Find and Delete Duplicates
"""
fnames = os.listdir(self.dirname)
hashes = {}
duplicates = []
print("Finding Duplicates Now!\n")
for image in fnames:
with Image.open(os.path.join(self.dirname,image)) as img:
temp_hash = imagehash.average_hash(img, self.hash_size)
if temp_hash in hashes:
print("Duplicate {} \nfound for Image {}!\n".format(image,hashes[temp_hash]))
duplicates.append(image)
else:
hashes[temp_hash] = image
if len(duplicates) != 0:
a = input("Do you want to delete these {} Images? Press Y or N: ".format(len(duplicates)))
space_saved = 0
if(a.strip().lower() == "y"):
for duplicate in duplicates:
space_saved += os.path.getsize(os.path.join(self.dirname,duplicate))
os.remove(os.path.join(self.dirname,duplicate))
print("{} Deleted Succesfully!".format(duplicate))
print("\n\nYou saved {} mb of Space!".format(round(space_saved/1000000),2))
else:
print("Thank you for Using Duplicate Remover")
else:
print("No Duplicates Found :(")
def find_similar(self,location,similarity=80):
fnames = os.listdir(self.dirname)
threshold = 1 - similarity/100
diff_limit = int(threshold*(self.hash_size**2))
with Image.open(location) as img:
hash1 = imagehash.average_hash(img, self.hash_size).hash
print("Finding Similar Images to {} Now!\n".format(location))
for image in fnames:
with Image.open(os.path.join(self.dirname,image)) as img:
hash2 = imagehash.average_hash(img, self.hash_size).hash
if np.count_nonzero(hash1 != hash2) <= diff_limit:
print("{} image found {}% similar to {}".format(image,similarity,location))