master
Sergey Marinkevich 3 years ago
parent f379ffcc7e
commit 51a8b6afbe

@ -4,11 +4,14 @@ import subprocess
import itertools import itertools
import os import os
import sys import sys
import threading
GET_ALL_PICS_LIST_STRING="find . -iname '*.jpg' -o -iname '*.png' -o -iname '*.jpeg'" GET_ALL_PICS_LIST_STRING="find . -iname '*.jpg' -o -iname '*.png' -o -iname '*.jpeg'"
COMPARE_PICS_CMD="compare -metric AE -fuzz 0.1% \"{}\" \"{}\" /dev/null" COMPARE_PICS_CMD="compare -metric AE -fuzz 0.1% \"{}\" \"{}\" /dev/null"
PICS_MAP_SQUSHED = {}
def main(): def main():
out = subprocess.Popen(['sh', '-c', GET_ALL_PICS_LIST_STRING], out = subprocess.Popen(['sh', '-c', GET_ALL_PICS_LIST_STRING],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@ -16,38 +19,49 @@ def main():
all_pics = all_pics_string.split('\n') all_pics = all_pics_string.split('\n')
print(f"Gathered {len(all_pics)} photos.") print(f"Gathered {len(all_pics)} photos.")
duplicate_map = {} # I. basenames
scanned_files = [] pics_map = {}
for pic in all_pics: for pic in all_pics:
if not pic: key = os.path.basename('.'.join(pic.split('.')[:-1]))
if key not in pics_map:
pics_map.update({key: [pic]})
continue continue
name = os.path.basename('.'.join(pic.split('.')[:-1])) pics_map[key].append(pic)
if not name: print(f"Hashtable for {len(pics_map)} keys.")
print("WTF? name is empty")
# II. sort
pics_map_keys = sorted(pics_map.keys(), key=len)
# III. squash
pics_map_squashed = {}
squash_marker = ['-', ' ', '(']
while pics_map_keys:
key = pics_map_keys.pop(0)
if len(key) < 7:
continue continue
if pic in scanned_files: if not key:
continue continue
for other_pic in all_pics: squashed_keys = []
if pic == other_pic: for other_key in pics_map_keys:
continue if other_key.startswith(key) and other_key[len(key)] in squash_marker:
if other_pic in scanned_files: squashed_keys.append(other_key)
continue if not squashed_keys:
if os.path.basename(other_pic).startswith(name): continue
if pic not in duplicate_map: pics_map_squashed.update({key: pics_map[key]})
duplicate_map.update({pic: []}) for squashed_key in squashed_keys:
scanned_files.append(pic) pics_map_squashed[key] += pics_map[squashed_key]
duplicate_map[pic].append(other_pic) pics_map_keys.remove(squashed_key)
scanned_files.append(other_pic)
origs = len(duplicate_map.keys()) print(f"Squashed hashtable to {len(pics_map_squashed)} keys.")
clones = sum([len(values) for values in duplicate_map.values()])
candidates = origs + clones
print(f"Found {origs} + {clones} = {candidates} delete candidate.")
sys.exit(0)
for pic, dups in duplicate_map.items(): estimated = len(pics_map_squashed)
for pics in pics_map_squashed.values():
if estimated % 10 == 0:
print(f"Estimated: {estimated}")
print(f"Do {len(pics)} photos: '{pics}'")
estimated -= 1
delete_list = [] delete_list = []
pics = [pic] + dups compars_count = 0
for pic_a, pic_b in itertools.combinations(pics, 2): for pic_a, pic_b in itertools.combinations(pics, 2):
if pic_b in delete_list or pic_a in delete_list: if pic_b in delete_list or pic_a in delete_list:
continue continue
@ -57,6 +71,7 @@ def main():
if not os.path.exists(pic_b): if not os.path.exists(pic_b):
print(f"pic_b doesnt exist: '{pic_b}'") print(f"pic_b doesnt exist: '{pic_b}'")
continue continue
compars_count += 1
out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)], out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
value_string, stderr = out.communicate() value_string, stderr = out.communicate()
@ -68,8 +83,10 @@ def main():
if out.returncode == 0: if out.returncode == 0:
delete_list.append(pic_b) delete_list.append(pic_b)
print(f"In list {pics} i will delete") print(f"Cmps: {compars_count}")
print(f"this pics: {delete_list}") if not delete_list:
continue
print(f"Delete: {delete_list}")
for delete_elem in delete_list: for delete_elem in delete_list:
os.remove(delete_elem) os.remove(delete_elem)

Loading…
Cancel
Save