From 51a8b6afbe59da26f0819599c839fad1cb1e6ea5 Mon Sep 17 00:00:00 2001 From: GRayHook Date: Sat, 9 Jul 2022 22:14:52 +0700 Subject: [PATCH] hz --- ddp.py | 71 +++++++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/ddp.py b/ddp.py index d1b6b7d..cb24dbb 100755 --- a/ddp.py +++ b/ddp.py @@ -4,11 +4,14 @@ import subprocess import itertools import os import sys +import threading GET_ALL_PICS_LIST_STRING="find . -iname '*.jpg' -o -iname '*.png' -o -iname '*.jpeg'" COMPARE_PICS_CMD="compare -metric AE -fuzz 0.1% \"{}\" \"{}\" /dev/null" +PICS_MAP_SQUSHED = {} + def main(): out = subprocess.Popen(['sh', '-c', GET_ALL_PICS_LIST_STRING], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) @@ -16,38 +19,49 @@ def main(): all_pics = all_pics_string.split('\n') print(f"Gathered {len(all_pics)} photos.") - duplicate_map = {} - scanned_files = [] + # I. basenames + pics_map = {} for pic in all_pics: - if not pic: + key = os.path.basename('.'.join(pic.split('.')[:-1])) + if key not in pics_map: + pics_map.update({key: [pic]}) continue - name = os.path.basename('.'.join(pic.split('.')[:-1])) - if not name: - print("WTF? name is empty") + pics_map[key].append(pic) + print(f"Hashtable for {len(pics_map)} keys.") + + # II. sort + pics_map_keys = sorted(pics_map.keys(), key=len) + + # III. squash + pics_map_squashed = {} + squash_marker = ['-', ' ', '('] + while pics_map_keys: + key = pics_map_keys.pop(0) + if len(key) < 7: continue - if pic in scanned_files: + if not key: continue - for other_pic in all_pics: - if pic == other_pic: - continue - if other_pic in scanned_files: - continue - if os.path.basename(other_pic).startswith(name): - if pic not in duplicate_map: - duplicate_map.update({pic: []}) - scanned_files.append(pic) - duplicate_map[pic].append(other_pic) - scanned_files.append(other_pic) + squashed_keys = [] + for other_key in pics_map_keys: + if other_key.startswith(key) and other_key[len(key)] in squash_marker: + squashed_keys.append(other_key) + if not squashed_keys: + continue + pics_map_squashed.update({key: pics_map[key]}) + for squashed_key in squashed_keys: + pics_map_squashed[key] += pics_map[squashed_key] + pics_map_keys.remove(squashed_key) - origs = len(duplicate_map.keys()) - clones = sum([len(values) for values in duplicate_map.values()]) - candidates = origs + clones - print(f"Found {origs} + {clones} = {candidates} delete candidate.") - sys.exit(0) + print(f"Squashed hashtable to {len(pics_map_squashed)} keys.") - for pic, dups in duplicate_map.items(): + estimated = len(pics_map_squashed) + for pics in pics_map_squashed.values(): + if estimated % 10 == 0: + print(f"Estimated: {estimated}") + print(f"Do {len(pics)} photos: '{pics}'") + estimated -= 1 delete_list = [] - pics = [pic] + dups + compars_count = 0 for pic_a, pic_b in itertools.combinations(pics, 2): if pic_b in delete_list or pic_a in delete_list: continue @@ -57,6 +71,7 @@ def main(): if not os.path.exists(pic_b): print(f"pic_b doesnt exist: '{pic_b}'") continue + compars_count += 1 out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) value_string, stderr = out.communicate() @@ -68,8 +83,10 @@ def main(): if out.returncode == 0: delete_list.append(pic_b) - print(f"In list {pics} i will delete") - print(f"this pics: {delete_list}") + print(f"Cmps: {compars_count}") + if not delete_list: + continue + print(f"Delete: {delete_list}") for delete_elem in delete_list: os.remove(delete_elem)