#!/usr/bin/python3 import subprocess import itertools import os import sys import threading GET_ALL_PICS_LIST_STRING="find . -iname '*.jpg' -o -iname '*.png' -o -iname '*.jpeg'" COMPARE_PICS_CMD="compare -metric AE -fuzz 0.1% \"{}\" \"{}\" /dev/null" PICS_MAP_SQUSHED = {} def main(): out = subprocess.Popen(['sh', '-c', GET_ALL_PICS_LIST_STRING], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) all_pics_string = out.communicate()[0].decode('utf-8') all_pics = all_pics_string.split('\n') print(f"Gathered {len(all_pics)} photos.") # I. basenames pics_map = {} for pic in all_pics: key = os.path.basename('.'.join(pic.split('.')[:-1])) if key not in pics_map: pics_map.update({key: [pic]}) continue pics_map[key].append(pic) print(f"Hashtable for {len(pics_map)} keys.") # II. sort pics_map_keys = sorted(pics_map.keys(), key=len) # III. squash pics_map_squashed = {} squash_marker = ['-', ' ', '('] while pics_map_keys: key = pics_map_keys.pop(0) if len(key) < 7: continue if not key: continue squashed_keys = [] for other_key in pics_map_keys: if other_key.startswith(key) and other_key[len(key)] in squash_marker: squashed_keys.append(other_key) if not squashed_keys: continue pics_map_squashed.update({key: pics_map[key]}) for squashed_key in squashed_keys: pics_map_squashed[key] += pics_map[squashed_key] pics_map_keys.remove(squashed_key) print(f"Squashed hashtable to {len(pics_map_squashed)} keys.") estimated = len(pics_map_squashed) for pics in pics_map_squashed.values(): if estimated % 10 == 0: print(f"Estimated: {estimated}") print(f"Do {len(pics)} photos: '{pics}'") estimated -= 1 delete_list = [] compars_count = 0 for pic_a, pic_b in itertools.combinations(pics, 2): if pic_b in delete_list or pic_a in delete_list: continue if not os.path.exists(pic_a): print(f"pic_a doesnt exist: '{pic_a}'") continue if not os.path.exists(pic_b): print(f"pic_b doesnt exist: '{pic_b}'") continue compars_count += 1 out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) value_string, stderr = out.communicate() if out.returncode >= 2: print(f"HERE!!! stdout: {value_string}\nstderr: {stderr}\nreturn code: {out.returncode}") continue #diff = float(value_string.decode('utf-8')) #if diff == 0.: if out.returncode == 0: delete_list.append(pic_b) print(f"Cmps: {compars_count}") if not delete_list: continue print(f"Delete: {delete_list}") for delete_elem in delete_list: os.remove(delete_elem) if __name__ == '__main__': main()