diff --git a/ddp.py b/ddp.py index cb24dbb..4046ef4 100755 --- a/ddp.py +++ b/ddp.py @@ -5,12 +5,22 @@ import itertools import os import sys import threading +import time GET_ALL_PICS_LIST_STRING="find . -iname '*.jpg' -o -iname '*.png' -o -iname '*.jpeg'" COMPARE_PICS_CMD="compare -metric AE -fuzz 0.1% \"{}\" \"{}\" /dev/null" +IDENTIFY_PIC_CMD="identify \"{}\"" PICS_MAP_SQUSHED = {} +PICS_MAP_SQUSHED_KEYS = [] + +THREADS_CTX = { + "keys": [], + "estimated": 0, + "num": 2, + "done": [] +} def main(): out = subprocess.Popen(['sh', '-c', GET_ALL_PICS_LIST_STRING], @@ -18,6 +28,18 @@ def main(): all_pics_string = out.communicate()[0].decode('utf-8') all_pics = all_pics_string.split('\n') print(f"Gathered {len(all_pics)} photos.") + count = 0 + try: + with open('./.ddp.csv', 'r') as done_file: + for line in done_file.readlines(): + line = line.strip('\n') + if not line: + continue + THREADS_CTX["done"].append(line) + count += 1 + except FileNotFoundError as err: + print(err) + print(f"Exclude list for {count} keys") # I. basenames pics_map = {} @@ -53,42 +75,73 @@ def main(): pics_map_keys.remove(squashed_key) print(f"Squashed hashtable to {len(pics_map_squashed)} keys.") + PICS_MAP_SQUSHED.update(pics_map_squashed) + THREADS_CTX["keys"] = [key for key in list(PICS_MAP_SQUSHED.keys()) if key not in THREADS_CTX["done"]] + THREADS_CTX["estimated"] = len(THREADS_CTX["keys"]) + threads = [None] * THREADS_CTX["num"] + for i in range(THREADS_CTX["num"]): + threads[i] = threading.Thread(target=thread_function, args=(f"thread_{i}",)) + threads[i].start() + try: + while THREADS_CTX["keys"]: + time.sleep(.1) + except KeyboardInterrupt as err: + print("Wait for threads...") + THREADS_CTX["keys"] = [] + for i in range(THREADS_CTX["num"]): + threads[i].join() + with open('./.ddp.csv', 'w') as done_file: + for key in THREADS_CTX["done"]: + done_file.write(key + "\n") - estimated = len(pics_map_squashed) - for pics in pics_map_squashed.values(): - if estimated % 10 == 0: - print(f"Estimated: {estimated}") - print(f"Do {len(pics)} photos: '{pics}'") - estimated -= 1 - delete_list = [] - compars_count = 0 - for pic_a, pic_b in itertools.combinations(pics, 2): - if pic_b in delete_list or pic_a in delete_list: - continue - if not os.path.exists(pic_a): - print(f"pic_a doesnt exist: '{pic_a}'") - continue - if not os.path.exists(pic_b): - print(f"pic_b doesnt exist: '{pic_b}'") - continue - compars_count += 1 - out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)], - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - value_string, stderr = out.communicate() - if out.returncode >= 2: - print(f"HERE!!! stdout: {value_string}\nstderr: {stderr}\nreturn code: {out.returncode}") + +def thread_function(name): + try: + prev_key = None + while key := THREADS_CTX["keys"].pop(0): + if prev_key: + THREADS_CTX["done"].append(prev_key) + prev_key = key + pics = PICS_MAP_SQUSHED[key] + if THREADS_CTX["estimated"] % 10 == 0: + print(f"Estimated: {THREADS_CTX['estimated']}") + print(f"{name}: Do {len(pics)} photos: '{pics}'") + THREADS_CTX["estimated"] -= 1 + delete_list = [] + compars_count = 0 + for pic_a, pic_b in itertools.combinations(pics, 2): + if pic_b in delete_list or pic_a in delete_list: + continue + if not os.path.exists(pic_a): + print(f"pic_a doesnt exist: '{pic_a}'") + continue + if not os.path.exists(pic_b): + print(f"pic_b doesnt exist: '{pic_b}'") + continue + compars_count += 1 + out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + value_string, stderr = out.communicate() + if out.returncode >= 2: + print(f"HERE!!! stdout: {value_string}\nstderr: {stderr}\nreturn code: {out.returncode}") + continue + #diff = float(value_string.decode('utf-8')) + #if diff == 0.: + if out.returncode == 0: + delete_list.append(pic_b) + + print(f"{name}: {compars_count} cmps") + if not delete_list: continue - #diff = float(value_string.decode('utf-8')) - #if diff == 0.: - if out.returncode == 0: - delete_list.append(pic_b) + print(f"Delete: {delete_list}") + for delete_elem in delete_list: + os.remove(delete_elem) + except BaseException as err: + print(f"{name}: {err}") - print(f"Cmps: {compars_count}") - if not delete_list: - continue - print(f"Delete: {delete_list}") - for delete_elem in delete_list: - os.remove(delete_elem) + # Handle last key + if prev_key: + THREADS_CTX["done"].append(prev_key) if __name__ == '__main__':