You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
149 lines
4.9 KiB
Python
149 lines
4.9 KiB
Python
#!/usr/bin/python3
|
|
|
|
import subprocess
|
|
import itertools
|
|
import os
|
|
import sys
|
|
import threading
|
|
import time
|
|
|
|
|
|
GET_ALL_PICS_LIST_STRING="find . -iname '*.jpg' -o -iname '*.png' -o -iname '*.jpeg'"
|
|
COMPARE_PICS_CMD="compare -metric AE -fuzz 0.1% \"{}\" \"{}\" /dev/null"
|
|
IDENTIFY_PIC_CMD="identify \"{}\""
|
|
|
|
PICS_MAP_SQUSHED = {}
|
|
PICS_MAP_SQUSHED_KEYS = []
|
|
|
|
THREADS_CTX = {
|
|
"keys": [],
|
|
"estimated": 0,
|
|
"num": 2,
|
|
"done": []
|
|
}
|
|
|
|
def main():
|
|
out = subprocess.Popen(['sh', '-c', GET_ALL_PICS_LIST_STRING],
|
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
all_pics_string = out.communicate()[0].decode('utf-8')
|
|
all_pics = all_pics_string.split('\n')
|
|
print(f"Gathered {len(all_pics)} photos.")
|
|
count = 0
|
|
try:
|
|
with open('./.ddp.csv', 'r') as done_file:
|
|
for line in done_file.readlines():
|
|
line = line.strip('\n')
|
|
if not line:
|
|
continue
|
|
THREADS_CTX["done"].append(line)
|
|
count += 1
|
|
except FileNotFoundError as err:
|
|
print(err)
|
|
print(f"Exclude list for {count} keys")
|
|
|
|
# I. basenames
|
|
pics_map = {}
|
|
for pic in all_pics:
|
|
key = os.path.basename('.'.join(pic.split('.')[:-1]))
|
|
if key not in pics_map:
|
|
pics_map.update({key: [pic]})
|
|
continue
|
|
pics_map[key].append(pic)
|
|
print(f"Hashtable for {len(pics_map)} keys.")
|
|
|
|
# II. sort
|
|
pics_map_keys = sorted(pics_map.keys(), key=len)
|
|
|
|
# III. squash
|
|
pics_map_squashed = {}
|
|
squash_marker = ['-', ' ', '(']
|
|
while pics_map_keys:
|
|
key = pics_map_keys.pop(0)
|
|
if len(key) < 7:
|
|
continue
|
|
if not key:
|
|
continue
|
|
squashed_keys = []
|
|
for other_key in pics_map_keys:
|
|
if other_key.startswith(key) and other_key[len(key)] in squash_marker:
|
|
squashed_keys.append(other_key)
|
|
if not squashed_keys:
|
|
continue
|
|
pics_map_squashed.update({key: pics_map[key]})
|
|
for squashed_key in squashed_keys:
|
|
pics_map_squashed[key] += pics_map[squashed_key]
|
|
pics_map_keys.remove(squashed_key)
|
|
|
|
print(f"Squashed hashtable to {len(pics_map_squashed)} keys.")
|
|
PICS_MAP_SQUSHED.update(pics_map_squashed)
|
|
THREADS_CTX["keys"] = [key for key in list(PICS_MAP_SQUSHED.keys()) if key not in THREADS_CTX["done"]]
|
|
THREADS_CTX["estimated"] = len(THREADS_CTX["keys"])
|
|
threads = [None] * THREADS_CTX["num"]
|
|
for i in range(THREADS_CTX["num"]):
|
|
threads[i] = threading.Thread(target=thread_function, args=(f"thread_{i}",))
|
|
threads[i].start()
|
|
try:
|
|
while THREADS_CTX["keys"]:
|
|
time.sleep(.1)
|
|
except KeyboardInterrupt as err:
|
|
print("Wait for threads...")
|
|
THREADS_CTX["keys"] = []
|
|
for i in range(THREADS_CTX["num"]):
|
|
threads[i].join()
|
|
with open('./.ddp.csv', 'w') as done_file:
|
|
for key in THREADS_CTX["done"]:
|
|
done_file.write(key + "\n")
|
|
|
|
|
|
def thread_function(name):
|
|
try:
|
|
prev_key = None
|
|
while key := THREADS_CTX["keys"].pop(0):
|
|
if prev_key:
|
|
THREADS_CTX["done"].append(prev_key)
|
|
prev_key = key
|
|
pics = PICS_MAP_SQUSHED[key]
|
|
if THREADS_CTX["estimated"] % 10 == 0:
|
|
print(f"Estimated: {THREADS_CTX['estimated']}")
|
|
print(f"{name}: Do {len(pics)} photos: '{pics}'")
|
|
THREADS_CTX["estimated"] -= 1
|
|
delete_list = []
|
|
compars_count = 0
|
|
for pic_a, pic_b in itertools.combinations(pics, 2):
|
|
if pic_b in delete_list or pic_a in delete_list:
|
|
continue
|
|
if not os.path.exists(pic_a):
|
|
print(f"pic_a doesnt exist: '{pic_a}'")
|
|
continue
|
|
if not os.path.exists(pic_b):
|
|
print(f"pic_b doesnt exist: '{pic_b}'")
|
|
continue
|
|
compars_count += 1
|
|
out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)],
|
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
value_string, stderr = out.communicate()
|
|
if out.returncode >= 2:
|
|
print(f"HERE!!! stdout: {value_string}\nstderr: {stderr}\nreturn code: {out.returncode}")
|
|
continue
|
|
#diff = float(value_string.decode('utf-8'))
|
|
#if diff == 0.:
|
|
if out.returncode == 0:
|
|
delete_list.append(pic_b)
|
|
|
|
print(f"{name}: {compars_count} cmps")
|
|
if not delete_list:
|
|
continue
|
|
print(f"Delete: {delete_list}")
|
|
for delete_elem in delete_list:
|
|
os.remove(delete_elem)
|
|
except BaseException as err:
|
|
print(f"{name}: {err}")
|
|
|
|
# Handle last key
|
|
if prev_key:
|
|
THREADS_CTX["done"].append(prev_key)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|