hz
This commit is contained in:
@@ -4,11 +4,14 @@ import subprocess
|
|||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import threading
|
||||||
|
|
||||||
|
|
||||||
GET_ALL_PICS_LIST_STRING="find . -iname '*.jpg' -o -iname '*.png' -o -iname '*.jpeg'"
|
GET_ALL_PICS_LIST_STRING="find . -iname '*.jpg' -o -iname '*.png' -o -iname '*.jpeg'"
|
||||||
COMPARE_PICS_CMD="compare -metric AE -fuzz 0.1% \"{}\" \"{}\" /dev/null"
|
COMPARE_PICS_CMD="compare -metric AE -fuzz 0.1% \"{}\" \"{}\" /dev/null"
|
||||||
|
|
||||||
|
PICS_MAP_SQUSHED = {}
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
out = subprocess.Popen(['sh', '-c', GET_ALL_PICS_LIST_STRING],
|
out = subprocess.Popen(['sh', '-c', GET_ALL_PICS_LIST_STRING],
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
@@ -16,38 +19,49 @@ def main():
|
|||||||
all_pics = all_pics_string.split('\n')
|
all_pics = all_pics_string.split('\n')
|
||||||
print(f"Gathered {len(all_pics)} photos.")
|
print(f"Gathered {len(all_pics)} photos.")
|
||||||
|
|
||||||
duplicate_map = {}
|
# I. basenames
|
||||||
scanned_files = []
|
pics_map = {}
|
||||||
for pic in all_pics:
|
for pic in all_pics:
|
||||||
if not pic:
|
key = os.path.basename('.'.join(pic.split('.')[:-1]))
|
||||||
|
if key not in pics_map:
|
||||||
|
pics_map.update({key: [pic]})
|
||||||
continue
|
continue
|
||||||
name = os.path.basename('.'.join(pic.split('.')[:-1]))
|
pics_map[key].append(pic)
|
||||||
if not name:
|
print(f"Hashtable for {len(pics_map)} keys.")
|
||||||
print("WTF? name is empty")
|
|
||||||
continue
|
|
||||||
if pic in scanned_files:
|
|
||||||
continue
|
|
||||||
for other_pic in all_pics:
|
|
||||||
if pic == other_pic:
|
|
||||||
continue
|
|
||||||
if other_pic in scanned_files:
|
|
||||||
continue
|
|
||||||
if os.path.basename(other_pic).startswith(name):
|
|
||||||
if pic not in duplicate_map:
|
|
||||||
duplicate_map.update({pic: []})
|
|
||||||
scanned_files.append(pic)
|
|
||||||
duplicate_map[pic].append(other_pic)
|
|
||||||
scanned_files.append(other_pic)
|
|
||||||
|
|
||||||
origs = len(duplicate_map.keys())
|
# II. sort
|
||||||
clones = sum([len(values) for values in duplicate_map.values()])
|
pics_map_keys = sorted(pics_map.keys(), key=len)
|
||||||
candidates = origs + clones
|
|
||||||
print(f"Found {origs} + {clones} = {candidates} delete candidate.")
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
for pic, dups in duplicate_map.items():
|
# III. squash
|
||||||
|
pics_map_squashed = {}
|
||||||
|
squash_marker = ['-', ' ', '(']
|
||||||
|
while pics_map_keys:
|
||||||
|
key = pics_map_keys.pop(0)
|
||||||
|
if len(key) < 7:
|
||||||
|
continue
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
squashed_keys = []
|
||||||
|
for other_key in pics_map_keys:
|
||||||
|
if other_key.startswith(key) and other_key[len(key)] in squash_marker:
|
||||||
|
squashed_keys.append(other_key)
|
||||||
|
if not squashed_keys:
|
||||||
|
continue
|
||||||
|
pics_map_squashed.update({key: pics_map[key]})
|
||||||
|
for squashed_key in squashed_keys:
|
||||||
|
pics_map_squashed[key] += pics_map[squashed_key]
|
||||||
|
pics_map_keys.remove(squashed_key)
|
||||||
|
|
||||||
|
print(f"Squashed hashtable to {len(pics_map_squashed)} keys.")
|
||||||
|
|
||||||
|
estimated = len(pics_map_squashed)
|
||||||
|
for pics in pics_map_squashed.values():
|
||||||
|
if estimated % 10 == 0:
|
||||||
|
print(f"Estimated: {estimated}")
|
||||||
|
print(f"Do {len(pics)} photos: '{pics}'")
|
||||||
|
estimated -= 1
|
||||||
delete_list = []
|
delete_list = []
|
||||||
pics = [pic] + dups
|
compars_count = 0
|
||||||
for pic_a, pic_b in itertools.combinations(pics, 2):
|
for pic_a, pic_b in itertools.combinations(pics, 2):
|
||||||
if pic_b in delete_list or pic_a in delete_list:
|
if pic_b in delete_list or pic_a in delete_list:
|
||||||
continue
|
continue
|
||||||
@@ -57,6 +71,7 @@ def main():
|
|||||||
if not os.path.exists(pic_b):
|
if not os.path.exists(pic_b):
|
||||||
print(f"pic_b doesnt exist: '{pic_b}'")
|
print(f"pic_b doesnt exist: '{pic_b}'")
|
||||||
continue
|
continue
|
||||||
|
compars_count += 1
|
||||||
out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)],
|
out = subprocess.Popen(['sh', '-c', COMPARE_PICS_CMD.format(pic_a, pic_b)],
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
value_string, stderr = out.communicate()
|
value_string, stderr = out.communicate()
|
||||||
@@ -68,8 +83,10 @@ def main():
|
|||||||
if out.returncode == 0:
|
if out.returncode == 0:
|
||||||
delete_list.append(pic_b)
|
delete_list.append(pic_b)
|
||||||
|
|
||||||
print(f"In list {pics} i will delete")
|
print(f"Cmps: {compars_count}")
|
||||||
print(f"this pics: {delete_list}")
|
if not delete_list:
|
||||||
|
continue
|
||||||
|
print(f"Delete: {delete_list}")
|
||||||
for delete_elem in delete_list:
|
for delete_elem in delete_list:
|
||||||
os.remove(delete_elem)
|
os.remove(delete_elem)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user