Update find-duplicates.py
This commit is contained in:
parent
f15d1cd5c2
commit
59d2d46d50
|
@ -2,15 +2,23 @@ import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
file_name_by_name = defaultdict(list)
|
||||||
file_name_by_link = defaultdict(list)
|
file_name_by_link = defaultdict(list)
|
||||||
|
|
||||||
for file_name in sorted(os.listdir("_data/signed")):
|
for file_name in sorted(os.listdir("_data/signed")):
|
||||||
with open(f"_data/signed/{file_name}") as f:
|
with open(f"_data/signed/{file_name}") as f:
|
||||||
contents = f.read().replace("\r", "")
|
contents = f.read().replace("\r", "")
|
||||||
|
name = next(line for line in contents.split("\n") if line.startswith("name:"))[5:].strip()
|
||||||
link = next(line for line in contents.split("\n") if line.startswith("link:"))[5:].strip()
|
link = next(line for line in contents.split("\n") if line.startswith("link:"))[5:].strip()
|
||||||
if link == "/#":
|
if name[0] == name[0].lower() or " " in name: # looks like a nickname or a full name
|
||||||
|
file_name_by_name[name].append(file_name)
|
||||||
|
if link != "/#":
|
||||||
|
file_name_by_link[link].append(file_name)
|
||||||
|
|
||||||
|
for name, file_names in file_name_by_name.items():
|
||||||
|
if len(file_names) == 1:
|
||||||
continue
|
continue
|
||||||
file_name_by_link[link].append(file_name)
|
print(name, "duplicates:", file_names)
|
||||||
|
|
||||||
for link, file_names in file_name_by_link.items():
|
for link, file_names in file_name_by_link.items():
|
||||||
if len(file_names) == 1:
|
if len(file_names) == 1:
|
||||||
|
|
Loading…
Reference in New Issue