This commit is contained in:
bparodi@lezzo.org 2024-11-05 16:34:28 +01:00
parent 7708dafafe
commit 1b65eb13d4
2 changed files with 42 additions and 3 deletions

2
17.txt
View file

@ -167,7 +167,7 @@
[ [
"34", "34",
"15 febbraio 2015", "15 febbraio 2015",
"La pace di Mirab\u00e0" "La pace di Mirabà"
], ],
[ [
"35", "35",

View file

@ -1,5 +1,5 @@
import json, sys, os, glob import json, sys, os, glob
from rapidfuzz import fuzz from rapidfuzz import process, fuzz
from collections import namedtuple from collections import namedtuple
from dateutil import parser from dateutil import parser
@ -141,6 +141,45 @@ from copy import copy
all_episodes = copy(acc) all_episodes = copy(acc)
all_titles = [a.title for a in acc] all_titles = [a.title for a in acc]
all_episodes = {a.title: a for a in all_episodes}
# from IPython import embed as fuck; fuck() # from IPython import embed as fuck; fuck()
max(all_episodes, key=lambda x: fuzz.ratio(x.title, "la missione di fata"))
targets = [
"la missione di fata",
"La Melevisione 1999 - Con la carta si può - E1 [3183cb06-5276-4093-bf96-16f7455cb4ff].mp4",
"melevisone 2010la genietta del cuore",
"melevisone 2010la genietta del cuore [pTy6WlKEUIM].mp4",
"melevisione i classici i tre desideri [0luzDKwhu7Y].mp4",
"melevisione 2015 la pace di miraba [oyXErd8BNCQ].mp4",
"melevisione 2015 una balia per il lupo [PiAX2fnm6ps].webm",
"melevisione i claasici i dolori di nina [kaAaYYSbln8].mp4",
"melevisione 2010 la perla dei sette mari [gm_nIC-zfOg].mp4",
"melevisione 2010 l orcoccodrillo [M9TVqW1adS4].mp4",
"2010 l orcoccodrillo [M9TVqW1adS4].mp4",
"Melevisione 2015 il Natale dei bambini cattivi [BdDUTs1-nHY].webm",
]
def preprocess(t):
starters = ["melevisione ", "La Melevisione ", "i claasici"]
for s in starters:
if t.startswith(s):
t = t[len(s):]
return t
for target in targets:
t = preprocess(target)
choices = all_titles
res = process.extract(t, choices, scorer=fuzz.partial_ratio, limit=5)
best_title, best_score = None, 0
for r in res:
score = fuzz.token_ratio(r[0].lower(), t.lower())
# print(score, r)
if score >= best_score:
best_title, best_score = r[0], score
found = all_episodes[best_title]
print('---------------------')
print(f'\t{t} -> {found}')
print('---------------------')