This commit is contained in:
bparodi@lezzo.org 2024-11-05 16:34:28 +01:00
parent 7708dafafe
commit 1b65eb13d4
2 changed files with 42 additions and 3 deletions

2
17.txt
View file

@ -167,7 +167,7 @@
[
"34",
"15 febbraio 2015",
"La pace di Mirab\u00e0"
"La pace di Mirabà"
],
[
"35",

View file

@ -1,5 +1,5 @@
import json, sys, os, glob
from rapidfuzz import fuzz
from rapidfuzz import process, fuzz
from collections import namedtuple
from dateutil import parser
@ -141,6 +141,45 @@ from copy import copy
all_episodes = copy(acc)
all_titles = [a.title for a in acc]
all_episodes = {a.title: a for a in all_episodes}
# from IPython import embed as fuck; fuck()
max(all_episodes, key=lambda x: fuzz.ratio(x.title, "la missione di fata"))
targets = [
"la missione di fata",
"La Melevisione 1999 - Con la carta si può - E1 [3183cb06-5276-4093-bf96-16f7455cb4ff].mp4",
"melevisone 2010la genietta del cuore",
"melevisone 2010la genietta del cuore [pTy6WlKEUIM].mp4",
"melevisione i classici i tre desideri [0luzDKwhu7Y].mp4",
"melevisione 2015 la pace di miraba [oyXErd8BNCQ].mp4",
"melevisione 2015 una balia per il lupo [PiAX2fnm6ps].webm",
"melevisione i claasici i dolori di nina [kaAaYYSbln8].mp4",
"melevisione 2010 la perla dei sette mari [gm_nIC-zfOg].mp4",
"melevisione 2010 l orcoccodrillo [M9TVqW1adS4].mp4",
"2010 l orcoccodrillo [M9TVqW1adS4].mp4",
"Melevisione 2015 il Natale dei bambini cattivi [BdDUTs1-nHY].webm",
]
def preprocess(t):
starters = ["melevisione ", "La Melevisione ", "i claasici"]
for s in starters:
if t.startswith(s):
t = t[len(s):]
return t
for target in targets:
t = preprocess(target)
choices = all_titles
res = process.extract(t, choices, scorer=fuzz.partial_ratio, limit=5)
best_title, best_score = None, 0
for r in res:
score = fuzz.token_ratio(r[0].lower(), t.lower())
# print(score, r)
if score >= best_score:
best_title, best_score = r[0], score
found = all_episodes[best_title]
print('---------------------')
print(f'\t{t} -> {found}')
print('---------------------')