Añadir mu-plugins y scripts de feadulta
This commit is contained in:
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
lecturas_apply.py — Casa las lecturas ES sin traducir contra el índice del leccionario
|
||||
(build_lectionary_index.py) POR REFERENCIA bíblica y vuelca las traducciones a crear.
|
||||
|
||||
Entrada: /tmp/lectionary_index.json , /tmp/lecturas_todo.json
|
||||
Salida: /tmp/lecturas_creadas.json (para que un wp eval cree+asocie+publique)
|
||||
/tmp/lecturas_skip.json
|
||||
|
||||
Uso: python3 lecturas_apply.py [--limit N]
|
||||
"""
|
||||
import sys, re, json, unicodedata
|
||||
from collections import Counter
|
||||
|
||||
# Alias de nombre de libro: feadulta -> token usado por evangelizo (último token full_title ES)
|
||||
ALIAS = {
|
||||
"HECHOS": "APOSTOLES", "HCH": "APOSTOLES",
|
||||
"CANTAR": "CANTARES",
|
||||
"APOC": "APOCALIPSIS", "AP": "APOCALIPSIS",
|
||||
"QOHELET": "ECLESIASTES",
|
||||
# abreviaturas litúrgicas
|
||||
"MT": "MATEO", "MC": "MARCOS", "LC": "LUCAS", "JN": "JUAN",
|
||||
"RM": "ROMANOS", "GA": "GALATAS", "EF": "EFESIOS", "FLP": "FILIPENSES",
|
||||
"COL": "COLOSENSES", "HB": "HEBREOS", "ST": "SANTIAGO",
|
||||
"IS": "ISAIAS", "JR": "JEREMIAS", "EZ": "EZEQUIEL", "GN": "GENESIS",
|
||||
"EX": "EXODO", "DT": "DEUTERONOMIO", "SAL": "SALMOS", "PR": "PROVERBIOS",
|
||||
}
|
||||
|
||||
|
||||
def norm(s):
|
||||
s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode().upper()
|
||||
return re.sub(r"[^A-Z]", "", s) # solo letras → descarta el número del libro
|
||||
|
||||
|
||||
def title_keys(title):
|
||||
keys = []
|
||||
for part in re.split(r"\s*/\s*", title):
|
||||
m = re.search(r"([0-9]?\s*[A-Za-zÀ-ÿ][A-Za-zÀ-ÿ.\s]+?)\s+(\d{1,3})\s*,\s*(\d{1,3})", part)
|
||||
if not m:
|
||||
return None # parte no parseable → no casar el post entero
|
||||
book = norm(m.group(1))
|
||||
book = ALIAS.get(book, book)
|
||||
keys.append(f"{book}|{int(m.group(2))}|{int(m.group(3))}")
|
||||
return keys or None
|
||||
|
||||
|
||||
def main():
|
||||
limit = 0
|
||||
if "--limit" in sys.argv:
|
||||
limit = int(sys.argv[sys.argv.index("--limit") + 1])
|
||||
idx = json.load(open("/tmp/lectionary_index.json"))
|
||||
todo = json.load(open("/tmp/lecturas_todo.json"))
|
||||
if limit:
|
||||
todo = todo[:limit]
|
||||
|
||||
creadas, skip = [], []
|
||||
for t in todo:
|
||||
keys = title_keys(t["title"])
|
||||
if not keys:
|
||||
skip.append({**t, "why": "título no parseable"})
|
||||
continue
|
||||
if not all(k in idx for k in keys):
|
||||
missing = [k for k in keys if k not in idx]
|
||||
skip.append({**t, "why": "ref no en índice", "missing": missing})
|
||||
continue
|
||||
langs = {}
|
||||
for wl in ("en", "fr", "it", "pt"):
|
||||
langs[wl] = "".join(idx[k][wl] for k in keys)
|
||||
creadas.append({"es_id": t["id"], "title": t["title"], "langs": langs})
|
||||
|
||||
json.dump(creadas, open("/tmp/lecturas_creadas.json", "w"), ensure_ascii=False)
|
||||
json.dump(skip, open("/tmp/lecturas_skip.json", "w"), ensure_ascii=False)
|
||||
print(f"CASADAS: {len(creadas)} / {len(todo)} SKIP: {len(skip)}")
|
||||
print("motivos skip:", dict(Counter(s["why"] for s in skip)))
|
||||
# muestra de refs que faltan (para ampliar alias/rango)
|
||||
missing = Counter()
|
||||
for s in skip:
|
||||
for k in s.get("missing", []):
|
||||
missing[k.split("|")[0]] += 1
|
||||
print("libros con más misses:", dict(missing.most_common(12)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user