#!/usr/bin/env python3 """ lecturas_apply.py — Casa las lecturas ES sin traducir contra el índice del leccionario (build_lectionary_index.py) POR REFERENCIA bíblica y vuelca las traducciones a crear. Entrada: /tmp/lectionary_index.json , /tmp/lecturas_todo.json Salida: /tmp/lecturas_creadas.json (para que un wp eval cree+asocie+publique) /tmp/lecturas_skip.json Uso: python3 lecturas_apply.py [--limit N] """ import sys, re, json, unicodedata from collections import Counter # Alias de nombre de libro: feadulta -> token usado por evangelizo (último token full_title ES) ALIAS = { "HECHOS": "APOSTOLES", "HCH": "APOSTOLES", "CANTAR": "CANTARES", "APOC": "APOCALIPSIS", "AP": "APOCALIPSIS", "QOHELET": "ECLESIASTES", # abreviaturas litúrgicas "MT": "MATEO", "MC": "MARCOS", "LC": "LUCAS", "JN": "JUAN", "RM": "ROMANOS", "GA": "GALATAS", "EF": "EFESIOS", "FLP": "FILIPENSES", "COL": "COLOSENSES", "HB": "HEBREOS", "ST": "SANTIAGO", "IS": "ISAIAS", "JR": "JEREMIAS", "EZ": "EZEQUIEL", "GN": "GENESIS", "EX": "EXODO", "DT": "DEUTERONOMIO", "SAL": "SALMOS", "PR": "PROVERBIOS", } def norm(s): s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode().upper() return re.sub(r"[^A-Z]", "", s) # solo letras → descarta el número del libro def title_keys(title): keys = [] for part in re.split(r"\s*/\s*", title): m = re.search(r"([0-9]?\s*[A-Za-zÀ-ÿ][A-Za-zÀ-ÿ.\s]+?)\s+(\d{1,3})\s*,\s*(\d{1,3})", part) if not m: return None # parte no parseable → no casar el post entero book = norm(m.group(1)) book = ALIAS.get(book, book) keys.append(f"{book}|{int(m.group(2))}|{int(m.group(3))}") return keys or None def main(): limit = 0 if "--limit" in sys.argv: limit = int(sys.argv[sys.argv.index("--limit") + 1]) idx = json.load(open("/tmp/lectionary_index.json")) todo = json.load(open("/tmp/lecturas_todo.json")) if limit: todo = todo[:limit] creadas, skip = [], [] for t in todo: keys = title_keys(t["title"]) if not keys: skip.append({**t, "why": "título no parseable"}) continue if not all(k in idx for k in keys): missing = [k for k in keys if k not in idx] skip.append({**t, "why": "ref no en índice", "missing": missing}) continue langs = {} for wl in ("en", "fr", "it", "pt"): langs[wl] = "".join(idx[k][wl] for k in keys) creadas.append({"es_id": t["id"], "title": t["title"], "langs": langs}) json.dump(creadas, open("/tmp/lecturas_creadas.json", "w"), ensure_ascii=False) json.dump(skip, open("/tmp/lecturas_skip.json", "w"), ensure_ascii=False) print(f"CASADAS: {len(creadas)} / {len(todo)} SKIP: {len(skip)}") print("motivos skip:", dict(Counter(s["why"] for s in skip))) # muestra de refs que faltan (para ampliar alias/rango) missing = Counter() for s in skip: for k in s.get("missing", []): missing[k.split("|")[0]] += 1 print("libros con más misses:", dict(missing.most_common(12))) if __name__ == "__main__": main()