#!/usr/bin/env python3 """ download_lecturas.py — Descarga lecturas bíblicas litúrgicas (texto católico oficial) desde evangelizo.org en es/en/fr/it/pt, para una fecha litúrgica dada. Fuente: feed.evangelizo.org/v2/reader.php (lecturas del día, leccionario católico). Códigos de idioma evangelizo: SP=es, AM=en, FR=fr, IT=it, PT=pt. Uso: python3 download_lecturas.py 2026-06-21 [--books Jeremías,Romanos] Salida: JSON a stdout con {libro: {lang: {title, html}}}. """ import sys, re, html, json, urllib.request LANGS = {"SP": "es", "AM": "en", "FR": "fr", "IT": "it", "PT": "pt"} # nombre del libro por idioma (para casar el bloque correcto) BOOK_ALIASES = { "Jeremías": ["Jeremías", "Jeremiah", "Jérémie", "Geremia", "Jeremias"], "Romanos": ["Romanos", "Romans", "Romains", "Romani"], "Mateo": ["Mateo", "Matthew", "Matthieu", "Matteo", "Mateus"], "Marcos": ["Marcos", "Mark", "Marc", "Marco", "Marcos"], "Lucas": ["Lucas", "Luke", "Luc", "Luca", "Lucas"], "Juan": ["Juan", "John", "Jean", "Giovanni", "João"], } REF_RE = re.compile(r"(\d{1,3}\s*,[\d.\-\s]+)\.?$") def fetch(date, lang_code): url = f"https://feed.evangelizo.org/v2/reader.php?date={date}&lang={lang_code}&type=all" req = urllib.request.Request(url, headers={"User-Agent": "fea-lecturas/1.0"}) with urllib.request.urlopen(req, timeout=30) as r: raw = r.read().decode("utf-8", "replace") raw = re.sub(r"", "\n", raw) raw = re.sub(r"<[^>]+>", "", raw) txt = html.unescape(raw) return [l.strip() for l in txt.split("\n") if l.strip()] def is_header(line): return len(line) < 110 and bool(REF_RE.search(line)) def parse_blocks(lines): """Devuelve [(header, [parrafos])] saltando la 1ª línea (título del día).""" blocks = [] cur_h, cur_t = None, [] for ln in lines[1:]: if is_header(ln): if cur_h: blocks.append((cur_h, cur_t)) cur_h, cur_t = ln, [] else: if cur_h: cur_t.append(ln) if cur_h: blocks.append((cur_h, cur_t)) return blocks def short_title(header): """'Carta de San Pablo a los Romanos 5,12-15.' -> 'ROMANOS 5,12-15'.""" m = re.search(r"([A-Za-zÀ-ÿ]+)\s+(\d{1,3}\s*,[\d.\-\s]+)\.?$", header) if not m: return header.rstrip(".") return m.group(1).upper() + " " + re.sub(r"\s+", "", m.group(2)).rstrip(".") def main(): date = sys.argv[1] books = ["Jeremías", "Romanos"] if "--books" in sys.argv: books = sys.argv[sys.argv.index("--books") + 1].split(",") result = {b: {} for b in books} for code, wl in LANGS.items(): blocks = parse_blocks(fetch(date, code)) for b in books: aliases = BOOK_ALIASES.get(b, [b]) for header, paras in blocks: if any(a.lower() in header.lower() for a in aliases): htmlc = "".join(f"

{p}

\n" for p in paras) result[b][wl] = {"title": short_title(header), "html": htmlc, "header": header} break json.dump(result, sys.stdout, ensure_ascii=False, indent=2) if __name__ == "__main__": main()