#!/usr/bin/env python3 """ translate_cartas.py Traduce artículos españoles de las últimas 2 cartas semanales usando Jan (Gemma 12B). Crea los posts traducidos en WordPress local (Docker) y los vincula con Polylang. Uso: 1. Arranca Jan con Gemma 12B 2. python3 translate_cartas.py --check-api # verifica conexión a Jan 3. python3 translate_cartas.py --dry-run # muestra qué se traduciría 4. python3 translate_cartas.py # traduce todo 5. python3 translate_cartas.py --lang en # solo un idioma 6. python3 translate_cartas.py --id 42579 # solo un artículo """ import subprocess import json import re import sys import time import argparse import pymysql # ── Configuración ───────────────────────────────────────────────────────────── JAN_URL = "http://172.19.128.1:1337/v1/chat/completions" JAN_MODEL = "gemma-3-12b-it-Q4_K_M" DB_HOST = "172.18.0.2" DB_PORT = 3306 DB_NAME = "wordpress_db" DB_USER = "wordpress_user" DB_PASS = "wordpress_pass" WP_CONTAINER = "wordpress-web" TARGET_LANGS = { "en": "English", "fr": "French", "it": "Italian", "pt": "Portuguese", } # IDs de artículos en español de todas las cartas de 2026 # (excluye 26899 = 42k chars, demasiado largo para Jan) SPANISH_IDS = [ # Carta 2026-03-05 (Agua Viva) — las 2 últimas ya traducidas, se saltarán automáticamente 42732, 42731, 42730, 42729, 42728, 42727, 42726, 42590, 42579, 42578, 42577, 42576, 42575, 42574, 42573, 42572, 42571, 42570, 42569, 42568, 42567, 42566, 42565, 42564, 42563, 42562, 42561, 42560, 42559, 42558, 42557, 42556, # Carta 2026-02-26 (¿Creemos en el evangelio?) 42594, 42555, 42554, 42553, 42552, 42551, 42550, 42549, 42548, 42547, 42546, 42545, 42544, 42543, 42542, 42541, 42540, 42539, 42538, 42537, 42536, 42535, 42534, 42533, 42532, 42531, 42530, 42529, 42528, 42527, 42526, 42525, 42524, 42523, # Carta 2026-02-19 (Seres limitados) 42589, 42517, 42516, 42515, 42514, 42513, 42512, 42511, 42510, 42509, 42508, 42507, 42506, 42518, 42505, 42504, 42503, 42502, 42501, # Carta 2026-02-12 (Más allá de la ley) 42588, 42500, 42499, 42498, 42497, 42496, 42495, 42490, 42489, 42488, 42487, 42486, 42485, 42484, 42587, 42478, # Carta 2026-02-05 (Ser sal, ser luz) 42477, 42476, 42475, 42474, 42473, 42472, 42471, 42470, 42469, 42468, 42467, 42466, 42465, 42464, 42586, 42479, # Carta 2026-01-29 (Bienaventurados) 42459, 42458, 42457, 42456, 42455, 42454, 42453, 42452, 42451, 42585, 42450, 42463, 42462, 42461, 42460, 42445, 42444, # Carta 2026-01-22 (Nuevos caminos) 42584, 42443, 42442, 42441, 42440, 42439, 42438, 42437, 42436, 42431, 42430, 42429, 42428, 42427, 42426, 42425, 42424, # Carta 2026-01-15 (La ley del Oeste) 26899, # 42k chars — se saltará por tamaño 26898, 26897, 26896, 26895, 26894, 26893, 26892, 26714, 26713, 26712, 26711, 26710, 26717, 26887, 26716, 26886, 26715, # Carta 2026-01-08 (Hakuna / Avivando ilusiones) 26885, 26884, 26883, 26882, 26881, 26880, 26875, 26708, 26707, 26706, 26705, 26704, 26703, 26702, 26874, 26873, 26872, 26871, 26870, 26869, 26868, 26867, 26866, 26865, # Carta 2026-01-01 26864, 26863, 26862, 26861, 26860, 26859, 26858, 26857, 26856, 26855, 26709, ] # Tamaño máximo de contenido para traducción automática (chars) MAX_CONTENT_LEN = 35000 AI_FOOTER = "\n
Traducido con IA
" # ── Detectar modelo Jan ─────────────────────────────────────────────────────── def get_jan_model(): import urllib.request try: req_m = urllib.request.Request(JAN_URL.replace("/chat/completions", "/models"), headers={"Authorization": "Bearer dummy"}) with urllib.request.urlopen(req_m, timeout=5) as r: data = json.loads(r.read()) models = data.get("data", []) if models: return models[0]["id"] except Exception as e: print(f"ERROR: No se puede conectar a Jan en {JAN_URL}") print(f" {e}") print(" Asegúrate de que Jan está corriendo con Gemma 12B cargado.") sys.exit(1) return "gemma" # ── Traducción via Jan ──────────────────────────────────────────────────────── def translate(title, content, lang_code, lang_name): import urllib.request, urllib.error # Few-shot examples from existing human translations (Pagola) to guide style few_shot = { "en": [ ("NO SABEMOS SABOREAR LA FE", "WE DON'T KNOW HOW TO SAVOR FAITH"), ("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "LISTENING TO JESUS IN TODAY'S SOCIETY"), ("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FAITHFUL TO JESUS IN TEMPTATIONS"), ], "fr": [ ("NO SABEMOS SABOREAR LA FE", "NOUS NE SAVONS PAS APPRÉCIER LA FOI"), ("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "ÉCOUTER JÉSUS DANS LA SOCIÉTÉ ACTUELLE"), ("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FIDÈLES À JÉSUS AU MILIEU DES TENTATIONS"), ], "it": [ ("NO SABEMOS SABOREAR LA FE", "NON SAPPIAMO ASSAPORARE LA FEDE"), ("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "ASCOLTARE GESÙ NELLA SOCIETÀ ATTUALE"), ("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FEDELI A GESÙ NELLE TENTAZIONI"), ], "pt": [ ("NO SABEMOS SABOREAR LA FE", "NÃO SABEMOS SABOREAR A FÉ"), ("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "OUVIR JESUS NA SOCIEDADE ATUAL"), ("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FIÉIS A JESUS NO MEIO DAS TENTAÇÕES"), ], } example_lines = "\n".join( f" ES: {e}\n {lang_code.upper()}: {t}" for e, t in few_shot.get(lang_code, []) ) example_block = f"\n\nTitle translation examples (be exactly this literal):\n{example_lines}" if example_lines else "" system_prompt = f"""You are a professional translator specializing in theological and religious texts. Translate from Spanish to {lang_name}. Rules: - Preserve all HTML tags exactly as they appear - Translate the title LITERALLY — never paraphrase or summarize it - Keep the full title including everything after colons and quoted subtitles - Titles must be in ALL CAPS - Maintain formal theological register - Standard religious proper nouns: translate them (e.g. "Jesús" → "Jesus" in English) - Other proper nouns (person names, place names): keep as-is - Return ONLY the translation, starting with 'Title:'{example_block}""" payload = json.dumps({ "model": JAN_MODEL, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Title: {title}\n\n{content}"} ], "temperature": 0.3, "max_tokens": 4096, }).encode("utf-8") req = urllib.request.Request( JAN_URL, data=payload, headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"}, method="POST" ) try: with urllib.request.urlopen(req, timeout=300) as r: result = json.loads(r.read()) full = result["choices"][0]["message"]["content"].strip() # Separar título traducido del contenido lines = full.split("\n", 2) if lines[0].startswith("Title:"): translated_title = lines[0].replace("Title:", "").strip() translated_content = "\n".join(lines[1:]).strip() if len(lines) > 1 else "" else: translated_title = lines[0].strip() translated_content = "\n".join(lines[1:]).strip() if len(lines) > 1 else full # Si el título volvió igual al original (sin traducir), reintentamos solo el título if translated_title.strip().upper() == title.strip().upper(): title_payload = json.dumps({ "model": JAN_MODEL, "messages": [ {"role": "user", "content": f"Translate this title from Spanish to {lang_name}. Return ONLY the translated title in ALL CAPS, nothing else: {title}"} ], "temperature": 0.2, "max_tokens": 50, }).encode("utf-8") title_req = urllib.request.Request(JAN_URL, data=title_payload, headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"}, method="POST") with urllib.request.urlopen(title_req, timeout=30) as tr: title_result = json.loads(tr.read()) translated_title = title_result["choices"][0]["message"]["content"].strip().strip('"') # Si el contenido traducido está vacío o es muy corto, reintentamos con prompt más directo if len(translated_content.strip()) < 50 and len(content.strip()) > 50: retry_payload = json.dumps({ "model": JAN_MODEL, "messages": [ {"role": "system", "content": f"You are a professional translator. Translate the following text from Spanish to {lang_name}. Preserve all HTML tags. Return ONLY the translated text, no preamble."}, {"role": "user", "content": content} ], "temperature": 0.3, "max_tokens": 4096, }).encode("utf-8") retry_req = urllib.request.Request(JAN_URL, data=retry_payload, headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"}, method="POST") with urllib.request.urlopen(retry_req, timeout=300) as rr: retry_result = json.loads(rr.read()) translated_content = retry_result["choices"][0]["message"]["content"].strip() return translated_title, translated_content except urllib.error.URLError as e: raise RuntimeError(f"Error llamando a Jan: {e}") # ── Base de datos WordPress ─────────────────────────────────────────────────── def get_db(): return pymysql.connect( host=DB_HOST, port=DB_PORT, user=DB_USER, password=DB_PASS, database=DB_NAME, charset="utf8mb4", cursorclass=pymysql.cursors.DictCursor ) def get_article(db, wp_id): with db.cursor() as c: c.execute(""" SELECT p.ID, p.post_title, p.post_content, p.post_author, p.post_date, p.post_name, GROUP_CONCAT(t.term_id) as term_ids FROM wp_posts p LEFT JOIN wp_term_relationships tr ON p.ID=tr.object_id LEFT JOIN wp_term_taxonomy tt ON tr.term_taxonomy_id=tt.term_taxonomy_id AND tt.taxonomy='category' LEFT JOIN wp_terms t ON tt.term_id=t.term_id WHERE p.ID=%s GROUP BY p.ID """, (wp_id,)) return c.fetchone() def get_existing_translation(db, original_id, lang_code): """Devuelve el WP ID de la traducción si ya existe.""" with db.cursor() as c: # Polylang guarda las traducciones en wp_term_relationships con taxonomy 'post_translations' c.execute(""" SELECT tr2.object_id as translated_id FROM wp_term_relationships tr1 JOIN wp_term_relationships tr2 ON tr1.term_taxonomy_id=tr2.term_taxonomy_id JOIN wp_term_taxonomy tt1 ON tr1.term_taxonomy_id=tt1.term_taxonomy_id WHERE tt1.taxonomy='post_translations' AND tr1.object_id=%s AND tr2.object_id!=%s """, (original_id, original_id)) candidates = [r['translated_id'] for r in c.fetchall()] for cid in candidates: c.execute(""" SELECT t.slug FROM wp_terms t JOIN wp_term_taxonomy tt ON t.term_id=tt.term_id JOIN wp_term_relationships tr ON tt.term_taxonomy_id=tr.term_taxonomy_id WHERE tt.taxonomy='language' AND tr.object_id=%s """, (cid,)) row = c.fetchone() if row and row['slug'] == lang_code: return cid return None # ── Crear post vía WP-CLI en Docker ────────────────────────────────────────── def create_wp_post(article, translated_title, translated_content, lang_code, original_id, dry_run=False): content_with_footer = translated_content + AI_FOOTER php = f""" global $wpdb; $post_id = wp_insert_post([ 'post_title' => {json.dumps(translated_title, ensure_ascii=False)}, 'post_content' => {json.dumps(content_with_footer, ensure_ascii=False)}, 'post_author' => {article['post_author']}, 'post_status' => 'publish', 'post_type' => 'post', 'post_date' => {json.dumps(article['post_date'].strftime('%Y-%m-%d %H:%M:%S') if hasattr(article['post_date'], 'strftime') else str(article['post_date']), ensure_ascii=False)}, ]); if (is_wp_error($post_id)) {{ echo 'ERROR: ' . $post_id->get_error_message(); exit; }} // Asignar idioma Polylang if (function_exists('pll_set_post_language')) {{ pll_set_post_language($post_id, {json.dumps(lang_code)}); }} // Vincular traducciones if (function_exists('pll_save_post_translations')) {{ $translations = pll_get_post_translations({original_id}); $translations[{json.dumps(lang_code)}] = $post_id; $translations['es'] = {original_id}; pll_save_post_translations($translations); }} // Copiar categorías del original (excepto las de idioma) $cats = wp_get_post_categories({original_id}, ['fields' => 'ids']); if (!empty($cats)) wp_set_post_categories($post_id, $cats); echo 'CREATED:' . $post_id; """ if dry_run: print(f" [DRY] Crearía post '{translated_title[:60]}' en {lang_code}") return 0 cmd = ["docker", "exec", WP_CONTAINER, "wp", "eval", php, "--allow-root"] result = subprocess.run(cmd, capture_output=True, text=True) output = result.stdout.strip() if "CREATED:" in output: new_id = int(output.split("CREATED:")[1].strip()) return new_id else: raise RuntimeError(f"Error creando post: {result.stdout} {result.stderr}") # ── Main ────────────────────────────────────────────────────────────────────── def main(): parser = argparse.ArgumentParser() parser.add_argument("--check-api", action="store_true", help="Verificar conexión a Jan") parser.add_argument("--dry-run", action="store_true", help="Simular sin crear posts") parser.add_argument("--lang", help="Solo traducir a este idioma (en/fr/it/pt)") parser.add_argument("--id", type=int, help="Solo traducir este WP ID") args = parser.parse_args() global JAN_MODEL JAN_MODEL = get_jan_model() print(f"Jan API OK — modelo: {JAN_MODEL}") if args.check_api: print("Probando traducción...") t, c = translate("Prueba", "Hola mundo
", "en", "English") print(f" Título: {t}") print(f" Contenido: {c}") return langs = {args.lang: TARGET_LANGS[args.lang]} if args.lang else TARGET_LANGS ids = [args.id] if args.id else SPANISH_IDS db = get_db() total = len(ids) * len(langs) done = 0 skipped = 0 errors = 0 print(f"\nArtículos: {len(ids)} | Idiomas: {list(langs.keys())} | Total: {total} traducciones\n") for wp_id in ids: article = get_article(db, wp_id) if not article: print(f" ⚠ ID {wp_id} no encontrado, saltando") continue title = article['post_title'] content = article['post_content'] print(f"\n[{wp_id}] {title[:70]}") if len(content) > MAX_CONTENT_LEN: print(f" ⚠ Contenido demasiado largo ({len(content)} chars), saltando") skipped += 1 continue for lang_code, lang_name in langs.items(): existing = get_existing_translation(db, wp_id, lang_code) if existing: print(f" → {lang_code.upper()}: ya existe (ID {existing}), saltando") skipped += 1 continue try: if args.dry_run: print(f" → {lang_code.upper()}: [DRY] se traduciría y crearía post") done += 1 continue print(f" → {lang_code.upper()}: traduciendo... ", end="", flush=True) t0 = time.time() trans_title, trans_content = translate(title, content, lang_code, lang_name) elapsed = time.time() - t0 print(f"{elapsed:.0f}s") print(f" Título: {trans_title[:60]}") new_id = create_wp_post(article, trans_title, trans_content, lang_code, wp_id, False) print(f" Post creado: ID {new_id}") done += 1 except Exception as e: print(f" ERROR: {e}") errors += 1 time.sleep(2) db.close() print(f"\n{'='*50}") print(f"Completado: {done} creados, {skipped} saltados, {errors} errores") if errors: print("Puedes volver a ejecutar — los ya creados se saltarán automáticamente.") if __name__ == "__main__": main()