Añadir mu-plugins y scripts de feadulta
This commit is contained in:
@@ -0,0 +1,401 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
translate_cartas.py
|
||||
|
||||
Traduce artículos españoles de las últimas 2 cartas semanales usando Jan (Gemma 12B).
|
||||
Crea los posts traducidos en WordPress local (Docker) y los vincula con Polylang.
|
||||
|
||||
Uso:
|
||||
1. Arranca Jan con Gemma 12B
|
||||
2. python3 translate_cartas.py --check-api # verifica conexión a Jan
|
||||
3. python3 translate_cartas.py --dry-run # muestra qué se traduciría
|
||||
4. python3 translate_cartas.py # traduce todo
|
||||
5. python3 translate_cartas.py --lang en # solo un idioma
|
||||
6. python3 translate_cartas.py --id 42579 # solo un artículo
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import argparse
|
||||
import pymysql
|
||||
|
||||
# ── Configuración ─────────────────────────────────────────────────────────────
|
||||
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
|
||||
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
|
||||
|
||||
DB_HOST = "172.18.0.2"
|
||||
DB_PORT = 3306
|
||||
DB_NAME = "wordpress_db"
|
||||
DB_USER = "wordpress_user"
|
||||
DB_PASS = "wordpress_pass"
|
||||
|
||||
WP_CONTAINER = "wordpress-web"
|
||||
|
||||
TARGET_LANGS = {
|
||||
"en": "English",
|
||||
"fr": "French",
|
||||
"it": "Italian",
|
||||
"pt": "Portuguese",
|
||||
}
|
||||
|
||||
# IDs de artículos en español de todas las cartas de 2026
|
||||
# (excluye 26899 = 42k chars, demasiado largo para Jan)
|
||||
SPANISH_IDS = [
|
||||
# Carta 2026-03-05 (Agua Viva) — las 2 últimas ya traducidas, se saltarán automáticamente
|
||||
42732, 42731, 42730, 42729, 42728, 42727, 42726, 42590,
|
||||
42579, 42578, 42577, 42576, 42575, 42574, 42573, 42572, 42571,
|
||||
42570, 42569, 42568, 42567, 42566, 42565, 42564, 42563, 42562,
|
||||
42561, 42560, 42559, 42558, 42557, 42556,
|
||||
# Carta 2026-02-26 (¿Creemos en el evangelio?)
|
||||
42594, 42555, 42554, 42553, 42552, 42551, 42550, 42549, 42548, 42547,
|
||||
42546, 42545, 42544, 42543, 42542, 42541, 42540, 42539, 42538,
|
||||
42537, 42536, 42535, 42534, 42533, 42532, 42531, 42530, 42529,
|
||||
42528, 42527, 42526, 42525, 42524, 42523,
|
||||
# Carta 2026-02-19 (Seres limitados)
|
||||
42589, 42517, 42516, 42515, 42514, 42513, 42512, 42511,
|
||||
42510, 42509, 42508, 42507, 42506, 42518, 42505, 42504, 42503,
|
||||
42502, 42501,
|
||||
# Carta 2026-02-12 (Más allá de la ley)
|
||||
42588, 42500, 42499, 42498, 42497, 42496, 42495, 42490,
|
||||
42489, 42488, 42487, 42486, 42485, 42484, 42587, 42478,
|
||||
# Carta 2026-02-05 (Ser sal, ser luz)
|
||||
42477, 42476, 42475, 42474, 42473, 42472, 42471, 42470,
|
||||
42469, 42468, 42467, 42466, 42465, 42464, 42586, 42479,
|
||||
# Carta 2026-01-29 (Bienaventurados)
|
||||
42459, 42458, 42457, 42456, 42455, 42454, 42453, 42452,
|
||||
42451, 42585, 42450, 42463, 42462, 42461, 42460, 42445, 42444,
|
||||
# Carta 2026-01-22 (Nuevos caminos)
|
||||
42584, 42443, 42442, 42441, 42440, 42439, 42438, 42437,
|
||||
42436, 42431, 42430, 42429, 42428, 42427, 42426, 42425, 42424,
|
||||
# Carta 2026-01-15 (La ley del Oeste)
|
||||
26899, # 42k chars — se saltará por tamaño
|
||||
26898, 26897, 26896, 26895, 26894, 26893, 26892,
|
||||
26714, 26713, 26712, 26711, 26710, 26717, 26887, 26716, 26886, 26715,
|
||||
# Carta 2026-01-08 (Hakuna / Avivando ilusiones)
|
||||
26885, 26884, 26883, 26882, 26881, 26880, 26875, 26708,
|
||||
26707, 26706, 26705, 26704, 26703, 26702, 26874, 26873,
|
||||
26872, 26871, 26870, 26869, 26868, 26867, 26866, 26865,
|
||||
# Carta 2026-01-01
|
||||
26864, 26863, 26862, 26861, 26860, 26859, 26858, 26857,
|
||||
26856, 26855, 26709,
|
||||
]
|
||||
|
||||
# Tamaño máximo de contenido para traducción automática (chars)
|
||||
MAX_CONTENT_LEN = 35000
|
||||
|
||||
AI_FOOTER = "\n<p><em>Traducido con IA</em></p>"
|
||||
|
||||
# ── Detectar modelo Jan ───────────────────────────────────────────────────────
|
||||
def get_jan_model():
|
||||
import urllib.request
|
||||
try:
|
||||
req_m = urllib.request.Request(JAN_URL.replace("/chat/completions", "/models"), headers={"Authorization": "Bearer dummy"})
|
||||
with urllib.request.urlopen(req_m, timeout=5) as r:
|
||||
data = json.loads(r.read())
|
||||
models = data.get("data", [])
|
||||
if models:
|
||||
return models[0]["id"]
|
||||
except Exception as e:
|
||||
print(f"ERROR: No se puede conectar a Jan en {JAN_URL}")
|
||||
print(f" {e}")
|
||||
print(" Asegúrate de que Jan está corriendo con Gemma 12B cargado.")
|
||||
sys.exit(1)
|
||||
return "gemma"
|
||||
|
||||
# ── Traducción via Jan ────────────────────────────────────────────────────────
|
||||
def translate(title, content, lang_code, lang_name):
|
||||
import urllib.request, urllib.error
|
||||
|
||||
# Few-shot examples from existing human translations (Pagola) to guide style
|
||||
few_shot = {
|
||||
"en": [
|
||||
("NO SABEMOS SABOREAR LA FE", "WE DON'T KNOW HOW TO SAVOR FAITH"),
|
||||
("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "LISTENING TO JESUS IN TODAY'S SOCIETY"),
|
||||
("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FAITHFUL TO JESUS IN TEMPTATIONS"),
|
||||
],
|
||||
"fr": [
|
||||
("NO SABEMOS SABOREAR LA FE", "NOUS NE SAVONS PAS APPRÉCIER LA FOI"),
|
||||
("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "ÉCOUTER JÉSUS DANS LA SOCIÉTÉ ACTUELLE"),
|
||||
("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FIDÈLES À JÉSUS AU MILIEU DES TENTATIONS"),
|
||||
],
|
||||
"it": [
|
||||
("NO SABEMOS SABOREAR LA FE", "NON SAPPIAMO ASSAPORARE LA FEDE"),
|
||||
("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "ASCOLTARE GESÙ NELLA SOCIETÀ ATTUALE"),
|
||||
("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FEDELI A GESÙ NELLE TENTAZIONI"),
|
||||
],
|
||||
"pt": [
|
||||
("NO SABEMOS SABOREAR LA FE", "NÃO SABEMOS SABOREAR A FÉ"),
|
||||
("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "OUVIR JESUS NA SOCIEDADE ATUAL"),
|
||||
("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FIÉIS A JESUS NO MEIO DAS TENTAÇÕES"),
|
||||
],
|
||||
}
|
||||
example_lines = "\n".join(
|
||||
f" ES: {e}\n {lang_code.upper()}: {t}"
|
||||
for e, t in few_shot.get(lang_code, [])
|
||||
)
|
||||
example_block = f"\n\nTitle translation examples (be exactly this literal):\n{example_lines}" if example_lines else ""
|
||||
|
||||
system_prompt = f"""You are a professional translator specializing in theological and religious texts.
|
||||
Translate from Spanish to {lang_name}.
|
||||
Rules:
|
||||
- Preserve all HTML tags exactly as they appear
|
||||
- Translate the title LITERALLY — never paraphrase or summarize it
|
||||
- Keep the full title including everything after colons and quoted subtitles
|
||||
- Titles must be in ALL CAPS
|
||||
- Maintain formal theological register
|
||||
- Standard religious proper nouns: translate them (e.g. "Jesús" → "Jesus" in English)
|
||||
- Other proper nouns (person names, place names): keep as-is
|
||||
- Return ONLY the translation, starting with 'Title:'{example_block}"""
|
||||
|
||||
payload = json.dumps({
|
||||
"model": JAN_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": f"Title: {title}\n\n{content}"}
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 4096,
|
||||
}).encode("utf-8")
|
||||
|
||||
req = urllib.request.Request(
|
||||
JAN_URL,
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
|
||||
method="POST"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=300) as r:
|
||||
result = json.loads(r.read())
|
||||
full = result["choices"][0]["message"]["content"].strip()
|
||||
# Separar título traducido del contenido
|
||||
lines = full.split("\n", 2)
|
||||
if lines[0].startswith("Title:"):
|
||||
translated_title = lines[0].replace("Title:", "").strip()
|
||||
translated_content = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
|
||||
else:
|
||||
translated_title = lines[0].strip()
|
||||
translated_content = "\n".join(lines[1:]).strip() if len(lines) > 1 else full
|
||||
|
||||
# Si el título volvió igual al original (sin traducir), reintentamos solo el título
|
||||
if translated_title.strip().upper() == title.strip().upper():
|
||||
title_payload = json.dumps({
|
||||
"model": JAN_MODEL,
|
||||
"messages": [
|
||||
{"role": "user", "content": f"Translate this title from Spanish to {lang_name}. Return ONLY the translated title in ALL CAPS, nothing else: {title}"}
|
||||
],
|
||||
"temperature": 0.2,
|
||||
"max_tokens": 50,
|
||||
}).encode("utf-8")
|
||||
title_req = urllib.request.Request(JAN_URL, data=title_payload,
|
||||
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"}, method="POST")
|
||||
with urllib.request.urlopen(title_req, timeout=30) as tr:
|
||||
title_result = json.loads(tr.read())
|
||||
translated_title = title_result["choices"][0]["message"]["content"].strip().strip('"')
|
||||
|
||||
# Si el contenido traducido está vacío o es muy corto, reintentamos con prompt más directo
|
||||
if len(translated_content.strip()) < 50 and len(content.strip()) > 50:
|
||||
retry_payload = json.dumps({
|
||||
"model": JAN_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": f"You are a professional translator. Translate the following text from Spanish to {lang_name}. Preserve all HTML tags. Return ONLY the translated text, no preamble."},
|
||||
{"role": "user", "content": content}
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 4096,
|
||||
}).encode("utf-8")
|
||||
retry_req = urllib.request.Request(JAN_URL, data=retry_payload,
|
||||
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"}, method="POST")
|
||||
with urllib.request.urlopen(retry_req, timeout=300) as rr:
|
||||
retry_result = json.loads(rr.read())
|
||||
translated_content = retry_result["choices"][0]["message"]["content"].strip()
|
||||
|
||||
return translated_title, translated_content
|
||||
except urllib.error.URLError as e:
|
||||
raise RuntimeError(f"Error llamando a Jan: {e}")
|
||||
|
||||
# ── Base de datos WordPress ───────────────────────────────────────────────────
|
||||
def get_db():
|
||||
return pymysql.connect(
|
||||
host=DB_HOST, port=DB_PORT,
|
||||
user=DB_USER, password=DB_PASS,
|
||||
database=DB_NAME, charset="utf8mb4",
|
||||
cursorclass=pymysql.cursors.DictCursor
|
||||
)
|
||||
|
||||
def get_article(db, wp_id):
|
||||
with db.cursor() as c:
|
||||
c.execute("""
|
||||
SELECT p.ID, p.post_title, p.post_content, p.post_author,
|
||||
p.post_date, p.post_name,
|
||||
GROUP_CONCAT(t.term_id) as term_ids
|
||||
FROM wp_posts p
|
||||
LEFT JOIN wp_term_relationships tr ON p.ID=tr.object_id
|
||||
LEFT JOIN wp_term_taxonomy tt ON tr.term_taxonomy_id=tt.term_taxonomy_id
|
||||
AND tt.taxonomy='category'
|
||||
LEFT JOIN wp_terms t ON tt.term_id=t.term_id
|
||||
WHERE p.ID=%s
|
||||
GROUP BY p.ID
|
||||
""", (wp_id,))
|
||||
return c.fetchone()
|
||||
|
||||
def get_existing_translation(db, original_id, lang_code):
|
||||
"""Devuelve el WP ID de la traducción si ya existe."""
|
||||
with db.cursor() as c:
|
||||
# Polylang guarda las traducciones en wp_term_relationships con taxonomy 'post_translations'
|
||||
c.execute("""
|
||||
SELECT tr2.object_id as translated_id
|
||||
FROM wp_term_relationships tr1
|
||||
JOIN wp_term_relationships tr2 ON tr1.term_taxonomy_id=tr2.term_taxonomy_id
|
||||
JOIN wp_term_taxonomy tt1 ON tr1.term_taxonomy_id=tt1.term_taxonomy_id
|
||||
WHERE tt1.taxonomy='post_translations'
|
||||
AND tr1.object_id=%s AND tr2.object_id!=%s
|
||||
""", (original_id, original_id))
|
||||
candidates = [r['translated_id'] for r in c.fetchall()]
|
||||
for cid in candidates:
|
||||
c.execute("""
|
||||
SELECT t.slug FROM wp_terms t
|
||||
JOIN wp_term_taxonomy tt ON t.term_id=tt.term_id
|
||||
JOIN wp_term_relationships tr ON tt.term_taxonomy_id=tr.term_taxonomy_id
|
||||
WHERE tt.taxonomy='language' AND tr.object_id=%s
|
||||
""", (cid,))
|
||||
row = c.fetchone()
|
||||
if row and row['slug'] == lang_code:
|
||||
return cid
|
||||
return None
|
||||
|
||||
# ── Crear post vía WP-CLI en Docker ──────────────────────────────────────────
|
||||
def create_wp_post(article, translated_title, translated_content, lang_code, original_id, dry_run=False):
|
||||
content_with_footer = translated_content + AI_FOOTER
|
||||
|
||||
php = f"""
|
||||
global $wpdb;
|
||||
$post_id = wp_insert_post([
|
||||
'post_title' => {json.dumps(translated_title, ensure_ascii=False)},
|
||||
'post_content' => {json.dumps(content_with_footer, ensure_ascii=False)},
|
||||
'post_author' => {article['post_author']},
|
||||
'post_status' => 'publish',
|
||||
'post_type' => 'post',
|
||||
'post_date' => {json.dumps(article['post_date'].strftime('%Y-%m-%d %H:%M:%S') if hasattr(article['post_date'], 'strftime') else str(article['post_date']), ensure_ascii=False)},
|
||||
]);
|
||||
if (is_wp_error($post_id)) {{ echo 'ERROR: ' . $post_id->get_error_message(); exit; }}
|
||||
|
||||
// Asignar idioma Polylang
|
||||
if (function_exists('pll_set_post_language')) {{
|
||||
pll_set_post_language($post_id, {json.dumps(lang_code)});
|
||||
}}
|
||||
|
||||
// Vincular traducciones
|
||||
if (function_exists('pll_save_post_translations')) {{
|
||||
$translations = pll_get_post_translations({original_id});
|
||||
$translations[{json.dumps(lang_code)}] = $post_id;
|
||||
$translations['es'] = {original_id};
|
||||
pll_save_post_translations($translations);
|
||||
}}
|
||||
|
||||
// Copiar categorías del original (excepto las de idioma)
|
||||
$cats = wp_get_post_categories({original_id}, ['fields' => 'ids']);
|
||||
if (!empty($cats)) wp_set_post_categories($post_id, $cats);
|
||||
|
||||
echo 'CREATED:' . $post_id;
|
||||
"""
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY] Crearía post '{translated_title[:60]}' en {lang_code}")
|
||||
return 0
|
||||
|
||||
cmd = ["docker", "exec", WP_CONTAINER, "wp", "eval", php, "--allow-root"]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
output = result.stdout.strip()
|
||||
if "CREATED:" in output:
|
||||
new_id = int(output.split("CREATED:")[1].strip())
|
||||
return new_id
|
||||
else:
|
||||
raise RuntimeError(f"Error creando post: {result.stdout} {result.stderr}")
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--check-api", action="store_true", help="Verificar conexión a Jan")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Simular sin crear posts")
|
||||
parser.add_argument("--lang", help="Solo traducir a este idioma (en/fr/it/pt)")
|
||||
parser.add_argument("--id", type=int, help="Solo traducir este WP ID")
|
||||
args = parser.parse_args()
|
||||
|
||||
global JAN_MODEL
|
||||
JAN_MODEL = get_jan_model()
|
||||
print(f"Jan API OK — modelo: {JAN_MODEL}")
|
||||
|
||||
if args.check_api:
|
||||
print("Probando traducción...")
|
||||
t, c = translate("Prueba", "<p>Hola mundo</p>", "en", "English")
|
||||
print(f" Título: {t}")
|
||||
print(f" Contenido: {c}")
|
||||
return
|
||||
|
||||
langs = {args.lang: TARGET_LANGS[args.lang]} if args.lang else TARGET_LANGS
|
||||
ids = [args.id] if args.id else SPANISH_IDS
|
||||
|
||||
db = get_db()
|
||||
total = len(ids) * len(langs)
|
||||
done = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
|
||||
print(f"\nArtículos: {len(ids)} | Idiomas: {list(langs.keys())} | Total: {total} traducciones\n")
|
||||
|
||||
for wp_id in ids:
|
||||
article = get_article(db, wp_id)
|
||||
if not article:
|
||||
print(f" ⚠ ID {wp_id} no encontrado, saltando")
|
||||
continue
|
||||
|
||||
title = article['post_title']
|
||||
content = article['post_content']
|
||||
print(f"\n[{wp_id}] {title[:70]}")
|
||||
|
||||
if len(content) > MAX_CONTENT_LEN:
|
||||
print(f" ⚠ Contenido demasiado largo ({len(content)} chars), saltando")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
for lang_code, lang_name in langs.items():
|
||||
existing = get_existing_translation(db, wp_id, lang_code)
|
||||
if existing:
|
||||
print(f" → {lang_code.upper()}: ya existe (ID {existing}), saltando")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
if args.dry_run:
|
||||
print(f" → {lang_code.upper()}: [DRY] se traduciría y crearía post")
|
||||
done += 1
|
||||
continue
|
||||
|
||||
print(f" → {lang_code.upper()}: traduciendo... ", end="", flush=True)
|
||||
t0 = time.time()
|
||||
trans_title, trans_content = translate(title, content, lang_code, lang_name)
|
||||
elapsed = time.time() - t0
|
||||
print(f"{elapsed:.0f}s")
|
||||
print(f" Título: {trans_title[:60]}")
|
||||
|
||||
new_id = create_wp_post(article, trans_title, trans_content, lang_code, wp_id, False)
|
||||
print(f" Post creado: ID {new_id}")
|
||||
done += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
errors += 1
|
||||
time.sleep(2)
|
||||
|
||||
db.close()
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Completado: {done} creados, {skipped} saltados, {errors} errores")
|
||||
if errors:
|
||||
print("Puedes volver a ejecutar — los ya creados se saltarán automáticamente.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user