Files
feadulta/scripts/sync_translations_to_prod.py

326 lines
13 KiB
Python

#!/usr/bin/env python3
"""
sync_translations_to_prod.py — Sincroniza contenido local a PROD reutilizando el
texto ya verificado en local.
Tiene dos modos:
1. Legado: sincroniza traducciones automáticas (`traduccion_origen`) suponiendo que
el post ES origen ya existe en prod con el mismo ID.
2. IDs preservados: clona posts locales a prod con ID explícito, copiando contenido,
slug, metas y categorías, y después reconstruye los grupos Polylang exactos.
El modo 2 es el que usa el handoff de la carta 46956 para evitar romper la
coincidencia local↔prod cuando prod va por detrás.
"""
from __future__ import annotations
import argparse
import json
import os
import subprocess
import time
from pathlib import Path
# ── Config ───────────────────────────────────────────────────────────────────
WP_CONTAINER = os.environ.get("FEA_WP_CONTAINER", "wordpress-web")
DB_CONTAINER = os.environ.get("FEA_DB_CONTAINER", "wordpress-mysql")
DB_NAME = os.environ.get("FEA_DB_NAME", "wordpress_db")
DB_USER = os.environ.get("FEA_DB_USER", "wordpress_user")
DB_PASS = os.environ.get("FEA_DB_PASS", "wordpress_pass")
PROD_HOST = os.environ.get("FEA_PROD_HOST", "feadulta@134.0.10.170")
PROD_PASS = os.environ.get("FEA_PROD_PASS", "C6c2A!mAl3Wj.BQF")
PROD_WPLOAD = os.environ.get("FEA_PROD_WPLOAD", "/web/wp-nuevo/wp-load.php")
PROD_HELPER = "/tmp/fea_translate_helper.php"
HELPER_SRC = Path(__file__).resolve().parent / "fea_translate_helper.php"
LOCAL_HELPER_DST = "/tmp/fea_translate_helper.php"
STATE_FILE = Path(os.environ.get("FEA_SYNC_STATE", "/tmp/feadulta-sync-state.json"))
LOG_FILE = Path(os.environ.get("FEA_SYNC_LOG", "/tmp/feadulta-sync.log"))
STATUS = os.environ.get("FEA_SYNC_STATUS", "draft")
# URLs absolutas del entorno local que NO deben llegar a prod (issue #91): el
# post_content local arrastra el host de Tailscale con prefijo /fea; en prod la
# instalación cuelga de la raíz. Se reescriben al desplegar para no dejar enlaces
# rotos (Tailscale es inaccesible para los visitantes).
LOCAL_BASE = os.environ.get("FEA_LOCAL_BASE", "https://farmer.taild3aaf6.ts.net/fea")
PROD_BASE = os.environ.get("FEA_PROD_BASE", "https://wp-nuevo.feadulta.com")
def log(msg: str) -> None:
line = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}"
print(line, flush=True)
try:
LOG_FILE.open("a", encoding="utf-8").write(line + "\n")
except OSError:
pass
def sh(cmd: list[str], *, stdin: str | None = None, timeout: int = 120) -> str:
r = subprocess.run(cmd, input=stdin, capture_output=True, text=True, timeout=timeout)
if r.returncode != 0:
raise RuntimeError(f"cmd falló ({r.returncode}): {' '.join(cmd[:3])}\n{r.stderr.strip()[:400]}")
return r.stdout
def parse_csv_ints(raw: str) -> list[int]:
out: list[int] = []
for part in raw.split(","):
part = part.strip()
if part.isdigit():
out.append(int(part))
return out
def localize_urls(text: str | None) -> tuple[str, int]:
"""Reescribe URLs absolutas local→prod en el contenido antes de subirlo.
Equivale al search-replace `farmer.taild3aaf6.ts.net/fea` → `wp-nuevo.feadulta.com`
pero aplicado en origen, así el contenido llega ya correcto a prod (issue #91).
Devuelve (texto, nº de reemplazos).
"""
if not text or not LOCAL_BASE:
return text or "", 0
n = text.count(LOCAL_BASE)
return (text.replace(LOCAL_BASE, PROD_BASE), n) if n else (text, 0)
# ── Local ────────────────────────────────────────────────────────────────────
_local_ready = False
def local_helper(subcmd: str, *args: str) -> str:
global _local_ready
if not _local_ready:
sh(["docker", "cp", str(HELPER_SRC), f"{WP_CONTAINER}:{LOCAL_HELPER_DST}"])
_local_ready = True
return sh(["docker", "exec", "-i", WP_CONTAINER, "php", LOCAL_HELPER_DST, subcmd, *args], timeout=180)
def local_read(post_id: int) -> dict:
return json.loads(local_helper("read", str(post_id)))
def local_read_full(post_id: int) -> dict:
return json.loads(local_helper("read_full", str(post_id)))
def local_translation_pairs() -> list[tuple[int, int]]:
q = ("SELECT post_id, meta_value FROM wp_postmeta "
"WHERE meta_key='traduccion_origen' ORDER BY CAST(meta_value AS UNSIGNED), post_id;")
out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}",
DB_NAME, "-N", "-e", q])
pairs = []
for line in out.splitlines():
parts = line.split("\t")
if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
pairs.append((int(parts[0]), int(parts[1])))
return pairs
def carta_article_ids(carta_id: int) -> list[int]:
q = ("SELECT post_id FROM wp_postmeta "
f"WHERE meta_key='_carta_id' AND meta_value='{carta_id}' ORDER BY post_id;")
out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}",
DB_NAME, "-N", "-e", q])
return [int(x) for x in out.split() if x.isdigit()]
def collect_related_posts(seed_ids: list[int]) -> tuple[dict[int, dict], list[dict[str, int]]]:
posts: dict[int, dict] = {}
groups: dict[tuple[tuple[str, int], ...], dict[str, int]] = {}
for seed in seed_ids:
info = local_read_full(seed)
posts[seed] = info
raw_group = info.get("translations") or {}
group = {
lang: int(pid)
for lang, pid in raw_group.items()
if str(pid).isdigit()
}
if not group:
lang = info.get("lang") or "es"
group = {lang: seed}
sig = tuple(sorted(group.items()))
groups[sig] = group
all_ids = sorted({pid for group in groups.values() for pid in group.values()})
for pid in all_ids:
if pid not in posts:
posts[pid] = local_read_full(pid)
return posts, list(groups.values())
# ── Prod ─────────────────────────────────────────────────────────────────────
_prod_ready = False
def _ssh(remote_cmd: str, *, stdin: str | None = None, timeout: int = 120) -> str:
cmd = ["sshpass", "-p", PROD_PASS, "ssh", "-o", "StrictHostKeyChecking=accept-new",
"-o", "ConnectTimeout=20", PROD_HOST, remote_cmd]
return sh(cmd, stdin=stdin, timeout=timeout)
def prod_helper(subcmd: str, *args: str, stdin: str | None = None) -> str:
global _prod_ready
if not _prod_ready:
_ssh(f"cat > {PROD_HELPER}", stdin=HELPER_SRC.read_text(encoding="utf-8"))
_prod_ready = True
inner = f"FEA_WP_LOAD={PROD_WPLOAD} php {PROD_HELPER} {subcmd} " + " ".join(args)
return _ssh(inner, stdin=stdin, timeout=180)
def prod_create(origin: int, lang: str, title: str, content: str) -> int:
content, n = localize_urls(content)
if n:
log(f" localize origin={origin} [{lang}]: {n} URL(s) Tailscale→prod")
payload = json.dumps({"title": title, "content": content, "model": "google/gemma-4-e4b (sync)"})
out = prod_helper("create", str(origin), lang, STATUS, stdin=payload).strip()
return int(out)
def prod_clone(post: dict) -> int:
content, n1 = localize_urls(post.get("content", ""))
excerpt, n2 = localize_urls(post.get("excerpt", ""))
if n1 or n2:
log(f" localize #{post['id']} [{post.get('lang','?')}]: {n1 + n2} URL(s) Tailscale→prod")
payload = {
"title": post["title"],
"content": content,
"excerpt": excerpt,
"slug": post.get("slug", ""),
"type": post.get("type", "post"),
"author": post.get("author", 1),
"date": post.get("date"),
"date_gmt": post.get("date_gmt"),
"status": post.get("status"),
"cats": post.get("cats", []),
"cat_slugs": post.get("cat_slugs", []),
"meta": post.get("meta", {}),
}
out = prod_helper("clone", str(post["id"]), post["lang"], STATUS, stdin=json.dumps(payload)).strip()
return int(out)
def prod_save_group(group: dict[str, int]) -> dict[str, int]:
out = prod_helper("save_translations", stdin=json.dumps({"translations": group})).strip()
return json.loads(out)
# ── Estado ───────────────────────────────────────────────────────────────────
def load_state() -> dict:
if STATE_FILE.exists():
try:
return json.loads(STATE_FILE.read_text())
except json.JSONDecodeError:
pass
return {"done": {}, "errors": {}}
def save_state(state: dict) -> None:
STATE_FILE.write_text(json.dumps(state, ensure_ascii=False, indent=2))
# ── Modo IDs preservados ─────────────────────────────────────────────────────
def deploy_fixed_ids(seed_ids: list[int], *, keep_existing: set[int], dry_run: bool) -> int:
posts, groups = collect_related_posts(seed_ids)
clone_ids = [pid for pid in posts if pid not in keep_existing]
clone_ids.sort(key=lambda pid: (0 if posts[pid].get("lang") == "es" else 1, pid))
log(f"Plan IDs preservados: seeds={seed_ids} clone={len(clone_ids)} grupos={len(groups)} status={STATUS}")
if keep_existing:
log(f"IDs marcados como ya existentes en prod: {sorted(keep_existing)}")
if dry_run:
for pid in clone_ids:
p = posts[pid]
log(f" CLONE #{pid} [{p.get('lang','?')}] slug={p.get('slug','')} cats={len(p.get('cat_slugs', []))}")
for group in groups:
log(f" GROUP {group}")
return 0
for pid in clone_ids:
p = posts[pid]
new_id = prod_clone(p)
log(f" clone #{pid} [{p.get('lang','?')}] → prod #{new_id} «{p['title'][:45]}»")
for group in groups:
saved = prod_save_group(group)
log(f" group enlazado {saved}")
log("FIN sync IDs preservados.")
return 0
# ── Main legado ──────────────────────────────────────────────────────────────
def legacy_sync(limit: int, origin: int) -> int:
state = load_state()
pairs = local_translation_pairs()
if origin:
pairs = [p for p in pairs if p[1] == origin]
log(f"Traducciones locales a sincronizar: {len(pairs)} (status={STATUS})")
n_ok = n_skip = n_err = 0
for tid, src_origin in pairs:
if limit and (n_ok + n_err) >= limit:
break
try:
t = local_read(tid)
except Exception as exc: # noqa: BLE001
log(f" local read #{tid} ERROR: {exc}")
n_err += 1
continue
lang = t.get("lang", "")
if lang in ("", "es"):
continue
key = f"{src_origin}:{lang}"
if key in state["done"]:
n_skip += 1
continue
try:
new_id = prod_create(src_origin, lang, t["title"], t["content"])
state["done"][key] = new_id
save_state(state)
n_ok += 1
log(f" {key} → prod #{new_id} «{t['title'][:45]}»")
except Exception as exc: # noqa: BLE001
state["errors"][key] = str(exc)[:300]
save_state(state)
n_err += 1
log(f" {key} ERROR: {exc}")
save_state(state)
log(f"FIN sync legado. nuevos={n_ok} saltados={n_skip} errores={n_err}. Estado: {STATE_FILE}")
log("Recuerda en prod: ejecutar remap_translation_cats.php si alguna quedó sin categoría traducida.")
return 0
def main() -> int:
ap = argparse.ArgumentParser(description="Sincroniza contenido local→prod reutilizando el texto local.")
ap.add_argument("--limit", type=int, default=0, help="Modo legado: máximo de traducciones a sincronizar.")
ap.add_argument("--origin", type=int, default=0, help="Modo legado: solo traducciones de este ES.")
ap.add_argument("--carta", type=int, default=0, help="Modo IDs preservados: carta ES y todo su cluster.")
ap.add_argument("--ids", default="", help="Modo IDs preservados: lista CSV de posts semilla a clonar/enlazar.")
ap.add_argument("--keep-existing", default="", help="IDs que ya existen en prod y no deben clonarse.")
ap.add_argument("--dry-run", action="store_true", help="Solo muestra el plan; no toca prod.")
args = ap.parse_args()
seed_ids: list[int] = []
if args.carta:
seed_ids = [args.carta, *carta_article_ids(args.carta)]
elif args.ids:
seed_ids = parse_csv_ints(args.ids)
if seed_ids:
keep_existing = set(parse_csv_ints(args.keep_existing))
return deploy_fixed_ids(seed_ids, keep_existing=keep_existing, dry_run=args.dry_run)
return legacy_sync(args.limit, args.origin)
if __name__ == "__main__":
raise SystemExit(main())