#!/usr/bin/env python3 """ sync_translations_to_prod.py — Sincroniza contenido local a PROD reutilizando el texto ya verificado en local. Tiene dos modos: 1. Legado: sincroniza traducciones automáticas (`traduccion_origen`) suponiendo que el post ES origen ya existe en prod con el mismo ID. 2. IDs preservados: clona posts locales a prod con ID explícito, copiando contenido, slug, metas y categorías, y después reconstruye los grupos Polylang exactos. El modo 2 es el que usa el handoff de la carta 46956 para evitar romper la coincidencia local↔prod cuando prod va por detrás. """ from __future__ import annotations import argparse import json import os import subprocess import time from pathlib import Path # ── Config ─────────────────────────────────────────────────────────────────── WP_CONTAINER = os.environ.get("FEA_WP_CONTAINER", "wordpress-web") DB_CONTAINER = os.environ.get("FEA_DB_CONTAINER", "wordpress-mysql") DB_NAME = os.environ.get("FEA_DB_NAME", "wordpress_db") DB_USER = os.environ.get("FEA_DB_USER", "wordpress_user") DB_PASS = os.environ.get("FEA_DB_PASS", "wordpress_pass") PROD_HOST = os.environ.get("FEA_PROD_HOST", "feadulta@134.0.10.170") PROD_PASS = os.environ.get("FEA_PROD_PASS", "C6c2A!mAl3Wj.BQF") PROD_WPLOAD = os.environ.get("FEA_PROD_WPLOAD", "/web/wp-nuevo/wp-load.php") PROD_HELPER = "/tmp/fea_translate_helper.php" HELPER_SRC = Path(__file__).resolve().parent / "fea_translate_helper.php" LOCAL_HELPER_DST = "/tmp/fea_translate_helper.php" STATE_FILE = Path(os.environ.get("FEA_SYNC_STATE", "/tmp/feadulta-sync-state.json")) LOG_FILE = Path(os.environ.get("FEA_SYNC_LOG", "/tmp/feadulta-sync.log")) STATUS = os.environ.get("FEA_SYNC_STATUS", "draft") # URLs absolutas del entorno local que NO deben llegar a prod (issue #91): el # post_content local arrastra el host de Tailscale con prefijo /fea; en prod la # instalación cuelga de la raíz. Se reescriben al desplegar para no dejar enlaces # rotos (Tailscale es inaccesible para los visitantes). LOCAL_BASE = os.environ.get("FEA_LOCAL_BASE", "https://farmer.taild3aaf6.ts.net/fea") PROD_BASE = os.environ.get("FEA_PROD_BASE", "https://wp-nuevo.feadulta.com") def log(msg: str) -> None: line = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}" print(line, flush=True) try: LOG_FILE.open("a", encoding="utf-8").write(line + "\n") except OSError: pass def sh(cmd: list[str], *, stdin: str | None = None, timeout: int = 120) -> str: r = subprocess.run(cmd, input=stdin, capture_output=True, text=True, timeout=timeout) if r.returncode != 0: raise RuntimeError(f"cmd falló ({r.returncode}): {' '.join(cmd[:3])}…\n{r.stderr.strip()[:400]}") return r.stdout def parse_csv_ints(raw: str) -> list[int]: out: list[int] = [] for part in raw.split(","): part = part.strip() if part.isdigit(): out.append(int(part)) return out def localize_urls(text: str | None) -> tuple[str, int]: """Reescribe URLs absolutas local→prod en el contenido antes de subirlo. Equivale al search-replace `farmer.taild3aaf6.ts.net/fea` → `wp-nuevo.feadulta.com` pero aplicado en origen, así el contenido llega ya correcto a prod (issue #91). Devuelve (texto, nº de reemplazos). """ if not text or not LOCAL_BASE: return text or "", 0 n = text.count(LOCAL_BASE) return (text.replace(LOCAL_BASE, PROD_BASE), n) if n else (text, 0) # ── Local ──────────────────────────────────────────────────────────────────── _local_ready = False def local_helper(subcmd: str, *args: str) -> str: global _local_ready if not _local_ready: sh(["docker", "cp", str(HELPER_SRC), f"{WP_CONTAINER}:{LOCAL_HELPER_DST}"]) _local_ready = True return sh(["docker", "exec", "-i", WP_CONTAINER, "php", LOCAL_HELPER_DST, subcmd, *args], timeout=180) def local_read(post_id: int) -> dict: return json.loads(local_helper("read", str(post_id))) def local_read_full(post_id: int) -> dict: return json.loads(local_helper("read_full", str(post_id))) def local_translation_pairs() -> list[tuple[int, int]]: q = ("SELECT post_id, meta_value FROM wp_postmeta " "WHERE meta_key='traduccion_origen' ORDER BY CAST(meta_value AS UNSIGNED), post_id;") out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}", DB_NAME, "-N", "-e", q]) pairs = [] for line in out.splitlines(): parts = line.split("\t") if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit(): pairs.append((int(parts[0]), int(parts[1]))) return pairs def carta_article_ids(carta_id: int) -> list[int]: q = ("SELECT post_id FROM wp_postmeta " f"WHERE meta_key='_carta_id' AND meta_value='{carta_id}' ORDER BY post_id;") out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}", DB_NAME, "-N", "-e", q]) return [int(x) for x in out.split() if x.isdigit()] def collect_related_posts(seed_ids: list[int]) -> tuple[dict[int, dict], list[dict[str, int]]]: posts: dict[int, dict] = {} groups: dict[tuple[tuple[str, int], ...], dict[str, int]] = {} for seed in seed_ids: info = local_read_full(seed) posts[seed] = info raw_group = info.get("translations") or {} group = { lang: int(pid) for lang, pid in raw_group.items() if str(pid).isdigit() } if not group: lang = info.get("lang") or "es" group = {lang: seed} sig = tuple(sorted(group.items())) groups[sig] = group all_ids = sorted({pid for group in groups.values() for pid in group.values()}) for pid in all_ids: if pid not in posts: posts[pid] = local_read_full(pid) return posts, list(groups.values()) # ── Prod ───────────────────────────────────────────────────────────────────── _prod_ready = False def _ssh(remote_cmd: str, *, stdin: str | None = None, timeout: int = 120) -> str: cmd = ["sshpass", "-p", PROD_PASS, "ssh", "-o", "StrictHostKeyChecking=accept-new", "-o", "ConnectTimeout=20", PROD_HOST, remote_cmd] return sh(cmd, stdin=stdin, timeout=timeout) def prod_helper(subcmd: str, *args: str, stdin: str | None = None) -> str: global _prod_ready if not _prod_ready: _ssh(f"cat > {PROD_HELPER}", stdin=HELPER_SRC.read_text(encoding="utf-8")) _prod_ready = True inner = f"FEA_WP_LOAD={PROD_WPLOAD} php {PROD_HELPER} {subcmd} " + " ".join(args) return _ssh(inner, stdin=stdin, timeout=180) def prod_create(origin: int, lang: str, title: str, content: str) -> int: content, n = localize_urls(content) if n: log(f" localize origin={origin} [{lang}]: {n} URL(s) Tailscale→prod") payload = json.dumps({"title": title, "content": content, "model": "google/gemma-4-e4b (sync)"}) out = prod_helper("create", str(origin), lang, STATUS, stdin=payload).strip() return int(out) def prod_clone(post: dict) -> int: content, n1 = localize_urls(post.get("content", "")) excerpt, n2 = localize_urls(post.get("excerpt", "")) if n1 or n2: log(f" localize #{post['id']} [{post.get('lang','?')}]: {n1 + n2} URL(s) Tailscale→prod") payload = { "title": post["title"], "content": content, "excerpt": excerpt, "slug": post.get("slug", ""), "type": post.get("type", "post"), "author": post.get("author", 1), "date": post.get("date"), "date_gmt": post.get("date_gmt"), "status": post.get("status"), "cats": post.get("cats", []), "cat_slugs": post.get("cat_slugs", []), "meta": post.get("meta", {}), } out = prod_helper("clone", str(post["id"]), post["lang"], STATUS, stdin=json.dumps(payload)).strip() return int(out) def prod_save_group(group: dict[str, int]) -> dict[str, int]: out = prod_helper("save_translations", stdin=json.dumps({"translations": group})).strip() return json.loads(out) # ── Estado ─────────────────────────────────────────────────────────────────── def load_state() -> dict: if STATE_FILE.exists(): try: return json.loads(STATE_FILE.read_text()) except json.JSONDecodeError: pass return {"done": {}, "errors": {}} def save_state(state: dict) -> None: STATE_FILE.write_text(json.dumps(state, ensure_ascii=False, indent=2)) # ── Modo IDs preservados ───────────────────────────────────────────────────── def deploy_fixed_ids(seed_ids: list[int], *, keep_existing: set[int], dry_run: bool) -> int: posts, groups = collect_related_posts(seed_ids) clone_ids = [pid for pid in posts if pid not in keep_existing] clone_ids.sort(key=lambda pid: (0 if posts[pid].get("lang") == "es" else 1, pid)) log(f"Plan IDs preservados: seeds={seed_ids} clone={len(clone_ids)} grupos={len(groups)} status={STATUS}") if keep_existing: log(f"IDs marcados como ya existentes en prod: {sorted(keep_existing)}") if dry_run: for pid in clone_ids: p = posts[pid] log(f" CLONE #{pid} [{p.get('lang','?')}] slug={p.get('slug','')} cats={len(p.get('cat_slugs', []))}") for group in groups: log(f" GROUP {group}") return 0 for pid in clone_ids: p = posts[pid] new_id = prod_clone(p) log(f" clone #{pid} [{p.get('lang','?')}] → prod #{new_id} «{p['title'][:45]}»") for group in groups: saved = prod_save_group(group) log(f" group enlazado {saved}") log("FIN sync IDs preservados.") return 0 # ── Main legado ────────────────────────────────────────────────────────────── def legacy_sync(limit: int, origin: int) -> int: state = load_state() pairs = local_translation_pairs() if origin: pairs = [p for p in pairs if p[1] == origin] log(f"Traducciones locales a sincronizar: {len(pairs)} (status={STATUS})") n_ok = n_skip = n_err = 0 for tid, src_origin in pairs: if limit and (n_ok + n_err) >= limit: break try: t = local_read(tid) except Exception as exc: # noqa: BLE001 log(f" local read #{tid} ERROR: {exc}") n_err += 1 continue lang = t.get("lang", "") if lang in ("", "es"): continue key = f"{src_origin}:{lang}" if key in state["done"]: n_skip += 1 continue try: new_id = prod_create(src_origin, lang, t["title"], t["content"]) state["done"][key] = new_id save_state(state) n_ok += 1 log(f" {key} → prod #{new_id} «{t['title'][:45]}»") except Exception as exc: # noqa: BLE001 state["errors"][key] = str(exc)[:300] save_state(state) n_err += 1 log(f" {key} ERROR: {exc}") save_state(state) log(f"FIN sync legado. nuevos={n_ok} saltados={n_skip} errores={n_err}. Estado: {STATE_FILE}") log("Recuerda en prod: ejecutar remap_translation_cats.php si alguna quedó sin categoría traducida.") return 0 def main() -> int: ap = argparse.ArgumentParser(description="Sincroniza contenido local→prod reutilizando el texto local.") ap.add_argument("--limit", type=int, default=0, help="Modo legado: máximo de traducciones a sincronizar.") ap.add_argument("--origin", type=int, default=0, help="Modo legado: solo traducciones de este ES.") ap.add_argument("--carta", type=int, default=0, help="Modo IDs preservados: carta ES y todo su cluster.") ap.add_argument("--ids", default="", help="Modo IDs preservados: lista CSV de posts semilla a clonar/enlazar.") ap.add_argument("--keep-existing", default="", help="IDs que ya existen en prod y no deben clonarse.") ap.add_argument("--dry-run", action="store_true", help="Solo muestra el plan; no toca prod.") args = ap.parse_args() seed_ids: list[int] = [] if args.carta: seed_ids = [args.carta, *carta_article_ids(args.carta)] elif args.ids: seed_ids = parse_csv_ints(args.ids) if seed_ids: keep_existing = set(parse_csv_ints(args.keep_existing)) return deploy_fixed_ids(seed_ids, keep_existing=keep_existing, dry_run=args.dry_run) return legacy_sync(args.limit, args.origin) if __name__ == "__main__": raise SystemExit(main())