326 lines
13 KiB
Python
326 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
sync_translations_to_prod.py — Sincroniza contenido local a PROD reutilizando el
|
|
texto ya verificado en local.
|
|
|
|
Tiene dos modos:
|
|
1. Legado: sincroniza traducciones automáticas (`traduccion_origen`) suponiendo que
|
|
el post ES origen ya existe en prod con el mismo ID.
|
|
2. IDs preservados: clona posts locales a prod con ID explícito, copiando contenido,
|
|
slug, metas y categorías, y después reconstruye los grupos Polylang exactos.
|
|
|
|
El modo 2 es el que usa el handoff de la carta 46956 para evitar romper la
|
|
coincidencia local↔prod cuando prod va por detrás.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import time
|
|
from pathlib import Path
|
|
|
|
# ── Config ───────────────────────────────────────────────────────────────────
|
|
WP_CONTAINER = os.environ.get("FEA_WP_CONTAINER", "wordpress-web")
|
|
DB_CONTAINER = os.environ.get("FEA_DB_CONTAINER", "wordpress-mysql")
|
|
DB_NAME = os.environ.get("FEA_DB_NAME", "wordpress_db")
|
|
DB_USER = os.environ.get("FEA_DB_USER", "wordpress_user")
|
|
DB_PASS = os.environ.get("FEA_DB_PASS", "wordpress_pass")
|
|
|
|
PROD_HOST = os.environ.get("FEA_PROD_HOST", "feadulta@134.0.10.170")
|
|
PROD_PASS = os.environ.get("FEA_PROD_PASS", "C6c2A!mAl3Wj.BQF")
|
|
PROD_WPLOAD = os.environ.get("FEA_PROD_WPLOAD", "/web/wp-nuevo/wp-load.php")
|
|
PROD_HELPER = "/tmp/fea_translate_helper.php"
|
|
|
|
HELPER_SRC = Path(__file__).resolve().parent / "fea_translate_helper.php"
|
|
LOCAL_HELPER_DST = "/tmp/fea_translate_helper.php"
|
|
STATE_FILE = Path(os.environ.get("FEA_SYNC_STATE", "/tmp/feadulta-sync-state.json"))
|
|
LOG_FILE = Path(os.environ.get("FEA_SYNC_LOG", "/tmp/feadulta-sync.log"))
|
|
STATUS = os.environ.get("FEA_SYNC_STATUS", "draft")
|
|
|
|
# URLs absolutas del entorno local que NO deben llegar a prod (issue #91): el
|
|
# post_content local arrastra el host de Tailscale con prefijo /fea; en prod la
|
|
# instalación cuelga de la raíz. Se reescriben al desplegar para no dejar enlaces
|
|
# rotos (Tailscale es inaccesible para los visitantes).
|
|
LOCAL_BASE = os.environ.get("FEA_LOCAL_BASE", "https://farmer.taild3aaf6.ts.net/fea")
|
|
PROD_BASE = os.environ.get("FEA_PROD_BASE", "https://wp-nuevo.feadulta.com")
|
|
|
|
|
|
def log(msg: str) -> None:
|
|
line = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}"
|
|
print(line, flush=True)
|
|
try:
|
|
LOG_FILE.open("a", encoding="utf-8").write(line + "\n")
|
|
except OSError:
|
|
pass
|
|
|
|
|
|
def sh(cmd: list[str], *, stdin: str | None = None, timeout: int = 120) -> str:
|
|
r = subprocess.run(cmd, input=stdin, capture_output=True, text=True, timeout=timeout)
|
|
if r.returncode != 0:
|
|
raise RuntimeError(f"cmd falló ({r.returncode}): {' '.join(cmd[:3])}…\n{r.stderr.strip()[:400]}")
|
|
return r.stdout
|
|
|
|
|
|
def parse_csv_ints(raw: str) -> list[int]:
|
|
out: list[int] = []
|
|
for part in raw.split(","):
|
|
part = part.strip()
|
|
if part.isdigit():
|
|
out.append(int(part))
|
|
return out
|
|
|
|
|
|
def localize_urls(text: str | None) -> tuple[str, int]:
|
|
"""Reescribe URLs absolutas local→prod en el contenido antes de subirlo.
|
|
|
|
Equivale al search-replace `farmer.taild3aaf6.ts.net/fea` → `wp-nuevo.feadulta.com`
|
|
pero aplicado en origen, así el contenido llega ya correcto a prod (issue #91).
|
|
Devuelve (texto, nº de reemplazos).
|
|
"""
|
|
if not text or not LOCAL_BASE:
|
|
return text or "", 0
|
|
n = text.count(LOCAL_BASE)
|
|
return (text.replace(LOCAL_BASE, PROD_BASE), n) if n else (text, 0)
|
|
|
|
|
|
# ── Local ────────────────────────────────────────────────────────────────────
|
|
_local_ready = False
|
|
|
|
|
|
def local_helper(subcmd: str, *args: str) -> str:
|
|
global _local_ready
|
|
if not _local_ready:
|
|
sh(["docker", "cp", str(HELPER_SRC), f"{WP_CONTAINER}:{LOCAL_HELPER_DST}"])
|
|
_local_ready = True
|
|
return sh(["docker", "exec", "-i", WP_CONTAINER, "php", LOCAL_HELPER_DST, subcmd, *args], timeout=180)
|
|
|
|
|
|
def local_read(post_id: int) -> dict:
|
|
return json.loads(local_helper("read", str(post_id)))
|
|
|
|
|
|
def local_read_full(post_id: int) -> dict:
|
|
return json.loads(local_helper("read_full", str(post_id)))
|
|
|
|
|
|
def local_translation_pairs() -> list[tuple[int, int]]:
|
|
q = ("SELECT post_id, meta_value FROM wp_postmeta "
|
|
"WHERE meta_key='traduccion_origen' ORDER BY CAST(meta_value AS UNSIGNED), post_id;")
|
|
out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}",
|
|
DB_NAME, "-N", "-e", q])
|
|
pairs = []
|
|
for line in out.splitlines():
|
|
parts = line.split("\t")
|
|
if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
|
|
pairs.append((int(parts[0]), int(parts[1])))
|
|
return pairs
|
|
|
|
|
|
def carta_article_ids(carta_id: int) -> list[int]:
|
|
q = ("SELECT post_id FROM wp_postmeta "
|
|
f"WHERE meta_key='_carta_id' AND meta_value='{carta_id}' ORDER BY post_id;")
|
|
out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}",
|
|
DB_NAME, "-N", "-e", q])
|
|
return [int(x) for x in out.split() if x.isdigit()]
|
|
|
|
|
|
def collect_related_posts(seed_ids: list[int]) -> tuple[dict[int, dict], list[dict[str, int]]]:
|
|
posts: dict[int, dict] = {}
|
|
groups: dict[tuple[tuple[str, int], ...], dict[str, int]] = {}
|
|
|
|
for seed in seed_ids:
|
|
info = local_read_full(seed)
|
|
posts[seed] = info
|
|
raw_group = info.get("translations") or {}
|
|
group = {
|
|
lang: int(pid)
|
|
for lang, pid in raw_group.items()
|
|
if str(pid).isdigit()
|
|
}
|
|
if not group:
|
|
lang = info.get("lang") or "es"
|
|
group = {lang: seed}
|
|
sig = tuple(sorted(group.items()))
|
|
groups[sig] = group
|
|
|
|
all_ids = sorted({pid for group in groups.values() for pid in group.values()})
|
|
for pid in all_ids:
|
|
if pid not in posts:
|
|
posts[pid] = local_read_full(pid)
|
|
|
|
return posts, list(groups.values())
|
|
|
|
|
|
# ── Prod ─────────────────────────────────────────────────────────────────────
|
|
_prod_ready = False
|
|
|
|
|
|
def _ssh(remote_cmd: str, *, stdin: str | None = None, timeout: int = 120) -> str:
|
|
cmd = ["sshpass", "-p", PROD_PASS, "ssh", "-o", "StrictHostKeyChecking=accept-new",
|
|
"-o", "ConnectTimeout=20", PROD_HOST, remote_cmd]
|
|
return sh(cmd, stdin=stdin, timeout=timeout)
|
|
|
|
|
|
def prod_helper(subcmd: str, *args: str, stdin: str | None = None) -> str:
|
|
global _prod_ready
|
|
if not _prod_ready:
|
|
_ssh(f"cat > {PROD_HELPER}", stdin=HELPER_SRC.read_text(encoding="utf-8"))
|
|
_prod_ready = True
|
|
inner = f"FEA_WP_LOAD={PROD_WPLOAD} php {PROD_HELPER} {subcmd} " + " ".join(args)
|
|
return _ssh(inner, stdin=stdin, timeout=180)
|
|
|
|
|
|
def prod_create(origin: int, lang: str, title: str, content: str) -> int:
|
|
content, n = localize_urls(content)
|
|
if n:
|
|
log(f" localize origin={origin} [{lang}]: {n} URL(s) Tailscale→prod")
|
|
payload = json.dumps({"title": title, "content": content, "model": "google/gemma-4-e4b (sync)"})
|
|
out = prod_helper("create", str(origin), lang, STATUS, stdin=payload).strip()
|
|
return int(out)
|
|
|
|
|
|
def prod_clone(post: dict) -> int:
|
|
content, n1 = localize_urls(post.get("content", ""))
|
|
excerpt, n2 = localize_urls(post.get("excerpt", ""))
|
|
if n1 or n2:
|
|
log(f" localize #{post['id']} [{post.get('lang','?')}]: {n1 + n2} URL(s) Tailscale→prod")
|
|
payload = {
|
|
"title": post["title"],
|
|
"content": content,
|
|
"excerpt": excerpt,
|
|
"slug": post.get("slug", ""),
|
|
"type": post.get("type", "post"),
|
|
"author": post.get("author", 1),
|
|
"date": post.get("date"),
|
|
"date_gmt": post.get("date_gmt"),
|
|
"status": post.get("status"),
|
|
"cats": post.get("cats", []),
|
|
"cat_slugs": post.get("cat_slugs", []),
|
|
"meta": post.get("meta", {}),
|
|
}
|
|
out = prod_helper("clone", str(post["id"]), post["lang"], STATUS, stdin=json.dumps(payload)).strip()
|
|
return int(out)
|
|
|
|
|
|
def prod_save_group(group: dict[str, int]) -> dict[str, int]:
|
|
out = prod_helper("save_translations", stdin=json.dumps({"translations": group})).strip()
|
|
return json.loads(out)
|
|
|
|
|
|
# ── Estado ───────────────────────────────────────────────────────────────────
|
|
def load_state() -> dict:
|
|
if STATE_FILE.exists():
|
|
try:
|
|
return json.loads(STATE_FILE.read_text())
|
|
except json.JSONDecodeError:
|
|
pass
|
|
return {"done": {}, "errors": {}}
|
|
|
|
|
|
def save_state(state: dict) -> None:
|
|
STATE_FILE.write_text(json.dumps(state, ensure_ascii=False, indent=2))
|
|
|
|
|
|
# ── Modo IDs preservados ─────────────────────────────────────────────────────
|
|
def deploy_fixed_ids(seed_ids: list[int], *, keep_existing: set[int], dry_run: bool) -> int:
|
|
posts, groups = collect_related_posts(seed_ids)
|
|
clone_ids = [pid for pid in posts if pid not in keep_existing]
|
|
clone_ids.sort(key=lambda pid: (0 if posts[pid].get("lang") == "es" else 1, pid))
|
|
|
|
log(f"Plan IDs preservados: seeds={seed_ids} clone={len(clone_ids)} grupos={len(groups)} status={STATUS}")
|
|
if keep_existing:
|
|
log(f"IDs marcados como ya existentes en prod: {sorted(keep_existing)}")
|
|
|
|
if dry_run:
|
|
for pid in clone_ids:
|
|
p = posts[pid]
|
|
log(f" CLONE #{pid} [{p.get('lang','?')}] slug={p.get('slug','')} cats={len(p.get('cat_slugs', []))}")
|
|
for group in groups:
|
|
log(f" GROUP {group}")
|
|
return 0
|
|
|
|
for pid in clone_ids:
|
|
p = posts[pid]
|
|
new_id = prod_clone(p)
|
|
log(f" clone #{pid} [{p.get('lang','?')}] → prod #{new_id} «{p['title'][:45]}»")
|
|
|
|
for group in groups:
|
|
saved = prod_save_group(group)
|
|
log(f" group enlazado {saved}")
|
|
|
|
log("FIN sync IDs preservados.")
|
|
return 0
|
|
|
|
|
|
# ── Main legado ──────────────────────────────────────────────────────────────
|
|
def legacy_sync(limit: int, origin: int) -> int:
|
|
state = load_state()
|
|
pairs = local_translation_pairs()
|
|
if origin:
|
|
pairs = [p for p in pairs if p[1] == origin]
|
|
log(f"Traducciones locales a sincronizar: {len(pairs)} (status={STATUS})")
|
|
|
|
n_ok = n_skip = n_err = 0
|
|
for tid, src_origin in pairs:
|
|
if limit and (n_ok + n_err) >= limit:
|
|
break
|
|
try:
|
|
t = local_read(tid)
|
|
except Exception as exc: # noqa: BLE001
|
|
log(f" local read #{tid} ERROR: {exc}")
|
|
n_err += 1
|
|
continue
|
|
lang = t.get("lang", "")
|
|
if lang in ("", "es"):
|
|
continue
|
|
key = f"{src_origin}:{lang}"
|
|
if key in state["done"]:
|
|
n_skip += 1
|
|
continue
|
|
try:
|
|
new_id = prod_create(src_origin, lang, t["title"], t["content"])
|
|
state["done"][key] = new_id
|
|
save_state(state)
|
|
n_ok += 1
|
|
log(f" {key} → prod #{new_id} «{t['title'][:45]}»")
|
|
except Exception as exc: # noqa: BLE001
|
|
state["errors"][key] = str(exc)[:300]
|
|
save_state(state)
|
|
n_err += 1
|
|
log(f" {key} ERROR: {exc}")
|
|
|
|
save_state(state)
|
|
log(f"FIN sync legado. nuevos={n_ok} saltados={n_skip} errores={n_err}. Estado: {STATE_FILE}")
|
|
log("Recuerda en prod: ejecutar remap_translation_cats.php si alguna quedó sin categoría traducida.")
|
|
return 0
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser(description="Sincroniza contenido local→prod reutilizando el texto local.")
|
|
ap.add_argument("--limit", type=int, default=0, help="Modo legado: máximo de traducciones a sincronizar.")
|
|
ap.add_argument("--origin", type=int, default=0, help="Modo legado: solo traducciones de este ES.")
|
|
ap.add_argument("--carta", type=int, default=0, help="Modo IDs preservados: carta ES y todo su cluster.")
|
|
ap.add_argument("--ids", default="", help="Modo IDs preservados: lista CSV de posts semilla a clonar/enlazar.")
|
|
ap.add_argument("--keep-existing", default="", help="IDs que ya existen en prod y no deben clonarse.")
|
|
ap.add_argument("--dry-run", action="store_true", help="Solo muestra el plan; no toca prod.")
|
|
args = ap.parse_args()
|
|
|
|
seed_ids: list[int] = []
|
|
if args.carta:
|
|
seed_ids = [args.carta, *carta_article_ids(args.carta)]
|
|
elif args.ids:
|
|
seed_ids = parse_csv_ints(args.ids)
|
|
|
|
if seed_ids:
|
|
keep_existing = set(parse_csv_ints(args.keep_existing))
|
|
return deploy_fixed_ids(seed_ids, keep_existing=keep_existing, dry_run=args.dry_run)
|
|
|
|
return legacy_sync(args.limit, args.origin)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|