Files
feadulta/scripts/import_new_k2_items.py
T

404 lines
15 KiB
Python

#!/usr/bin/env python3
"""
import_new_k2_items.py
Importa los K2 items nuevos de Joomla prod (id > 17873) al WordPress local (Docker).
Conexión a Joomla: SSH + MySQL en feadulta@134.0.10.170
Conexión a WP: Docker exec wordpress-mysql
Categorías WP asignadas según extra_fields:
- ES + tiene "libro de la biblia" (id 9) → Comentarios al evangelio (1647) + Feadulta (71)
- ES + no id9 + título "DOMINGO/SEMANA SANTA/etc." → Eucaristía (1648) + Feadulta (71)
- ES + no id9 + otro → Artículos (1650) + Feadulta (71)
- No ES → Artículos (1650) + Feadulta (71)
Idioma Polylang asignado según extra_field id 16:
1=es, 2=en, 3=fr, 4=it, 5=pt
"""
import json
import subprocess
import sys
from datetime import datetime
# ── Configuración ──────────────────────────────────────────────────────────────
JOOMLA_SSH_HOST = "134.0.10.170"
JOOMLA_SSH_USER = "feadulta"
JOOMLA_SSH_PASS = "C6c2A!mAl3Wj.BQF"
JOOMLA_DB_HOST = "127.0.0.1"
JOOMLA_DB_USER = "fejoomla3"
JOOMLA_DB_PASS = "5FF-}5^[>7^pK4W9"
JOOMLA_DB_NAME = "fejoomla3"
WP_DOCKER = "wordpress-mysql"
WP_DB_USER = "wordpress_user"
WP_DB_PASS = "wordpress_pass"
WP_DB_NAME = "wordpress_db"
WP_DB_HOST = "wordpress-mysql" # dentro del container
LAST_K2_ID = None # se calcula dinámicamente en main(): MAX(_fgj2wp_old_k2_id) en WP
# WP term_taxonomy_ids (obtenidos con SELECT tt.term_taxonomy_id FROM wp_term_taxonomy tt WHERE tt.term_id=N)
# Precalculados:
CAT_FEADULTA = 71 # term_id (se convertirá a term_taxonomy_id abajo)
CAT_ARTICULOS = 1650
CAT_EVANGELIO = 1647
CAT_EUCARISTIA = 1648
LANG_MAP = {1: 'es', 2: 'en', 3: 'fr', 4: 'it', 5: 'pt'}
DOMINGO_RE = r'DOMINGO|SEMANA SANTA|SEMANA DE PASCUA|PENTECOST|NAVIDAD|EPIFAN'
DRY_RUN = '--dry-run' in sys.argv
# ── Helpers ────────────────────────────────────────────────────────────────────
def ssh_mysql(query: str) -> list[dict]:
"""Ejecuta una query en el MySQL de Joomla prod vía sshpass."""
cmd = [
'sshpass', '-p', JOOMLA_SSH_PASS,
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}',
f'mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} '
f'-p{repr(JOOMLA_DB_PASS)} {JOOMLA_DB_NAME} '
f'--default-character-set=utf8mb4 -B -e "{query}"'
]
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
print(f"[ERROR SSH] {result.stderr[:300]}", file=sys.stderr)
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
rows = []
for line in lines[1:]:
if line:
vals = line.split('\t')
rows.append(dict(zip(headers, vals)))
return rows
def wp_mysql(query: str) -> list[dict]:
"""Ejecuta una query en el MySQL del WP local vía Docker exec."""
cmd = [
'docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-B', '-e', query
]
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
print(f"[ERROR WP] {result.stderr[:300]}", file=sys.stderr)
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
rows = []
for line in lines[1:]:
if line:
vals = line.split('\t')
rows.append(dict(zip(headers, vals)))
return rows
def wp_execute(sql: str):
"""Ejecuta un INSERT/UPDATE en WP MySQL."""
if DRY_RUN:
print(f" [DRY] {sql[:120]}")
return
cmd = [
'docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-e', sql
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"[ERROR INSERT] {result.stderr[:300]}", file=sys.stderr)
def esc(s: str) -> str:
"""Escapa una string para SQL."""
return s.replace('\\', '\\\\').replace("'", "\\'")
# ── Cargar datos auxiliares ────────────────────────────────────────────────────
def load_user_map() -> dict:
"""Devuelve {joomla_user_id: wp_user_id}."""
rows = wp_mysql(
"SELECT um.meta_value jid, u.ID wid FROM wp_users u "
"JOIN wp_usermeta um ON um.user_id=u.ID "
"WHERE um.meta_key='_fgj2wp_old_user_id'"
)
m = {}
for r in rows:
try:
m[int(r['jid'])] = int(r['wid'])
except ValueError:
pass
return m
def load_term_taxonomy_ids() -> dict:
"""Devuelve {term_id: term_taxonomy_id} para las categorías relevantes."""
term_ids = [CAT_FEADULTA, CAT_ARTICULOS, CAT_EVANGELIO, CAT_EUCARISTIA]
ids_str = ','.join(str(x) for x in term_ids)
rows = wp_mysql(
f"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy "
f"WHERE term_id IN ({ids_str}) AND taxonomy='category'"
)
return {int(r['term_id']): int(r['term_taxonomy_id']) for r in rows}
def load_polylang_term_ids() -> dict:
"""Devuelve {'es': tt_id, 'en': tt_id, ...} para los términos de idioma de Polylang."""
rows = wp_mysql(
"SELECT t.slug, tt.term_taxonomy_id FROM wp_terms t "
"JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id "
"WHERE tt.taxonomy='language' AND t.slug IN ('es','en','fr','it','pt')"
)
return {r['slug']: int(r['term_taxonomy_id']) for r in rows}
# ── Parsear extra_fields ───────────────────────────────────────────────────────
def parse_extra_fields(ef_json: str) -> dict:
"""Devuelve dict con claves: lang_val, has_libro, cita_biblica."""
result = {'lang_val': None, 'has_libro': False, 'cita_biblica': None}
if not ef_json or ef_json == 'NULL':
return result
try:
fields = json.loads(ef_json)
except json.JSONDecodeError:
return result
for f in fields:
fid = str(f.get('id', ''))
val = f.get('value')
if fid == '16' and val is not None:
try:
result['lang_val'] = int(val)
except (ValueError, TypeError):
pass
elif fid == '9':
result['has_libro'] = True
elif fid == '14':
if isinstance(val, list):
result['cita_biblica'] = ','.join(str(v) for v in val)
else:
result['cita_biblica'] = str(val) if val else None
return result
def determine_categories(ef: dict, title: str) -> list[int]:
"""Devuelve lista de term_ids de categoría para el post."""
import re
lang = ef.get('lang_val')
es = (lang == 1 or lang is None)
cats = [CAT_FEADULTA]
if es and ef.get('has_libro'):
cats.append(CAT_EVANGELIO)
elif es and re.search(DOMINGO_RE, title, re.IGNORECASE):
cats.append(CAT_EUCARISTIA)
else:
cats.append(CAT_ARTICULOS)
return cats
# ── Import principal ───────────────────────────────────────────────────────────
def main():
global LAST_K2_ID
# Detección dinámica del último K2 importado (evita hardcodear y re-importar deltas previos)
r = wp_mysql("SELECT MAX(CAST(meta_value AS UNSIGNED)) m FROM wp_postmeta "
"WHERE meta_key='_fgj2wp_old_k2_id'")
LAST_K2_ID = int(r[0]['m']) if r and r[0].get('m') and r[0]['m'] != 'NULL' else 17873
print(f"=== Import K2 items > {LAST_K2_ID} → WP local {'[DRY RUN]' if DRY_RUN else '[LIVE]'} ===\n")
user_map = load_user_map()
print(f"Usuarios mapeados: {len(user_map)}")
tt_ids = load_term_taxonomy_ids()
print(f"Categorías TT IDs: {tt_ids}")
pl_ids = load_polylang_term_ids()
print(f"Polylang idiomas: {pl_ids}")
# Verificar que los K2 IDs ya en WP no se reimportan
existing = wp_mysql(
f"SELECT meta_value FROM wp_postmeta WHERE meta_key='_fgj2wp_old_k2_id' "
f"AND meta_value+0 > {LAST_K2_ID}"
)
existing_ids = {int(r['meta_value']) for r in existing}
print(f"K2 IDs > {LAST_K2_ID} ya en WP: {len(existing_ids)}")
# Obtener items de Joomla vía SSH+MySQL (query por stdin para evitar escape de shell)
print("\nObteniendo K2 items de Joomla prod...")
# HEX encoding para campos de texto (evita que el HTML con saltos de línea
# rompa el parsing TSV)
query = (
f"SELECT id, HEX(title) title, HEX(alias) alias, "
f"HEX(introtext) introtext, HEX(`fulltext`) fulltext_col, "
f"created, created_by, HEX(extra_fields) extra_fields, publish_up "
f"FROM ew4r_k2_items "
f"WHERE published=1 AND id > {LAST_K2_ID} ORDER BY id;"
)
mysql_cmd = (
f"mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} "
f"-p'{JOOMLA_DB_PASS}' {JOOMLA_DB_NAME} "
f"--default-character-set=utf8mb4 -B"
)
cmd = [
'sshpass', '-p', JOOMLA_SSH_PASS,
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}',
mysql_cmd
]
result = subprocess.run(cmd, input=query, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
print(f"ERROR: {result.stderr[:500]}")
sys.exit(1)
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
print("No se encontraron items nuevos.")
return
headers = lines[0].split('\t')
items = []
for line in lines[1:]:
if line:
vals = line.split('\t')
items.append(dict(zip(headers, vals)))
print(f"Items a importar: {len(items)}")
stats = {'ok': 0, 'skip': 0, 'err': 0}
for item in items:
k2_id = int(item['id'])
if k2_id in existing_ids:
print(f" [SKIP] K2 id={k2_id} ya existe en WP")
stats['skip'] += 1
continue
def unhex(val: str) -> str:
if not val or val == 'NULL':
return ''
try:
return bytes.fromhex(val).decode('utf-8', errors='replace')
except Exception:
return val
title = unhex(item.get('title', ''))
alias = unhex(item.get('alias', ''))
intro = unhex(item.get('introtext', ''))
full = unhex(item.get('fulltext_col', ''))
ef_json = unhex(item.get('extra_fields', '')) or '[]'
created = item.get('created', '') or datetime.now().strftime('%Y-%m-%d %H:%M:%S')
if not created or created == 'NULL':
created = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
created_by_raw = item.get('created_by', '0')
created_by = int(created_by_raw) if created_by_raw and created_by_raw != 'NULL' else 0
# Contenido combinado
if full and full.strip():
content = intro + '\n<!--more-->\n' + full
else:
content = intro
# Autor WP
wp_author = user_map.get(created_by, 1) # fallback: admin
if created_by and created_by not in user_map:
# Autor Joomla sin usuario WP equivalente: queda atribuido a «Fe Adulta».
# NO se pierde el dato: corregir tras el delta con scripts/fix_k2_authors.php,
# que crea el usuario (nombre real de ew4r_users) y reasigna post_author (#143).
print(f" ⚠ autor K2 {created_by} sin user WP → queda en 'Fe Adulta' "
f"(corregir con fix_k2_authors.php)")
# Extra fields
ef = parse_extra_fields(ef_json)
lang_code = LANG_MAP.get(ef.get('lang_val'), 'es')
cats = determine_categories(ef, title)
print(f" [{k2_id}] {title[:50]} | lang={lang_code} | cats={cats}")
# INSERT post
post_slug = esc(alias[:200]) if alias else ''
post_title = esc(title)
post_content = esc(content)
post_date = created
post_date_gmt = created # simplificado (no ajuste TZ)
insert_post = (
f"INSERT INTO wp_posts "
f"(post_author, post_date, post_date_gmt, post_content, post_title, "
f"post_excerpt, post_status, comment_status, ping_status, post_name, "
f"post_type, post_modified, post_modified_gmt, comment_count, "
f"to_ping, pinged, post_content_filtered) VALUES ("
f"{wp_author}, '{post_date}', '{post_date_gmt}', '{post_content}', "
f"'{post_title}', '', 'publish', 'open', 'open', '{post_slug}', "
f"'post', '{post_date}', '{post_date_gmt}', 0, '', '', '')"
)
wp_execute(insert_post)
if DRY_RUN:
stats['ok'] += 1
continue
# Obtener el ID del post recién insertado. NO usar LAST_INSERT_ID(): cada
# docker exec abre una conexión nueva y devolvería 0. MAX(ID) es fiable
# en uso secuencial (sin inserciones concurrentes).
new_id_rows = wp_mysql("SELECT MAX(ID) as new_id FROM wp_posts")
if not new_id_rows:
print(f" [ERROR] No se pudo obtener el ID del post para k2_id={k2_id}")
stats['err'] += 1
continue
new_wp_id = int(new_id_rows[0]['new_id'])
print(f" → WP post ID={new_wp_id}")
# INSERT metas
metas = [
('_fgj2wp_old_k2_id', str(k2_id)),
('Idioma', str(ef.get('lang_val') or 1)),
]
for meta_key, meta_val in metas:
wp_execute(
f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) "
f"VALUES ({new_wp_id}, '{esc(meta_key)}', '{esc(meta_val)}')"
)
# Categorías
for term_id in cats:
tt_id = tt_ids.get(term_id)
if tt_id:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {tt_id})"
)
# Polylang language
pl_tt = pl_ids.get(lang_code)
if pl_tt:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {pl_tt})"
)
stats['ok'] += 1
# Actualizar counts de categorías
if not DRY_RUN and stats['ok'] > 0:
print("\nActualizando counts de categorías...")
tt_ids_list = ','.join(str(v) for v in tt_ids.values())
wp_execute(
f"UPDATE wp_term_taxonomy tt SET count = ("
f"SELECT COUNT(*) FROM wp_term_relationships tr WHERE tr.term_taxonomy_id=tt.term_taxonomy_id"
f") WHERE tt.term_taxonomy_id IN ({tt_ids_list})"
)
print(f"\n=== Resultado: {stats['ok']} ok, {stats['skip']} skip, {stats['err']} err ===")
if __name__ == '__main__':
main()