404 lines
15 KiB
Python
404 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
import_new_k2_items.py
|
|
|
|
Importa los K2 items nuevos de Joomla prod (id > 17873) al WordPress local (Docker).
|
|
Conexión a Joomla: SSH + MySQL en feadulta@134.0.10.170
|
|
Conexión a WP: Docker exec wordpress-mysql
|
|
|
|
Categorías WP asignadas según extra_fields:
|
|
- ES + tiene "libro de la biblia" (id 9) → Comentarios al evangelio (1647) + Feadulta (71)
|
|
- ES + no id9 + título "DOMINGO/SEMANA SANTA/etc." → Eucaristía (1648) + Feadulta (71)
|
|
- ES + no id9 + otro → Artículos (1650) + Feadulta (71)
|
|
- No ES → Artículos (1650) + Feadulta (71)
|
|
|
|
Idioma Polylang asignado según extra_field id 16:
|
|
1=es, 2=en, 3=fr, 4=it, 5=pt
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
# ── Configuración ──────────────────────────────────────────────────────────────
|
|
|
|
JOOMLA_SSH_HOST = "134.0.10.170"
|
|
JOOMLA_SSH_USER = "feadulta"
|
|
JOOMLA_SSH_PASS = "C6c2A!mAl3Wj.BQF"
|
|
JOOMLA_DB_HOST = "127.0.0.1"
|
|
JOOMLA_DB_USER = "fejoomla3"
|
|
JOOMLA_DB_PASS = "5FF-}5^[>7^pK4W9"
|
|
JOOMLA_DB_NAME = "fejoomla3"
|
|
|
|
WP_DOCKER = "wordpress-mysql"
|
|
WP_DB_USER = "wordpress_user"
|
|
WP_DB_PASS = "wordpress_pass"
|
|
WP_DB_NAME = "wordpress_db"
|
|
WP_DB_HOST = "wordpress-mysql" # dentro del container
|
|
|
|
LAST_K2_ID = None # se calcula dinámicamente en main(): MAX(_fgj2wp_old_k2_id) en WP
|
|
|
|
# WP term_taxonomy_ids (obtenidos con SELECT tt.term_taxonomy_id FROM wp_term_taxonomy tt WHERE tt.term_id=N)
|
|
# Precalculados:
|
|
CAT_FEADULTA = 71 # term_id (se convertirá a term_taxonomy_id abajo)
|
|
CAT_ARTICULOS = 1650
|
|
CAT_EVANGELIO = 1647
|
|
CAT_EUCARISTIA = 1648
|
|
|
|
LANG_MAP = {1: 'es', 2: 'en', 3: 'fr', 4: 'it', 5: 'pt'}
|
|
DOMINGO_RE = r'DOMINGO|SEMANA SANTA|SEMANA DE PASCUA|PENTECOST|NAVIDAD|EPIFAN'
|
|
|
|
DRY_RUN = '--dry-run' in sys.argv
|
|
|
|
# ── Helpers ────────────────────────────────────────────────────────────────────
|
|
|
|
def ssh_mysql(query: str) -> list[dict]:
|
|
"""Ejecuta una query en el MySQL de Joomla prod vía sshpass."""
|
|
cmd = [
|
|
'sshpass', '-p', JOOMLA_SSH_PASS,
|
|
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}',
|
|
f'mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} '
|
|
f'-p{repr(JOOMLA_DB_PASS)} {JOOMLA_DB_NAME} '
|
|
f'--default-character-set=utf8mb4 -B -e "{query}"'
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
|
|
if result.returncode != 0:
|
|
print(f"[ERROR SSH] {result.stderr[:300]}", file=sys.stderr)
|
|
return []
|
|
lines = result.stdout.strip().split('\n')
|
|
if len(lines) < 2:
|
|
return []
|
|
headers = lines[0].split('\t')
|
|
rows = []
|
|
for line in lines[1:]:
|
|
if line:
|
|
vals = line.split('\t')
|
|
rows.append(dict(zip(headers, vals)))
|
|
return rows
|
|
|
|
|
|
def wp_mysql(query: str) -> list[dict]:
|
|
"""Ejecuta una query en el MySQL del WP local vía Docker exec."""
|
|
cmd = [
|
|
'docker', 'exec', WP_DOCKER,
|
|
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
|
|
'--default-character-set=utf8mb4', '-B', '-e', query
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
|
|
if result.returncode != 0:
|
|
print(f"[ERROR WP] {result.stderr[:300]}", file=sys.stderr)
|
|
return []
|
|
lines = result.stdout.strip().split('\n')
|
|
if len(lines) < 2:
|
|
return []
|
|
headers = lines[0].split('\t')
|
|
rows = []
|
|
for line in lines[1:]:
|
|
if line:
|
|
vals = line.split('\t')
|
|
rows.append(dict(zip(headers, vals)))
|
|
return rows
|
|
|
|
|
|
def wp_execute(sql: str):
|
|
"""Ejecuta un INSERT/UPDATE en WP MySQL."""
|
|
if DRY_RUN:
|
|
print(f" [DRY] {sql[:120]}")
|
|
return
|
|
cmd = [
|
|
'docker', 'exec', WP_DOCKER,
|
|
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
|
|
'--default-character-set=utf8mb4', '-e', sql
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
if result.returncode != 0:
|
|
print(f"[ERROR INSERT] {result.stderr[:300]}", file=sys.stderr)
|
|
|
|
|
|
def esc(s: str) -> str:
|
|
"""Escapa una string para SQL."""
|
|
return s.replace('\\', '\\\\').replace("'", "\\'")
|
|
|
|
|
|
# ── Cargar datos auxiliares ────────────────────────────────────────────────────
|
|
|
|
def load_user_map() -> dict:
|
|
"""Devuelve {joomla_user_id: wp_user_id}."""
|
|
rows = wp_mysql(
|
|
"SELECT um.meta_value jid, u.ID wid FROM wp_users u "
|
|
"JOIN wp_usermeta um ON um.user_id=u.ID "
|
|
"WHERE um.meta_key='_fgj2wp_old_user_id'"
|
|
)
|
|
m = {}
|
|
for r in rows:
|
|
try:
|
|
m[int(r['jid'])] = int(r['wid'])
|
|
except ValueError:
|
|
pass
|
|
return m
|
|
|
|
|
|
def load_term_taxonomy_ids() -> dict:
|
|
"""Devuelve {term_id: term_taxonomy_id} para las categorías relevantes."""
|
|
term_ids = [CAT_FEADULTA, CAT_ARTICULOS, CAT_EVANGELIO, CAT_EUCARISTIA]
|
|
ids_str = ','.join(str(x) for x in term_ids)
|
|
rows = wp_mysql(
|
|
f"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy "
|
|
f"WHERE term_id IN ({ids_str}) AND taxonomy='category'"
|
|
)
|
|
return {int(r['term_id']): int(r['term_taxonomy_id']) for r in rows}
|
|
|
|
|
|
def load_polylang_term_ids() -> dict:
|
|
"""Devuelve {'es': tt_id, 'en': tt_id, ...} para los términos de idioma de Polylang."""
|
|
rows = wp_mysql(
|
|
"SELECT t.slug, tt.term_taxonomy_id FROM wp_terms t "
|
|
"JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id "
|
|
"WHERE tt.taxonomy='language' AND t.slug IN ('es','en','fr','it','pt')"
|
|
)
|
|
return {r['slug']: int(r['term_taxonomy_id']) for r in rows}
|
|
|
|
|
|
# ── Parsear extra_fields ───────────────────────────────────────────────────────
|
|
|
|
def parse_extra_fields(ef_json: str) -> dict:
|
|
"""Devuelve dict con claves: lang_val, has_libro, cita_biblica."""
|
|
result = {'lang_val': None, 'has_libro': False, 'cita_biblica': None}
|
|
if not ef_json or ef_json == 'NULL':
|
|
return result
|
|
try:
|
|
fields = json.loads(ef_json)
|
|
except json.JSONDecodeError:
|
|
return result
|
|
for f in fields:
|
|
fid = str(f.get('id', ''))
|
|
val = f.get('value')
|
|
if fid == '16' and val is not None:
|
|
try:
|
|
result['lang_val'] = int(val)
|
|
except (ValueError, TypeError):
|
|
pass
|
|
elif fid == '9':
|
|
result['has_libro'] = True
|
|
elif fid == '14':
|
|
if isinstance(val, list):
|
|
result['cita_biblica'] = ','.join(str(v) for v in val)
|
|
else:
|
|
result['cita_biblica'] = str(val) if val else None
|
|
return result
|
|
|
|
|
|
def determine_categories(ef: dict, title: str) -> list[int]:
|
|
"""Devuelve lista de term_ids de categoría para el post."""
|
|
import re
|
|
lang = ef.get('lang_val')
|
|
es = (lang == 1 or lang is None)
|
|
cats = [CAT_FEADULTA]
|
|
if es and ef.get('has_libro'):
|
|
cats.append(CAT_EVANGELIO)
|
|
elif es and re.search(DOMINGO_RE, title, re.IGNORECASE):
|
|
cats.append(CAT_EUCARISTIA)
|
|
else:
|
|
cats.append(CAT_ARTICULOS)
|
|
return cats
|
|
|
|
|
|
# ── Import principal ───────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
global LAST_K2_ID
|
|
# Detección dinámica del último K2 importado (evita hardcodear y re-importar deltas previos)
|
|
r = wp_mysql("SELECT MAX(CAST(meta_value AS UNSIGNED)) m FROM wp_postmeta "
|
|
"WHERE meta_key='_fgj2wp_old_k2_id'")
|
|
LAST_K2_ID = int(r[0]['m']) if r and r[0].get('m') and r[0]['m'] != 'NULL' else 17873
|
|
print(f"=== Import K2 items > {LAST_K2_ID} → WP local {'[DRY RUN]' if DRY_RUN else '[LIVE]'} ===\n")
|
|
|
|
user_map = load_user_map()
|
|
print(f"Usuarios mapeados: {len(user_map)}")
|
|
|
|
tt_ids = load_term_taxonomy_ids()
|
|
print(f"Categorías TT IDs: {tt_ids}")
|
|
|
|
pl_ids = load_polylang_term_ids()
|
|
print(f"Polylang idiomas: {pl_ids}")
|
|
|
|
# Verificar que los K2 IDs ya en WP no se reimportan
|
|
existing = wp_mysql(
|
|
f"SELECT meta_value FROM wp_postmeta WHERE meta_key='_fgj2wp_old_k2_id' "
|
|
f"AND meta_value+0 > {LAST_K2_ID}"
|
|
)
|
|
existing_ids = {int(r['meta_value']) for r in existing}
|
|
print(f"K2 IDs > {LAST_K2_ID} ya en WP: {len(existing_ids)}")
|
|
|
|
# Obtener items de Joomla vía SSH+MySQL (query por stdin para evitar escape de shell)
|
|
print("\nObteniendo K2 items de Joomla prod...")
|
|
# HEX encoding para campos de texto (evita que el HTML con saltos de línea
|
|
# rompa el parsing TSV)
|
|
query = (
|
|
f"SELECT id, HEX(title) title, HEX(alias) alias, "
|
|
f"HEX(introtext) introtext, HEX(`fulltext`) fulltext_col, "
|
|
f"created, created_by, HEX(extra_fields) extra_fields, publish_up "
|
|
f"FROM ew4r_k2_items "
|
|
f"WHERE published=1 AND id > {LAST_K2_ID} ORDER BY id;"
|
|
)
|
|
mysql_cmd = (
|
|
f"mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} "
|
|
f"-p'{JOOMLA_DB_PASS}' {JOOMLA_DB_NAME} "
|
|
f"--default-character-set=utf8mb4 -B"
|
|
)
|
|
cmd = [
|
|
'sshpass', '-p', JOOMLA_SSH_PASS,
|
|
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}',
|
|
mysql_cmd
|
|
]
|
|
result = subprocess.run(cmd, input=query, capture_output=True, text=True, encoding='utf-8')
|
|
if result.returncode != 0:
|
|
print(f"ERROR: {result.stderr[:500]}")
|
|
sys.exit(1)
|
|
|
|
lines = result.stdout.strip().split('\n')
|
|
if len(lines) < 2:
|
|
print("No se encontraron items nuevos.")
|
|
return
|
|
|
|
headers = lines[0].split('\t')
|
|
items = []
|
|
for line in lines[1:]:
|
|
if line:
|
|
vals = line.split('\t')
|
|
items.append(dict(zip(headers, vals)))
|
|
|
|
print(f"Items a importar: {len(items)}")
|
|
|
|
stats = {'ok': 0, 'skip': 0, 'err': 0}
|
|
|
|
for item in items:
|
|
k2_id = int(item['id'])
|
|
|
|
if k2_id in existing_ids:
|
|
print(f" [SKIP] K2 id={k2_id} ya existe en WP")
|
|
stats['skip'] += 1
|
|
continue
|
|
|
|
def unhex(val: str) -> str:
|
|
if not val or val == 'NULL':
|
|
return ''
|
|
try:
|
|
return bytes.fromhex(val).decode('utf-8', errors='replace')
|
|
except Exception:
|
|
return val
|
|
|
|
title = unhex(item.get('title', ''))
|
|
alias = unhex(item.get('alias', ''))
|
|
intro = unhex(item.get('introtext', ''))
|
|
full = unhex(item.get('fulltext_col', ''))
|
|
ef_json = unhex(item.get('extra_fields', '')) or '[]'
|
|
created = item.get('created', '') or datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
if not created or created == 'NULL':
|
|
created = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
created_by_raw = item.get('created_by', '0')
|
|
created_by = int(created_by_raw) if created_by_raw and created_by_raw != 'NULL' else 0
|
|
|
|
# Contenido combinado
|
|
if full and full.strip():
|
|
content = intro + '\n<!--more-->\n' + full
|
|
else:
|
|
content = intro
|
|
|
|
# Autor WP
|
|
wp_author = user_map.get(created_by, 1) # fallback: admin
|
|
if created_by and created_by not in user_map:
|
|
# Autor Joomla sin usuario WP equivalente: queda atribuido a «Fe Adulta».
|
|
# NO se pierde el dato: corregir tras el delta con scripts/fix_k2_authors.php,
|
|
# que crea el usuario (nombre real de ew4r_users) y reasigna post_author (#143).
|
|
print(f" ⚠ autor K2 {created_by} sin user WP → queda en 'Fe Adulta' "
|
|
f"(corregir con fix_k2_authors.php)")
|
|
|
|
# Extra fields
|
|
ef = parse_extra_fields(ef_json)
|
|
lang_code = LANG_MAP.get(ef.get('lang_val'), 'es')
|
|
cats = determine_categories(ef, title)
|
|
|
|
print(f" [{k2_id}] {title[:50]} | lang={lang_code} | cats={cats}")
|
|
|
|
# INSERT post
|
|
post_slug = esc(alias[:200]) if alias else ''
|
|
post_title = esc(title)
|
|
post_content = esc(content)
|
|
post_date = created
|
|
post_date_gmt = created # simplificado (no ajuste TZ)
|
|
|
|
insert_post = (
|
|
f"INSERT INTO wp_posts "
|
|
f"(post_author, post_date, post_date_gmt, post_content, post_title, "
|
|
f"post_excerpt, post_status, comment_status, ping_status, post_name, "
|
|
f"post_type, post_modified, post_modified_gmt, comment_count, "
|
|
f"to_ping, pinged, post_content_filtered) VALUES ("
|
|
f"{wp_author}, '{post_date}', '{post_date_gmt}', '{post_content}', "
|
|
f"'{post_title}', '', 'publish', 'open', 'open', '{post_slug}', "
|
|
f"'post', '{post_date}', '{post_date_gmt}', 0, '', '', '')"
|
|
)
|
|
wp_execute(insert_post)
|
|
|
|
if DRY_RUN:
|
|
stats['ok'] += 1
|
|
continue
|
|
|
|
# Obtener el ID del post recién insertado. NO usar LAST_INSERT_ID(): cada
|
|
# docker exec abre una conexión nueva y devolvería 0. MAX(ID) es fiable
|
|
# en uso secuencial (sin inserciones concurrentes).
|
|
new_id_rows = wp_mysql("SELECT MAX(ID) as new_id FROM wp_posts")
|
|
if not new_id_rows:
|
|
print(f" [ERROR] No se pudo obtener el ID del post para k2_id={k2_id}")
|
|
stats['err'] += 1
|
|
continue
|
|
new_wp_id = int(new_id_rows[0]['new_id'])
|
|
print(f" → WP post ID={new_wp_id}")
|
|
|
|
# INSERT metas
|
|
metas = [
|
|
('_fgj2wp_old_k2_id', str(k2_id)),
|
|
('Idioma', str(ef.get('lang_val') or 1)),
|
|
]
|
|
for meta_key, meta_val in metas:
|
|
wp_execute(
|
|
f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) "
|
|
f"VALUES ({new_wp_id}, '{esc(meta_key)}', '{esc(meta_val)}')"
|
|
)
|
|
|
|
# Categorías
|
|
for term_id in cats:
|
|
tt_id = tt_ids.get(term_id)
|
|
if tt_id:
|
|
wp_execute(
|
|
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
|
|
f"VALUES ({new_wp_id}, {tt_id})"
|
|
)
|
|
|
|
# Polylang language
|
|
pl_tt = pl_ids.get(lang_code)
|
|
if pl_tt:
|
|
wp_execute(
|
|
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
|
|
f"VALUES ({new_wp_id}, {pl_tt})"
|
|
)
|
|
|
|
stats['ok'] += 1
|
|
|
|
# Actualizar counts de categorías
|
|
if not DRY_RUN and stats['ok'] > 0:
|
|
print("\nActualizando counts de categorías...")
|
|
tt_ids_list = ','.join(str(v) for v in tt_ids.values())
|
|
wp_execute(
|
|
f"UPDATE wp_term_taxonomy tt SET count = ("
|
|
f"SELECT COUNT(*) FROM wp_term_relationships tr WHERE tr.term_taxonomy_id=tt.term_taxonomy_id"
|
|
f") WHERE tt.term_taxonomy_id IN ({tt_ids_list})"
|
|
)
|
|
|
|
print(f"\n=== Resultado: {stats['ok']} ok, {stats['skip']} skip, {stats['err']} err ===")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|