Files
feadulta/scripts/import_new_cartas.py

321 lines
12 KiB
Python

#!/usr/bin/env python3
"""
import_new_cartas.py
Importa las cartas de la semana nuevas de ew4r_content (Joomla prod, id > 9043)
al WordPress local (Docker), y luego asigna _carta_id a los artículos K2
correspondientes según la fecha (extra_field id 15).
Categorías WP según catid Joomla:
catid 27 (Carta de la semana) → WP: 6 + 21 + 71
catid 40 (Cartas de otras sem) → WP: 21 + 71
catid 41 (Carta semana pasada) → WP: 21 + 22 + 71
"""
import json
import subprocess
import sys
import re
from datetime import datetime
JOOMLA_SSH_HOST = "134.0.10.170"
JOOMLA_SSH_USER = "feadulta"
JOOMLA_SSH_PASS = "C6c2A!mAl3Wj.BQF"
JOOMLA_DB_HOST = "127.0.0.1"
JOOMLA_DB_USER = "fejoomla3"
JOOMLA_DB_PASS = "5FF-}5^[>7^pK4W9"
JOOMLA_DB_NAME = "fejoomla3"
WP_DOCKER = "wordpress-mysql"
WP_DB_USER = "wordpress_user"
WP_DB_PASS = "wordpress_pass"
WP_DB_NAME = "wordpress_db"
LAST_CONTENT_ID = None # se calcula dinámicamente en main(): MAX(_fgj2wp_old_content_id) en WP
# WP term_ids y sus term_taxonomy_ids (se cargan dinámicamente)
CAT_FEADULTA = 71
CAT_CARTA_SEMANA = 6
CAT_CARTAS_OTRAS = 21
CAT_CARTA_PASADA = 22
CATID_TO_WP = {
27: [CAT_CARTA_SEMANA, CAT_CARTAS_OTRAS, CAT_FEADULTA],
40: [CAT_CARTAS_OTRAS, CAT_FEADULTA],
41: [CAT_CARTAS_OTRAS, CAT_CARTA_PASADA, CAT_FEADULTA],
}
DRY_RUN = '--dry-run' in sys.argv
# ── Helpers ────────────────────────────────────────────────────────────────────
def joomla_query(query: str) -> list[dict]:
mysql_cmd = (f"mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} "
f"-p'{JOOMLA_DB_PASS}' {JOOMLA_DB_NAME} "
f"--default-character-set=utf8mb4 -B")
cmd = ['sshpass', '-p', JOOMLA_SSH_PASS,
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}', mysql_cmd]
result = subprocess.run(cmd, input=query, capture_output=True,
text=True, encoding='utf-8')
if result.returncode != 0:
print(f"[ERR SSH] {result.stderr[:300]}", file=sys.stderr)
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
return [dict(zip(headers, line.split('\t'))) for line in lines[1:] if line]
def wp_mysql(query: str) -> list[dict]:
cmd = ['docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-B', '-e', query]
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
return [dict(zip(headers, line.split('\t'))) for line in lines[1:] if line]
def wp_execute(sql: str):
if DRY_RUN:
print(f" [DRY] {sql[:110]}")
return None
cmd = ['docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-e', sql]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
err = result.stderr.replace('mysql: [Warning] Using a password on the command line interface can be insecure.\n', '')
if err.strip():
print(f" [ERR] {err.strip()[:200]}", file=sys.stderr)
def esc(s: str) -> str:
return s.replace('\\', '\\\\').replace("'", "\\'")
def unhex(val: str) -> str:
if not val or val == 'NULL':
return ''
try:
return bytes.fromhex(val).decode('utf-8', errors='replace')
except Exception:
return val
# ── Main ───────────────────────────────────────────────────────────────────────
def main():
global LAST_CONTENT_ID
# Detección dinámica del último ew4r_content (carta) ya importado
r = wp_mysql("SELECT MAX(CAST(meta_value AS UNSIGNED)) m FROM wp_postmeta "
"WHERE meta_key='_fgj2wp_old_content_id'")
LAST_CONTENT_ID = int(r[0]['m']) if r and r[0].get('m') and r[0]['m'] != 'NULL' else 9043
print(f"=== Import nuevas cartas (ew4r_content id > {LAST_CONTENT_ID}) "
f"{'[DRY RUN]' if DRY_RUN else '[LIVE]'} ===\n")
# Cargar term_taxonomy_ids
all_term_ids = [CAT_FEADULTA, CAT_CARTA_SEMANA, CAT_CARTAS_OTRAS, CAT_CARTA_PASADA]
rows = wp_mysql(
f"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy "
f"WHERE term_id IN ({','.join(map(str,all_term_ids))}) AND taxonomy='category'"
)
tt_ids = {int(r['term_id']): int(r['term_taxonomy_id']) for r in rows}
print(f"TT IDs: {tt_ids}")
# Cargar Polylang ES
pl_rows = wp_mysql(
"SELECT tt.term_taxonomy_id FROM wp_terms t "
"JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id "
"WHERE tt.taxonomy='language' AND t.slug='es' LIMIT 1"
)
pl_es_tt = int(pl_rows[0]['term_taxonomy_id']) if pl_rows else None
print(f"Polylang ES tt_id: {pl_es_tt}")
# Cargar user map
user_rows = wp_mysql(
"SELECT um.meta_value jid, u.ID wid FROM wp_users u "
"JOIN wp_usermeta um ON um.user_id=u.ID "
"WHERE um.meta_key='_fgj2wp_old_user_id'"
)
user_map = {}
for r in user_rows:
try:
user_map[int(r['jid'])] = int(r['wid'])
except ValueError:
pass
# Obtener cartas nuevas de Joomla (con HEX para texto)
print("\nObteniendo cartas nuevas de Joomla...")
query = (
f"SELECT id, HEX(title) title, HEX(alias) alias, "
f"HEX(introtext) introtext, HEX(`fulltext`) fulltext_col, "
f"catid, created, created_by "
f"FROM ew4r_content "
f"WHERE state=1 AND id > {LAST_CONTENT_ID} AND catid IN (27,40,41) "
f"ORDER BY id;"
)
items = joomla_query(query)
print(f"Cartas a importar: {len(items)}")
# Mapa fecha_carta → wp_id (para asignar _carta_id a artículos K2)
fecha_a_wp_carta = {}
for item in items:
joomla_id = int(item['id'])
catid = int(item['catid'])
title = unhex(item.get('title',''))
alias = unhex(item.get('alias',''))
intro = unhex(item.get('introtext',''))
full = unhex(item.get('fulltext_col',''))
created = item.get('created','') or datetime.now().strftime('%Y-%m-%d %H:%M:%S')
created_by = int(item.get('created_by', 0) or 0)
content = intro + ('\n<!--more-->\n' + full if full.strip() else '')
# La carta semanal SIEMPRE la firma Inma Calvo (WP user 1048 icalvotorre),
# aunque en Joomla la cree el webmaster (José Chicharro / josek 1049).
CARTA_AUTHOR = 1048
wp_author = CARTA_AUTHOR
wp_cats = CATID_TO_WP.get(catid, [CAT_CARTAS_OTRAS, CAT_FEADULTA])
fecha_carta = created[:10] # YYYY-MM-DD
print(f"\n [{joomla_id}] {title[:55]} | catid={catid} | fecha={fecha_carta}")
print(f" → WP cats: {wp_cats}")
# INSERT post
post_slug = esc(alias[:200])
post_title = esc(title)
post_content = esc(content)
wp_execute(
f"INSERT INTO wp_posts "
f"(post_author, post_date, post_date_gmt, post_content, post_title, "
f"post_excerpt, post_status, comment_status, ping_status, post_name, "
f"post_type, post_modified, post_modified_gmt, comment_count, "
f"to_ping, pinged, post_content_filtered) VALUES ("
f"{wp_author}, '{created}', '{created}', '{post_content}', "
f"'{post_title}', '', 'publish', 'open', 'open', '{post_slug}', "
f"'post', '{created}', '{created}', 0, '', '', '')"
)
if DRY_RUN:
fecha_a_wp_carta[fecha_carta] = f"DRY_WP_ID_for_{joomla_id}"
continue
new_id_rows = wp_mysql("SELECT MAX(ID) new_id FROM wp_posts")
if not new_id_rows:
print(f" [ERR] No se pudo obtener ID del post", file=sys.stderr)
continue
new_wp_id = int(new_id_rows[0]['new_id'])
print(f" → WP post ID={new_wp_id}")
fecha_a_wp_carta[fecha_carta] = new_wp_id
# Metas
wp_execute(f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) VALUES ({new_wp_id}, '_fgj2wp_old_content_id', '{joomla_id}')")
wp_execute(f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) VALUES ({new_wp_id}, 'Idioma', '1')")
# Categorías
for term_id in wp_cats:
tt_id = tt_ids.get(term_id)
if tt_id:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {tt_id})"
)
# Polylang ES
if pl_es_tt:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {pl_es_tt})"
)
print(f"\nFecha→WP carta map: {fecha_a_wp_carta}")
# ── Asignar _carta_id a los artículos K2 importados ──────────────────────
if DRY_RUN or not fecha_a_wp_carta:
print("\n[SKIP] Asignación _carta_id (dry-run o sin cartas importadas)")
return
print("\n=== Asignando _carta_id a artículos K2 ===")
# Obtener los artículos K2 con su fecha (id 15), acotando por la fecha más
# antigua de las cartas importadas en esta ejecución (evita recorrer todo).
min_fecha = min(fecha_a_wp_carta.keys())
k2_query = (
f"SELECT id, HEX(extra_fields) ef "
f"FROM ew4r_k2_items WHERE published=1 AND created >= '{min_fecha} 00:00:00' "
f"ORDER BY id;"
)
k2_items = joomla_query(k2_query)
print(f"Artículos K2 a procesar (desde {min_fecha}): {len(k2_items)}")
assigned = 0
for k2item in k2_items:
k2_id = int(k2item['id'])
# wp_id REAL por meta (NO offset fijo, que pisaba metas en deltas sucesivos)
wp_rows = wp_mysql(
f"SELECT post_id FROM wp_postmeta WHERE meta_key='_fgj2wp_old_k2_id' "
f"AND meta_value='{k2_id}' LIMIT 1"
)
if not wp_rows:
continue
wp_id = int(wp_rows[0]['post_id'])
ef_raw = unhex(k2item.get('ef',''))
# Parsear fecha (id 15)
fecha_art = None
try:
fields = json.loads(ef_raw)
for f in fields:
if str(f.get('id','')) == '15':
fecha_art = str(f.get('value',''))[:10]
break
except Exception:
pass
if not fecha_art:
continue
carta_wp_id = fecha_a_wp_carta.get(fecha_art)
if not carta_wp_id:
continue
# Verificar que el meta no existe ya
existing = wp_mysql(
f"SELECT meta_id FROM wp_postmeta WHERE post_id={wp_id} AND meta_key='_carta_id' LIMIT 1"
)
if existing:
continue
wp_execute(
f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) "
f"VALUES ({wp_id}, '_carta_id', '{carta_wp_id}')"
)
print(f" K2 {k2_id} (WP {wp_id}) → _carta_id={carta_wp_id} [{fecha_art}]")
assigned += 1
print(f"\n_carta_id asignado a {assigned} artículos.")
# Actualizar counts de categorías
print("\nActualizando counts de categorías...")
tt_str = ','.join(str(v) for v in tt_ids.values())
wp_execute(
f"UPDATE wp_term_taxonomy tt SET count = ("
f"SELECT COUNT(*) FROM wp_term_relationships tr "
f"WHERE tr.term_taxonomy_id=tt.term_taxonomy_id"
f") WHERE tt.term_taxonomy_id IN ({tt_str})"
)
print("\nListo.")
if __name__ == '__main__':
main()