#!/usr/bin/env python3 """ import_new_k2_items.py Importa los K2 items nuevos de Joomla prod (id > 17873) al WordPress local (Docker). Conexión a Joomla: SSH + MySQL en feadulta@134.0.10.170 Conexión a WP: Docker exec wordpress-mysql Categorías WP asignadas según extra_fields: - ES + tiene "libro de la biblia" (id 9) → Comentarios al evangelio (1647) + Feadulta (71) - ES + no id9 + título "DOMINGO/SEMANA SANTA/etc." → Eucaristía (1648) + Feadulta (71) - ES + no id9 + otro → Artículos (1650) + Feadulta (71) - No ES → Artículos (1650) + Feadulta (71) Idioma Polylang asignado según extra_field id 16: 1=es, 2=en, 3=fr, 4=it, 5=pt """ import json import subprocess import sys from datetime import datetime # ── Configuración ────────────────────────────────────────────────────────────── JOOMLA_SSH_HOST = "134.0.10.170" JOOMLA_SSH_USER = "feadulta" JOOMLA_SSH_PASS = "C6c2A!mAl3Wj.BQF" JOOMLA_DB_HOST = "127.0.0.1" JOOMLA_DB_USER = "fejoomla3" JOOMLA_DB_PASS = "5FF-}5^[>7^pK4W9" JOOMLA_DB_NAME = "fejoomla3" WP_DOCKER = "wordpress-mysql" WP_DB_USER = "wordpress_user" WP_DB_PASS = "wordpress_pass" WP_DB_NAME = "wordpress_db" WP_DB_HOST = "wordpress-mysql" # dentro del container LAST_K2_ID = None # se calcula dinámicamente en main(): MAX(_fgj2wp_old_k2_id) en WP # WP term_taxonomy_ids (obtenidos con SELECT tt.term_taxonomy_id FROM wp_term_taxonomy tt WHERE tt.term_id=N) # Precalculados: CAT_FEADULTA = 71 # term_id (se convertirá a term_taxonomy_id abajo) CAT_ARTICULOS = 1650 CAT_EVANGELIO = 1647 CAT_EUCARISTIA = 1648 LANG_MAP = {1: 'es', 2: 'en', 3: 'fr', 4: 'it', 5: 'pt'} DOMINGO_RE = r'DOMINGO|SEMANA SANTA|SEMANA DE PASCUA|PENTECOST|NAVIDAD|EPIFAN' DRY_RUN = '--dry-run' in sys.argv # ── Helpers ──────────────────────────────────────────────────────────────────── def ssh_mysql(query: str) -> list[dict]: """Ejecuta una query en el MySQL de Joomla prod vía sshpass.""" cmd = [ 'sshpass', '-p', JOOMLA_SSH_PASS, 'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}', f'mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} ' f'-p{repr(JOOMLA_DB_PASS)} {JOOMLA_DB_NAME} ' f'--default-character-set=utf8mb4 -B -e "{query}"' ] result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8') if result.returncode != 0: print(f"[ERROR SSH] {result.stderr[:300]}", file=sys.stderr) return [] lines = result.stdout.strip().split('\n') if len(lines) < 2: return [] headers = lines[0].split('\t') rows = [] for line in lines[1:]: if line: vals = line.split('\t') rows.append(dict(zip(headers, vals))) return rows def wp_mysql(query: str) -> list[dict]: """Ejecuta una query en el MySQL del WP local vía Docker exec.""" cmd = [ 'docker', 'exec', WP_DOCKER, 'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME, '--default-character-set=utf8mb4', '-B', '-e', query ] result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8') if result.returncode != 0: print(f"[ERROR WP] {result.stderr[:300]}", file=sys.stderr) return [] lines = result.stdout.strip().split('\n') if len(lines) < 2: return [] headers = lines[0].split('\t') rows = [] for line in lines[1:]: if line: vals = line.split('\t') rows.append(dict(zip(headers, vals))) return rows def wp_execute(sql: str): """Ejecuta un INSERT/UPDATE en WP MySQL.""" if DRY_RUN: print(f" [DRY] {sql[:120]}") return cmd = [ 'docker', 'exec', WP_DOCKER, 'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME, '--default-character-set=utf8mb4', '-e', sql ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: print(f"[ERROR INSERT] {result.stderr[:300]}", file=sys.stderr) def esc(s: str) -> str: """Escapa una string para SQL.""" return s.replace('\\', '\\\\').replace("'", "\\'") # ── Cargar datos auxiliares ──────────────────────────────────────────────────── def load_user_map() -> dict: """Devuelve {joomla_user_id: wp_user_id}.""" rows = wp_mysql( "SELECT um.meta_value jid, u.ID wid FROM wp_users u " "JOIN wp_usermeta um ON um.user_id=u.ID " "WHERE um.meta_key='_fgj2wp_old_user_id'" ) m = {} for r in rows: try: m[int(r['jid'])] = int(r['wid']) except ValueError: pass return m def load_term_taxonomy_ids() -> dict: """Devuelve {term_id: term_taxonomy_id} para las categorías relevantes.""" term_ids = [CAT_FEADULTA, CAT_ARTICULOS, CAT_EVANGELIO, CAT_EUCARISTIA] ids_str = ','.join(str(x) for x in term_ids) rows = wp_mysql( f"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy " f"WHERE term_id IN ({ids_str}) AND taxonomy='category'" ) return {int(r['term_id']): int(r['term_taxonomy_id']) for r in rows} def load_polylang_term_ids() -> dict: """Devuelve {'es': tt_id, 'en': tt_id, ...} para los términos de idioma de Polylang.""" rows = wp_mysql( "SELECT t.slug, tt.term_taxonomy_id FROM wp_terms t " "JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id " "WHERE tt.taxonomy='language' AND t.slug IN ('es','en','fr','it','pt')" ) return {r['slug']: int(r['term_taxonomy_id']) for r in rows} # ── Parsear extra_fields ─────────────────────────────────────────────────────── def parse_extra_fields(ef_json: str) -> dict: """Devuelve dict con claves: lang_val, has_libro, cita_biblica.""" result = {'lang_val': None, 'has_libro': False, 'cita_biblica': None} if not ef_json or ef_json == 'NULL': return result try: fields = json.loads(ef_json) except json.JSONDecodeError: return result for f in fields: fid = str(f.get('id', '')) val = f.get('value') if fid == '16' and val is not None: try: result['lang_val'] = int(val) except (ValueError, TypeError): pass elif fid == '9': result['has_libro'] = True elif fid == '14': if isinstance(val, list): result['cita_biblica'] = ','.join(str(v) for v in val) else: result['cita_biblica'] = str(val) if val else None return result def determine_categories(ef: dict, title: str) -> list[int]: """Devuelve lista de term_ids de categoría para el post.""" import re lang = ef.get('lang_val') es = (lang == 1 or lang is None) cats = [CAT_FEADULTA] if es and ef.get('has_libro'): cats.append(CAT_EVANGELIO) elif es and re.search(DOMINGO_RE, title, re.IGNORECASE): cats.append(CAT_EUCARISTIA) else: cats.append(CAT_ARTICULOS) return cats # ── Import principal ─────────────────────────────────────────────────────────── def main(): global LAST_K2_ID # Detección dinámica del último K2 importado (evita hardcodear y re-importar deltas previos) r = wp_mysql("SELECT MAX(CAST(meta_value AS UNSIGNED)) m FROM wp_postmeta " "WHERE meta_key='_fgj2wp_old_k2_id'") LAST_K2_ID = int(r[0]['m']) if r and r[0].get('m') and r[0]['m'] != 'NULL' else 17873 print(f"=== Import K2 items > {LAST_K2_ID} → WP local {'[DRY RUN]' if DRY_RUN else '[LIVE]'} ===\n") user_map = load_user_map() print(f"Usuarios mapeados: {len(user_map)}") tt_ids = load_term_taxonomy_ids() print(f"Categorías TT IDs: {tt_ids}") pl_ids = load_polylang_term_ids() print(f"Polylang idiomas: {pl_ids}") # Verificar que los K2 IDs ya en WP no se reimportan existing = wp_mysql( f"SELECT meta_value FROM wp_postmeta WHERE meta_key='_fgj2wp_old_k2_id' " f"AND meta_value+0 > {LAST_K2_ID}" ) existing_ids = {int(r['meta_value']) for r in existing} print(f"K2 IDs > {LAST_K2_ID} ya en WP: {len(existing_ids)}") # Obtener items de Joomla vía SSH+MySQL (query por stdin para evitar escape de shell) print("\nObteniendo K2 items de Joomla prod...") # HEX encoding para campos de texto (evita que el HTML con saltos de línea # rompa el parsing TSV) query = ( f"SELECT id, HEX(title) title, HEX(alias) alias, " f"HEX(introtext) introtext, HEX(`fulltext`) fulltext_col, " f"created, created_by, HEX(extra_fields) extra_fields, publish_up " f"FROM ew4r_k2_items " f"WHERE published=1 AND id > {LAST_K2_ID} ORDER BY id;" ) mysql_cmd = ( f"mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} " f"-p'{JOOMLA_DB_PASS}' {JOOMLA_DB_NAME} " f"--default-character-set=utf8mb4 -B" ) cmd = [ 'sshpass', '-p', JOOMLA_SSH_PASS, 'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}', mysql_cmd ] result = subprocess.run(cmd, input=query, capture_output=True, text=True, encoding='utf-8') if result.returncode != 0: print(f"ERROR: {result.stderr[:500]}") sys.exit(1) lines = result.stdout.strip().split('\n') if len(lines) < 2: print("No se encontraron items nuevos.") return headers = lines[0].split('\t') items = [] for line in lines[1:]: if line: vals = line.split('\t') items.append(dict(zip(headers, vals))) print(f"Items a importar: {len(items)}") stats = {'ok': 0, 'skip': 0, 'err': 0} for item in items: k2_id = int(item['id']) if k2_id in existing_ids: print(f" [SKIP] K2 id={k2_id} ya existe en WP") stats['skip'] += 1 continue def unhex(val: str) -> str: if not val or val == 'NULL': return '' try: return bytes.fromhex(val).decode('utf-8', errors='replace') except Exception: return val title = unhex(item.get('title', '')) alias = unhex(item.get('alias', '')) intro = unhex(item.get('introtext', '')) full = unhex(item.get('fulltext_col', '')) ef_json = unhex(item.get('extra_fields', '')) or '[]' created = item.get('created', '') or datetime.now().strftime('%Y-%m-%d %H:%M:%S') if not created or created == 'NULL': created = datetime.now().strftime('%Y-%m-%d %H:%M:%S') created_by_raw = item.get('created_by', '0') created_by = int(created_by_raw) if created_by_raw and created_by_raw != 'NULL' else 0 # Contenido combinado if full and full.strip(): content = intro + '\n\n' + full else: content = intro # Autor WP wp_author = user_map.get(created_by, 1) # fallback: admin if created_by and created_by not in user_map: # Autor Joomla sin usuario WP equivalente: queda atribuido a «Fe Adulta». # NO se pierde el dato: corregir tras el delta con scripts/fix_k2_authors.php, # que crea el usuario (nombre real de ew4r_users) y reasigna post_author (#143). print(f" ⚠ autor K2 {created_by} sin user WP → queda en 'Fe Adulta' " f"(corregir con fix_k2_authors.php)") # Extra fields ef = parse_extra_fields(ef_json) lang_code = LANG_MAP.get(ef.get('lang_val'), 'es') cats = determine_categories(ef, title) print(f" [{k2_id}] {title[:50]} | lang={lang_code} | cats={cats}") # INSERT post post_slug = esc(alias[:200]) if alias else '' post_title = esc(title) post_content = esc(content) post_date = created post_date_gmt = created # simplificado (no ajuste TZ) insert_post = ( f"INSERT INTO wp_posts " f"(post_author, post_date, post_date_gmt, post_content, post_title, " f"post_excerpt, post_status, comment_status, ping_status, post_name, " f"post_type, post_modified, post_modified_gmt, comment_count, " f"to_ping, pinged, post_content_filtered) VALUES (" f"{wp_author}, '{post_date}', '{post_date_gmt}', '{post_content}', " f"'{post_title}', '', 'publish', 'open', 'open', '{post_slug}', " f"'post', '{post_date}', '{post_date_gmt}', 0, '', '', '')" ) wp_execute(insert_post) if DRY_RUN: stats['ok'] += 1 continue # Obtener el ID del post recién insertado. NO usar LAST_INSERT_ID(): cada # docker exec abre una conexión nueva y devolvería 0. MAX(ID) es fiable # en uso secuencial (sin inserciones concurrentes). new_id_rows = wp_mysql("SELECT MAX(ID) as new_id FROM wp_posts") if not new_id_rows: print(f" [ERROR] No se pudo obtener el ID del post para k2_id={k2_id}") stats['err'] += 1 continue new_wp_id = int(new_id_rows[0]['new_id']) print(f" → WP post ID={new_wp_id}") # INSERT metas metas = [ ('_fgj2wp_old_k2_id', str(k2_id)), ('Idioma', str(ef.get('lang_val') or 1)), ] for meta_key, meta_val in metas: wp_execute( f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) " f"VALUES ({new_wp_id}, '{esc(meta_key)}', '{esc(meta_val)}')" ) # Categorías for term_id in cats: tt_id = tt_ids.get(term_id) if tt_id: wp_execute( f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) " f"VALUES ({new_wp_id}, {tt_id})" ) # Polylang language pl_tt = pl_ids.get(lang_code) if pl_tt: wp_execute( f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) " f"VALUES ({new_wp_id}, {pl_tt})" ) stats['ok'] += 1 # Actualizar counts de categorías if not DRY_RUN and stats['ok'] > 0: print("\nActualizando counts de categorías...") tt_ids_list = ','.join(str(v) for v in tt_ids.values()) wp_execute( f"UPDATE wp_term_taxonomy tt SET count = (" f"SELECT COUNT(*) FROM wp_term_relationships tr WHERE tr.term_taxonomy_id=tt.term_taxonomy_id" f") WHERE tt.term_taxonomy_id IN ({tt_ids_list})" ) print(f"\n=== Resultado: {stats['ok']} ok, {stats['skip']} skip, {stats['err']} err ===") if __name__ == '__main__': main()