Añadir mu-plugins y scripts de feadulta

This commit is contained in:
2026-06-28 15:10:46 -04:00
parent bce7e42f44
commit b6116b066d
106 changed files with 17600 additions and 2 deletions
+180
View File
@@ -0,0 +1,180 @@
#!/usr/bin/env python3
"""
Aplica `clasificacion_articulos_regen.csv` a wp_term_relationships.
MODO CONSERVADOR (--mode=add): solo AÑADE las cats nuevas que el CSV indique
y que no estén ya. NO borra cats existentes. Maximiza seguridad — no perdemos
atribuciones legítimas que el CSV viejo o asignaciones manuales pusieran.
MODO ESTRICTO (--mode=replace): para los posts presentes en el CSV, sustituye
el conjunto de cats {1645,1646,1647,1648,1649,1650} por exactamente las que
el CSV indique. Borra las que sobren. Posts NO presentes en CSV no se tocan.
Recalcula `wp_term_taxonomy.count` al final.
Issue: rafa/feadulta#42
Uso: python3 aplicar_clasificacion_a_bd.py [--csv FILE] [--mode add|replace] [--dry-run]
"""
import argparse, csv, subprocess, sys
from collections import defaultdict
try:
import pymysql
except ImportError:
sys.exit('requiere pymysql')
CAT_NAME_TO_TERM = {
'lectura': 1645,
'comentario_editorial':1646,
'comentario': 1647,
'eucaristia': 1648,
'multimedia': 1649,
'articulo': 1650,
# 'noticia': 1651, # no implementado
# 'otro': 1652, # no implementado
# 'effa': ?, # no implementado
}
MANAGED_TERMS = set(CAT_NAME_TO_TERM.values())
def get_conn():
ip = subprocess.run(['docker','inspect','wordpress-mysql','--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'],
capture_output=True, text=True, check=True).stdout.strip()
return pymysql.connect(host=ip, user='wordpress_user', password='wordpress_pass',
database='wordpress_db', charset='utf8mb4', autocommit=False)
def get_term_taxonomy_ids(conn, term_ids):
"""Devuelve dict term_id → term_taxonomy_id para taxonomy='category'."""
with conn.cursor() as c:
c.execute(f"""
SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy
WHERE taxonomy='category' AND term_id IN ({','.join(str(t) for t in term_ids)})
""")
return dict(c.fetchall())
def load_csv(path):
"""Devuelve dict post_id → set(cat_name)."""
out = defaultdict(set)
with open(path, encoding='utf-8') as f:
r = csv.DictReader(f)
for row in r:
pid = row.get('post_id')
cat = row.get('categoria_propuesta')
if pid and cat in CAT_NAME_TO_TERM:
out[int(pid)].add(cat)
return out
def current_cats(conn, post_ids, tt_ids):
"""Para cada post devuelve set de term_ids de MANAGED_TERMS que tiene actualmente."""
if not post_ids: return {}
in_ttids = ','.join(str(t) for t in tt_ids)
in_pids = ','.join(str(p) for p in post_ids)
out = defaultdict(set)
with conn.cursor() as c:
c.execute(f"""
SELECT tr.object_id, tt.term_id
FROM wp_term_relationships tr
JOIN wp_term_taxonomy tt ON tt.term_taxonomy_id=tr.term_taxonomy_id
WHERE tr.object_id IN ({in_pids}) AND tt.term_taxonomy_id IN ({in_ttids})
""")
for pid, tid in c.fetchall():
out[pid].add(tid)
return out
def main():
ap = argparse.ArgumentParser()
ap.add_argument('--csv', default='/tmp/clasif_new.csv')
ap.add_argument('--mode', choices=['add', 'replace'], default='add')
ap.add_argument('--dry-run', action='store_true')
args = ap.parse_args()
print(f'CSV: {args.csv}', file=sys.stderr)
print(f'Mode: {args.mode}{" (DRY)" if args.dry_run else ""}', file=sys.stderr)
desired_by_pid = load_csv(args.csv)
print(f'Posts en CSV: {len(desired_by_pid)}', file=sys.stderr)
conn = get_conn()
term_to_tt = get_term_taxonomy_ids(conn, MANAGED_TERMS)
print(f'Term taxonomy ids: {term_to_tt}', file=sys.stderr)
if len(term_to_tt) != len(CAT_NAME_TO_TERM):
sys.exit(f'No encuentro todos los term_ids: {set(MANAGED_TERMS) - set(term_to_tt)}')
cat_to_tt = {name: term_to_tt[tid] for name, tid in CAT_NAME_TO_TERM.items()}
# Cats actuales para los posts del CSV
pids = list(desired_by_pid.keys())
BATCH = 5000
current_by_pid = {}
for i in range(0, len(pids), BATCH):
chunk = pids[i:i+BATCH]
current_by_pid.update(current_cats(conn, chunk, term_to_tt.values()))
# Computar añadir / quitar
to_add = [] # (object_id, term_taxonomy_id)
to_del = [] # (object_id, term_taxonomy_id)
for pid, desired_names in desired_by_pid.items():
desired_tids = {CAT_NAME_TO_TERM[n] for n in desired_names}
current_tids = current_by_pid.get(pid, set())
# Añadir las que estén en desired y no en current
for tid in desired_tids - current_tids:
to_add.append((pid, term_to_tt[tid]))
# En modo replace: quitar las MANAGED que estén en current y no en desired
if args.mode == 'replace':
for tid in current_tids - desired_tids:
to_del.append((pid, term_to_tt[tid]))
print(f'A añadir: {len(to_add)}', file=sys.stderr)
print(f'A quitar: {len(to_del)}', file=sys.stderr)
if args.dry_run:
# Muestra
print('\n--- 5 ejemplos añadir ---', file=sys.stderr)
for x in to_add[:5]: print(' ', x, file=sys.stderr)
print('\n--- 5 ejemplos quitar ---', file=sys.stderr)
for x in to_del[:5]: print(' ', x, file=sys.stderr)
conn.close()
return
with conn.cursor() as c:
# Bulk insert (INSERT IGNORE)
if to_add:
for i in range(0, len(to_add), 1000):
chunk = to_add[i:i+1000]
vals = ','.join(f'({p},{t})' for p, t in chunk)
c.execute(f'INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) VALUES {vals}')
if to_del:
for i in range(0, len(to_del), 1000):
chunk = to_del[i:i+1000]
conds = ' OR '.join(f'(object_id={p} AND term_taxonomy_id={t})' for p, t in chunk)
c.execute(f'DELETE FROM wp_term_relationships WHERE {conds}')
# Recalcular counts
in_ttids = ','.join(str(t) for t in term_to_tt.values())
c.execute(f"""
UPDATE wp_term_taxonomy tt
SET tt.count = (SELECT COUNT(*) FROM wp_term_relationships tr WHERE tr.term_taxonomy_id=tt.term_taxonomy_id)
WHERE tt.term_taxonomy_id IN ({in_ttids})
""")
conn.commit()
print('Commit OK.', file=sys.stderr)
# Conteos finales
with conn.cursor() as c:
c.execute(f"""
SELECT t.term_id, t.slug, tt.count FROM wp_term_taxonomy tt
JOIN wp_terms t USING(term_id)
WHERE tt.term_taxonomy_id IN ({in_ttids}) ORDER BY t.term_id
""")
print('\nCats finales:', file=sys.stderr)
for row in c.fetchall():
print(f' {row[0]:5d} {row[1]:30s} {row[2]}', file=sys.stderr)
conn.close()
if __name__ == '__main__':
main()
+41
View File
@@ -0,0 +1,41 @@
<?php
/**
* apply_lecturas_wp.php — Crea las traducciones de lecturas bíblicas casadas por
* referencia (lecturas_apply.py) y las asocia en Polylang. Idempotente.
*
* Ejecutar dentro del contenedor:
* docker cp /tmp/lecturas_creadas.json wordpress-web:/tmp/
* docker exec wordpress-web wp eval-file /tmp/apply_lecturas_wp.php [publish]
*/
$status = (isset($argv[1]) && $argv[1] === 'publish') ? 'publish' : 'draft';
$data = json_decode(file_get_contents('/tmp/lecturas_creadas.json'), true);
$created = 0; $posts_done = 0; $skipped = 0;
foreach ($data as $row) {
$es_id = (int) $row['es_id'];
$es = get_post($es_id);
if (!$es) { $skipped++; continue; }
$existing = function_exists('pll_get_post_translations') ? pll_get_post_translations($es_id) : ['es' => $es_id];
$group = $existing;
$es_cats = wp_get_post_categories($es_id);
foreach (['en', 'fr', 'it', 'pt'] as $L) {
if (!empty($existing[$L])) { $group[$L] = $existing[$L]; continue; }
if (empty($row['langs'][$L])) continue;
$id = wp_insert_post([
'post_title' => $es->post_title, // referencia bíblica (igual en todos)
'post_content' => $row['langs'][$L],
'post_status' => $status,
'post_type' => 'post',
'comment_status' => 'closed',
], true);
if (is_wp_error($id)) continue;
pll_set_post_language($id, $L);
$cats = [];
foreach ($es_cats as $c) { $t = pll_get_term($c, $L); if ($t) $cats[] = $t; }
if ($cats) wp_set_post_categories($id, $cats);
$group[$L] = $id;
$created++;
}
if (function_exists('pll_save_post_translations')) pll_save_post_translations($group);
$posts_done++;
}
echo "posts ES procesados: $posts_done | traducciones creadas: $created | status=$status | skip=$skipped\n";
+93
View File
@@ -0,0 +1,93 @@
<?php
/**
* assign_author_photos.php
* Asigna fotos de /uploads/quienes_somos/ a los usuarios de WordPress.
* Guarda la URL en user_meta 'fea_foto_url'.
* Usage: php assign_author_photos.php [--dry-run]
*/
$dry_run = in_array('--dry-run', $argv ?? []);
$pdo = new PDO(
"mysql:host=wordpress-mysql;dbname=wordpress_db;charset=utf8mb4",
'wordpress_user', 'wordpress_pass',
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
);
$base_url = 'https://farmer.taild3aaf6.ts.net/fea/wp-content/uploads/quienes_somos/avatars/';
$base_dir = '/var/www/html/wp-content/uploads/quienes_somos/avatars/';
// user_id => foto (preferir col_*.png, fallback a .jpg originales)
$mapping = [
382 => 'col_fraymarcos.png', // Fray Marcos
383 => 'col_pagola.png', // José Antonio Pagola
384 => 'col_enrique.png', // Enrique Martínez Lozano
385 => 'col_galarreta.png', // José Enrique Galarreta
386 => 'col_arregi.png', // José Arregi
387 => 'col_eloy.png', // Eloy Roy
388 => 'col_aleixandre.png', // Dolores Aleixandre
389 => 'col_vicente.png', // Vicente Martínez
390 => 'col_sandra.png', // Sandra Hojman
391 => 'col_mellado.png', // Julián Mellado
392 => 'col_gastalver.png', // Matilde Gastalver
393 => 'col_koldo.png', // Koldo Aldai
394 => 'marta_1.png', // Marta Salazar
395 => 'col_florentino.png', // Florentino Ulibarri
396 => 'col_rafael.png', // Rafael Calvo Beca
405 => 'col_faustino.png', // Faustino Vilabrille
407 => 'col_victor.png', // Víctor Daniel Blanco
423 => 'col_patxi.png', // Mari Patxi Ayerra
468 => 'col_luque.png', // José Sánchez Luque
746 => 'col_viki.png', // Vicky Irigaray
774 => 'col_sicre.png', // José Luis Sicre
842 => 'col_yolanchavez.png', // Yolanda Chávez
948 => 'col_inma_calvo.png', // Inma Calvo Torrejón
1048 => 'col_inma_calvo.png', // Inma Calvo (icalvotorre)
1010 => 'col_inigo-garcia.png', // Íñigo García Blanco
];
echo "=== Asignar fotos de autor ===\n";
echo $dry_run ? "[DRY RUN]\n\n" : "[LIVE RUN]\n\n";
$ok = 0; $skip = 0; $missing = 0;
$upsert = $pdo->prepare("
INSERT INTO wp_usermeta (user_id, meta_key, meta_value)
VALUES (?, 'fea_foto_url', ?)
ON DUPLICATE KEY UPDATE meta_value = VALUES(meta_value)
");
foreach ($mapping as $user_id => $foto) {
$file = $base_dir . $foto;
$url = $base_url . $foto;
// Verificar que el archivo existe
if (!file_exists($file)) {
echo " [MISSING] user $user_id$foto (archivo no encontrado)\n";
$missing++;
continue;
}
// Obtener nombre del usuario
$stmt = $pdo->prepare("SELECT display_name FROM wp_users WHERE ID = ?");
$stmt->execute([$user_id]);
$name = $stmt->fetchColumn();
if (!$name) {
echo " [SKIP] user_id $user_id no existe en la BD\n";
$skip++;
continue;
}
echo " [OK] $name$foto\n";
if (!$dry_run) {
$upsert->execute([$user_id, $url]);
}
$ok++;
}
echo "\n=== Resultado ===\n";
echo "Asignadas: $ok\n";
echo "Archivos no encontrados: $missing\n";
echo "Usuarios no encontrados: $skip\n";
echo "\nDone.\n";
+182
View File
@@ -0,0 +1,182 @@
<?php
/**
* assign_polylang_languages.php
*
* Asigna idioma Polylang a cada post de WordPress basándose en el campo
* "Idioma" (extra_field id=16) de K2 Joomla, cruzando por _fgj2wp_old_k2_id.
*
* Mapa K2 → Polylang:
* 1 = Español → es
* 2 = Inglés → en
* 3 = Francés → fr
* 4 = Italiano → it
* 5 = Portugués → pt
*
* Requisitos:
* - Polylang instalado y activado
* - Los 5 idiomas creados en Polylang (es, en, fr, it, pt)
* - DB Joomla accesible (ajustar credenciales abajo si hace falta)
*
* Uso: wp eval-file assign_polylang_languages.php
* o copiarlo a /wp-content/mu-plugins/ y acceder via navegador con ?run_assign_lang=1
*/
if ( ! defined('ABSPATH') ) {
// Ejecución directa via navegador
define('RUN_VIA_BROWSER', true);
$_SERVER['HTTP_HOST'] = 'localhost';
require_once dirname(__FILE__) . '/../../wp-load.php';
}
if ( defined('RUN_VIA_BROWSER') && ! isset($_GET['run_assign_lang']) ) {
echo 'Añade ?run_assign_lang=1 a la URL para ejecutar.';
exit;
}
if ( ! function_exists('pll_set_post_language') ) {
echo "ERROR: Polylang no está activo.\n";
exit(1);
}
// ── Configuración Joomla DB ───────────────────────────────────────────────────
$joomla_host = defined('RUN_VIA_BROWSER') ? '127.0.0.1' : 'joomla-mysql';
$joomla_db = 'joomla_db';
$joomla_user = 'joomla_user';
$joomla_pass = 'joomla_pass';
$jdb = new mysqli($joomla_host, $joomla_user, $joomla_pass, $joomla_db);
if ( $jdb->connect_error ) {
echo "ERROR conectando a Joomla DB: " . $jdb->connect_error . "\n";
exit(1);
}
$jdb->set_charset('utf8mb4');
// ── Mapa de idiomas K2 → código Polylang ─────────────────────────────────────
$lang_map = [
'1' => 'es',
'2' => 'en',
'3' => 'fr',
'4' => 'it',
'5' => 'pt',
];
// ── Obtener idiomas disponibles en Polylang ───────────────────────────────────
$pll_languages = pll_languages_list(['fields' => 'slug']);
echo "Idiomas disponibles en Polylang: " . implode(', ', $pll_languages) . "\n";
$missing_langs = array_diff(array_values($lang_map), $pll_languages);
if ( ! empty($missing_langs) ) {
echo "AVISO: Faltan idiomas en Polylang: " . implode(', ', $missing_langs) . "\n";
echo "Créalos en Ajustes → Languages antes de continuar.\n";
exit(1);
}
// ── Leer idiomas de K2 ────────────────────────────────────────────────────────
$result = $jdb->query("
SELECT id as k2_id,
CASE
WHEN extra_fields LIKE '%\"id\":\"16\",\"value\":\"1\"%' THEN '1'
WHEN extra_fields LIKE '%\"id\":\"16\",\"value\":\"2\"%' THEN '2'
WHEN extra_fields LIKE '%\"id\":\"16\",\"value\":\"3\"%' THEN '3'
WHEN extra_fields LIKE '%\"id\":\"16\",\"value\":\"4\"%' THEN '4'
WHEN extra_fields LIKE '%\"id\":\"16\",\"value\":\"5\"%' THEN '5'
ELSE '1'
END as lang_value
FROM ew4r_k2_items
WHERE published = 1
");
$k2_langs = [];
while ( $row = $result->fetch_assoc() ) {
$k2_langs[(int)$row['k2_id']] = $lang_map[$row['lang_value']] ?? 'es';
}
$jdb->close();
echo "K2 items con idioma: " . count($k2_langs) . "\n";
// ── Asignar idioma en WordPress ───────────────────────────────────────────────
global $wpdb;
$counts = array_fill_keys(array_values($lang_map), 0);
$counts['sin_k2_id'] = 0;
$counts['ya_asignado'] = 0;
$processed = 0;
// Obtener todos los posts con su k2_id de una vez
$rows = $wpdb->get_results("
SELECT p.ID as wp_id, pm.meta_value as k2_id
FROM {$wpdb->posts} p
JOIN {$wpdb->postmeta} pm ON p.ID = pm.post_id
WHERE pm.meta_key = '_fgj2wp_old_k2_id'
AND p.post_type = 'post'
AND p.post_status IN ('publish', 'draft', 'private')
");
$total = count($rows);
echo "Posts WP con _fgj2wp_old_k2_id: {$total}\n";
echo "Procesando...\n";
foreach ( $rows as $row ) {
$wp_id = (int) $row->wp_id;
$k2_id = (int) $row->k2_id;
if ( ! isset($k2_langs[$k2_id]) ) {
$counts['sin_k2_id']++;
// Sin datos en K2 → asumir español
pll_set_post_language($wp_id, 'es');
continue;
}
$lang = $k2_langs[$k2_id];
pll_set_post_language($wp_id, $lang);
$counts[$lang]++;
$processed++;
if ( $processed % 500 === 0 ) {
echo " {$processed}/{$total}...\n";
if (ob_get_level()) ob_flush();
flush();
}
}
// ── Asignar español a posts sin k2_id (cartas, EFFA, etc.) ───────────────────
$posts_sin_k2 = $wpdb->get_col("
SELECT p.ID FROM {$wpdb->posts} p
LEFT JOIN {$wpdb->postmeta} pm ON p.ID = pm.post_id AND pm.meta_key = '_fgj2wp_old_k2_id'
WHERE pm.post_id IS NULL
AND p.post_type = 'post'
AND p.post_status IN ('publish', 'draft', 'private')
");
echo "Posts sin _fgj2wp_old_k2_id (cartas EFFA etc): " . count($posts_sin_k2) . "\n";
foreach ( $posts_sin_k2 as $wp_id ) {
pll_set_post_language((int)$wp_id, 'es');
}
// ── Eliminar tag "English" falso ──────────────────────────────────────────────
$english_tag = get_term_by('slug', 'english', 'post_tag');
if ( $english_tag ) {
$tag_posts = get_posts(['tag_id' => $english_tag->term_id, 'numberposts' => 1]);
if ( empty($tag_posts) || $wpdb->get_var($wpdb->prepare("SELECT COUNT(*) FROM {$wpdb->term_relationships} WHERE term_taxonomy_id=%d", $english_tag->term_taxonomy_id)) < 100 ) {
wp_delete_term($english_tag->term_id, 'post_tag');
echo "Tag 'English' falso eliminado.\n";
} else {
// Desasociar masivamente antes de borrar
$wpdb->delete($wpdb->term_relationships, [
'term_taxonomy_id' => $english_tag->term_taxonomy_id
]);
wp_update_term_count($english_tag->term_id, 'post_tag');
wp_delete_term($english_tag->term_id, 'post_tag');
echo "Tag 'English' falso eliminado (12845 asociaciones borradas).\n";
}
}
// ── Resumen ───────────────────────────────────────────────────────────────────
echo "\n=== RESULTADO ===\n";
foreach ( $lang_map as $val => $slug ) {
$names = ['1'=>'Español','2'=>'Inglés','3'=>'Francés','4'=>'Italiano','5'=>'Portugués'];
echo " {$names[$val]} ({$slug}): " . ($counts[$slug] ?? 0) . " posts\n";
}
echo " Sin k2_id (→es): " . $counts['sin_k2_id'] . "\n";
echo " Posts sin k2_id (cartas/EFFA): " . count($posts_sin_k2) . "\n";
echo "\nListo.\n";
File diff suppressed because one or more lines are too long
+151
View File
@@ -0,0 +1,151 @@
#!/usr/bin/env python3
"""
audit_translations.py
Audits all new translated posts (ID > 42760) to check:
- Assigned Polylang language
- Detected language of the title
- Detected language of the content
Flags mismatches.
"""
import pymysql
import re
import html
from langdetect import detect, LangDetectException
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
# Map langdetect codes to our Polylang slugs
LANG_MAP = {'es': 'es', 'pt': 'pt', 'fr': 'fr', 'en': 'en', 'it': 'it',
'ca': 'es', # Catalan often confused with Spanish
}
def strip_html(text):
if not text:
return ''
text = re.sub(r'<[^>]+>', ' ', text)
text = html.unescape(text)
text = re.sub(r'\s+', ' ', text).strip()
return text
def detect_lang(text, min_len=50):
text = text.strip()
if len(text) < min_len:
return None
try:
return detect(text)
except LangDetectException:
return None
def main():
db = pymysql.connect(**DB)
c = db.cursor()
c.execute("""
SELECT p.ID, p.post_title, p.post_content,
t_lang.slug as assigned_lang,
(
SELECT p2.post_title FROM wp_posts p2
JOIN wp_term_relationships trl2 ON p2.ID=trl2.object_id
JOIN wp_term_taxonomy ttl2 ON trl2.term_taxonomy_id=ttl2.term_taxonomy_id AND ttl2.taxonomy='language'
JOIN wp_terms tl2 ON ttl2.term_id=tl2.term_id AND tl2.slug='es'
JOIN wp_term_relationships trg2 ON p2.ID=trg2.object_id
JOIN wp_term_taxonomy ttg2 ON trg2.term_taxonomy_id=ttg2.term_taxonomy_id AND ttg2.taxonomy='post_translations'
WHERE ttg2.term_taxonomy_id = (
SELECT ttg3.term_taxonomy_id FROM wp_term_relationships trg3
JOIN wp_term_taxonomy ttg3 ON trg3.term_taxonomy_id=ttg3.term_taxonomy_id AND ttg3.taxonomy='post_translations'
WHERE trg3.object_id=p.ID LIMIT 1
)
LIMIT 1
) as es_title
FROM wp_posts p
JOIN wp_term_relationships trl ON p.ID=trl.object_id
JOIN wp_term_taxonomy ttl ON trl.term_taxonomy_id=ttl.term_taxonomy_id AND ttl.taxonomy='language'
JOIN wp_terms t_lang ON ttl.term_id=t_lang.term_id
WHERE p.ID > 42760 AND p.post_type='post' AND p.post_status='publish'
AND t_lang.slug != 'es'
ORDER BY t_lang.slug, p.ID
""")
posts = c.fetchall()
db.close()
print(f"Auditing {len(posts)} translated posts...\n")
issues = []
for p in posts:
post_id = p['ID']
assigned = p['assigned_lang']
title = p['post_title'] or ''
es_title = p['es_title'] or ''
content_raw = p['post_content'] or ''
content = strip_html(content_raw)[:600] # first 600 chars for detection
# Detect content language
content_lang = detect_lang(content, min_len=100)
content_lang_norm = LANG_MAP.get(content_lang, content_lang)
# Check title: is it the same as Spanish original?
title_is_spanish = (title.strip().lower() == es_title.strip().lower() and es_title.strip())
# Detect title language (only if long enough)
title_lang = detect_lang(title, min_len=30)
title_lang_norm = LANG_MAP.get(title_lang, title_lang)
problems = []
# Content language mismatch
if content_lang_norm and content_lang_norm != assigned:
# Allow es/pt confusion only if very short
if not (content_lang_norm in ('es', 'pt') and assigned in ('es', 'pt') and len(content) < 200):
problems.append(f"content={content_lang_norm}{assigned}")
# Title still in Spanish
if title_is_spanish:
problems.append(f"title=ES_ORIGINAL")
elif title_lang_norm and title_lang_norm != assigned and len(title) > 20:
# Allow es/pt confusion for titles
if not (title_lang_norm in ('es', 'pt') and assigned in ('es', 'pt')):
problems.append(f"title_lang={title_lang_norm}{assigned}")
if problems:
issues.append({
'id': post_id,
'assigned': assigned,
'problems': problems,
'title': title[:70],
'content_start': content[:80],
})
# Summary by language
print(f"{'='*70}")
print(f"ISSUES FOUND: {len(issues)} out of {len(posts)} posts")
print(f"{'='*70}\n")
by_lang = {}
for issue in issues:
by_lang.setdefault(issue['assigned'], []).append(issue)
for lang in sorted(by_lang.keys()):
lang_issues = by_lang[lang]
print(f"--- {lang.upper()} ({len(lang_issues)} issues) ---")
for i in sorted(lang_issues, key=lambda x: x['problems'][0]):
print(f" [{i['id']}] {', '.join(i['problems'])}")
print(f" Title: {i['title']}")
print(f" Content: {i['content_start']}")
print()
# Write CSV for easier review
with open('/tmp/translation_audit.csv', 'w') as f:
f.write('id,assigned_lang,problems,title,content_start\n')
for i in issues:
title_esc = i['title'].replace('"', '""')
content_esc = i['content_start'].replace('"', '""')
f.write(f'{i["id"]},{i["assigned"]},"{",".join(i["problems"])}","{title_esc}","{content_esc}"\n')
print(f"CSV saved to /tmp/translation_audit.csv")
if __name__ == '__main__':
main()
+50
View File
@@ -0,0 +1,50 @@
<?php
$DEF = [
408 => ['en'=>'New Testament','fr'=>'Nouveau Testament','it'=>'Nuovo Testamento','pt'=>'Novo Testamento'],
409 => ['en'=>'Old Testament','fr'=>'Ancien Testament','it'=>'Antico Testamento','pt'=>'Antigo Testamento'],
];
$LOGIN = [408=>'nt', 409=>'at'];
// 1) crear/asegurar usuarios por idioma
$uid_map = []; // [es_author][lang] => uid
foreach ($DEF as $es_author => $names) {
foreach ($names as $L => $dn) {
$login = $LOGIN[$es_author].'-'.$L;
$u = get_user_by('login', $login);
if (!$u) {
$id = wp_insert_user(['user_login'=>$login,'user_pass'=>wp_generate_password(20),
'user_email'=>$login.'@feadulta.local','display_name'=>$dn,'role'=>'author','first_name'=>$dn]);
if (is_wp_error($id)) { echo "ERROR user $login: ".$id->get_error_message()."\n"; continue; }
echo "creado usuario $login (#$id) = $dn\n";
} else { $id = $u->ID; wp_update_user(['ID'=>$id,'display_name'=>$dn]); echo "existe $login (#$id)\n"; }
$uid_map[$es_author][$L] = $id;
}
}
// 2) reasignar autor de las traducciones según el autor del ES original
$reasig = 0;
foreach ([408,409] as $es_author) {
$posts = get_posts(['author'=>$es_author,'post_type'=>'post','post_status'=>['publish','draft'],
'posts_per_page'=>-1,'fields'=>'ids','lang'=>'es','no_found_rows'=>true]);
foreach ($posts as $es_id) {
$tr = pll_get_post_translations($es_id);
foreach (['en','fr','it','pt'] as $L) {
if (empty($tr[$L]) || empty($uid_map[$es_author][$L])) continue;
$p = get_post($tr[$L]);
if ($p && (int)$p->post_author !== (int)$uid_map[$es_author][$L]) {
wp_update_post(['ID'=>$tr[$L],'post_author'=>$uid_map[$es_author][$L]]);
$reasig++;
}
}
}
echo "ES autor $es_author: ".count($posts)." posts ES procesados\n";
}
// 2ª pasada: cualquier post no-ES que aún tenga el autor bíblico ES → autor del idioma
$reasig2 = 0;
foreach ([408,409] as $es_author) {
foreach (['en','fr','it','pt'] as $L) {
if (empty($uid_map[$es_author][$L])) continue;
$ids = get_posts(['author'=>$es_author,'lang'=>$L,'post_type'=>'post',
'post_status'=>['publish','draft'],'fields'=>'ids','posts_per_page'=>-1,'no_found_rows'=>true]);
foreach ($ids as $id) { wp_update_post(['ID'=>$id,'post_author'=>$uid_map[$es_author][$L]]); $reasig2++; }
}
}
echo "traducciones reasignadas de autor: $reasig (1ª) + $reasig2 (2ª directa)\n";
+126
View File
@@ -0,0 +1,126 @@
#!/usr/bin/env python3
"""
build_lectionary_index.py — Descarga el leccionario de evangelizo.ws para un rango de
fechas (un ciclo litúrgico completo cubre todas las lecturas) en es/en/fr/it/pt y
construye un índice POR REFERENCIA bíblica, para casar lecturas sin depender de fechas.
Salida: /tmp/lectionary_index.json { "LIBRO|cap|vers": {es,en,fr,it,pt: html} }
Cache por día/idioma en /tmp/evangelizo_cache (resumible).
Uso: python3 build_lectionary_index.py 2023-01-01 2025-12-31
"""
import sys, os, re, json, time, html, unicodedata, urllib.request
from datetime import date, timedelta
LANGS = {"SP": "es", "AM": "en", "FR": "fr", "IT": "it", "PT": "pt"}
CACHE = "/tmp/evangelizo_cache"
os.makedirs(CACHE, exist_ok=True)
INDEX = "/tmp/lectionary_index.json"
def norm_book(full_title):
# "Libro de Jeremías" / "Carta de san Pablo a los Romanos" -> "JEREMIAS"/"ROMANOS"
s = unicodedata.normalize("NFKD", full_title).encode("ascii", "ignore").decode().upper()
s = re.sub(r"[^A-Z0-9 ]", " ", s)
toks = [t for t in s.split() if t]
return toks[-1] if toks else ""
def clean(raw):
raw = html.unescape(raw or "")
raw = re.sub(r"\[\[[^\]]+\]\]", "", raw)
paras = [p.strip() for p in raw.split("\n") if p.strip()]
return "".join(f"<p>{p}</p>\n" for p in paras)
def fetch(date_s, lang_code):
fp = os.path.join(CACHE, f"{date_s}_{lang_code}.json")
if os.path.exists(fp):
try:
return json.load(open(fp))
except Exception:
pass
url = f"https://publication.evangelizo.ws/{lang_code}/days/{date_s}"
for a in range(3):
try:
req = urllib.request.Request(url, headers={"User-Agent": "fea-lect/1.0"})
with urllib.request.urlopen(req, timeout=30) as r:
data = json.load(r)
out = []
for rd in data.get("data", {}).get("readings", []) or []:
out.append({
"code": rd.get("reading_code", ""),
"ref": (rd.get("reference_displayed") or "").strip().rstrip("."),
"book": (rd.get("book") or {}).get("full_title", ""),
"text": clean(rd.get("text", "")),
})
json.dump(out, open(fp, "w"), ensure_ascii=False)
time.sleep(0.15)
return out
except Exception:
if a == 2:
json.dump([], open(fp, "w"))
return []
time.sleep(1.0)
def key_from(book_full, ref):
m = re.match(r"(\d{1,3})\s*,\s*(\d{1,3})", ref)
if not m:
return None
return f"{norm_book(book_full)}|{int(m.group(1))}|{int(m.group(2))}"
def main():
d0 = date.fromisoformat(sys.argv[1])
d1 = date.fromisoformat(sys.argv[2])
days = (d1 - d0).days + 1
# Las fiestas trasladadas caen en fechas distintas por país/idioma → NO se puede
# casar dentro del mismo día. Indexamos por reading_code (estable entre idiomas)
# acumulando el texto de cada idioma desde CUALQUIER día donde aparezca.
code_text = {wl: {} for wl in LANGS.values()} # lang -> {code: text}
code_book = {} # code -> norm_book (del SP)
cur, n = d0, 0
while cur <= d1:
ds = cur.isoformat()
for lc, wl in LANGS.items():
for rd in fetch(ds, lc):
code = rd["code"]
if not code:
continue
if rd["text"] and code not in code_text[wl]:
code_text[wl][code] = rd["text"]
if wl == "es" and code not in code_book:
nb = norm_book(rd["book"])
m = re.search(r"(\d{1,3})\s*,\s*(\d{1,3})", code)
if nb and m:
code_book[code] = f"{nb}|{int(m.group(1))}|{int(m.group(2))}"
n += 1
if n % 60 == 0:
print(f" {n}/{days} días codes_es={len(code_text['es'])}", flush=True)
cur += timedelta(days=1)
# combinar: para cada code con clave-ES y texto en los 4 idiomas
index = {}
for code, key in code_book.items():
if key in index:
continue
entry = {}
ok = True
for wl in ("es", "en", "fr", "it", "pt"):
t = code_text[wl].get(code)
if not t:
ok = (wl == "es") and ok # es siempre presente; faltar otro descarta
if wl != "es":
ok = False
break
entry[wl] = t
if ok and all(l in entry for l in ("en", "fr", "it", "pt")):
index[key] = entry
json.dump(index, open(INDEX, "w"), ensure_ascii=False)
print(f"FIN. {n} días. codes_es={len(code_book)} → índice {len(index)} referencias en {INDEX}")
if __name__ == "__main__":
main()
+75
View File
@@ -0,0 +1,75 @@
<?php
/**
* Plugin Name: Fe Adulta — Carta de la Semana
* Description: Redirige las URLs de carta al archivo de categoría correspondiente.
* Version: 1.8
*/
// Redirigir las páginas custom a las categorías
add_action("template_redirect", function() {
if (is_page("carta-de-la-semana")) {
wp_redirect(home_url("/category/cartasemana/"), 302);
exit;
}
if (is_page("la-semana-pasada")) {
wp_redirect(home_url("/category/carta-semana-pasada/"), 302);
exit;
}
});
// Las categorías de carta actual/anterior siempre llevan al post traducido que
// corresponde a la categoría española canónica. No dependemos del count ni de
// las relaciones traducidas, que pueden quedar desfasadas durante una importación.
add_action("template_redirect", function() {
if (!is_category()) return;
$cat = get_queried_object();
if (!$cat || empty($cat->term_id)) return;
$source_cat_id = (int) $cat->term_id;
if (function_exists('pll_get_term')) {
$spanish_cat_id = (int) pll_get_term($source_cat_id, 'es');
if ($spanish_cat_id) $source_cat_id = $spanish_cat_id;
}
if (!in_array($source_cat_id, [6, 22], true)) return;
global $wpdb;
$source_post_id = (int) $wpdb->get_var($wpdb->prepare(
"SELECT p.ID
FROM {$wpdb->posts} p
INNER JOIN {$wpdb->term_relationships} tr ON tr.object_id = p.ID
INNER JOIN {$wpdb->term_taxonomy} tt ON tt.term_taxonomy_id = tr.term_taxonomy_id
WHERE tt.taxonomy = 'category' AND tt.term_id = %d
AND p.post_type = 'post' AND p.post_status = 'publish'
ORDER BY p.post_date DESC, p.ID DESC
LIMIT 1",
$source_cat_id
));
if (!$source_post_id) return;
$post_id = $source_post_id;
if (function_exists('pll_current_language') && function_exists('pll_get_post')) {
$lang = pll_current_language();
$translated = $lang ? (int) pll_get_post($source_post_id, $lang) : 0;
if ($translated) $post_id = $translated;
}
$url = get_permalink($post_id);
if (!$url) return;
wp_safe_redirect($url, 302);
exit;
}, 9);
// Mostrar 50 artículos por página en los archivos de cartas
add_action("pre_get_posts", function($query) {
if (!$query->is_main_query() || is_admin()) return;
if ($query->is_category([
"cartasemana", "carta-semana-pasada", "cartas-de-otras-semanas",
"letter-of-the-week", "lettre-de-la-semaine", "lettera-della-settimana", "carta-da-semana",
"carta-semana-pasada-en", "carta-semana-pasada-fr",
"carta-semana-pasada-it", "carta-semana-pasada-pt",
"letters-from-other-weeks", "lettres-des-autres-semaines",
"lettere-delle-altre-settimane", "cartas-de-outras-semanas",
])) {
$query->set("posts_per_page", 50);
}
});
+109
View File
@@ -0,0 +1,109 @@
<?php
/**
* create_buscar_page.php (#8) — Crea/repara la página dedicada /buscar.
* La página sirve como destino del enlace «Búsqueda avanzada» y muestra el formulario
* avanzado aunque no haya consulta activa.
*
* Idempotente de verdad:
* - Si /buscar NO existe → la crea (es) + traducciones (en/fr/it/pt) y las vincula.
* - Si /buscar YA existe → la deja, pero REPARA traducciones Polylang faltantes o
* no publicadas (crea las que falten, publica las que estén en borrador, revincula).
*
* Uso:
* wp eval-file scripts/create_buscar_page.php # DRY-RUN
* APPLY=1 wp eval-file scripts/create_buscar_page.php # aplica
*/
$apply = getenv('APPLY') === '1';
$titles = [
'es' => 'Búsqueda avanzada',
'en' => 'Advanced Search',
'fr' => 'Recherche avancée',
'it' => 'Ricerca avanzata',
'pt' => 'Pesquisa avançada',
];
// Contenido mínimo (sin bloques Gutenberg). El formulario se inyecta vía the_content.
$content = '<p>Utiliza el formulario de búsqueda avanzada para encontrar reflexiones, artículos y más.</p>';
$has_pll = function_exists('pll_set_post_language') && function_exists('pll_save_post_translations');
$pll_langs = (function_exists('pll_languages_list'))
? pll_languages_list(['fields' => 'slug'])
: ['es'];
/** Crea (o devuelve si existe) una página por slug, con idioma Polylang. */
function fea_buscar_ensure_page(string $slug, string $title, string $content, string $lang, bool $apply, bool $has_pll) {
$existing = get_page_by_path($slug, OBJECT, 'page');
if ($existing) {
// Asegurar que está publicada
if ($apply && $existing->post_status !== 'publish') {
wp_update_post(['ID' => $existing->ID, 'post_status' => 'publish']);
echo " · ({$lang}) página '{$slug}' existía en estado {$existing->post_status} → publicada (ID {$existing->ID})\n";
} else {
echo " · ({$lang}) página '{$slug}' ya existe y publicada (ID {$existing->ID})\n";
}
// Asegurar idioma asignado
if ($apply && $has_pll && function_exists('pll_get_post_language')) {
$cur = pll_get_post_language($existing->ID);
if ($cur !== $lang) { pll_set_post_language($existing->ID, $lang); echo " idioma → {$lang}\n"; }
}
return (int) $existing->ID;
}
echo ($apply ? " · ({$lang}) creando" : " · ({$lang}) [dry] crearía") . " página '{$slug}'\n";
if (!$apply) return 0;
$id = wp_insert_post([
'post_type' => 'page',
'post_status' => 'publish',
'post_name' => $slug,
'post_title' => $title,
'post_content' => $content,
'post_author' => 1,
], true);
if (is_wp_error($id)) { echo " ERROR: " . $id->get_error_message() . "\n"; return 0; }
if ($has_pll) pll_set_post_language($id, $lang);
echo " creada (ID {$id})\n";
return (int) $id;
}
echo ($apply ? "APLICANDO" : "DRY-RUN") . " — crear/reparar /buscar\n";
// 1) Página ES (slug 'buscar')
$translations = [];
$es_id = fea_buscar_ensure_page('buscar', $titles['es'], $content, 'es', $apply, $has_pll);
if ($es_id) $translations['es'] = $es_id;
// 2) Traducciones (en/fr/it/pt) sólo si Polylang activo y el idioma existe
if ($has_pll) {
foreach (['en', 'fr', 'it', 'pt'] as $lang) {
if (!in_array($lang, $pll_langs, true)) { echo " · ({$lang}) idioma no configurado en Polylang, omitido\n"; continue; }
// Si ya hay traducción vinculada a la ES, reusarla
$linked = ($es_id && function_exists('pll_get_post')) ? (int) pll_get_post($es_id, $lang) : 0;
if ($linked) {
$p = get_post($linked);
if ($p && $p->post_status !== 'publish' && $apply) {
wp_update_post(['ID' => $linked, 'post_status' => 'publish']);
echo " · ({$lang}) traducción vinculada (ID {$linked}) estaba {$p->post_status} → publicada\n";
} else {
echo " · ({$lang}) traducción ya vinculada (ID {$linked})\n";
}
$translations[$lang] = $linked;
continue;
}
// Crear/reparar por slug
$tl_id = fea_buscar_ensure_page('buscar-' . $lang, $titles[$lang], $content, $lang, $apply, $has_pll);
if ($tl_id) $translations[$lang] = $tl_id;
}
// 3) Revincular todas las traducciones
if ($apply && count($translations) > 1) {
pll_save_post_translations($translations);
echo " · traducciones revinculadas: " . implode(', ', array_map(
fn($l, $id) => "{$l}={$id}", array_keys($translations), $translations)) . "\n";
}
}
if (function_exists('wp_cache_flush')) wp_cache_flush();
echo ($apply ? "APLICADO" : "DRY-RUN") . "\n";
+43
View File
@@ -0,0 +1,43 @@
<?php
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
$ES = (int)(getenv('ES_ID') ?: 2682); // MATEO 10, 26-33 (es)
$src = get_post($ES);
$tr_html = json_decode(file_get_contents(getenv('LECTURAS_JSON') ?: '/tmp/lecturas_mateo.json'), true);
$BOOK = ['en'=>'MATTHEW','fr'=>'MATTHIEU','it'=>'MATTEO','pt'=>'MATEUS'];
$VER = ['en'=>'Douay-Rheims Bible','fr'=>'Bible du Semeur 2015','it'=>'Nuova Riveduta 2006','pt'=>'Bíblia CNBB 2002'];
$REST = getenv('LECTURA_TITULO_ES') ?: 'MATEO 10, 26-33'; // título es
$tail = preg_replace('~^MATEO~','',$REST); // " 10, 26-33"
$es_cats = wp_get_post_categories($ES);
$target_ids = array_values(array_filter(array_map('intval', explode(',', getenv('TARGET_IDS') ?: ''))));
$target_map = [];
foreach (['en','fr','it','pt'] as $idx => $lang0) {
if (!empty($target_ids[$idx])) $target_map[$lang0] = (int)$target_ids[$idx];
}
$grp = pll_get_post_translations($ES); if(!$grp) $grp=['es'=>$ES];
foreach (['en','fr','it','pt'] as $lang) {
$exist = (int)pll_get_post($ES,$lang);
if ($exist && get_post($exist)) { echo "$lang ya existe #$exist — saltado\n"; $grp[$lang]=$exist; continue; }
$title = $BOOK[$lang].$tail;
$postarr = [
'post_title'=>$title,'post_content'=>$tr_html[$lang],'post_status'=>'publish',
'post_type'=>'post','post_author'=>(int)$src->post_author,'post_date'=>$src->post_date,
];
$target = (int)($target_map[$lang] ?? 0);
if ($target && get_post($target)) {
$postarr['ID'] = $target;
} elseif ($target) {
$postarr['import_id'] = $target;
}
$id = wp_insert_post($postarr, true);
if (is_wp_error($id)) { echo "$lang ERROR ".$id->get_error_message()."\n"; continue; }
if ($target && (int)$id !== $target) { echo "$lang ERROR id esperado $target creado $id\n"; continue; }
pll_set_post_language($id,$lang);
$mapped=[]; foreach($es_cats as $c){ $tc=(int)pll_get_term($c,$lang); $mapped[]=$tc?:$c; }
wp_set_post_categories($id, array_values(array_unique($mapped)));
$grp[$lang]=$id; pll_save_post_translations($grp);
update_post_meta($id,'lectura_fuente',$VER[$lang]);
update_post_meta($id,'traduccion_origen',$ES);
echo "$lang creado #$id «$title» [".$VER[$lang]."]\n";
}
echo "Grupo final: ".json_encode(pll_get_post_translations($ES))."\n";
+69
View File
@@ -0,0 +1,69 @@
#!/bin/bash
# =============================================================================
# Script de cutover DNS: feadulta.org → feadulta.com
# Ejecutar DESPUÉS de apuntar el DNS de feadulta.com al servidor de producción
# =============================================================================
#
# Este script reemplaza todas las URLs internas de feadulta.org por feadulta.com
# en la base de datos WordPress de producción.
#
# Servidor: 185.42.105.48
# DB: 278025353wordpress20260112013937 / myfeadultaa5 / KjyGU29h
# =============================================================================
set -e
DB_HOST="127.0.0.1"
DB_NAME="278025353wordpress20260112013937"
DB_USER="myfeadultaa5"
DB_PASS="KjyGU29h"
OLD_URL="http://feadulta.org"
NEW_URL="https://feadulta.com"
MYSQL="mysql -h $DB_HOST -u $DB_USER -p$DB_PASS $DB_NAME"
echo "=== Cutover feadulta.org → feadulta.com ==="
echo "OLD: $OLD_URL"
echo "NEW: $NEW_URL"
echo ""
echo "Ejecutando en 5 segundos... (Ctrl+C para cancelar)"
sleep 5
echo "[1/6] Actualizando siteurl y home..."
$MYSQL -e "
UPDATE wp_options SET option_value = '$NEW_URL' WHERE option_name = 'siteurl';
UPDATE wp_options SET option_value = '$NEW_URL' WHERE option_name = 'home';
"
echo "[2/6] Reemplazando en post_content..."
$MYSQL -e "UPDATE wp_posts SET post_content = REPLACE(post_content, '$OLD_URL', '$NEW_URL');"
echo "[3/6] Reemplazando en guid..."
$MYSQL -e "UPDATE wp_posts SET guid = REPLACE(guid, '$OLD_URL', '$NEW_URL');"
echo "[4/6] Reemplazando en postmeta..."
$MYSQL -e "UPDATE wp_postmeta SET meta_value = REPLACE(meta_value, '$OLD_URL', '$NEW_URL');"
echo "[5/6] Reemplazando en wp_options (no serializados)..."
$MYSQL -e "
UPDATE wp_options SET option_value = REPLACE(option_value, '$OLD_URL', '$NEW_URL')
WHERE option_name NOT IN ('wpseo', 'fgj2wp_save_posts', 'bsr_data')
AND option_value LIKE '%feadulta.org%';
"
echo "[6/6] Actualizando wp-config.php..."
ssh feadultada@185.42.105.48 "
sed -i \"s|define('WP_SITEURL','http://feadulta.org')|define('WP_SITEURL','https://feadulta.com')|\" /web/wp-config.php
sed -i \"s|define('WP_HOME','http://feadulta.org')|define('WP_HOME','https://feadulta.com')|\" /web/wp-config.php
"
echo ""
echo "=== Verificación ==="
$MYSQL -e "SELECT option_name, option_value FROM wp_options WHERE option_name IN ('siteurl','home');"
$MYSQL -e "SELECT COUNT(*) as pendientes_feadulta_org FROM wp_posts WHERE post_content LIKE '%feadulta.org%';"
echo ""
echo "=== Cutover completado ==="
echo "IMPORTANTE: Limpiar caché de WordPress y Cloudflare después de este paso."
echo "IMPORTANTE: Activar plugins: AdSense, Wordfence, TTS."
echo "IMPORTANTE: Verificar redirects de feadulta.com/images/Musica/ (ya no hacen falta si los MP3 están en el mismo servidor)."
+69
View File
@@ -0,0 +1,69 @@
<?php
/**
* Ciclo carta nueva — sincroniza "esta semana", "semana pasada" y "otras semanas"
* en TODOS los idiomas (ES + EN/FR/IT/PT).
*
* Deriva los términos por Polylang desde los términos ES base:
* cartasemana = term 6 | otras semanas = term 21 | semana pasada = term 22
*
* Uso: CARTA=<es_id> php demote_old_cartasemana.php (dry-run)
* APPLY=1 CARTA=<es_id> php demote_old_cartasemana.php
*/
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
$APPLY = getenv('APPLY') === '1';
$CARTA = (int)(getenv('CARTA') ?: 0);
if (!$CARTA) { fwrite(STDERR,"Falta CARTA=<es_id>\n"); exit(1); }
$cs_terms = pll_get_term_translations(6); // cartasemana por idioma
$otras_terms = pll_get_term_translations(21); // cartas de otras semanas por idioma
$last_terms = pll_get_term_translations(22); // carta semana pasada por idioma
$carta_tr = pll_get_post_translations($CARTA);
$last_es_posts = get_posts([
'post_type' => 'post',
'numberposts' => 1,
'post_status' => 'publish',
'fields' => 'ids',
'cat' => 22,
'orderby' => 'date',
'order' => 'DESC',
'suppress_filters' => true,
]);
$last_es = (int) ($last_es_posts[0] ?? 0);
$last_tr = $last_es ? pll_get_post_translations($last_es) : [];
foreach ($cs_terms as $lang=>$cs) {
$keep = $carta_tr[$lang] ?? 0;
$otras = $otras_terms[$lang] ?? 0;
$last = $last_terms[$lang] ?? 0;
$keep_last = $last_tr[$lang] ?? 0;
$posts = get_posts(['post_type'=>'post','numberposts'=>-1,'post_status'=>'any','fields'=>'ids',
'tax_query'=>[['taxonomy'=>'category','field'=>'term_id','terms'=>$cs]]]);
$moved=0;
foreach ($posts as $pid) {
if ($pid == $keep) continue;
if ($APPLY) {
if ($otras) wp_set_object_terms($pid,[(int)$otras],'category',true); // append a otras
wp_remove_object_terms($pid,[(int)$cs],'category');
}
$moved++;
}
$last_posts = $last ? get_posts([
'post_type'=>'post','numberposts'=>-1,'post_status'=>'any','fields'=>'ids',
'tax_query'=>[['taxonomy'=>'category','field'=>'term_id','terms'=>$last]],
]) : [];
$last_removed = 0;
foreach ($last_posts as $pid) {
if ($pid == $keep_last) continue;
if ($APPLY) wp_remove_object_terms($pid, [(int)$last], 'category');
$last_removed++;
}
if ($APPLY && $last && $keep_last) {
wp_set_object_terms($keep_last, [(int)$last], 'category', true);
}
if ($APPLY) clean_term_cache(array_filter([$cs,$otras,$last]),'category');
$t=get_term($cs);
echo sprintf("%s: %s %d de actual | anterior=#%d (limpia %d) | '%s' keep=#%d\n",
strtoupper($lang), $APPLY?"movidas":"se moverían", $moved,
$keep_last, $last_removed, $t->slug, $keep);
}
echo $APPLY ? "APLICADO\n" : "DRY-RUN (APPLY=1 para aplicar)\n";
+282
View File
@@ -0,0 +1,282 @@
#!/usr/bin/env bash
# Paso 1 del upgrade PHP 8.3 en feadulta.com (issue #46):
# subir 5 ficheros parcheados + limpieza malware, manteniendo PHP 7.4.
#
# Modos:
# --dry-run imprime lo que haría sin tocar nada (default)
# --apply ejecuta el despliegue real
# --rollback BACKUP_DIR restaura desde un backup pre-step1 concreto
#
# Pre-flight estricto: aborta si los 5 ficheros remotos no coinciden con el
# hash esperado (el que verificamos hoy 2026-05-25). Si en prod cambió algo
# entre planning y ejecución, hay que rehacer la planificación.
set -euo pipefail
# ─── Config ────────────────────────────────────────────────────────────────
SSH_USER="feadulta"
SSH_HOST="134.0.10.170"
SSH_PASS='6Rm2qOF@eundwpda'
REMOTE_WEB_ROOT="/web"
LOCAL_SRC_ROOT="/home/rafa/joomla-migration/joomla-php83"
BACKUP_ROOT="/home/rafa/joomla-migration/backup/prod-20260525-php83-compat"
# Mapa: ruta_relativa | hash_local_esperado | hash_remoto_esperado_actual
# (verificado 2026-05-25 con md5sum local + ssh + tar -xzO del backup)
declare -a FILES=(
"modules/mod_featcats/helper.php|01ae5ad40e13abdcd5852897786d3733|744922888ae533b090eb34effe3bb469"
"templates/fe_adulta_1/functions.php|06b2c26a618dbceefa5d8d5f0293c2cf|6318edec84cdf7a6b84a1d07f063c6c0"
"templates/fe_adulta_1/index.php|dc318909b9ae5976e5c4f01c06c35f66|9793dfa3c880eba37ad5ad35e6988705"
"modules/mod_k2_filter/helper.php|0d9767a0d8d67aa420baefe38382a87c|8530a4e70043973fd2d625c6f8de6ce9"
"modules/mod_k2_filter/tmpl/Default/template.php|a62a39dacafa0748528268b9f04aa844|008465147acdeb442eca6f64311fb23d"
)
# URLs de smoke test (deben devolver HTTP 200 antes y después del cambio)
declare -a SMOKE_URLS=(
"https://www.feadulta.com/"
"https://www.feadulta.com/es/"
"https://www.feadulta.com/es/ayuda.html"
"https://www.feadulta.com/es/quienessomos/colaboradores.html"
"https://www.feadulta.com/es/buscadoravanzado/itemlist/"
"https://www.feadulta.com/es/buscadoravanzado/itemlist/user/43-fraymarcos.html"
"https://www.feadulta.com/es/comentcol2.html"
)
# Patrones de spam que deben dar 0 hits en las respuestas tras el paso 1
SPAM_REGEX="(apuestadeportiva|vavada\.mobi|inkabet|betsafe|betcris|botbotbot)"
# IP del origen para bypass de Cloudflare (la web está detrás de CF managed challenge,
# las peticiones curl normales reciben 403). --resolve nos lleva directo al origen.
ORIGIN_IP="134.0.10.170"
SMOKE_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36"
# ─── Utilidades ────────────────────────────────────────────────────────────
# Logs van a stderr; stdout queda libre para "datos" (p.ej. ruta del backup
# para capturar con $(...) sin contaminación). Bug detectado en revisión #46.
log() { printf '%s [%s] %s\n' "$(date +'%H:%M:%S')" "$1" "$2" >&2; }
info() { log "INFO" "$1"; }
warn() { log "WARN" "$1"; }
err() { log "ERR " "$1"; }
ok() { log "OK " "$1"; }
ssh_run() {
SSHPASS="$SSH_PASS" sshpass -e ssh -o StrictHostKeyChecking=no -o ConnectTimeout=15 \
"$SSH_USER@$SSH_HOST" "$@"
}
# El cPanel jail rechaza scp y sftp ("Connection closed"). Usamos cat-over-ssh,
# que sí funciona (cat está en /usr/bin/cat). Verificación 2026-05-25: download
# de un fichero conocido reproduce el mismo MD5; upload de bytes y verificación
# por re-lectura sin pérdidas.
scp_get() {
local remote="$1" local_target="$2"
SSHPASS="$SSH_PASS" sshpass -e ssh -o StrictHostKeyChecking=no -o ConnectTimeout=15 \
"$SSH_USER@$SSH_HOST" "cat '$remote'" > "$local_target"
}
scp_put() {
local local_src="$1" remote_target="$2"
SSHPASS="$SSH_PASS" sshpass -e ssh -o StrictHostKeyChecking=no -o ConnectTimeout=15 \
"$SSH_USER@$SSH_HOST" "cat > '$remote_target'" < "$local_src"
}
remote_md5() {
ssh_run "md5sum '$1' 2>/dev/null | cut -d' ' -f1"
}
# ─── Pre-flight checks ─────────────────────────────────────────────────────
preflight() {
info "Pre-flight: verificar hashes locales y remotos"
local fail=0
for entry in "${FILES[@]}"; do
IFS='|' read -r rel hl hr_expected <<<"$entry"
local local_path="$LOCAL_SRC_ROOT/$rel"
local remote_path="$REMOTE_WEB_ROOT/$rel"
if [[ ! -f "$local_path" ]]; then
err "Local no existe: $local_path"; fail=1; continue
fi
local hl_actual
hl_actual=$(md5sum "$local_path" | cut -d' ' -f1)
if [[ "$hl_actual" != "$hl" ]]; then
err "Local $rel: hash $hl_actual ≠ esperado $hl"; fail=1
else
ok "Local $rel coincide ($hl)"
fi
local hr_actual
hr_actual=$(remote_md5 "$remote_path")
if [[ -z "$hr_actual" ]]; then
err "Remoto no existe o vacío: $remote_path"; fail=1
elif [[ "$hr_actual" != "$hr_expected" ]]; then
err "Remoto $rel: hash $hr_actual ≠ esperado $hr_expected (algo cambió en prod, replanificar)"; fail=1
else
ok "Remoto $rel coincide ($hr_actual)"
fi
done
if [[ $fail -eq 1 ]]; then
err "Pre-flight FAIL → abortando"
exit 2
fi
ok "Pre-flight OK"
}
# ─── Backup pre-cambio (los 5 ficheros remotos) ────────────────────────────
backup_pre_step1() {
local ts; ts=$(date +'%Y%m%d-%H%M%S')
local dir="$BACKUP_ROOT/pre-step1-$ts"
if [[ "$MODE" == "dry-run" ]]; then
info "[dry-run] Crearía directorio: $dir"
info "[dry-run] Descargaría ${#FILES[@]} ficheros remotos a ese directorio + md5sums.txt + tar.gz"
echo "$dir" # imprime para usar después
return
fi
info "Creando backup pre-step1 en $dir"
mkdir -p "$dir"
for entry in "${FILES[@]}"; do
IFS='|' read -r rel _ hr_expected <<<"$entry"
local remote_path="$REMOTE_WEB_ROOT/$rel"
local local_target="$dir/$rel"
mkdir -p "$(dirname "$local_target")"
info " Descargando $remote_path"
scp_get "$remote_path" "$local_target"
if [[ ! -s "$local_target" ]]; then
err "Descarga vacía: $local_target — ABORT (revisar acceso SSH)"
exit 6
fi
local h_after
h_after=$(md5sum "$local_target" | cut -d' ' -f1)
if [[ "$h_after" != "$hr_expected" ]]; then
err "Backup $rel: md5 $h_after ≠ esperado $hr_expected — ABORT"
exit 6
fi
ok " Backup $rel verificado ($h_after)"
done
(cd "$dir" && find . -type f -name '*.php' -exec md5sum {} \; > md5sums.txt)
tar -czf "$dir.tar.gz" -C "$BACKUP_ROOT" "pre-step1-$ts"
ok "Backup pre-step1 creado: $dir"
ok "Backup tar.gz: $dir.tar.gz"
echo "$dir"
}
# ─── Subida de los 5 ficheros ──────────────────────────────────────────────
upload_files() {
for entry in "${FILES[@]}"; do
IFS='|' read -r rel hl _ <<<"$entry"
local local_path="$LOCAL_SRC_ROOT/$rel"
local remote_path="$REMOTE_WEB_ROOT/$rel"
if [[ "$MODE" == "dry-run" ]]; then
info "[dry-run] scp $local_path$remote_path"
continue
fi
info "Subiendo $rel"
scp_put "$local_path" "$remote_path"
local hr_after; hr_after=$(remote_md5 "$remote_path")
if [[ "$hr_after" != "$hl" ]]; then
err "Subida de $rel: hash remoto post-subida $hr_after ≠ local $hl"
err "ABORT — el fichero remoto no coincide con el local. Considerar rollback."
exit 3
fi
ok "Subido + verificado $rel ($hr_after)"
done
}
# ─── Smoke test HTTP ───────────────────────────────────────────────────────
# allow_spam=1 → solo validar HTTP code (uso post-rollback, donde los ficheros
# restaurados llevan las inyecciones spam originales y un spam>0 es esperado).
smoke_test() {
local allow_spam="${1:-0}"
if [[ "$allow_spam" -eq 1 ]]; then
info "Smoke test HTTP (post-rollback: solo validar HTTP, spam>0 esperado)"
else
info "Smoke test HTTP (HTTP 200/3xx + spam=0)"
fi
local fail=0
for url in "${SMOKE_URLS[@]}"; do
if [[ "$MODE" == "dry-run" ]]; then
info "[dry-run] curl $url + grep spam"
continue
fi
local code body_spam tmp
tmp=$(mktemp)
code=$(curl -ksL -A "$SMOKE_UA" --resolve "www.feadulta.com:443:$ORIGIN_IP" \
-o "$tmp" -w "%{http_code}" "$url" || echo "ERR")
body_spam=$(grep -cE "$SPAM_REGEX" "$tmp" || true)
rm -f "$tmp"
if [[ "$code" != "200" && "$code" != "301" && "$code" != "302" ]]; then
err " $url → HTTP $code"; fail=1
elif [[ "$allow_spam" -eq 0 && "$body_spam" -gt 0 ]]; then
err " $url → HTTP $code, $body_spam strings spam"; fail=1
else
ok " $url → HTTP $code, $body_spam spam"
fi
done
if [[ $fail -eq 1 ]]; then
err "Smoke test FAIL → considerar rollback manual con --rollback <backup-dir>"
exit 4
fi
ok "Smoke test OK"
}
# ─── Rollback desde backup ─────────────────────────────────────────────────
rollback() {
local dir="$1"
if [[ ! -d "$dir" ]]; then
err "Backup dir no existe: $dir"; exit 5
fi
info "Rollback desde $dir"
for entry in "${FILES[@]}"; do
IFS='|' read -r rel _ _ <<<"$entry"
local local_src="$dir/$rel"
local remote_path="$REMOTE_WEB_ROOT/$rel"
if [[ ! -f "$local_src" ]]; then
err " No hay backup para $rel — saltando"; continue
fi
info " Restaurando $rel"
scp_put "$local_src" "$remote_path"
ok " Restaurado $rel"
done
ok "Rollback completado"
}
# ─── Main ──────────────────────────────────────────────────────────────────
MODE="dry-run"
ROLLBACK_DIR=""
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run) MODE="dry-run"; shift ;;
--apply) MODE="apply"; shift ;;
--rollback) MODE="rollback"; ROLLBACK_DIR="${2:-}"; shift 2 ;;
-h|--help) sed -n '1,12p' "$0"; exit 0 ;;
*) err "Flag desconocido: $1"; exit 1 ;;
esac
done
info "Modo: $MODE"
if [[ "$MODE" == "rollback" ]]; then
if [[ -z "$ROLLBACK_DIR" ]]; then err "--rollback requiere ruta del backup"; exit 1; fi
rollback "$ROLLBACK_DIR"
smoke_test 1 # allow_spam=1: ficheros restaurados aún tienen las inyecciones
exit 0
fi
preflight
backup_dir=$(backup_pre_step1)
upload_files
smoke_test
ok "Paso 1 finalizado en modo: $MODE"
if [[ "$MODE" == "apply" ]]; then
ok "Backup pre-step1: $backup_dir"
ok "Para rollback: $0 --rollback $backup_dir"
fi
+77
View File
@@ -0,0 +1,77 @@
<?php
/**
* Issue #80 — detecta traducciones con fragmentos en español sin traducir.
* Señal: una frase sin traducir queda IDÉNTICA a la del ES original.
* Para cada traducción (en/fr/it/pt) compara sus frases contra el set de frases
* del ES enlazado (Polylang) y calcula el % de caracteres que coinciden literal.
*
* Uso (en contenedor): php detect_untranslated.php [umbral] [status]
* umbral: ratio mínimo para marcar (def 0.12)
* status: draft (def) | publish | any
*/
require "/var/www/html/wp-load.php";
global $wpdb;
$THRESH = isset($argv[1]) ? (float)$argv[1] : 0.12;
$STATUS = $argv[2] ?? 'draft';
function norm_text($html) {
$t = preg_replace('~(?i)</p>|<br\s*/?>|</h[1-6]>~', "\n", $html);
$t = preg_replace('~<[^>]+>~', ' ', $t);
$t = preg_replace('~\[[^\]]+\]~', ' ', $t);
$t = html_entity_decode($t, ENT_QUOTES);
return $t;
}
/** Frases normalizadas de longitud >= 40 (las cortas dan falsos positivos). */
function sentences($html) {
$t = norm_text($html);
$parts = preg_split('~(?<=[.!?…])\s+|\n+~u', $t);
$out = [];
foreach ($parts as $s) {
$s = trim(preg_replace('~\s+~u', ' ', $s));
$s = mb_strtolower($s);
if (mb_strlen($s) >= 40) $out[$s] = mb_strlen($s);
}
return $out;
}
$statuses = $STATUS === 'any' ? ['draft','publish'] : [$STATUS];
$in = "'" . implode("','", $statuses) . "'";
$ids = $wpdb->get_col(
"SELECT p.ID FROM wp_posts p
JOIN wp_term_relationships tr ON tr.object_id=p.ID
JOIN wp_term_taxonomy tt ON tt.term_taxonomy_id=tr.term_taxonomy_id AND tt.taxonomy='language'
JOIN wp_terms t ON t.term_id=tt.term_id AND t.slug IN ('en','fr','it','pt')
WHERE p.post_type='post' AND p.post_status IN ($in)
GROUP BY p.ID"
);
$by_lang = []; $offenders = [];
foreach ($ids as $id) {
$lang = pll_get_post_language($id);
$es = pll_get_post((int)$id, 'es');
if (!$es) continue;
$tr_s = sentences(get_post($id)->post_content);
if (!$tr_s) continue;
$es_s = sentences(get_post($es)->post_content);
if (!$es_s) continue;
$total = array_sum($tr_s); $match = 0;
foreach ($tr_s as $s => $len) if (isset($es_s[$s])) $match += $len;
$ratio = $total ? $match / $total : 0;
$by_lang[$lang]['n'] = ($by_lang[$lang]['n'] ?? 0) + 1;
if ($ratio >= $THRESH) {
$by_lang[$lang]['bad'] = ($by_lang[$lang]['bad'] ?? 0) + 1;
$offenders[] = [$id, $lang, $es, round($ratio, 2), get_post($id)->post_title];
}
}
usort($offenders, fn($a, $b) => $b[3] <=> $a[3]);
echo "=== Traducciones con fragmentos ES (ratio >= $THRESH, status=$STATUS) ===\n";
foreach ($offenders as $o)
echo sprintf("#%d [%s] ratio=%.2f es=%d %s\n", $o[0], $o[1], $o[3], $o[2], mb_substr($o[4], 0, 45));
echo "\n--- resumen por idioma ---\n";
foreach ($by_lang as $l => $d)
echo sprintf("%s: %d/%d con fragmentos ES\n", $l, $d['bad'] ?? 0, $d['n']);
echo "TOTAL ofensores: " . count($offenders) . "\n";
// Volcar IDs para el reprocesado
file_put_contents('/tmp/untranslated_ids.txt', implode("\n", array_map(fn($o) => $o[0], $offenders)));
+88
View File
@@ -0,0 +1,88 @@
#!/usr/bin/env python3
"""
download_lecturas.py — Descarga lecturas bíblicas litúrgicas (texto católico oficial)
desde evangelizo.org en es/en/fr/it/pt, para una fecha litúrgica dada.
Fuente: feed.evangelizo.org/v2/reader.php (lecturas del día, leccionario católico).
Códigos de idioma evangelizo: SP=es, AM=en, FR=fr, IT=it, PT=pt.
Uso: python3 download_lecturas.py 2026-06-21 [--books Jeremías,Romanos]
Salida: JSON a stdout con {libro: {lang: {title, html}}}.
"""
import sys, re, html, json, urllib.request
LANGS = {"SP": "es", "AM": "en", "FR": "fr", "IT": "it", "PT": "pt"}
# nombre del libro por idioma (para casar el bloque correcto)
BOOK_ALIASES = {
"Jeremías": ["Jeremías", "Jeremiah", "Jérémie", "Geremia", "Jeremias"],
"Romanos": ["Romanos", "Romans", "Romains", "Romani"],
"Mateo": ["Mateo", "Matthew", "Matthieu", "Matteo", "Mateus"],
"Marcos": ["Marcos", "Mark", "Marc", "Marco", "Marcos"],
"Lucas": ["Lucas", "Luke", "Luc", "Luca", "Lucas"],
"Juan": ["Juan", "John", "Jean", "Giovanni", "João"],
}
REF_RE = re.compile(r"(\d{1,3}\s*,[\d.\-\s]+)\.?$")
def fetch(date, lang_code):
url = f"https://feed.evangelizo.org/v2/reader.php?date={date}&lang={lang_code}&type=all"
req = urllib.request.Request(url, headers={"User-Agent": "fea-lecturas/1.0"})
with urllib.request.urlopen(req, timeout=30) as r:
raw = r.read().decode("utf-8", "replace")
raw = re.sub(r"<br\s*/?>", "\n", raw)
raw = re.sub(r"<[^>]+>", "", raw)
txt = html.unescape(raw)
return [l.strip() for l in txt.split("\n") if l.strip()]
def is_header(line):
return len(line) < 110 and bool(REF_RE.search(line))
def parse_blocks(lines):
"""Devuelve [(header, [parrafos])] saltando la 1ª línea (título del día)."""
blocks = []
cur_h, cur_t = None, []
for ln in lines[1:]:
if is_header(ln):
if cur_h:
blocks.append((cur_h, cur_t))
cur_h, cur_t = ln, []
else:
if cur_h:
cur_t.append(ln)
if cur_h:
blocks.append((cur_h, cur_t))
return blocks
def short_title(header):
"""'Carta de San Pablo a los Romanos 5,12-15.' -> 'ROMANOS 5,12-15'."""
m = re.search(r"([A-Za-zÀ-ÿ]+)\s+(\d{1,3}\s*,[\d.\-\s]+)\.?$", header)
if not m:
return header.rstrip(".")
return m.group(1).upper() + " " + re.sub(r"\s+", "", m.group(2)).rstrip(".")
def main():
date = sys.argv[1]
books = ["Jeremías", "Romanos"]
if "--books" in sys.argv:
books = sys.argv[sys.argv.index("--books") + 1].split(",")
result = {b: {} for b in books}
for code, wl in LANGS.items():
blocks = parse_blocks(fetch(date, code))
for b in books:
aliases = BOOK_ALIASES.get(b, [b])
for header, paras in blocks:
if any(a.lower() in header.lower() for a in aliases):
htmlc = "".join(f"<p>{p}</p>\n" for p in paras)
result[b][wl] = {"title": short_title(header), "html": htmlc, "header": header}
break
json.dump(result, sys.stdout, ensure_ascii=False, indent=2)
if __name__ == "__main__":
main()
+229
View File
@@ -0,0 +1,229 @@
#!/usr/bin/env python3
"""
export_cat_translations.py
Exports Polylang category translation data from local DB and generates SQL
for production. Handles:
1. wp_terms for translated categories
2. wp_term_taxonomy (category + language taxonomy rows)
3. wp_term_taxonomy (term_translations groups)
4. wp_term_relationships (post→translated category assignments)
"""
import pymysql, re
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
# Translated category term_ids on local (ES parent → lang: local_term_id)
TRANS_CATS = {
6: {'en': 3077, 'fr': 3083, 'it': 3089, 'pt': 3095},
21: {'en': 3080, 'fr': 3086, 'it': 3092, 'pt': 3098},
1646: {'en': 2982, 'fr': 3032, 'it': 3048, 'pt': 3063},
1647: {'en': 2986, 'fr': 3035, 'it': 3051, 'pt': 3066},
1648: {'en': 2971, 'fr': 3029, 'it': 3045, 'pt': 3060},
1650: {'en': 2964, 'fr': 3023, 'it': 3039, 'pt': 3054},
}
# Collect all translated term_ids
all_trans_ids = []
for mapping in TRANS_CATS.values():
all_trans_ids.extend(mapping.values())
all_trans_ids = sorted(set(all_trans_ids))
all_es_ids = sorted(TRANS_CATS.keys())
db = pymysql.connect(**DB)
c = db.cursor()
sql_lines = [
"-- Polylang category translations export",
"-- Generated by export_cat_translations.py",
"-- Run on production AFTER verifying no term_id conflicts",
"",
"SET NAMES utf8mb4;",
"SET foreign_key_checks = 0;",
"",
]
# ─── 1. wp_terms ──────────────────────────────────────────────────────────────
ids_str = ','.join(str(i) for i in all_trans_ids)
c.execute(f"SELECT term_id, name, slug, term_group FROM wp_terms WHERE term_id IN ({ids_str})")
rows = c.fetchall()
sql_lines.append("-- 1. wp_terms (translated category names/slugs)")
for r in rows:
name = r['name'].replace("'", "''")
slug = r['slug'].replace("'", "''")
sql_lines.append(
f"INSERT IGNORE INTO wp_terms (term_id, name, slug, term_group) "
f"VALUES ({r['term_id']}, '{name}', '{slug}', {r['term_group']});"
)
sql_lines.append("")
# ─── 2. wp_term_taxonomy (category rows for translated terms) ────────────────
c.execute(f"""
SELECT term_taxonomy_id, term_id, taxonomy, description, parent, count
FROM wp_term_taxonomy
WHERE term_id IN ({ids_str}) AND taxonomy='category'
""")
cat_rows = c.fetchall()
sql_lines.append("-- 2. wp_term_taxonomy (taxonomy='category' for translated terms)")
for r in cat_rows:
desc = r['description'].replace("'", "''") if r['description'] else ''
sql_lines.append(
f"INSERT IGNORE INTO wp_term_taxonomy "
f"(term_taxonomy_id, term_id, taxonomy, description, parent, count) "
f"VALUES ({r['term_taxonomy_id']}, {r['term_id']}, 'category', "
f"'{desc}', {r['parent']}, {r['count']});"
)
sql_lines.append("")
# ─── 3. wp_term_taxonomy (language rows for translated terms) ────────────────
c.execute(f"""
SELECT term_taxonomy_id, term_id, taxonomy, description, parent, count
FROM wp_term_taxonomy
WHERE term_id IN ({ids_str}) AND taxonomy='language'
""")
lang_rows = c.fetchall()
sql_lines.append("-- 3. wp_term_taxonomy (taxonomy='language' for translated terms)")
for r in lang_rows:
desc = r['description'].replace("'", "''") if r['description'] else ''
sql_lines.append(
f"INSERT IGNORE INTO wp_term_taxonomy "
f"(term_taxonomy_id, term_id, taxonomy, description, parent, count) "
f"VALUES ({r['term_taxonomy_id']}, {r['term_id']}, 'language', "
f"'{desc}', {r['parent']}, {r['count']});"
)
sql_lines.append("")
# ─── 4. wp_term_taxonomy (term_translations groups for our ES categories) ───
# Get translation groups that contain any of our ES or translated term_ids
all_ids_str = ','.join(str(i) for i in all_es_ids + all_trans_ids)
c.execute("""
SELECT DISTINCT tt.term_taxonomy_id, tt.term_id, tt.taxonomy,
tt.description, tt.parent, tt.count
FROM wp_term_taxonomy tt
WHERE tt.taxonomy = 'term_translations'
""")
all_tt_rows = c.fetchall()
# Filter to only those that reference our category term_ids
relevant_tt = []
for r in all_tt_rows:
desc = r['description'] or ''
# Check if any of our term_ids appear in the description
for tid in all_es_ids + all_trans_ids:
if f'i:{tid};' in desc or f'i:{tid}' == desc.strip():
relevant_tt.append(r)
break
sql_lines.append("-- 4. wp_term_taxonomy (taxonomy='term_translations' groups for our categories)")
for r in relevant_tt:
desc = r['description'].replace("'", "''") if r['description'] else ''
sql_lines.append(
f"INSERT INTO wp_term_taxonomy "
f"(term_taxonomy_id, term_id, taxonomy, description, parent, count) "
f"VALUES ({r['term_taxonomy_id']}, {r['term_id']}, 'term_translations', "
f"'{desc}', {r['parent']}, {r['count']}) "
f"ON DUPLICATE KEY UPDATE description=VALUES(description), count=VALUES(count);"
)
sql_lines.append("")
# ─── 5. wp_terms for term_translations taxonomy entries ─────────────────────
tt_term_ids = [r['term_id'] for r in relevant_tt]
if tt_term_ids:
tt_ids_str = ','.join(str(i) for i in tt_term_ids)
c.execute(f"SELECT term_id, name, slug, term_group FROM wp_terms WHERE term_id IN ({tt_ids_str})")
tt_term_rows = c.fetchall()
# Insert before the term_taxonomy rows (we need to reorder — prepend)
term_inserts = []
for r in tt_term_rows:
name = r['name'].replace("'", "''")
slug = r['slug'].replace("'", "''")
term_inserts.append(
f"INSERT IGNORE INTO wp_terms (term_id, name, slug, term_group) "
f"VALUES ({r['term_id']}, '{name}', '{slug}', {r['term_group']});"
)
# Insert after section 1
idx = sql_lines.index("-- 2. wp_term_taxonomy (taxonomy='category' for translated terms)")
sql_lines[idx:idx] = ["-- 1b. wp_terms for term_translations taxonomy"] + term_inserts + [""]
# ─── 6. wp_term_relationships (post→translated category) ─────────────────────
# Get term_taxonomy_ids for translated categories
cat_tt_ids = [r['term_taxonomy_id'] for r in cat_rows]
if cat_tt_ids:
cat_tt_str = ','.join(str(i) for i in cat_tt_ids)
c.execute(f"""
SELECT object_id, term_taxonomy_id, term_order
FROM wp_term_relationships
WHERE term_taxonomy_id IN ({cat_tt_str})
ORDER BY term_taxonomy_id, object_id
""")
rel_rows = c.fetchall()
sql_lines.append("-- 5. wp_term_relationships (posts → translated categories)")
sql_lines.append(f"-- {len(rel_rows)} relationships")
# Batch INSERT for efficiency
if rel_rows:
batch = []
for r in rel_rows:
batch.append(f"({r['object_id']},{r['term_taxonomy_id']},{r['term_order']})")
if len(batch) >= 500:
sql_lines.append(
"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id, term_order) VALUES "
+ ','.join(batch) + ";"
)
batch = []
if batch:
sql_lines.append(
"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id, term_order) VALUES "
+ ','.join(batch) + ";"
)
sql_lines.append("")
# ─── 7. wp_term_relationships (translated terms → language taxonomy) ─────────
lang_tt_ids = [r['term_taxonomy_id'] for r in lang_rows]
if lang_tt_ids:
lang_tt_str = ','.join(str(i) for i in lang_tt_ids)
c.execute(f"""
SELECT object_id, term_taxonomy_id, term_order
FROM wp_term_relationships
WHERE term_taxonomy_id IN ({lang_tt_str})
""")
lang_rel_rows = c.fetchall()
sql_lines.append("-- 6. wp_term_relationships (translated category terms → language taxonomy)")
sql_lines.append(f"-- {len(lang_rel_rows)} relationships")
if lang_rel_rows:
batch = []
for r in lang_rel_rows:
batch.append(f"({r['object_id']},{r['term_taxonomy_id']},{r['term_order']})")
if len(batch) >= 500:
sql_lines.append(
"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id, term_order) VALUES "
+ ','.join(batch) + ";"
)
batch = []
if batch:
sql_lines.append(
"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id, term_order) VALUES "
+ ','.join(batch) + ";"
)
sql_lines.append("")
sql_lines.append("SET foreign_key_checks = 1;")
sql_lines.append("")
sql_lines.append("-- Done.")
db.close()
output = '\n'.join(sql_lines)
with open('/tmp/cat_translations_prod.sql', 'w', encoding='utf-8') as f:
f.write(output)
print(f"Written to /tmp/cat_translations_prod.sql")
print(f" {len(rows)} translated terms (wp_terms)")
print(f" {len(cat_rows)} category taxonomy rows")
print(f" {len(lang_rows)} language taxonomy rows")
print(f" {len(relevant_tt)} term_translations groups")
if cat_tt_ids:
print(f" {len(rel_rows)} post→category relationships")
if lang_tt_ids:
print(f" {len(lang_rel_rows)} term→language relationships")
+200
View File
@@ -0,0 +1,200 @@
#!/usr/bin/env python3
"""
export_translations.py
Genera SQL para importar todos los posts traducidos (ID > 42760)
de la DB local a producción, con remapeo correcto de language IDs (FR↔PT).
"""
import pymysql
DB_LOCAL = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
# Local lang term_taxonomy_id → Production term_taxonomy_id
# Local: en=1407, es=1404, fr=1419, it=1415, pt=1411
# Prod: en=1407, es=1404, fr=1411, it=1415, pt=1419
LANG_MAP = {1407: 1407, 1404: 1404, 1419: 1411, 1415: 1415, 1411: 1419}
def esc(s):
if s is None:
return 'NULL'
return "'" + str(s).replace('\\', '\\\\').replace("'", "\\'") + "'"
db = pymysql.connect(**DB_LOCAL)
c = db.cursor()
lines = []
lines.append("SET NAMES utf8mb4;")
lines.append("SET foreign_key_checks = 0;")
lines.append("")
# ── 1. wp_posts ───────────────────────────────────────────────────────────────
lines.append("-- ============================================================")
lines.append("-- 1. POSTS (ID > 42760)")
lines.append("-- ============================================================")
c.execute("""
SELECT ID, post_author, post_date, post_date_gmt, post_content, post_title,
post_excerpt, post_status, comment_status, ping_status, post_password,
post_name, to_ping, pinged, post_modified, post_modified_gmt,
post_content_filtered, post_parent, guid, menu_order, post_type,
post_mime_type, comment_count
FROM wp_posts
WHERE ID > 42760 AND post_status='publish' AND post_type='post'
ORDER BY ID
""")
posts = c.fetchall()
lines.append(f"-- {len(posts)} posts")
for p in posts:
cols = ['ID','post_author','post_date','post_date_gmt','post_content','post_title',
'post_excerpt','post_status','comment_status','ping_status','post_password',
'post_name','to_ping','pinged','post_modified','post_modified_gmt',
'post_content_filtered','post_parent','guid','menu_order','post_type',
'post_mime_type','comment_count']
vals = ', '.join(esc(p[col]) for col in cols)
lines.append(
f"INSERT IGNORE INTO wp_posts ({', '.join(cols)}) VALUES ({vals});"
)
lines.append("")
# ── 2. wp_term_relationships — language ──────────────────────────────────────
lines.append("-- ============================================================")
lines.append("-- 2. LANGUAGE ASSIGNMENTS (remapped FR↔PT)")
lines.append("-- ============================================================")
post_ids = [p['ID'] for p in posts]
fmt = ','.join(str(i) for i in post_ids)
c.execute(f"""
SELECT object_id, term_taxonomy_id
FROM wp_term_relationships
WHERE object_id IN ({fmt})
AND term_taxonomy_id IN (1404,1407,1411,1415,1419)
""")
lang_rels = c.fetchall()
lines.append(f"-- {len(lang_rels)} language assignments")
for r in lang_rels:
prod_ttid = LANG_MAP[r['term_taxonomy_id']]
lines.append(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id, term_order) "
f"VALUES ({r['object_id']}, {prod_ttid}, 0);"
)
lines.append("")
# ── 3. wp_terms + wp_term_taxonomy — post_translations groups ─────────────────
lines.append("-- ============================================================")
lines.append("-- 3. POST_TRANSLATIONS GROUPS (term_taxonomy_id 2705-3043)")
lines.append("-- ============================================================")
c.execute(f"""
SELECT DISTINCT tt.term_taxonomy_id, tt.term_id, tt.taxonomy, tt.description,
tt.parent, tt.count, t.name, t.slug, t.term_group
FROM wp_term_taxonomy tt
JOIN wp_terms t ON tt.term_id=t.term_id
JOIN wp_term_relationships tr ON tt.term_taxonomy_id=tr.term_taxonomy_id
WHERE tt.taxonomy='post_translations'
AND tr.object_id IN ({fmt})
ORDER BY tt.term_taxonomy_id
""")
pt_groups = c.fetchall()
lines.append(f"-- {len(pt_groups)} translation groups")
for g in pt_groups:
# wp_terms
lines.append(
f"INSERT IGNORE INTO wp_terms (term_id, name, slug, term_group) "
f"VALUES ({g['term_id']}, {esc(g['name'])}, {esc(g['slug'])}, {g['term_group']});"
)
# wp_term_taxonomy
lines.append(
f"INSERT IGNORE INTO wp_term_taxonomy (term_taxonomy_id, term_id, taxonomy, description, parent, count) "
f"VALUES ({g['term_taxonomy_id']}, {g['term_id']}, 'post_translations', "
f"{esc(g['description'])}, {g['parent']}, {g['count']});"
)
lines.append("")
# ── 4. wp_term_relationships — post_translations (ALL members of each group) ──
lines.append("-- ============================================================")
lines.append("-- 4. POST_TRANSLATIONS RELATIONSHIPS (all group members)")
lines.append("-- ============================================================")
pt_ttids = [g['term_taxonomy_id'] for g in pt_groups]
fmt_tt = ','.join(str(i) for i in pt_ttids)
c.execute(f"""
SELECT object_id, term_taxonomy_id
FROM wp_term_relationships
WHERE term_taxonomy_id IN ({fmt_tt})
ORDER BY term_taxonomy_id, object_id
""")
pt_rels = c.fetchall()
lines.append(f"-- {len(pt_rels)} translation group relationships")
for r in pt_rels:
lines.append(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id, term_order) "
f"VALUES ({r['object_id']}, {r['term_taxonomy_id']}, 0);"
)
lines.append("")
# ── 5. wp_term_relationships — categories ─────────────────────────────────────
lines.append("-- ============================================================")
lines.append("-- 5. CATEGORY ASSIGNMENTS")
lines.append("-- ============================================================")
c.execute(f"""
SELECT tr.object_id, tr.term_taxonomy_id
FROM wp_term_relationships tr
JOIN wp_term_taxonomy tt ON tr.term_taxonomy_id=tt.term_taxonomy_id
WHERE tr.object_id IN ({fmt})
AND tt.taxonomy='category'
ORDER BY tr.object_id
""")
cat_rels = c.fetchall()
lines.append(f"-- {len(cat_rels)} category assignments")
for r in cat_rels:
lines.append(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id, term_order) "
f"VALUES ({r['object_id']}, {r['term_taxonomy_id']}, 0);"
)
lines.append("")
lines.append("-- ============================================================")
lines.append("-- 6. UPDATE term_taxonomy counts")
lines.append("-- ============================================================")
lines.append("""
UPDATE wp_term_taxonomy tt
SET count = (
SELECT COUNT(*) FROM wp_term_relationships tr
JOIN wp_posts p ON tr.object_id=p.ID
WHERE tr.term_taxonomy_id=tt.term_taxonomy_id
AND p.post_status='publish'
)
WHERE tt.taxonomy IN ('language','post_translations','category');
""")
lines.append("SET foreign_key_checks = 1;")
lines.append(f"-- Export complete: {len(posts)} posts, {len(pt_groups)} translation groups")
db.close()
output = '\n'.join(lines)
with open('/tmp/translations_export.sql', 'w', encoding='utf-8') as f:
f.write(output)
print(f"SQL written to /tmp/translations_export.sql")
print(f" Posts: {len(posts)}")
print(f" Language rels: {len(lang_rels)}")
print(f" Translation groups: {len(pt_groups)}")
print(f" Group rels: {len(pt_rels)}")
print(f" Category rels: {len(cat_rels)}")
print(f" File size: {len(output)//1024} KB")
+142
View File
@@ -0,0 +1,142 @@
#!/usr/bin/env python3
"""
Recorta avatares cuadrados centrados en la cara, usando Haar cascade de OpenCV.
Uso:
python3 face_crop_avatar.py <src> <dst> [--size 256] [--padding 0.6]
python3 face_crop_avatar.py --batch <src_dir> <dst_dir> [--size 256]
Strategy:
- Detecta caras frontales (haarcascade_frontalface_default).
- Si encuentra >=1: coge la mayor, expande con padding (factor del lado de la cara)
y recorta cuadrado.
- Si encuentra 0: fallback a crop cuadrado centrado en el TERCIO SUPERIOR
de la imagen (donde suele estar la cabeza en fotos verticales).
- Redimensiona el cuadrado a `size x size`.
Mantiene aspecto natural — NO estira.
"""
import argparse, os, sys
import cv2
DEFAULT_SIZE = 256
DEFAULT_PADDING = 0.6 # factor del lado de la cara para añadir alrededor
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
profile_cascade_path = cv2.data.haarcascades + 'haarcascade_profileface.xml'
_face_cascade = cv2.CascadeClassifier(cascade_path)
_profile_cascade = cv2.CascadeClassifier(profile_cascade_path)
def detect_face(gray):
"""Devuelve (x, y, w, h) de la cara más grande, o None."""
for cascade in (_face_cascade, _profile_cascade):
faces = cascade.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
)
if len(faces):
# más grande
return max(faces, key=lambda r: r[2] * r[3])
# también lateral en flip
if cascade is _profile_cascade:
flipped = cv2.flip(gray, 1)
faces2 = cascade.detectMultiScale(flipped, 1.1, 5, minSize=(30, 30))
if len(faces2):
x, y, w, h = max(faces2, key=lambda r: r[2] * r[3])
return (gray.shape[1] - x - w, y, w, h)
return None
def square_crop_box(face, img_w, img_h, padding):
"""Caja cuadrada centrada en la cara. Si el padding no cabe sin invadir
lados opuestos (típicamente texto), se REDUCE el side antes que extender.
"""
x, y, w, h = face
cx, cy = x + w / 2, y + h / 2
ideal = max(w, h) * (1 + 2 * padding)
# side máximo manteniendo cara centrada y dentro de la imagen
max_x = 2 * min(cx, img_w - cx)
max_y = 2 * min(cy, img_h - cy)
side = min(ideal, max_x, max_y)
half = side / 2
x1, y1 = int(cx - half), int(cy - half)
x2, y2 = int(cx + half), int(cy + half)
return x1, y1, x2, y2
def fallback_box(img_w, img_h):
"""Sin cara detectada. Heurística por aspect ratio:
- Horizontal (w > h*1.3): cuadrado a la IZQUIERDA (col_* suelen tener foto
a la izquierda y texto a la derecha).
- Vertical o cuadrado: cuadrado anclado al tercio superior, centrado en x.
"""
if img_w > img_h * 1.3:
side = img_h
return 0, 0, side, side
side = min(img_w, img_h)
cx = img_w / 2
x1 = max(0, int(cx - side / 2))
return x1, 0, x1 + side, side
def process(src_path, dst_path, size=DEFAULT_SIZE, padding=DEFAULT_PADDING):
img = cv2.imread(src_path, cv2.IMREAD_UNCHANGED)
if img is None:
return False, 'imread fail'
# convertir alpha → blanco si hace falta
if img.ndim == 3 and img.shape[2] == 4:
# composite sobre blanco
bgr = img[:, :, :3].copy()
alpha = img[:, :, 3] / 255.0
white = (1 - alpha[:, :, None]) * 255
img = (bgr * alpha[:, :, None] + white).astype('uint8')
elif img.ndim == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
h, w = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
face = detect_face(gray)
if face is not None:
x1, y1, x2, y2 = square_crop_box(face, w, h, padding)
used = 'face'
else:
x1, y1, x2, y2 = fallback_box(w, h)
used = 'fallback'
crop = img[y1:y2, x1:x2]
resized = cv2.resize(crop, (size, size), interpolation=cv2.INTER_AREA)
# asegurar JPEG-safe (sin alpha)
if resized.ndim == 3 and resized.shape[2] == 4:
resized = cv2.cvtColor(resized, cv2.COLOR_BGRA2BGR)
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
cv2.imwrite(dst_path, resized, [cv2.IMWRITE_JPEG_QUALITY, 88])
return True, used
def main():
ap = argparse.ArgumentParser()
ap.add_argument('src')
ap.add_argument('dst')
ap.add_argument('--size', type=int, default=DEFAULT_SIZE)
ap.add_argument('--padding', type=float, default=DEFAULT_PADDING)
ap.add_argument('--batch', action='store_true', help='src y dst son directorios')
args = ap.parse_args()
if args.batch:
files = [f for f in os.listdir(args.src) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
stats = {'face': 0, 'fallback': 0, 'fail': 0}
for fn in files:
ok, info = process(
os.path.join(args.src, fn),
os.path.join(args.dst, os.path.splitext(fn)[0] + '.jpg'),
size=args.size, padding=args.padding,
)
stats['fail' if not ok else info] += 1
print(f'face: {stats["face"]}, fallback: {stats["fallback"]}, fail: {stats["fail"]}')
else:
ok, info = process(args.src, args.dst, size=args.size, padding=args.padding)
print(f'{"OK" if ok else "FAIL"} {info}{args.dst}')
if __name__ == '__main__':
main()
+142
View File
@@ -0,0 +1,142 @@
<?php
/**
* Fe Adulta — Homepage template
* Cargado via template_include filter desde fea-homepage.php
*/
if (!defined('ABSPATH')) exit;
get_header();
?>
<style>
/* ── Reset dentro de la homepage ── */
.fea-homepage {
max-width: 960px;
margin: 0 auto;
padding: 2rem 1.25rem 4rem;
font-family: inherit;
}
/* ── Hero: Carta de la semana ── */
.fea-hero {
border-bottom: 2px solid #111;
padding-bottom: 2rem;
margin-bottom: 2.5rem;
}
.fea-hero-link {
display: block;
text-decoration: none;
color: inherit;
}
.fea-hero-link:hover .fea-hero-title {
text-decoration: underline;
text-underline-offset: 4px;
}
.fea-section-label {
display: inline-block;
font-size: 0.75rem;
font-weight: 600;
letter-spacing: 0.08em;
text-transform: uppercase;
color: #888;
margin-bottom: 0.75rem;
}
.fea-hero-title {
font-size: clamp(1.6rem, 4vw, 2.4rem);
font-weight: 700;
line-height: 1.2;
margin: 0 0 1rem;
color: #111;
}
.fea-hero-meta {
display: flex;
align-items: center;
gap: 0.6rem;
font-size: 0.875rem;
color: #666;
}
.fea-hero-meta .fea-avatar {
border-radius: 50%;
flex-shrink: 0;
}
/* ── Secciones ── */
.fea-section {
margin-bottom: 3rem;
}
.fea-section-title {
font-size: 0.75rem;
font-weight: 600;
letter-spacing: 0.08em;
text-transform: uppercase;
color: #888;
margin: 0 0 1.25rem;
padding-bottom: 0.5rem;
border-bottom: 1px solid #e5e5e5;
}
/* ── Grid de artículos ── */
.fea-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
gap: 1.5rem;
}
.fea-grid--4 {
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
}
/* ── Tarjeta ── */
.fea-card {
border-bottom: 1px solid #e5e5e5;
padding-bottom: 1.25rem;
}
.fea-card-meta {
display: flex;
align-items: center;
gap: 0.5rem;
margin-bottom: 0.5rem;
}
.fea-avatar {
border-radius: 50%;
flex-shrink: 0;
width: 36px;
height: 36px;
}
.fea-card-author {
font-size: 0.8rem;
font-weight: 600;
color: #444;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.fea-card-title {
font-size: 0.975rem;
font-weight: 600;
line-height: 1.35;
margin: 0;
}
.fea-card-title a {
text-decoration: none;
color: #111;
}
.fea-card-title a:hover {
text-decoration: underline;
text-underline-offset: 3px;
}
/* ── Mobile ── */
@media (max-width: 600px) {
.fea-grid,
.fea-grid--4 {
grid-template-columns: 1fr;
}
}
</style>
<main id="wp--skip-link--target">
<div class="fea-homepage">
<?php echo fea_homepage_content(); ?>
</div>
</main>
<?php get_footer(); ?>
+425
View File
@@ -0,0 +1,425 @@
<?php
/**
* Plugin Name: Fe Adulta — Homepage
* Description: Portada con selección editorial via ACF.
* Version: 1.3
*/
// ── Foto de perfil de autor via ACF (campo en perfil de usuario) ──────────
add_action('acf/init', function() {
if (!function_exists('acf_add_local_field_group')) return;
acf_add_local_field_group([
'key' => 'group_user_foto_perfil',
'title' => 'Foto de perfil',
'fields' => [[
'key' => 'field_user_foto_perfil',
'label' => 'Foto',
'name' => 'foto_perfil',
'type' => 'image',
'instructions' => 'Sube una foto cuadrada del autor (mínimo 100×100px).',
'return_format' => 'url',
'preview_size' => 'thumbnail',
'upload_folder' => 'autores',
]],
'location' => [[
['param' => 'user_form', 'operator' => '==', 'value' => 'all'],
]],
]);
});
// Usar la foto del autor en lugar del Gravatar cuando existe
// Lee el attachment ID guardado en el meta 'foto_perfil' (campo ACF)
add_filter('get_avatar_url', function($url, $id_or_email, $args) {
$user_id = null;
if (is_numeric($id_or_email)) $user_id = (int) $id_or_email;
elseif ($id_or_email instanceof WP_User) $user_id = $id_or_email->ID;
elseif (is_string($id_or_email)) {
$user = get_user_by('email', $id_or_email);
if ($user) $user_id = $user->ID;
}
if (!$user_id) return $url;
$attach_id = get_user_meta($user_id, 'foto_perfil', true);
if ($attach_id) {
$foto = wp_get_attachment_image_url((int) $attach_id, 'full');
if ($foto) return $foto;
}
return $url;
}, 10, 3);
// ── Ordenar por fecha los resultados del buscador ACF en campos de portada ─
add_filter('acf/fields/relationship/query/key=field_portada_articulos', function($args) {
$args['orderby'] = 'date';
$args['order'] = 'DESC';
return $args;
});
add_filter('acf/fields/relationship/query/key=field_portada_multimedia', function($args) {
$args['orderby'] = 'date';
$args['order'] = 'DESC';
return $args;
});
// ── Campos ACF para la portada ────────────────────────────────────────────
add_action('acf/init', function() {
if (!function_exists('acf_add_local_field_group')) return;
$front_page_id = (int) get_option('page_on_front');
acf_add_local_field_group([
'key' => 'group_portada_fea',
'title' => 'Contenido de la portada',
'fields' => [
[
'key' => 'field_portada_articulos',
'label' => 'Artículos seleccionados',
'name' => 'portada_articulos',
'type' => 'relationship',
'instructions' => 'Elige los artículos que aparecerán en la portada esta semana (máx. 9). Puedes buscar por título.',
'post_type' => ['post'],
'post_status' => ['publish', 'draft'],
'filters' => ['search', 'taxonomy'],
'elements' => ['featured_image'],
'min' => 0,
'max' => 9,
'return_format' => 'object',
'query_args' => ['orderby' => 'date', 'order' => 'DESC'],
],
[
'key' => 'field_portada_multimedia',
'label' => 'Multimedia seleccionado',
'name' => 'portada_multimedia',
'type' => 'relationship',
'instructions' => 'Elige los vídeos o audios para la portada (máx. 4).',
'post_type' => ['post'],
'post_status' => ['publish', 'draft'],
'filters' => ['search'],
'elements' => ['featured_image'],
'min' => 0,
'max' => 4,
'return_format' => 'object',
'query_args' => ['orderby' => 'date', 'order' => 'DESC'],
],
],
'location' => [[
['param' => 'page', 'operator' => '==', 'value' => (string) $front_page_id],
]],
'position' => 'normal',
'style' => 'default',
'label_placement' => 'top',
]);
});
// ── Centrar bloque slider+librería (header template, todas las páginas) ───
add_action('wp_head', function() {
?>
<style>
/* El bloque de columnas con el slider usa márgenes negativos del FSE
que lo desplazan. Lo forzamos a centrar con max-width explícito. */
.wp-block-columns:has(.wp-block-nextend-smartslider3) {
max-width: min(var(--wp--style--global--wide-size, 1340px), 100%);
margin-left: auto !important;
margin-right: auto !important;
box-sizing: border-box;
padding-left: var(--wp--preset--spacing--30);
padding-right: var(--wp--preset--spacing--30);
}
/* Ocultar columna librería en tablet */
@media (max-width: 900px) {
.fea-slider-block .wp-block-column:last-child {
display: none !important;
}
}
/* Ocultar slider en móvil (banner superior sigue visible) */
@media (max-width: 600px) {
.fea-slider-block {
display: none !important;
}
}
/* Buscador del header: reducir altura */
.wp-block-search__input {
padding-top: 0.25rem !important;
padding-bottom: 0.25rem !important;
line-height: 1.3 !important;
}
.wp-block-search__button {
padding-top: 0.25rem !important;
padding-bottom: 0.25rem !important;
}
</style>
<?php
});
// ── Estilos ───────────────────────────────────────────────────────────────
add_action('wp_head', function() {
if (!is_front_page()) return;
?>
<style>
.fea-hero { border-bottom: 2px solid #111; padding-bottom: 2rem; margin-bottom: 2.5rem; }
.fea-hero-link { display: block; text-decoration: none; color: inherit; }
.fea-hero-link:hover .fea-hero-title { text-decoration: underline; text-underline-offset: 4px; }
.fea-section-label { display: inline-block; font-size: 0.72rem; font-weight: 700; letter-spacing: 0.1em; text-transform: uppercase; color: #888; margin-bottom: 0.6rem; }
.fea-hero-title { font-size: clamp(1.5rem, 4vw, 2.2rem); font-weight: 700; line-height: 1.2; margin: 0 0 0.75rem; color: #111; }
.fea-hero-meta { display: flex; align-items: center; gap: 0.5rem; font-size: 0.875rem; color: #666; }
.fea-section { margin-bottom: 3rem; }
.fea-section-title { font-size: 0.72rem; font-weight: 700; letter-spacing: 0.1em; text-transform: uppercase; color: #888; margin: 0 0 1.25rem; padding-bottom: 0.5rem; border-bottom: 1px solid #e0e0e0; }
.fea-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 1.5rem; }
@media (max-width: 720px) { .fea-grid { grid-template-columns: 1fr 1fr; } }
@media (max-width: 480px) { .fea-grid { grid-template-columns: 1fr; } }
.fea-card { border-bottom: 1px solid #e5e5e5; padding-bottom: 1.1rem; }
.fea-card-meta { display: flex; align-items: center; gap: 0.4rem; margin-bottom: 0.4rem; }
.fea-avatar { border-radius: 50%; width: 28px !important; height: 28px !important; flex-shrink: 0; display: inline-block !important; }
.fea-card-author { font-size: 0.78rem; font-weight: 600; color: #555; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
.fea-card-title { font-size: 0.93rem; font-weight: 600; line-height: 1.35; margin: 0; }
.fea-card-title a { text-decoration: none; color: #111; }
.fea-card-title a:hover { text-decoration: underline; text-underline-offset: 3px; }
</style>
<?php
});
// ── Byline personalizado en artículos individuales ────────────────────────
// ── Byline personalizado: se gestiona desde el template FSE (ID 42359) ────
// El template wp_template 'single' ya contiene wp:avatar + wp:post-author-name
// + wp:post-terms. Este hook solo añade los estilos necesarios.
add_action('astra_single_header_bottom', function() {
if (!is_single()) return;
$author_id = (int) get_the_author_meta('ID');
$author_name = get_the_author_meta('display_name');
$avatar_url = get_avatar_url($author_id, ['size' => 48]);
$author_url = get_author_posts_url($author_id);
$cat_str = '';
$cats = get_the_category();
if ($cats) {
$cat_url = get_category_link($cats[0]->term_id);
$cat_str = '<a href="' . esc_url($cat_url) . '" class="fea-byline-cat">'
. esc_html($cats[0]->name) . '</a>';
}
echo '<div class="fea-byline">'
. '<a href="' . esc_url($author_url) . '" class="fea-byline-avatar-link">'
. '<img src="' . esc_url($avatar_url) . '" alt="" width="48" height="48" class="fea-byline-avatar">'
. '</a>'
. '<div class="fea-byline-info">'
. '<a href="' . esc_url($author_url) . '" class="fea-byline-name">' . esc_html($author_name) . '</a>'
. $cat_str
. '</div>'
. '</div>';
});
add_action('wp_head', function() {
if (!is_single()) return;
?>
<style>
.fea-byline { display: flex; align-items: center; gap: 0.75rem; margin-top: 0.75rem; }
.fea-byline-avatar-link { flex-shrink: 0; }
.fea-byline-avatar { border-radius: 50%; display: block; }
.fea-byline-info { display: flex; flex-direction: column; gap: 0.2rem; }
.fea-byline-name { font-size: 0.9rem; font-weight: 600; color: #222; text-decoration: none; }
.fea-byline-name:hover { text-decoration: underline; }
.fea-byline-cat { font-size: 0.78rem; color: #888; text-decoration: none; }
.fea-byline-cat:hover { text-decoration: underline; color: #555; }
</style>
<?php
});
// ── Helpers ───────────────────────────────────────────────────────────────
function fea_title(string $title): string {
$lower = mb_strtolower($title, 'UTF-8');
return mb_strtoupper(mb_substr($lower, 0, 1, 'UTF-8'), 'UTF-8') . mb_substr($lower, 1, null, 'UTF-8');
}
function fea_card(object $post): string {
$author_id = $post->post_author;
$author_name = get_the_author_meta('display_name', $author_id);
$avatar_url = get_avatar_url($author_id, ['size' => 28, 'default' => 'identicon']);
$url = get_permalink($post->ID);
$title = fea_title($post->post_title);
return '<article class="fea-card">'
. '<div class="fea-card-meta">'
. '<img src="' . esc_url($avatar_url) . '" alt="" width="28" height="28" class="fea-avatar" loading="lazy">'
. '<span class="fea-card-author">' . esc_html($author_name) . '</span>'
. '</div>'
. '<h3 class="fea-card-title"><a href="' . esc_url($url) . '">' . esc_html($title) . '</a></h3>'
. '</article>';
}
// ── Shortcode: [fea_carta_semana_hero] ────────────────────────────────────
add_shortcode('fea_carta_semana_hero', function() {
$cartas = get_posts([
'posts_per_page' => 1,
'category__in' => [6],
'post_status' => 'publish',
'orderby' => 'date',
'order' => 'DESC',
]);
if (!$cartas) return '';
$c = $cartas[0];
$url = get_permalink($c->ID);
$fecha = date_i18n('j \d\e F \d\e Y', strtotime($c->post_date));
$author_name = get_the_author_meta('display_name', $c->post_author);
$avatar_url = get_avatar_url($c->post_author, ['size' => 32, 'default' => 'identicon']);
return '<section class="fea-hero">'
. '<a href="' . esc_url($url) . '" class="fea-hero-link">'
. '<span class="fea-section-label">Carta de la semana</span>'
. '<h2 class="fea-hero-title">' . esc_html(fea_title($c->post_title)) . '</h2>'
. '<div class="fea-hero-meta">'
. '<img src="' . esc_url($avatar_url) . '" alt="" width="32" height="32" class="fea-avatar">'
. '<span>' . esc_html($author_name) . ' · ' . $fecha . '</span>'
. '</div>'
. '</a></section>';
});
// ── Shortcode: [fea_articulos_semana] ─────────────────────────────────────
add_shortcode('fea_articulos_semana', function($atts) {
$atts = shortcode_atts(['titulo' => 'Artículos de esta semana'], $atts);
$page = (int) get_option('page_on_front');
// Selección editorial (ACF) — solo posts publicados, ordenados por fecha desc
$posts = [];
if (function_exists('get_field')) {
$seleccion = get_field('portada_articulos', $page) ?: [];
foreach ($seleccion as $p) {
if ($p->post_status === 'publish') $posts[] = $p;
}
}
// Fallback: últimos artículos si no hay selección
if (empty($posts)) {
$posts = get_posts([
'posts_per_page' => 9,
'category__in' => [1650],
'category__not_in' => [6, 21, 22, 23, 26, 58, 40, 1645, 1646, 1647, 1648, 1649, 1651, 1652],
'post_status' => 'publish',
'orderby' => 'date',
'order' => 'DESC',
]);
}
if (!$posts) return '';
$html = '<section class="fea-section">'
. '<h2 class="fea-section-title">' . esc_html($atts['titulo']) . '</h2>'
. '<div class="fea-grid">';
foreach ($posts as $post) $html .= fea_card($post);
return $html . '</div></section>';
});
// ── Shortcode: [fea_evangelio] ────────────────────────────────────────────
// Editorial (cat 1646) primero, luego comentarios (cat 1647). Máx 7 en total.
add_shortcode('fea_evangelio', function($atts) {
$atts = shortcode_atts(['titulo' => 'Comentarios al evangelio'], $atts);
$editorial = get_posts([
'posts_per_page' => 1,
'category__in' => [1646],
'post_status' => 'publish',
'orderby' => 'date',
'order' => 'DESC',
]);
$comentarios = get_posts([
'posts_per_page' => 6,
'category__in' => [1647],
'category__not_in' => [1646],
'post_status' => 'publish',
'orderby' => 'date',
'order' => 'DESC',
]);
$posts = array_merge($editorial, $comentarios);
if (!$posts) return '';
$html = '<section class="fea-section">'
. '<h2 class="fea-section-title">' . esc_html($atts['titulo']) . '</h2>'
. '<div class="fea-grid">';
foreach ($posts as $post) $html .= fea_card($post);
return $html . '</div></section>';
});
// ── Shortcode: [fea_eucaristia] ───────────────────────────────────────────
add_shortcode('fea_eucaristia', function($atts) {
$atts = shortcode_atts(['titulo' => 'Para una eucaristía más participativa'], $atts);
$posts = get_posts([
'posts_per_page' => 6,
'category__in' => [1648],
'post_status' => 'publish',
'orderby' => 'date',
'order' => 'DESC',
]);
if (!$posts) return '';
$html = '<section class="fea-section">'
. '<h2 class="fea-section-title">' . esc_html($atts['titulo']) . '</h2>'
. '<div class="fea-grid">';
foreach ($posts as $post) $html .= fea_card($post);
return $html . '</div></section>';
});
// ── Shortcode: [fea_multimedia] ───────────────────────────────────────────
add_shortcode('fea_multimedia', function($atts) {
$atts = shortcode_atts(['titulo' => 'Multimedia'], $atts);
$page = (int) get_option('page_on_front');
$posts = [];
if (function_exists('get_field')) {
$seleccion = get_field('portada_multimedia', $page) ?: [];
foreach ($seleccion as $p) {
if ($p->post_status === 'publish') $posts[] = $p;
}
}
if (empty($posts)) {
$posts = get_posts([
'posts_per_page' => 4,
'category__in' => [1649, 26, 58],
'post_status' => 'publish',
'orderby' => 'date',
'order' => 'DESC',
]);
}
if (!$posts) return '';
$html = '<section class="fea-section">'
. '<h2 class="fea-section-title">' . esc_html($atts['titulo']) . '</h2>'
. '<div class="fea-grid">';
foreach ($posts as $post) $html .= fea_card($post);
return $html . '</div></section>';
});
// ── Reescribir links internos al idioma activo (Polylang) ─────────────────
add_filter('the_content', function($content) {
if (!function_exists('pll_current_language') || !function_exists('pll_get_post')) return $content;
$lang = pll_current_language();
if (!$lang || $lang === 'es') return $content;
return preg_replace_callback(
'/<a\s([^>]*\s)?href=["\']([^"\']+)["\']([^>]*)>/i',
function($m) use ($lang) {
$href = $m[2];
$home = home_url();
if (strpos($href, $home) === false) return $m[0];
$post_id = url_to_postid($href);
if (!$post_id) return $m[0];
$translated_id = pll_get_post($post_id, $lang);
if (!$translated_id || $translated_id === $post_id) return $m[0];
$new_url = get_permalink($translated_id);
if (!$new_url) return $m[0];
return str_replace($href, $new_url, $m[0]);
},
$content
);
}, 20);
+62
View File
@@ -0,0 +1,62 @@
<?php
/**
* IO mínimo de posts WP para el reprocesador EN.
* get <id> -> escribe /tmp/fea_es.json {title, content, status}
* update <id> <titlef> <bodyf> -> actualiza post_title/post_content desde ficheros
* Carga wp-load; portable (local docker o prod via FEA_WP_LOAD).
*/
$WP = getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
require $WP;
$action = $argv[1] ?? '';
if ($action === 'get') {
$id = (int)$argv[2];
$p = get_post($id);
if (!$p) { fwrite(STDERR, "no existe $id\n"); exit(1); }
file_put_contents('/tmp/fea_es.json', json_encode([
'id' => $id,
'title' => $p->post_title,
'content' => $p->post_content,
'status' => $p->post_status,
], JSON_UNESCAPED_UNICODE));
exit(0);
}
if ($action === 'update') {
$id = (int)$argv[2];
$title = rtrim(file_get_contents($argv[3]), "\r\n");
$body = file_get_contents($argv[4]);
if (!get_post($id)) { fwrite(STDERR, "no existe $id\n"); exit(1); }
$r = wp_update_post([
'ID' => $id,
'post_title' => $title,
'post_content' => $body,
], true);
if (is_wp_error($r)) { fwrite(STDERR, "error: " . $r->get_error_message() . "\n"); exit(1); }
fwrite(STDOUT, "ok actualizado $id\n");
exit(0);
}
if ($action === 'getmeta') {
echo get_post_meta((int)$argv[2], $argv[3], true);
exit(0);
}
if ($action === 'setaudio') { // setaudio <id> <relpath>
$id = (int)$argv[2];
update_post_meta($id, 'fea_audio_url', home_url($argv[3]));
update_post_meta($id, 'fea_audio_voice', 'NicoFeadulta2026');
update_post_meta($id, 'fea_audio_done', '1');
delete_post_meta($id, 'fea_audio_error');
fwrite(STDOUT, "ok " . home_url($argv[3]) . "\n");
exit(0);
}
if ($action === 'setflag') { // setflag <id> <key> <value>
update_post_meta((int)$argv[2], $argv[3], $argv[4]);
exit(0);
}
fwrite(STDERR, "uso: get|update|getmeta|setaudio|setflag\n");
exit(2);
+289
View File
@@ -0,0 +1,289 @@
<?php
/**
* Helper PHP para translate_post.py — corre DENTRO del contenedor WP cargando wp-load.php
* (no necesita wp-cli ni proc_open). Centraliza la lógica de WordPress/Polylang.
*
* Uso (vía `docker exec wordpress-web php /tmp/fea_translate_helper.php <subcomando> ...`):
* read <id> → JSON {id,title,content,excerpt,lang,status,author,date,cats}
* read_full <id> → JSON con slug, metas, categorías y grupo Polylang
* exists <es_id> <lang> → imprime el ID de la traducción en <lang> (0 si no hay)
* create <es_id> <lang> <status> (lee {title,content} por stdin)
* → crea el post traducido, lo enlaza con Polylang y mete metas;
* imprime el nuevo ID.
* clone <target_id> <lang> <status> (lee payload JSON por stdin)
* → inserta/actualiza un post con ID explícito, categorías y metas.
* save_translations → guarda un grupo Polylang exacto leído por stdin.
*
* Ver issue rafa/feadulta#75.
*/
// Bootstrap portable. Si WP no está cargado (modo standalone), cargar wp-load.
// Local (docker): /var/www/html/wp-load.php (por defecto).
// Prod: export FEA_WP_LOAD=/web/wp-nuevo/wp-load.php
// (Si se ejecuta vía `wp eval-file`, ABSPATH ya está definido y no se recarga.)
if (!defined('ABSPATH')) {
$_SERVER['REQUEST_URI'] = $_SERVER['REQUEST_URI'] ?? '/';
$_SERVER['HTTP_HOST'] = $_SERVER['HTTP_HOST'] ?? 'localhost';
require_once (getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php');
}
if (!function_exists('pll_set_post_language')) {
fwrite(STDERR, "Polylang no disponible\n");
exit(2);
}
$cmd = $argv[1] ?? '';
function out_json($data): void { echo wp_json_encode($data); }
function meta_payload(int $id): array {
$raw = get_post_meta($id);
$out = [];
foreach ($raw as $key => $values) {
if (in_array($key, ['_edit_lock', '_edit_last'], true)) {
continue;
}
$out[$key] = array_map('maybe_unserialize', (array) $values);
}
return $out;
}
function normalize_meta_input(array $payload): array {
$meta = $payload['meta'] ?? [];
if (!is_array($meta)) {
return [];
}
$out = [];
foreach ($meta as $key => $values) {
if (!is_string($key) || $key === '') {
continue;
}
if (!is_array($values)) {
$values = [$values];
}
$out[$key] = $values;
}
return $out;
}
function set_meta_payload(int $id, array $meta): void {
foreach ($meta as $key => $values) {
delete_post_meta($id, $key);
foreach ($values as $value) {
add_post_meta($id, $key, maybe_serialize($value));
}
}
}
switch ($cmd) {
case 'read': {
$id = (int) ($argv[2] ?? 0);
$p = get_post($id);
if (!$p) { fwrite(STDERR, "post $id no existe\n"); exit(3); }
out_json([
'id' => $p->ID,
'title' => $p->post_title,
'content' => $p->post_content,
'excerpt' => $p->post_excerpt,
'lang' => function_exists('pll_get_post_language') ? pll_get_post_language($id) : '',
'status' => $p->post_status,
'author' => (int) $p->post_author,
'date' => $p->post_date,
'cats' => wp_get_post_categories($id),
]);
break;
}
case 'read_full': {
$id = (int) ($argv[2] ?? 0);
$p = get_post($id);
if (!$p) { fwrite(STDERR, "post $id no existe\n"); exit(3); }
out_json([
'id' => $p->ID,
'title' => $p->post_title,
'content' => $p->post_content,
'excerpt' => $p->post_excerpt,
'slug' => $p->post_name,
'lang' => function_exists('pll_get_post_language') ? pll_get_post_language($id) : '',
'status' => $p->post_status,
'author' => (int) $p->post_author,
'date' => $p->post_date,
'date_gmt' => $p->post_date_gmt,
'type' => $p->post_type,
'cats' => wp_get_post_categories($id),
'cat_slugs' => array_values(array_map(static fn($t) => $t->slug, get_the_terms($id, 'category') ?: [])),
'meta' => meta_payload($id),
'translations' => function_exists('pll_get_post_translations') ? pll_get_post_translations($id) : [],
]);
break;
}
case 'exists': {
$es = (int) ($argv[2] ?? 0);
$lang = (string) ($argv[3] ?? '');
$t = (int) pll_get_post($es, $lang);
if ($t && !get_post($t)) $t = 0; // enlace colgado a un post borrado
echo $t;
break;
}
case 'unlink': {
// Borra la traducción en <lang> y la saca del grupo (para --force / limpieza).
$es = (int) ($argv[2] ?? 0);
$lang = (string) ($argv[3] ?? '');
$t = (int) pll_get_post($es, $lang);
if ($t && get_post($t)) wp_delete_post($t, true);
$tr = function_exists('pll_get_post_translations') ? pll_get_post_translations($es) : ['es' => $es];
unset($tr[$lang]);
if ($tr) pll_save_post_translations($tr);
echo $t;
break;
}
case 'create': {
$es = (int) ($argv[2] ?? 0);
$lang = (string) ($argv[3] ?? '');
$status = (string) ($argv[4] ?? 'draft');
$src = get_post($es);
if (!$src) { fwrite(STDERR, "post fuente $es no existe\n"); exit(3); }
$payload = json_decode(file_get_contents('php://stdin'), true);
if (!is_array($payload) || empty($payload['title'])) {
fwrite(STDERR, "payload inválido por stdin\n"); exit(4);
}
// ¿ya existe (y vivo)? idempotencia dura.
$existing = (int) pll_get_post($es, $lang);
if ($existing && !get_post($existing)) $existing = 0;
if ($existing) { echo $existing; break; }
$new_id = wp_insert_post([
'post_title' => wp_slash($payload['title']),
'post_content' => wp_slash($payload['content'] ?? ''),
'post_excerpt' => wp_slash($payload['excerpt'] ?? ''),
'post_name' => sanitize_title($payload['title']),
'post_status' => $status,
'post_type' => 'post',
'post_author' => (int) $src->post_author,
'post_date' => $src->post_date,
'to_ping' => '',
'pinged' => '',
], true);
if (is_wp_error($new_id)) { fwrite(STDERR, $new_id->get_error_message() . "\n"); exit(5); }
// Idioma primero, para que las categorías traducidas casen con el idioma del post.
pll_set_post_language($new_id, $lang);
// Categorías: mapea cada categoría ES a su traducción en el idioma destino
// (las categorías de carta ya están traducidas: cartasemana 6→en 3077, fr 3083…).
$cats = wp_get_post_categories($es);
$mapped = [];
foreach ($cats as $c) {
$tc = function_exists('pll_get_term') ? (int) pll_get_term($c, $lang) : 0;
$mapped[] = $tc ?: $c; // traducida si existe; si no, la ES (fallback)
}
if ($mapped) wp_set_post_categories($new_id, array_values(array_unique($mapped)));
// Enlace de traducción (preservando el grupo existente).
$tr = function_exists('pll_get_post_translations') ? pll_get_post_translations($es) : ['es' => $es];
if (!$tr) $tr = ['es' => $es];
$tr[$lang] = $new_id;
pll_save_post_translations($tr);
// Metas de trazabilidad.
update_post_meta($new_id, 'traduccion_automatica', '1');
update_post_meta($new_id, 'traduccion_origen', $es);
update_post_meta($new_id, 'traduccion_modelo', $payload['model'] ?? '');
update_post_meta($new_id, 'traduccion_fecha', gmdate('c'));
echo $new_id;
break;
}
case 'clone': {
$target = (int) ($argv[2] ?? 0);
$lang = (string) ($argv[3] ?? '');
$status = (string) ($argv[4] ?? 'draft');
if ($target <= 0 || $lang === '') {
fwrite(STDERR, "uso: clone <target_id> <lang> <status>\n"); exit(6);
}
$payload = json_decode(file_get_contents('php://stdin'), true);
if (!is_array($payload) || empty($payload['title'])) {
fwrite(STDERR, "payload inválido por stdin\n"); exit(4);
}
$postarr = [
'post_title' => wp_slash($payload['title']),
'post_content' => wp_slash($payload['content'] ?? ''),
'post_excerpt' => wp_slash($payload['excerpt'] ?? ''),
'post_status' => $status ?: ($payload['status'] ?? 'draft'),
'post_type' => $payload['type'] ?? 'post',
'post_author' => (int) ($payload['author'] ?? 1),
'post_date' => $payload['date'] ?? current_time('mysql'),
'post_date_gmt'=> $payload['date_gmt'] ?? current_time('mysql', true),
'post_name' => $payload['slug'] ?? '',
'to_ping' => '',
'pinged' => '',
];
$existing = get_post($target);
if ($existing) {
$postarr['ID'] = $target;
$new_id = wp_update_post($postarr, true);
} else {
$postarr['import_id'] = $target;
$new_id = wp_insert_post($postarr, true);
}
if (is_wp_error($new_id)) { fwrite(STDERR, $new_id->get_error_message() . "\n"); exit(5); }
if ((int) $new_id !== $target) {
fwrite(STDERR, "ID preservado falló: esperado $target, creado $new_id\n"); exit(7);
}
pll_set_post_language($new_id, $lang);
$cats = [];
foreach ((array) ($payload['cat_slugs'] ?? []) as $slug) {
$term = get_term_by('slug', (string) $slug, 'category');
if ($term && !is_wp_error($term)) {
$cats[] = (int) $term->term_id;
}
}
if (!$cats) {
$cats = array_values(array_unique(array_map('intval', (array) ($payload['cats'] ?? []))));
}
wp_set_post_categories($new_id, $cats);
set_meta_payload($new_id, normalize_meta_input($payload));
clean_post_cache($new_id);
echo $new_id;
break;
}
case 'save_translations': {
$payload = json_decode(file_get_contents('php://stdin'), true);
if (!is_array($payload) || empty($payload['translations']) || !is_array($payload['translations'])) {
fwrite(STDERR, "payload inválido por stdin\n"); exit(4);
}
$tr = [];
foreach ($payload['translations'] as $lang => $id) {
$id = (int) $id;
if (!is_string($lang) || $lang === '' || $id <= 0 || !get_post($id)) {
continue;
}
$tr[$lang] = $id;
}
if (count($tr) < 2) {
fwrite(STDERR, "grupo insuficiente\n"); exit(8);
}
pll_save_post_translations($tr);
out_json($tr);
break;
}
default:
fwrite(STDERR, "subcomando desconocido: '$cmd'\n");
exit(1);
}
+21
View File
@@ -0,0 +1,21 @@
#!/usr/bin/env python3
"""Descarga un pasaje bíblico en EN/FR/IT/PT desde bolls.life (issue #88).
NO traducir la Biblia con LLM: descargar versiones oficiales.
Edita BOOK/CH/V0/V1 y TR según el pasaje. Salida JSON {lang: html} a /tmp/lecturas_<x>.json
Traducciones usadas (carta 46956, Mateo 10,26-33): EN=DRB (católica), PT=CNBB (católica),
IT=NR06 (única moderna IT), FR=BDS. Libros bolls: Mateo=40. Ver languages.json del sitio.
"""
import json, urllib.request, re, sys
TR={"en":"DRB","fr":"BDS","it":"NR06","pt":"CNBB"}
BOOK=int(sys.argv[1]) if len(sys.argv)>1 else 40
CH=int(sys.argv[2]) if len(sys.argv)>2 else 10
V0=int(sys.argv[3]) if len(sys.argv)>3 else 26
V1=int(sys.argv[4]) if len(sys.argv)>4 else 33
def clean(t): return re.sub(r"\s+"," ",re.sub(r"<[^>]+>"," ",t)).strip()
out={}
for lang,code in TR.items():
data=json.load(urllib.request.urlopen(f"https://bolls.life/get-text/{code}/{BOOK}/{CH}/",timeout=20))
v={x["verse"]:clean(x["text"]) for x in data if V0<=x["verse"]<=V1}
out[lang]="\n".join(f'<p><sup><strong>{n}</strong></sup> {v[n]}</p>' for n in range(V0,V1+1) if n in v)
json.dump(out,open("/tmp/lecturas_fetched.json","w"),ensure_ascii=False)
print("OK ->/tmp/lecturas_fetched.json", {k:len(x) for k,x in out.items()})
+65
View File
@@ -0,0 +1,65 @@
<?php
/**
* Arregla los enlaces internos de la carta que apuntan a contenido com_content
* de Joomla (multimedia, vídeos, cantoral...) con forma legacy
* es/<seccion>/<joomla_content_id>-<slug>.html
* que fix_carta_joomla_links.php NO mapea (solo trata /item/<k2>-...). Resuelve
* el número (id de contenido Joomla) por meta `_fgj2wp_old_content_id` y, en su
* defecto, `_fgj2wp_old_id` (contenido migrado en el bulk original) → permalink
* WP en el idioma de cada carta (degrada a ES si no hay traducción).
*
* Deja intactos los enlaces absolutos a feadulta.com (navegación externa) y los
* índices de sección (tablon-de-anuncios.html, noticias-de-alcance.html, etc.).
*
* Uso: CARTA=<es_id> php fix_carta_content_links.php (dry-run)
* APPLY=1 CARTA=<es_id> php fix_carta_content_links.php
*/
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
global $wpdb;
$APPLY = getenv('APPLY') === '1';
$CARTA = (int)(getenv('CARTA') ?: 0);
if (!$CARTA) { fwrite(STDERR, "Falta CARTA=<es_id>\n"); exit(1); }
$BAK = "/tmp/fix_carta_content_bak"; if ($APPLY) @mkdir($BAK, 0777, true);
function content_es_post($jid) {
global $wpdb;
foreach (['_fgj2wp_old_content_id', '_fgj2wp_old_id'] as $mk) {
$pid = $wpdb->get_var($wpdb->prepare(
"SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key=%s AND meta_value=%s LIMIT 1", $mk, (string)$jid));
if ($pid) return (int)$pid;
}
return 0;
}
$tot = 0;
foreach (pll_get_post_translations($CARTA) as $lang => $pid) {
$post = get_post($pid); if (!$post) continue;
$chg = 0; $miss = [];
$new = preg_replace_callback('~href="([^"]+)"~i', function($m) use ($lang, &$chg, &$miss) {
$href = html_entity_decode(trim($m[1]));
if (stripos($href, '.html') === false) return $m[0]; // solo legacy .html
if (stripos($href, 'feadulta.com') !== false) return $m[0]; // absoluto externo → dejar
if (stripos($href, '/item/') !== false) return $m[0]; // K2 lo trata otro script
if (!preg_match('~/(\d+)-[^/"]+\.html$~i', $href, $mm)) return $m[0]; // necesita <id>-slug.html
$es = content_es_post((int)$mm[1]);
if (!$es) { $miss[] = $href; return $m[0]; }
$t = function_exists('pll_get_post') ? (pll_get_post($es, $lang) ?: $es) : $es;
$url = get_permalink($t);
if (!$url || strpos($url, '?p=') !== false) return $m[0];
$chg++;
return 'href="' . esc_url($url) . '"';
}, $post->post_content);
printf("#%d [%s] «%s» — %d enlaces de contenido%s\n", $pid, $lang, mb_substr($post->post_title,0,26), $chg,
$miss ? (" | sin mapear: " . implode(", ", array_slice($miss,0,3))) : "");
$tot += $chg;
if ($APPLY && $chg) {
file_put_contents("$BAK/$pid.html", $post->post_content);
wp_update_post(['ID'=>$pid, 'post_content'=>$new]);
clean_post_cache($pid);
}
}
if ($APPLY) {
$wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_fea_carta_sections_%'");
$wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_timeout_fea_carta_sections_%'");
}
echo ($APPLY ? "APLICADO" : "DRY-RUN") . ": $tot enlaces.\n";
+42
View File
@@ -0,0 +1,42 @@
<?php
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
global $wpdb;
$APPLY = getenv("APPLY") === "1";
$BAK = "/tmp/fix_carta_links_bak"; if ($APPLY) @mkdir($BAK,0777,true);
$CARTA = (int)(getenv("CARTA") ?: 46956);
$ids = array_values(pll_get_post_translations($CARTA)) ?: [$CARTA];
// k2_id -> ES post
function es_post_by_k2($k2){ global $wpdb; return (int)$wpdb->get_var($wpdb->prepare(
"SELECT post_id FROM wp_postmeta WHERE meta_key='_fgj2wp_old_k2_id' AND meta_value=%s LIMIT 1",$k2)); }
// slug -> ES post
function es_post_by_slug($slug){ global $wpdb; return (int)$wpdb->get_var($wpdb->prepare(
"SELECT ID FROM wp_posts WHERE post_name=%s AND post_type='post' AND post_status IN('publish','draft','future') LIMIT 1",$slug)); }
$tot=0;
foreach($ids as $pid){
$post=get_post($pid); if(!$post) continue;
$lang=pll_get_post_language($pid) ?: 'es';
$chg=0; $miss=[];
$new=preg_replace_callback('~href="([^"]+)"~i', function($m) use($lang,&$chg,&$miss){
$href=html_entity_decode(trim($m[1]));
if(stripos($href,'.html')===false) return $m[0]; // solo legacy joomla .html
if(stripos($href,'feadulta.com')!==false) return $m[0]; // dominio viejo absoluto -> dejar
$es=0;
if(preg_match('~/item/(\d+)-~',$href,$mm)) $es=es_post_by_k2($mm[1]);
if(!$es && preg_match('~/?([a-z0-9-]+)\.html$~i',$href,$mm)) $es=es_post_by_slug($mm[1]);
if(!$es){ $miss[]=$href; return $m[0]; }
$t=pll_get_post($es,$lang) ?: $es;
$url=get_permalink($t);
if(!$url) return $m[0];
$chg++;
return 'href="'.esc_url($url).'"';
}, $post->post_content);
echo sprintf("#%d [%s] «%s» — %d reescritos%s\n",$pid,$lang,mb_substr($post->post_title,0,30),$chg,
$miss?(" | sin mapear: ".implode(", ",array_slice($miss,0,4))):"");
$tot+=$chg;
if($APPLY && $chg){ file_put_contents("$BAK/$pid.html",$post->post_content);
$wpdb->update($wpdb->posts,['post_content'=>$new],['ID'=>$pid]); clean_post_cache($pid); }
}
echo "\n".($APPLY?"APLICADO":"DRY-RUN").": $tot enlaces.\n";
+104
View File
@@ -0,0 +1,104 @@
<?php
/**
* Issue #75 — normaliza los enlaces internos de las cartas (ES + traducciones).
*
* Bugs que arregla:
* - Cartas ES con enlaces a http://localhost:8081/<slug>/ (rompen en prod).
* - Traducciones con enlaces relativos /<slug>/ (rompen en local, falta /fea).
* - Enlaces que apuntan a un artículo en otro idioma (los re-apunta a la
* traducción del MISMO idioma de la carta si existe).
*
* Para CADA <a href>: si el slug resuelve a un post del sitio, se reescribe al
* permalink absoluto del post en el idioma de la página (fallback: el que haya).
* Si el slug NO resuelve a ningún post (legacy .html, externos), se deja intacto.
*
* Uso (dentro del contenedor, con WP cargado):
* php fix_carta_links.php -> DRY-RUN (no escribe nada)
* APPLY=1 php fix_carta_links.php -> aplica y guarda backup en /tmp/fix_links_bak/
*/
if (!defined('ABSPATH')) {
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
}
global $wpdb;
$APPLY = getenv("APPLY") === "1";
$BAKDIR = "/tmp/fix_links_bak";
if ($APPLY) @mkdir($BAKDIR, 0777, true);
// Conjunto de trabajo: posts ES con localhost:8081 + todas sus traducciones.
$es_ids = $wpdb->get_col(
"SELECT ID FROM wp_posts WHERE post_type='post'
AND post_status IN ('publish','draft')
AND post_content LIKE '%localhost:8081%'"
);
$targets = [];
foreach ($es_ids as $id) {
$targets[$id] = true;
foreach (pll_get_post_translations($id) as $tid) $targets[$tid] = true;
}
$targets = array_keys($targets);
/** Extrae el slug candidato de un href, o null si no parece interno. */
function slug_from_href($href) {
$href = html_entity_decode(trim($href));
if ($href === '' || $href[0] === '#') return null;
if (preg_match('~^(mailto:|tel:|javascript:)~i', $href)) return null;
if (stripos($href, '.html') !== false) return null; // legacy Joomla
if (stripos($href, 'feadulta.com') !== false) return null; // dominio viejo
if (strpos($href, '%') !== false) return null; // placeholders [unsubscribe]
// Quitar querystring / fragment
$href = preg_replace('~[?#].*$~', '', $href);
// Quitar esquema+host si los hay
$path = preg_replace('~^https?://[^/]+~i', '', $href);
if ($path === '') return null;
if ($path[0] !== '/') return null; // relativo raro -> no tocar
if (stripos($path, '/category/') !== false) return null; // categorías, no posts
if (stripos($path, '/wp-') === 0) return null;
// Quitar /fea y prefijo de idioma
$path = preg_replace('~^/fea~', '', $path);
$path = preg_replace('~^/(en|fr|it|pt|es)(/|$)~', '/', $path);
$segs = array_values(array_filter(explode('/', $path), 'strlen'));
if (count($segs) !== 1) return null; // solo /<slug>/ de un nivel
return $segs[0];
}
$total_posts = 0; $total_links = 0; $samples = 0;
foreach ($targets as $pid) {
$post = get_post($pid);
if (!$post) continue;
$lang = pll_get_post_language($pid) ?: 'es';
$content = $post->post_content;
$changes = 0;
$new = preg_replace_callback('~href="([^"]*)"~i', function($m) use ($lang, &$changes, $wpdb) {
$href = $m[1];
$slug = slug_from_href($href);
if ($slug === null) return $m[0];
$found = $wpdb->get_var($wpdb->prepare(
"SELECT ID FROM wp_posts WHERE post_name=%s AND post_type='post'
AND post_status='publish' LIMIT 1", $slug));
if (!$found) return $m[0]; // no es un post -> intacto
// Resolver a la traducción del idioma de la página
$target = pll_get_post((int)$found, $lang);
if (!$target) $target = (int)$found;
$url = get_permalink($target);
if (!$url || $url === $href) return $m[0];
$changes++;
return 'href="' . esc_url($url) . '"';
}, $content);
if ($changes > 0) {
$total_posts++; $total_links += $changes;
echo sprintf("#%d [%s] «%s» — %d enlace(s) reescrito(s)\n",
$pid, $lang, mb_substr($post->post_title, 0, 40), $changes);
if ($APPLY) {
file_put_contents("$BAKDIR/$pid.html", $content);
$wpdb->update($wpdb->posts, ['post_content' => $new], ['ID' => $pid]);
clean_post_cache($pid);
}
}
}
echo "\n";
echo ($APPLY ? "APLICADO" : "DRY-RUN") . ": $total_links enlaces en $total_posts posts.\n";
if (!$APPLY) echo "Para aplicar: APPLY=1 php fix_carta_links.php (backup en $BAKDIR)\n";
+26
View File
@@ -0,0 +1,26 @@
<?php
$D = [
410 => ['en'=>'New Testament','fr'=>'Nouveau Testament','it'=>'Nuovo Testamento','pt'=>'Novo Testamento'],
411 => ['en'=>'Old Testament','fr'=>'Ancien Testament','it'=>'Antico Testamento','pt'=>'Antigo Testamento'],
49 => ['en'=>'Advent and Christmas','fr'=>'Avent et Noël','it'=>'Avvento e Natale','pt'=>'Advento e Natal'],
12 => ['en'=>'In Memoriam','fr'=>'In Memoriam','it'=>'In Memoriam','pt'=>'In Memoriam'],
1651 => ['en'=>'News','fr'=>'Actualités','it'=>'Notizie','pt'=>'Notícias'],
61 => ['en'=>'Christian Communities','fr'=>'Communautés chrétiennes','it'=>'Comunità cristiane','pt'=>'Comunidades cristãs'],
23 => ['en'=>'Letters We Receive','fr'=>'Lettres reçues','it'=>'Lettere che riceviamo','pt'=>'Cartas que recebemos'],
39 => ['en'=>'Topics','fr'=>'Thèmes','it'=>'Temi','pt'=>'Temas'],
27 => ['en'=>'Chronological Index','fr'=>'Index chronologique','it'=>'Indice cronologico','pt'=>'Índice cronológico'],
63 => ['en'=>'EFFA','fr'=>'EFFA','it'=>'EFFA','pt'=>'EFFA'],
];
$fixed=0;
foreach ($D as $es=>$names) {
foreach ($names as $L=>$correct) {
$t = pll_get_term($es, $L);
if (!$t) { echo " $es/$L sin término\n"; continue; }
$cur = get_term($t)->name;
if ($cur !== $correct) {
wp_update_term($t, 'category', ['name'=>$correct]);
echo " #$t [$L] \"$cur\" \"$correct\"\n"; $fixed++;
}
}
}
echo "nombres corregidos: $fixed\n";
+137
View File
@@ -0,0 +1,137 @@
<?php
/**
* fix_image_paths.php
*
* Reescribe rutas relativas Joomla `images/...` en wp_posts.post_content
* a rutas absolutas del site `/fea/wp-content/uploads/...`, pero solo cuando
* el fichero correspondiente existe en /var/www/html/wp-content/uploads/.
*
* Cubre src= y href= con comillas dobles o simples.
* URL-decodifica antes de comprobar el filesystem (mp3 con espacios/tildes).
*
* Issue: rafa/feadulta#34
*
* Usage:
* docker exec wordpress-web php /tmp/fix_image_paths.php --dry-run
* docker exec wordpress-web php /tmp/fix_image_paths.php # live
*/
$dry_run = in_array('--dry-run', $argv ?? []);
$db_host = 'wordpress-mysql';
$db_name = 'wordpress_db';
$db_user = 'wordpress_user';
$db_pass = 'wordpress_pass';
$uploads_fs = '/var/www/html/wp-content/uploads';
$uploads_url = '/fea/wp-content/uploads';
$pdo = new PDO("mysql:host=$db_host;dbname=$db_name;charset=utf8mb4", $db_user, $db_pass, [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
]);
echo "=== Fix image paths (Joomla `images/...` → WP uploads) ===\n";
echo $dry_run ? "[DRY RUN]\n\n" : "[LIVE RUN]\n\n";
$stmt = $pdo->query("
SELECT ID, post_title, post_content
FROM wp_posts
WHERE post_status IN ('publish','draft')
AND post_type IN ('post','page')
AND (
post_content LIKE '%src=\"images/%'
OR post_content LIKE \"%src='images/%\"
OR post_content LIKE '%href=\"images/%'
OR post_content LIKE \"%href='images/%\"
)
");
$posts = $stmt->fetchAll(PDO::FETCH_ASSOC);
echo "Posts candidatos: " . count($posts) . "\n\n";
$stats = [
'posts_changed' => 0,
'posts_unchanged' => 0,
'refs_rewritten' => 0,
'refs_missing_file' => 0,
];
$missing = []; // path => count
$missing_per_post = []; // ID => [path,...]
// (src|href)= ( " | ' ) images/... ( " | ' )
$pattern = '/\b(src|href)=("|\')images\/([^"\']+)\2/i';
$update = $pdo->prepare("UPDATE wp_posts SET post_content = ? WHERE ID = ?");
foreach ($posts as $post) {
$original = $post['post_content'];
$pid = (int)$post['ID'];
$content = preg_replace_callback(
$pattern,
function ($m) use ($uploads_fs, $uploads_url, &$stats, &$missing, &$missing_per_post, $pid) {
$attr = $m[1];
$quote = $m[2];
$rel_enc = $m[3]; // tal como aparece en HTML (puede ir URL-encoded)
$rel_dec = urldecode($rel_enc); // para mirar el filesystem
$fs_path = $uploads_fs . '/' . $rel_dec;
if (is_file($fs_path)) {
$stats['refs_rewritten']++;
return $attr . '=' . $quote . $uploads_url . '/' . $rel_enc . $quote;
}
$stats['refs_missing_file']++;
$missing[$rel_dec] = ($missing[$rel_dec] ?? 0) + 1;
$missing_per_post[$pid][] = $rel_dec;
return $m[0]; // dejar sin tocar
},
$original
);
if ($content !== $original) {
$stats['posts_changed']++;
if (!$dry_run) {
$update->execute([$content, $pid]);
}
} else {
$stats['posts_unchanged']++;
}
}
echo "=== Resumen ===\n";
echo "Posts modificados: {$stats['posts_changed']}\n";
echo "Posts sin cambios: {$stats['posts_unchanged']}\n";
echo "Referencias reescritas: {$stats['refs_rewritten']}\n";
echo "Referencias sin fichero: {$stats['refs_missing_file']}\n";
echo "Rutas faltantes únicas: " . count($missing) . "\n";
if (!empty($missing)) {
arsort($missing);
$log_path = '/tmp/fix_image_paths_missing.log';
$lines = [];
foreach ($missing as $path => $n) {
$lines[] = sprintf("%4d %s", $n, $path);
}
file_put_contents($log_path, implode("\n", $lines) . "\n");
echo "\nLog rutas faltantes (orden por #ocurrencias): $log_path\n";
echo "Top 15:\n";
$i = 0;
foreach ($missing as $path => $n) {
echo sprintf(" %4d %s\n", $n, $path);
if (++$i >= 15) break;
}
// breakdown por carpeta raíz (segmento tras `images/`)
$by_root = [];
foreach ($missing as $path => $n) {
$root = explode('/', $path)[0] ?? '?';
$by_root[$root] = ($by_root[$root] ?? 0) + $n;
}
arsort($by_root);
echo "\nFaltantes por carpeta raíz:\n";
foreach ($by_root as $root => $n) {
echo sprintf(" %4d images/%s/\n", $n, $root);
}
}
echo "\nHecho.\n";
+225
View File
@@ -0,0 +1,225 @@
#!/usr/bin/env python3
"""
fix_imported_k2_metas.py
Asigna metas, categorías y Polylang a los posts importados por import_new_k2_items.py.
Los posts WP ya existen (IDs 43914-44082); este script solo añade los metadatos.
Mapping: wp_id = k2_id + 26040
"""
import json
import subprocess
import sys
import re
# ── Config ─────────────────────────────────────────────────────────────────────
JOOMLA_SSH_HOST = "134.0.10.170"
JOOMLA_SSH_USER = "feadulta"
JOOMLA_SSH_PASS = "6Rm2qOF@eundwpda"
JOOMLA_DB_HOST = "127.0.0.1"
JOOMLA_DB_USER = "fejoomla3"
JOOMLA_DB_PASS = "5FF-}5^[>7^pK4W9"
JOOMLA_DB_NAME = "fejoomla3"
WP_DOCKER = "wordpress-mysql"
WP_DB_USER = "wordpress_user"
WP_DB_PASS = "wordpress_pass"
WP_DB_NAME = "wordpress_db"
LAST_K2_ID = 17873
WP_ID_OFFSET = 26040 # wp_id = k2_id + WP_ID_OFFSET
CAT_FEADULTA = 71
CAT_ARTICULOS = 1650
CAT_EVANGELIO = 1647
CAT_EUCARISTIA = 1648
LANG_MAP = {1: 'es', 2: 'en', 3: 'fr', 4: 'it', 5: 'pt'}
DOMINGO_RE = r'DOMINGO|SEMANA SANTA|SEMANA DE PASCUA|PENTECOST|NAVIDAD|EPIFAN'
DRY_RUN = '--dry-run' in sys.argv
# ── Helpers ────────────────────────────────────────────────────────────────────
def wp_execute(sql: str):
if DRY_RUN:
print(f" [DRY] {sql[:100]}")
return
cmd = ['docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-e', sql]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
err = result.stderr.replace('mysql: [Warning] Using a password on the command line interface can be insecure.\n', '')
if err.strip():
print(f" [ERR] {err.strip()[:200]}", file=sys.stderr)
def wp_mysql(query: str) -> list[dict]:
cmd = ['docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-B', '-e', query]
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
return [dict(zip(headers, line.split('\t'))) for line in lines[1:] if line]
def esc(s: str) -> str:
return s.replace('\\', '\\\\').replace("'", "\\'")
def unhex(val: str) -> str:
if not val or val == 'NULL':
return ''
try:
return bytes.fromhex(val).decode('utf-8', errors='replace')
except Exception:
return val
def parse_extra_fields(ef_json: str) -> dict:
result = {'lang_val': None, 'has_libro': False}
if not ef_json:
return result
try:
fields = json.loads(ef_json)
except json.JSONDecodeError:
return result
for f in fields:
fid = str(f.get('id', ''))
val = f.get('value')
if fid == '16' and val is not None:
try:
result['lang_val'] = int(val)
except (ValueError, TypeError):
pass
elif fid == '9':
result['has_libro'] = True
return result
def determine_categories(ef: dict, title: str) -> list[int]:
lang = ef.get('lang_val')
es = (lang == 1 or lang is None)
cats = [CAT_FEADULTA]
if es and ef.get('has_libro'):
cats.append(CAT_EVANGELIO)
elif es and re.search(DOMINGO_RE, title, re.IGNORECASE):
cats.append(CAT_EUCARISTIA)
else:
cats.append(CAT_ARTICULOS)
return cats
# ── Main ───────────────────────────────────────────────────────────────────────
def main():
print(f"=== Fix metas/cats K2 items > {LAST_K2_ID} {'[DRY RUN]' if DRY_RUN else '[LIVE]'} ===\n")
# Cargar term_taxonomy_ids
term_ids = [CAT_FEADULTA, CAT_ARTICULOS, CAT_EVANGELIO, CAT_EUCARISTIA]
tt_ids = {}
rows = wp_mysql(f"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy WHERE term_id IN ({','.join(map(str,term_ids))}) AND taxonomy='category'")
for r in rows:
tt_ids[int(r['term_id'])] = int(r['term_taxonomy_id'])
print(f"TT IDs categorías: {tt_ids}")
pl_ids = {}
rows = wp_mysql("SELECT t.slug, tt.term_taxonomy_id FROM wp_terms t JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id WHERE tt.taxonomy='language' AND t.slug IN ('es','en','fr','it','pt')")
for r in rows:
pl_ids[r['slug']] = int(r['term_taxonomy_id'])
print(f"Polylang TT IDs: {pl_ids}")
# Verificar que los WP posts existen
rows = wp_mysql(f"SELECT COUNT(*) n FROM wp_posts WHERE ID BETWEEN {LAST_K2_ID+WP_ID_OFFSET+1} AND (SELECT MAX(ID) FROM wp_posts)")
print(f"Posts WP a procesar (aprox): {rows[0]['n'] if rows else '?'}")
# Obtener K2 items desde Joomla
print("\nObteniendo K2 items de Joomla prod...")
query = (
f"SELECT id, HEX(title) title, HEX(extra_fields) extra_fields "
f"FROM ew4r_k2_items WHERE published=1 AND id > {LAST_K2_ID} ORDER BY id;"
)
mysql_cmd = (
f"mysql -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} "
f"-p'{JOOMLA_DB_PASS}' {JOOMLA_DB_NAME} "
f"--default-character-set=utf8mb4 -B"
)
cmd = ['sshpass', '-p', JOOMLA_SSH_PASS, 'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}', mysql_cmd]
result = subprocess.run(cmd, input=query, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
print(f"ERROR: {result.stderr[:300]}")
sys.exit(1)
lines = result.stdout.strip().split('\n')
headers = lines[0].split('\t')
items = [dict(zip(headers, line.split('\t'))) for line in lines[1:] if line]
print(f"Items obtenidos: {len(items)}")
stats = {'ok': 0, 'skip': 0}
for item in items:
k2_id = int(item['id'])
wp_id = k2_id + WP_ID_OFFSET
title = unhex(item.get('title', ''))
ef_raw = unhex(item.get('extra_fields', ''))
ef = parse_extra_fields(ef_raw)
lang = LANG_MAP.get(ef.get('lang_val'), 'es')
cats = determine_categories(ef, title)
# Verificar que el WP post existe
existing = wp_mysql(f"SELECT ID FROM wp_posts WHERE ID={wp_id} LIMIT 1")
if not existing:
print(f" [SKIP] WP post ID={wp_id} no encontrado (k2={k2_id})")
stats['skip'] += 1
continue
print(f" [{k2_id}{wp_id}] {title[:45]} | lang={lang} | cats={cats}")
# Metas
for meta_key, meta_val in [('_fgj2wp_old_k2_id', str(k2_id)), ('Idioma', str(ef.get('lang_val') or 1))]:
wp_execute(
f"INSERT IGNORE INTO wp_postmeta (post_id, meta_key, meta_value) "
f"VALUES ({wp_id}, '{esc(meta_key)}', '{esc(meta_val)}')"
)
# Categorías
for term_id in cats:
tt_id = tt_ids.get(term_id)
if tt_id:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({wp_id}, {tt_id})"
)
# Polylang
pl_tt = pl_ids.get(lang)
if pl_tt:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({wp_id}, {pl_tt})"
)
stats['ok'] += 1
# Actualizar counts
if not DRY_RUN and stats['ok'] > 0:
print("\nActualizando counts de categorías y Polylang...")
all_tt = list(tt_ids.values()) + list(pl_ids.values())
tt_str = ','.join(str(x) for x in all_tt)
wp_execute(
f"UPDATE wp_term_taxonomy tt SET count = ("
f"SELECT COUNT(*) FROM wp_term_relationships tr WHERE tr.term_taxonomy_id=tt.term_taxonomy_id"
f") WHERE tt.term_taxonomy_id IN ({tt_str})"
)
print(f"\n=== Resultado: {stats['ok']} ok, {stats['skip']} skip ===")
if __name__ == '__main__':
main()
+197
View File
@@ -0,0 +1,197 @@
<?php
/**
* fix_joomla_links.php
*
* Replaces Joomla internal links in WordPress post_content with correct WP URLs.
*
* Handles:
* 1. index.php?option=com_content&view=article&id=NNN → jos_content ID → WP post_name
* 2. es/.../NNN-slug.html (relative) → K2 item ID → WP post_name
* 3. http://feadulta.com/es/.../NNN-slug.html → K2 item ID → WP post_name
* 4. https://farmer.taild3aaf6.ts.net/fea/es/.../NNN-slug.html → K2 ID → WP post_name
*
* Usage: php fix_joomla_links.php [--dry-run]
*/
$dry_run = in_array('--dry-run', $argv ?? []);
// DB config
$db_host = 'wordpress-mysql';
$db_name = 'wordpress_db';
$db_user = 'wordpress_user';
$db_pass = 'wordpress_pass';
$wp_site_url = 'https://farmer.taild3aaf6.ts.net/fea';
$pdo = new PDO("mysql:host=$db_host;dbname=$db_name;charset=utf8mb4", $db_user, $db_pass, [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
]);
echo "=== Fix Joomla Internal Links ===\n";
echo $dry_run ? "[DRY RUN - no changes will be saved]\n\n" : "[LIVE RUN - changes will be saved]\n\n";
// -------------------------------------------------------------------------
// Step 1: Build lookup maps from wp_postmeta
// -------------------------------------------------------------------------
echo "Building lookup maps from wp_postmeta...\n";
// Map: K2 item ID → WP post_name
$k2_map = [];
$stmt = $pdo->query("
SELECT pm.meta_value AS k2_id, p.post_name
FROM wp_postmeta pm
JOIN wp_posts p ON pm.post_id = p.ID
WHERE pm.meta_key = '_fgj2wp_old_k2_id'
AND p.post_status IN ('publish', 'draft')
AND p.post_type = 'post'
AND p.post_name != ''
");
foreach ($stmt as $row) {
$k2_map[(int)$row['k2_id']] = $row['post_name'];
}
echo " K2 map: " . count($k2_map) . " entries\n";
// Map: jos_content ID → WP post_name
$joomla_map = [];
$stmt = $pdo->query("
SELECT pm.meta_value AS joomla_id, p.post_name
FROM wp_postmeta pm
JOIN wp_posts p ON pm.post_id = p.ID
WHERE pm.meta_key = '_fgj2wp_old_id'
AND p.post_status IN ('publish', 'draft')
AND p.post_type = 'post'
AND p.post_name != ''
");
foreach ($stmt as $row) {
$joomla_map[(int)$row['joomla_id']] = $row['post_name'];
}
echo " jos_content map: " . count($joomla_map) . " entries\n\n";
// -------------------------------------------------------------------------
// Step 2: Fetch posts with Joomla links
// -------------------------------------------------------------------------
$stmt = $pdo->query("
SELECT ID, post_title, post_content
FROM wp_posts
WHERE post_type = 'post'
AND post_status IN ('publish', 'draft')
AND (
post_content LIKE '%index.php?option=%'
OR post_content LIKE '%\"es/%'
OR post_content LIKE '%/es/%'
OR post_content LIKE '%feadulta.com%'
OR post_content LIKE '%farmer.taild3aaf6%'
)
");
$posts = $stmt->fetchAll(PDO::FETCH_ASSOC);
echo "Posts to process: " . count($posts) . "\n\n";
// -------------------------------------------------------------------------
// Step 3: Process each post
// -------------------------------------------------------------------------
$stats = [
'posts_changed' => 0,
'posts_skipped' => 0,
'links_replaced' => 0,
'links_not_found'=> 0,
];
$not_found_log = [];
// Regex patterns (note: href values may be HTML-entity encoded: & → &amp;)
$patterns = [
// Pattern A: index.php?option=com_content&[amp;]view=article&[amp;]id=NNN[;alias]
'joomla_content' => '/href="index\.php\?option=com_content(?:&(?:amp;)?)[^"]*?(?:&(?:amp;)?)id=(\d+)[^"]*"/i',
// Pattern B: K2 item links — only on known Joomla-origin domains/paths
// Matches:
// href="es/[path/]NNN-slug.html" (relative)
// href="http://feadulta.com/[path/]NNN-slug.html" (old domain)
// href="https://farmer.taild3aaf6.ts.net/fea/[path/]NNN-slug.html" (staging domain)
'k2_item' => '/href="(?:(?:https?:\/\/feadulta\.com|https?:\/\/farmer\.taild3aaf6\.ts\.net\/fea)\/)?es\/[^"]*?\/(\d+)-[^"\/]+\.html[^"]*"/i',
];
$update_stmt = $pdo->prepare("UPDATE wp_posts SET post_content = ? WHERE ID = ?");
foreach ($posts as $post) {
$original = $post['post_content'];
$content = $original;
$changed = false;
// --- Pattern A: jos_content links ---
$content = preg_replace_callback(
$patterns['joomla_content'],
function ($m) use ($joomla_map, $wp_site_url, &$stats, &$not_found_log, $post) {
$id = (int)$m[1];
if (isset($joomla_map[$id])) {
$stats['links_replaced']++;
$new_url = $wp_site_url . '/' . $joomla_map[$id] . '/';
return 'href="' . $new_url . '"';
}
$stats['links_not_found']++;
$not_found_log[] = "jos_content ID=$id not found (post {$post['ID']}: {$post['post_title']})";
return $m[0]; // keep original
},
$content
);
// --- Pattern B: K2 item links ---
$content = preg_replace_callback(
$patterns['k2_item'],
function ($m) use ($k2_map, $wp_site_url, &$stats, &$not_found_log, $post) {
$id = (int)$m[1];
// Skip if ID 0, or if this looks like a year (4 digits in 1900-2100 range) in a date URL
if ($id === 0) return $m[0];
// Skip pure numbers that are years in date-based URLs (e.g. /2024/01/post.html)
// We check: if the full match contains /YYYY/ before the filename, skip
if ($id >= 1990 && $id <= 2100 && preg_match('/\/\d{4}\//', $m[0])) {
return $m[0];
}
if (isset($k2_map[$id])) {
$stats['links_replaced']++;
$new_url = $wp_site_url . '/' . $k2_map[$id] . '/';
return 'href="' . $new_url . '"';
}
$stats['links_not_found']++;
$not_found_log[] = "K2 ID=$id not found in map (post {$post['ID']}: {$post['post_title']}) | original: " . substr($m[0], 0, 100);
return $m[0]; // keep original
},
$content
);
if ($content !== $original) {
$changed = true;
$stats['posts_changed']++;
if (!$dry_run) {
$update_stmt->execute([$content, $post['ID']]);
} else {
echo " [DRY] Would update post {$post['ID']}: {$post['post_title']}\n";
}
} else {
$stats['posts_skipped']++;
}
}
// -------------------------------------------------------------------------
// Step 4: Summary
// -------------------------------------------------------------------------
echo "\n=== Results ===\n";
echo "Posts changed: {$stats['posts_changed']}\n";
echo "Posts unchanged: {$stats['posts_skipped']}\n";
echo "Links replaced: {$stats['links_replaced']}\n";
echo "Links not resolved: {$stats['links_not_found']}\n";
if (!empty($not_found_log)) {
$log_path = '/tmp/fix_joomla_links_unresolved.log';
file_put_contents($log_path, implode("\n", $not_found_log) . "\n");
echo "\nUnresolved links logged to: $log_path\n";
echo "First 10 unresolved:\n";
foreach (array_slice($not_found_log, 0, 10) as $line) {
echo " $line\n";
}
}
echo "\nDone.\n";
+94
View File
@@ -0,0 +1,94 @@
<?php
/**
* fix_k2_authors.php (#143) — Corrige la autoría de artículos de origen K2 que
* quedaron atribuidos al usuario genérico «Fe Adulta» (post_author 1 / 890)
* porque el importador de delta (import_new_k2_items.py) no encontró un usuario
* WP para su `created_by` de Joomla y cayó al fallback admin.
*
* Qué hace, por cada artículo del TSV de entrada:
* 1. Crea (idempotente) un usuario WP rol 'subscriber' con el nombre real del
* autor (display_name), login = slug del nombre, email = slug@feadulta.com.
* 2. Reasigna post_author del/los post WP de ese K2 (los que sigan en 1/890).
*
* Entrada: un TSV «k2_id<TAB>created_by<TAB>nombre», generado desde Joomla:
* IDS=<lista de k2_id atribuidos a 1/890> # de wp: meta _fgj2wp_old_k2_id
* mysql --skip-ssl ... fejoomla3 -N -e \
* "SELECT i.id, i.created_by, COALESCE(u.name,'') \
* FROM ew4r_k2_items i LEFT JOIN ew4r_users u ON u.id=i.created_by \
* WHERE i.id IN ($IDS);" > /tmp/autores143.tsv
*
* Uso (en el servidor, dentro de /web/wp-nuevo):
* FEA_TSV=/tmp/autores143.tsv wp eval-file scripts/fix_k2_authors.php # dry-run
* APPLY=1 FEA_TSV=/tmp/autores143.tsv wp eval-file scripts/fix_k2_authors.php # aplica
*
* Notas:
* - Los autores con created_by cuyo usuario Joomla ya no existe llegan con
* nombre vacío en el TSV → se SALTAN (no recuperable; firma en el cuerpo).
* - El nombre literal «Fe Adulta» se salta (es legítimo).
* - Los nuevos usuarios quedan sin foto_perfil (avatar genérico). Si se quiere
* avatar propio, generarlo aparte (ver flujo de avatares #62).
*/
$APPLY = getenv('APPLY') === '1';
$TSV = getenv('FEA_TSV') ?: '/tmp/autores143.tsv';
if (!is_readable($TSV)) { fwrite(STDERR, "No puedo leer TSV: $TSV\n"); exit(1); }
global $wpdb;
$GENERIC = [1, 890];
$SKIP_NAMES = ['Fe Adulta'];
$byname = [];
foreach (file($TSV) as $line) {
$r = explode("\t", rtrim($line, "\n"));
if (count($r) < 3) continue;
$name = trim($r[2]);
if ($name === '' || in_array($name, $SKIP_NAMES, true)) continue;
$byname[$name][] = (int) $r[0];
}
$created = 0; $reassigned = 0; $log = [];
foreach ($byname as $name => $k2ids) {
$login = sanitize_user(sanitize_title($name), true);
$u = get_user_by('login', $login);
if (!$u) {
$email = $login . '@feadulta.com'; $i = 2;
while (email_exists($email)) { $email = $login . $i . '@feadulta.com'; $i++; }
if ($APPLY) {
$uid = wp_insert_user([
'user_login' => $login,
'user_pass' => wp_generate_password(20),
'user_email' => $email,
'display_name' => $name,
'nickname' => $name,
'role' => 'subscriber',
]);
if (is_wp_error($uid)) { $log[] = "ERROR crear '$name': " . $uid->get_error_message(); continue; }
$u = get_userdata($uid); $created++;
$log[] = "USER creado: '$name' -> id $uid ($login / $email)";
} else {
$log[] = "[dry] crearia USER '$name' ($login / $email)"; $created++;
}
} else {
$log[] = "USER ya existe: '$name' -> id {$u->ID} ($login)";
}
$uid = $u ? $u->ID : 0;
foreach ($k2ids as $k2) {
$pids = $wpdb->get_col($wpdb->prepare(
"SELECT DISTINCT post_id FROM {$wpdb->postmeta}
WHERE meta_key='_fgj2wp_old_k2_id' AND meta_value=%s", (string) $k2));
foreach ($pids as $pid) {
$a = (int) get_post_field('post_author', $pid);
if (!in_array($a, $GENERIC, true)) continue;
if ($APPLY && $uid) {
wp_update_post(['ID' => (int) $pid, 'post_author' => $uid]);
$reassigned++; $log[] = " post $pid (k2 $k2) author $a -> $uid";
} else {
$reassigned++; $log[] = " [dry] post $pid (k2 $k2) author $a -> '$name'";
}
}
}
}
echo implode("\n", $log) . "\n";
echo "\nRESUMEN: usuarios " . ($APPLY ? 'creados' : 'a crear') . ": $created ; "
. "posts " . ($APPLY ? 'reasignados' : 'a reasignar') . ": $reassigned "
. "(modo " . ($APPLY ? 'APPLY' : 'DRY-RUN') . ")\n";
+241
View File
@@ -0,0 +1,241 @@
<?php
/**
* fix_numeric_categories.php
*
* Renames 100 WordPress categories that have numeric names (K2 Autor field IDs)
* to their proper author names from the Joomla K2 extra field mapping.
*
* When a named category already exists for the same author, merges both
* (moves posts from numeric → named category, then deletes numeric).
*
* Usage: php fix_numeric_categories.php [--dry-run]
*/
$dry_run = in_array('--dry-run', $argv ?? []);
$db_host = 'wordpress-mysql';
$db_name = 'wordpress_db';
$db_user = 'wordpress_user';
$db_pass = 'wordpress_pass';
$pdo = new PDO("mysql:host=$db_host;dbname=$db_name;charset=utf8mb4", $db_user, $db_pass, [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
]);
echo "=== Fix Numeric Author Categories ===\n";
echo $dry_run ? "[DRY RUN]\n\n" : "[LIVE RUN]\n\n";
// -------------------------------------------------------------------------
// Mapping: numeric value → author name (from K2 extra field "Autor")
// -------------------------------------------------------------------------
$autor_map = [
1 => "Fray Marcos",
2 => "José Antonio Pagola",
3 => "Enrique Martínez Lozano",
4 => "José Enrique Galarreta",
5 => "José Arregi",
6 => "Eloy Roy",
7 => "Dolores Aleixandre",
9 => "Florentino Ulibarri",
10 => "Rafael Calvo",
11 => "Julián Mellado",
12 => "Vicente Martínez",
13 => "Matilde Gastalver",
14 => "Koldo Aldai",
15 => "Sandra Hojman",
16 => "Leonardo Boff",
17 => "José M. Castillo",
18 => "Luís Alemán",
19 => "Juan José Tamayo",
20 => "José Ignacio González Faus",
22 => "José Manuel Vidal",
23 => "Isabel Gómez-Acebo",
31 => "Faustino Vilabrille",
32 => "Víctor Daniel Blanco",
33 => "Nuevo Testamento",
36 => "Gabriel Mª Otalora",
38 => "Luís García Orso",
40 => "María Teresa Sánchez Carmona",
41 => "Emma Martínez Ocaña",
45 => "Mari Patxi Ayerra",
49 => "Jesús Bastante",
52 => "J. A. Estrada",
53 => "Rafael Díaz Arias",
58 => "Susana Merino",
69 => "Asociación de teólogos y teólogas Juan XXIII",
73 => "José Ignacio Calleja",
75 => "Autor desconocido",
76 => "Gerardo Villar",
79 => "José Sánchez Luque",
83 => "Mari Paz López Santos",
84 => "Patricia Paz",
87 => "Pedro Casaldáliga",
88 => "Foro «Curas de Madrid»",
92 => "Xavier Pikaza",
96 => "Benjamín Forcano",
97 => "Ima Sanchís",
108 => "Pedro M. Lamet",
114 => "Juan G. Bedoya",
115 => "Juan Masiá",
123 => "Frei Betto",
124 => "Juan Cejudo",
125 => "Miguel Ángel Mesa",
126 => "Carlos F. Barberá",
127 => "Mariá Corbí",
129 => "Rafael Fernando Navarro",
149 => "José María Díez Alegría",
174 => "Carmen Soto",
175 => "Hans Küng",
188 => "Fidel Aizpurúa",
194 => "Pepcastelló",
208 => "Juan Yzuel",
234 => "Maite García Romero",
263 => "Gonzalo Haya",
288 => "Redes Cristianas",
303 => "Víctor Codina",
306 => "José María García-Mauriño",
312 => "Patxi Loidi",
321 => "Jesús Gil García",
323 => "John P. Meier",
325 => "Rogelio Cárdenas",
329 => "Pablo Ordaz",
345 => "Papa Francisco",
347 => "Vicky Irigaray",
357 => "Marco Antonio Velásquez Uribe",
362 => "Fernando Bermúdez López",
374 => "Pablo",
375 => "José Luis Sicre",
376 => "Miguel A. Munárriz Casajús",
382 => "Santiago Agrelo",
392 => "Felix Jiménez Tutor",
396 => "José María Alvarez",
399 => "Hechos",
404 => "Bruno Álvarez",
412 => "Luis Miguel Modino",
418 => "Varios autores",
435 => "Voces cristianas de Sevilla",
437 => "Religión Digital",
443 => "Francisco Bautista",
444 => "Yolanda Chávez",
449 => "Atrio",
450 => "Carolina Abarca",
465 => "Magdalena Bennasar",
516 => "Eclesalia",
520 => "Antonio Aradillas",
529 => "Humanismo Sin credos",
540 => "Juan Zapatero",
557 => "Marifé Ramos González",
566 => "Marta García",
570 => "María Dolores López Guzmán",
583 => "Inma Eibe",
615 => "Íñigo García Blanco",
];
// -------------------------------------------------------------------------
// Fetch all numeric categories from WordPress
// -------------------------------------------------------------------------
$stmt = $pdo->query("
SELECT t.term_id, t.name, t.slug, tt.count
FROM wp_terms t
JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id
WHERE tt.taxonomy = 'category' AND t.name REGEXP '^[0-9]+$'
ORDER BY CAST(t.name AS UNSIGNED)
");
$numeric_cats = $stmt->fetchAll(PDO::FETCH_ASSOC);
echo "Numeric categories found: " . count($numeric_cats) . "\n\n";
$stats = ['renamed' => 0, 'merged' => 0, 'skipped' => 0, 'no_map' => 0];
foreach ($numeric_cats as $cat) {
$num_val = (int)$cat['name'];
$term_id = (int)$cat['term_id'];
$post_count = (int)$cat['count'];
if (!isset($autor_map[$num_val])) {
echo " [SKIP] No mapping for value $num_val (term_id=$term_id, $post_count posts)\n";
$stats['no_map']++;
continue;
}
$new_name = $autor_map[$num_val];
// Check if a category with this name already exists
$existing = $pdo->prepare("
SELECT t.term_id, tt.count
FROM wp_terms t
JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id
WHERE tt.taxonomy = 'category' AND t.name = ?
AND t.term_id != ?
");
$existing->execute([$new_name, $term_id]);
$existing_cat = $existing->fetch(PDO::FETCH_ASSOC);
if ($existing_cat) {
// MERGE: move posts from numeric category to the existing named category
$target_term_id = (int)$existing_cat['term_id'];
echo " [MERGE] \"$num_val\" ($post_count posts) \"$new_name\" (term_id=$target_term_id, existing {$existing_cat['count']} posts)\n";
if (!$dry_run) {
// Get term_taxonomy_id for both
$tt_stmt = $pdo->prepare("SELECT term_taxonomy_id FROM wp_term_taxonomy WHERE term_id = ? AND taxonomy = 'category'");
$tt_stmt->execute([$term_id]);
$src_tt_id = (int)$tt_stmt->fetchColumn();
$tt_stmt->execute([$target_term_id]);
$dst_tt_id = (int)$tt_stmt->fetchColumn();
// Move post relationships (avoiding duplicates)
$pdo->prepare("
UPDATE IGNORE wp_term_relationships
SET term_taxonomy_id = ?
WHERE term_taxonomy_id = ?
")->execute([$dst_tt_id, $src_tt_id]);
// Delete remaining relationships for source (duplicates that weren't moved)
$pdo->prepare("DELETE FROM wp_term_relationships WHERE term_taxonomy_id = ?")->execute([$src_tt_id]);
// Update count on target
$pdo->prepare("
UPDATE wp_term_taxonomy SET count = (
SELECT COUNT(*) FROM wp_term_relationships WHERE term_taxonomy_id = ?
) WHERE term_taxonomy_id = ?
")->execute([$dst_tt_id, $dst_tt_id]);
// Delete numeric category
$pdo->prepare("DELETE FROM wp_term_taxonomy WHERE term_id = ? AND taxonomy = 'category'")->execute([$term_id]);
$pdo->prepare("DELETE FROM wp_terms WHERE term_id = ?")->execute([$term_id]);
}
$stats['merged']++;
} else {
// RENAME: update name and slug
$new_slug = sanitize_slug($new_name);
echo " [RENAME] \"$num_val\" \"$new_name\" (term_id=$term_id, $post_count posts)\n";
if (!$dry_run) {
$pdo->prepare("UPDATE wp_terms SET name = ?, slug = ? WHERE term_id = ?")->execute([$new_name, $new_slug, $term_id]);
}
$stats['renamed']++;
}
}
echo "\n=== Results ===\n";
echo "Renamed: {$stats['renamed']}\n";
echo "Merged: {$stats['merged']}\n";
echo "Skipped (no map): {$stats['no_map']}\n";
echo "\nDone.\n";
// -------------------------------------------------------------------------
function sanitize_slug(string $name): string {
$slug = mb_strtolower($name, 'UTF-8');
$slug = str_replace(
['á','é','í','ó','ú','ü','ñ','ã','â','à','ê','ô','ç','ú','ó','ä','ö'],
['a','e','i','o','u','u','n','a','a','a','e','o','c','u','o','a','o'],
$slug
);
$slug = preg_replace('/[^a-z0-9\s-]/', '', $slug);
$slug = preg_replace('/[\s]+/', '-', trim($slug));
$slug = preg_replace('/-+/', '-', $slug);
return trim($slug, '-');
}
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""
fix_remaining_titles.py
Fixes posts where the translated title still equals the Spanish original.
Queries DB dynamically, then translates each title via Jan API.
"""
import pymysql
import json
import urllib.request
import time
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
LANG_NAMES = {"en": "English", "fr": "French", "it": "Italian", "pt": "Portuguese"}
def translate_title(spanish_title, lang_name):
payload = json.dumps({
"model": JAN_MODEL,
"messages": [
{"role": "system", "content": "You are a translator. Respond ONLY with the translated text, nothing else."},
{"role": "user", "content": f"Translate from Spanish to {lang_name}, ALL CAPS:\n\n{spanish_title}"}
],
"temperature": 0.1,
"max_tokens": 120,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL, data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
with urllib.request.urlopen(req, timeout=30) as r:
result = json.loads(r.read())
return result["choices"][0]["message"]["content"].strip().strip('"').strip("'")
def main():
db = pymysql.connect(**DB)
c = db.cursor()
# Find all posts where the title = the Spanish original's title (untranslated)
c.execute("""
SELECT p.ID, t.slug as lang, p.post_title as current_title, p2.post_title as sp_title
FROM wp_posts p
JOIN wp_term_relationships trl ON p.ID=trl.object_id
JOIN wp_term_taxonomy ttl ON trl.term_taxonomy_id=ttl.term_taxonomy_id AND ttl.taxonomy='language'
JOIN wp_terms t ON ttl.term_id=t.term_id
JOIN wp_term_relationships trg ON p.ID=trg.object_id
JOIN wp_term_taxonomy ttg ON trg.term_taxonomy_id=ttg.term_taxonomy_id AND ttg.taxonomy='post_translations'
JOIN wp_posts p2 ON (ttg.description LIKE CONCAT('%i:',p2.ID,';%') OR ttg.description LIKE CONCAT('%i:',p2.ID,'}%'))
JOIN wp_term_relationships trl2 ON p2.ID=trl2.object_id
JOIN wp_term_taxonomy ttl2 ON trl2.term_taxonomy_id=ttl2.term_taxonomy_id AND ttl2.taxonomy='language'
JOIN wp_terms t2 ON ttl2.term_id=t2.term_id AND t2.slug='es'
WHERE p.ID > 42760 AND p.post_type='post' AND p.post_status='publish'
AND t.slug != 'es'
AND p.post_title = p2.post_title
ORDER BY t.slug, p.ID
""")
rows = c.fetchall()
print(f"Found {len(rows)} posts with untranslated titles\n")
cache = {} # (sp_title, lang) -> translated
done = 0
errors = 0
for row in rows:
post_id = row['ID']
lang = row['lang']
sp_title = row['sp_title']
lang_name = LANG_NAMES.get(lang, lang)
key = (sp_title, lang)
if key not in cache:
try:
t0 = time.time()
translated = translate_title(sp_title, lang_name)
elapsed = time.time() - t0
# Reject if translation = original (model failed)
if translated.upper() == sp_title.upper():
print(f" [{lang}] FAILED (returned same): {sp_title[:50]}")
errors += 1
cache[key] = None
continue
cache[key] = translated
print(f" [{lang}] {sp_title[:40]!r} -> {translated[:40]!r} ({elapsed:.0f}s)")
except Exception as e:
print(f" [{lang}] ERROR: {e}")
errors += 1
cache[key] = None
continue
new_title = cache.get(key)
if not new_title:
continue
c.execute("UPDATE wp_posts SET post_title=%s WHERE ID=%s", (new_title, post_id))
db.commit()
done += 1
print(f" Updated {post_id} [{lang}]: {new_title[:60]}")
db.close()
print(f"\nDone: {done} fixed, {errors} errors/skipped")
if __name__ == "__main__":
main()
+180
View File
@@ -0,0 +1,180 @@
#!/usr/bin/env python3
"""
fix_titles.py
Fixes wrong/contaminated/untranslated titles for translated WordPress posts.
Translates only the title via Jan (fast, ~5s each).
"""
import pymysql
import json
import urllib.request
import sys
import time
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB_HOST = "172.18.0.2"
DB_PORT = 3306
DB_NAME = "wordpress_db"
DB_USER = "wordpress_user"
DB_PASS = "wordpress_pass"
TARGET_LANGS = {"en": "English", "fr": "French", "it": "Italian", "pt": "Portuguese"}
# All posts needing title fix: post_id -> (lang, spanish_id, spanish_title)
FIXES = {
43151: ("en", 42523, "LA TENTACIÓN"),
43281: ("fr", 42523, "LA TENTACIÓN"),
43150: ("en", 42524, "CUANDO NOS LEEMOS EN CLAVE DE CARENCIA"),
43280: ("fr", 42524, "CUANDO NOS LEEMOS EN CLAVE DE CARENCIA"),
43278: ("fr", 42525, "SE TRATA DE BUSCAR LO MEJOR PARA MÍ, AUNQUE ME CUESTE"),
43270: ("it", 42526, "PARA SER TENTADO"),
43269: ("pt", 42526, "PARA SER TENTADO"),
43143: ("en", 42531, "LA MAYOR TENTACIÓN HUMANA"),
43263: ("fr", 42531, "LA MAYOR TENTACIÓN HUMANA"),
43261: ("it", 42531, "LA MAYOR TENTACIÓN HUMANA"),
43256: ("fr", 42532, "MIÉRCOLES DE CENIZA"),
43260: ("it", 42532, "MIÉRCOLES DE CENIZA"),
43141: ("en", 42533, "1º DOMINGO DE CUARESMA"),
43259: ("it", 42533, "1º DOMINGO DE CUARESMA"),
43251: ("pt", 42533, "1º DOMINGO DE CUARESMA"),
43137: ("en", 42538, "ADÁN, EVA Y JESÚS FRENTE A LA TENTACIÓN"),
43240: ("fr", 42538, "ADÁN, EVA Y JESÚS FRENTE A LA TENTACIÓN"),
43236: ("pt", 42538, "ADÁN, EVA Y JESÚS FRENTE A LA TENTACIÓN"),
43135: ("en", 42544, "LO PROVISIONAL Y LO DEFINITIVO"),
43234: ("fr", 42544, "LO PROVISIONAL Y LO DEFINITIVO"),
43228: ("pt", 42544, "LO PROVISIONAL Y LO DEFINITIVO"),
43134: ("en", 42545, "2º DOMINGO DE CUARESMA"),
43232: ("fr", 42545, "2º DOMINGO DE CUARESMA"),
43226: ("pt", 42545, "2º DOMINGO DE CUARESMA"),
43225: ("pt", 42546, "POR LA RENUNCIA AL TRIUNFO"),
43132: ("en", 42547, "LO DIVINO ES NUESTRA ESENCIA"),
43233: ("it", 42547, "LO DIVINO ES NUESTRA ESENCIA"),
43131: ("en", 42548, "¡QUÉ BUENO ES QUE ESTEMOS AQUÍ!"),
43223: ("fr", 42548, "¡QUÉ BUENO ES QUE ESTEMOS AQUÍ!"),
43230: ("it", 42548, "¡QUÉ BUENO ES QUE ESTEMOS AQUÍ!"),
43216: ("pt", 42549, "¿A QUÉ TRANSFIGURACIÓN NOS ESTAMOS REFIRIENDO?"),
43129: ("en", 42555, "CUANDO NOS LEEMOS EN CLAVE DE PLENITUD"),
43211: ("fr", 42555, "CUANDO NOS LEEMOS EN CLAVE DE PLENITUD"),
43221: ("it", 42555, "CUANDO NOS LEEMOS EN CLAVE DE PLENITUD"),
43212: ("pt", 42555, "CUANDO NOS LEEMOS EN CLAVE DE PLENITUD"),
43128: ("en", 42556, "CUARESMA: CREER EN EL EVANGELIO"),
43208: ("fr", 42556, "CUARESMA: CREER EN EL EVANGELIO"),
43127: ("en", 42557, "LA CUARESMA COMO PEDAGOGÍA EN EL TIEMPO"),
43206: ("fr", 42557, "LA CUARESMA COMO PEDAGOGÍA EN EL TIEMPO"),
43217: ("it", 42557, "LA CUARESMA COMO PEDAGOGÍA EN EL TIEMPO"),
43205: ("pt", 42557, "LA CUARESMA COMO PEDAGOGÍA EN EL TIEMPO"),
43126: ("en", 42558, "¡NO TENEMOS UN DIOS VENGATIVO!"),
43124: ("en", 42560, 'CARLOS AGUIAR: "LA SINODALIDAD HA VENIDO A LA IGLESIA PARA QUEDARSE"'),
43123: ("en", 42561, "¿HERENCIA CRISTIANA?"),
43196: ("fr", 42561, "¿HERENCIA CRISTIANA?"),
43194: ("pt", 42561, "¿HERENCIA CRISTIANA?"),
43122: ("en", 42562, 'EL PAPA ADVIERTE A LOS CURAS DE LA "PANDEMIA" DEL CLERICALISMO'),
43120: ("en", 42564, "MOISÉS, LA SAMARITANA Y EL BORRACHO"),
43187: ("pt", 42564, "MOISÉS, LA SAMARITANA Y EL BORRACHO"),
43119: ("en", 42565, "EL FINAL DE LA BÚSQUEDA"),
43182: ("pt", 42565, "EL FINAL DE LA BÚSQUEDA"),
43174: ("fr", 42568, "EN EL POZO DE LA DIGNIDAD LIBERADA"),
43183: ("it", 42568, "EN EL POZO DE LA DIGNIDAD LIBERADA"),
43115: ("en", 42569, "PALABRA Y EUCARISTÍA"),
43171: ("fr", 42569, "PALABRA Y EUCARISTÍA"),
43172: ("pt", 42569, "PALABRA Y EUCARISTÍA"),
43167: ("fr", 42570, 'MABEL RUIZ: "LA TRADICIÓN HA UTILIZADO A LAS MUJERES PARA QUE SEAN SILENCIADAS"'),
43169: ("pt", 42570, 'MABEL RUIZ: "LA TRADICIÓN HA UTILIZADO A LAS MUJERES PARA QUE SEAN SILENCIADAS"'),
43113: ("en", 42571, 'LEÓN XIV, ANTE EL ATAQUE DE EEUU E ISRAEL CONTRA IRÁN: "HAY QUE DETENERLO"'),
43166: ("pt", 42571, 'LEÓN XIV, ANTE EL ATAQUE DE EEUU E ISRAEL CONTRA IRÁN: "HAY QUE DETENERLO"'),
43111: ("en", 42573, 'VICARIO GENERAL DE MOSCÚ: "LA GUERRA EN UCRANIA DEBE TERMINAR"'),
43104: ("pt", 42573, 'VICARIO GENERAL DE MOSCÚ: "LA GUERRA EN UCRANIA DEBE TERMINAR"'),
43163: ("pt", 42574, "SERVIR ES UNA FORMA DE LIDERAR"),
43156: ("pt", 42576, 'DIARMAID MACCULLOCH, HISTORIADOR: "NO EXISTE UNA ENSEÑANZA UNIFORME SOBRE SEXUALIDAD"'),
43155: ("pt", 42577, "3º DOMINGO DE CUARESMA"),
}
# Orphaned posts to delete (no Polylang link to any Spanish original)
ORPHANS_TO_DELETE = [42581, 43130, 43235]
def translate_title(spanish_title, lang_code, lang_name):
payload = json.dumps({
"model": JAN_MODEL,
"messages": [
{"role": "user", "content": f"Translate this title from Spanish to {lang_name}. Return ONLY the translated title in ALL CAPS, nothing else: {spanish_title}"}
],
"temperature": 0.2,
"max_tokens": 100,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL, data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
with urllib.request.urlopen(req, timeout=30) as r:
result = json.loads(r.read())
return result["choices"][0]["message"]["content"].strip().strip('"').strip("'")
def get_db():
return pymysql.connect(
host=DB_HOST, port=DB_PORT,
user=DB_USER, password=DB_PASS,
database=DB_NAME, charset="utf8mb4",
cursorclass=pymysql.cursors.DictCursor
)
def main():
db = get_db()
c = db.cursor()
# Delete orphans first
print("Deleting orphaned posts...")
for orphan_id in ORPHANS_TO_DELETE:
cmd = f"docker exec wordpress-web wp post delete {orphan_id} --force --allow-root"
import subprocess
result = subprocess.run(cmd.split(), capture_output=True, text=True)
print(f" Deleted {orphan_id}: {result.stdout.strip() or result.stderr.strip()}")
print(f"\nFixing {len(FIXES)} titles...\n")
done = 0
errors = 0
# Group by Spanish title to batch translate same title to multiple langs
by_spanish = {}
for post_id, (lang, sp_id, sp_title) in FIXES.items():
by_spanish.setdefault((sp_id, sp_title), []).append((post_id, lang))
translated_cache = {} # (sp_id, lang) -> translated_title
for (sp_id, sp_title), targets in by_spanish.items():
print(f"ES:{sp_id}{sp_title[:50]}")
for post_id, lang in targets:
lang_name = TARGET_LANGS[lang]
cache_key = (sp_id, lang)
if cache_key not in translated_cache:
try:
t0 = time.time()
new_title = translate_title(sp_title, lang, lang_name)
elapsed = time.time() - t0
translated_cache[cache_key] = new_title
print(f" [{lang}] {new_title[:60]} ({elapsed:.0f}s)")
except Exception as e:
print(f" [{lang}] ERROR translating: {e}")
errors += 1
continue
new_title = translated_cache[cache_key]
# Update the post title
c.execute("UPDATE wp_posts SET post_title=%s WHERE ID=%s", (new_title, post_id))
db.commit()
print(f" [{lang}] Updated {post_id}: {new_title[:60]}")
done += 1
db.close()
print(f"\nDone: {done} fixed, {errors} errors")
if __name__ == "__main__":
main()
+33
View File
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
"""Issue #90 — círculo 200px RGBA (esquinas transparentes) para 2 autores nuevos.
Recorte cuadrado centrado en la cara + máscara circular supersampleada.
"""
from PIL import Image, ImageDraw
SRC = "/home/rafa/Feadulta"
OUT = "/home/rafa/joomla-migration/wordpress/wp-content/uploads/avatares/autores"
SIZE = 200
SS = 4 # supersampling para borde suave
# foto, uid, (left, top, side) recorte cuadrado en coords del original
JOBS = [
("MP_Lopez.jpeg", 474, (120, 0, 880)),
("A_delaCruz.jpeg", 993, (128, 0, 785)),
]
mask = Image.new("L", (SIZE * SS, SIZE * SS), 0)
ImageDraw.Draw(mask).ellipse((0, 0, SIZE * SS - 1, SIZE * SS - 1), fill=255)
mask = mask.resize((SIZE, SIZE), Image.LANCZOS)
for fname, uid, (l, t, side) in JOBS:
im = Image.open(f"{SRC}/{fname}").convert("RGB")
W, H = im.size
# clamp dentro de la imagen
l = max(0, min(l, W - side))
t = max(0, min(t, H - side))
crop = im.crop((l, t, l + side, t + side)).resize((SIZE, SIZE), Image.LANCZOS)
out = Image.new("RGBA", (SIZE, SIZE), (0, 0, 0, 0))
out.paste(crop, (0, 0))
out.putalpha(mask)
out.save(f"{OUT}/autor-{uid}.png")
print(f"OK autor-{uid}.png <- {fname} crop=({l},{t},{side})")
+77
View File
@@ -0,0 +1,77 @@
#!/usr/bin/env python3
"""Genera avatares de INICIALES (mismo formato que #62) para autores sin foto.
Formato replicado del #62: PNG 200x200 RGBA, círculo de color sólido (paleta
determinista de 10 tonos elegida por hash del nombre) con las iniciales en blanco,
DejaVuSans-Bold, esquinas transparentes (borde circular supersampleado).
Entrada: TSV «uid<TAB>display_name» (env FEA_TSV, por defecto /tmp/users29.tsv).
Salida: uploads/avatares/autores/autor-<uid>.png
Uso:
FEA_TSV=/tmp/users29.tsv python3 scripts/gen_avatars_initials.py
"""
import hashlib, os, unicodedata
from PIL import Image, ImageDraw, ImageFont
OUT = "/home/rafa/joomla-migration/wordpress/wp-content/uploads/avatares/autores"
TSV = os.environ.get("FEA_TSV", "/tmp/users29.tsv")
FONT = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
SIZE, SS = 200, 4
# Paleta del #62 (extraída de los avatares existentes; 10 tonos apagados)
PALETTE = [
(91, 110, 80), (100, 110, 60), (160, 110, 70), (176, 122, 57), (150, 90, 110),
(80, 80, 110), (74, 95, 120), (60, 90, 90), (139, 26, 46), (120, 82, 72),
]
STOP = {"la", "las", "el", "los", "un", "una", "de", "del", "y", "e", "da", "do", "the"}
def strip_accents(s: str) -> str:
return "".join(c for c in unicodedata.normalize("NFD", s)
if unicodedata.category(c) != "Mn")
def initials(name: str) -> str:
raw = name.replace("/", " ").replace('"', " ").replace("'", " ")
toks = [t for t in raw.split() if t and t.lower() not in STOP]
if not toks:
return "?"
picks = toks[:2] if len(toks) >= 2 else toks[:1]
return strip_accents("".join(t[0] for t in picks)).upper()
def color_for(name: str):
h = int(hashlib.md5(name.encode("utf-8")).hexdigest(), 16)
return PALETTE[h % len(PALETTE)]
def make(uid: str, name: str):
big = SIZE * SS
im = Image.new("RGBA", (big, big), (0, 0, 0, 0))
d = ImageDraw.Draw(im)
d.ellipse((0, 0, big - 1, big - 1), fill=color_for(name) + (255,))
txt = initials(name)
# ajusta tamaño hasta cap-height ~77px (en escala SS)
font = ImageFont.truetype(FONT, int(104 * SS))
bbox = d.textbbox((0, 0), txt, font=font)
tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
x = (big - tw) / 2 - bbox[0]
y = (big - th) / 2 - bbox[1]
d.text((x, y), txt, font=font, fill=(255, 255, 255, 255))
im = im.resize((SIZE, SIZE), Image.LANCZOS)
im.save(f"{OUT}/autor-{uid}.png")
return txt
os.makedirs(OUT, exist_ok=True)
n = 0
for line in open(TSV, encoding="utf-8"):
parts = line.rstrip("\n").split("\t")
if len(parts) < 2:
continue
uid, name = parts[0].strip(), parts[1].strip()
ini = make(uid, name)
print(f"autor-{uid}.png {ini:3} {name}")
n += 1
print(f"\n{n} avatares generados en {OUT}")
+197
View File
@@ -0,0 +1,197 @@
<?php
/**
* generate_k2_redirects.php
*
* Populates wp_fg_redirect table with 301 redirect entries for all K2 items
* migrated to WordPress.
*
* Joomla K2 URL pattern: /es/[menu]/NNN-alias.html
* Stored in wp_fg_redirect as: NNN-alias.html
* FG plugin matches via LIKE '%NNN-alias.html' fallback.
*
* Also adds redirects for K2 categories → WP categories.
*
* Usage: php generate_k2_redirects.php [--dry-run]
*/
$dry_run = in_array('--dry-run', $argv ?? []);
// DB config - WordPress
$wp_host = 'wordpress-mysql';
$wp_db = 'wordpress_db';
$wp_user = 'wordpress_user';
$wp_pass = 'wordpress_pass';
// DB config - Joomla
$jm_host = 'joomla-mysql';
$jm_db = 'joomla_db';
$jm_user = 'joomla_user';
$jm_pass = 'joomla_pass';
$wp_pdo = new PDO("mysql:host=$wp_host;dbname=$wp_db;charset=utf8mb4", $wp_user, $wp_pass, [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
]);
$jm_pdo = new PDO("mysql:host=$jm_host;dbname=$jm_db;charset=utf8mb4", $jm_user, $jm_pass, [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
]);
echo "=== Generate K2 Redirects → wp_fg_redirect ===\n";
echo $dry_run ? "[DRY RUN]\n\n" : "[LIVE RUN]\n\n";
// -------------------------------------------------------------------------
// Step 1: Build K2 ID → alias map from Joomla
// -------------------------------------------------------------------------
echo "Loading K2 items from Joomla...\n";
$stmt = $jm_pdo->query("SELECT id, alias FROM ew4r_k2_items WHERE alias != '' AND alias IS NOT NULL");
$k2_aliases = [];
foreach ($stmt as $row) {
$k2_aliases[(int)$row['id']] = $row['alias'];
}
echo " K2 items loaded: " . count($k2_aliases) . "\n";
// -------------------------------------------------------------------------
// Step 2: Load WP postmeta: K2 ID → WP post ID
// -------------------------------------------------------------------------
echo "Loading WP postmeta (_fgj2wp_old_k2_id)...\n";
$stmt = $wp_pdo->query("
SELECT pm.meta_value AS k2_id, pm.post_id AS wp_id
FROM wp_postmeta pm
JOIN wp_posts p ON pm.post_id = p.ID
WHERE pm.meta_key = '_fgj2wp_old_k2_id'
AND p.post_type = 'post'
");
$k2_to_wp = [];
foreach ($stmt as $row) {
$k2_to_wp[(int)$row['k2_id']] = (int)$row['wp_id'];
}
echo " WP posts with K2 ID: " . count($k2_to_wp) . "\n\n";
// -------------------------------------------------------------------------
// Step 3: Check existing redirects to avoid duplicates
// -------------------------------------------------------------------------
echo "Loading existing redirects...\n";
$existing = [];
$stmt = $wp_pdo->query("SELECT old_url FROM wp_fg_redirect");
foreach ($stmt as $row) {
$existing[$row['old_url']] = true;
}
echo " Existing entries: " . count($existing) . "\n\n";
// -------------------------------------------------------------------------
// Step 4: Build and insert K2 item redirects
// -------------------------------------------------------------------------
echo "Building K2 item redirects...\n";
$insert = $wp_pdo->prepare("
INSERT IGNORE INTO wp_fg_redirect (old_url, id, type, activated)
VALUES (?, ?, 'post', 1)
");
$stats = ['inserted' => 0, 'skipped_no_alias' => 0, 'skipped_no_wp' => 0, 'skipped_exists' => 0];
// Process in batches
$batch = [];
foreach ($k2_to_wp as $k2_id => $wp_id) {
if (!isset($k2_aliases[$k2_id])) {
$stats['skipped_no_alias']++;
continue;
}
$alias = $k2_aliases[$k2_id];
$old_url = $k2_id . '-' . $alias . '.html';
if (isset($existing[$old_url])) {
$stats['skipped_exists']++;
continue;
}
$batch[] = [$old_url, $wp_id];
}
echo " Redirects to insert: " . count($batch) . "\n";
if (!$dry_run) {
$wp_pdo->beginTransaction();
try {
foreach ($batch as [$old_url, $wp_id]) {
$insert->execute([$old_url, $wp_id]);
$stats['inserted']++;
if ($stats['inserted'] % 1000 === 0) {
echo " ... {$stats['inserted']} inserted\n";
$wp_pdo->commit();
$wp_pdo->beginTransaction();
}
}
$wp_pdo->commit();
} catch (Exception $e) {
$wp_pdo->rollBack();
echo "ERROR: " . $e->getMessage() . "\n";
exit(1);
}
} else {
$stats['inserted'] = count($batch);
// Show first 5 samples
echo "\n Sample entries:\n";
foreach (array_slice($batch, 0, 5) as [$old_url, $wp_id]) {
echo " $old_url → post ID $wp_id\n";
}
}
// -------------------------------------------------------------------------
// Step 5: K2 category redirects
// -------------------------------------------------------------------------
echo "\nBuilding K2 category redirects...\n";
// Load K2 categories from Joomla
$stmt = $jm_pdo->query("SELECT id, alias FROM ew4r_k2_categories WHERE published=1");
$k2_cats = [];
foreach ($stmt as $row) {
$k2_cats[(int)$row['id']] = $row['alias'];
}
echo " K2 categories: " . count($k2_cats) . "\n";
// Load WP term IDs for K2 categories via postmeta equivalent
// FG plugin stores category mapping in wp_term_meta or wp_termmeta
$stmt = $wp_pdo->query("
SELECT tm.term_id, tm.meta_value AS k2_cat_id
FROM wp_termmeta tm
WHERE tm.meta_key = '_fgj2wp_old_k2_category_id'
");
$k2_cat_to_wp = [];
foreach ($stmt as $row) {
$k2_cat_to_wp[(int)$row['k2_cat_id']] = (int)$row['term_id'];
}
echo " WP categories with K2 ID: " . count($k2_cat_to_wp) . "\n";
$insert_cat = $wp_pdo->prepare("
INSERT IGNORE INTO wp_fg_redirect (old_url, id, type, activated)
VALUES (?, ?, 'category', 1)
");
$cat_inserted = 0;
foreach ($k2_cat_to_wp as $k2_cat_id => $wp_term_id) {
if (!isset($k2_cats[$k2_cat_id])) continue;
$alias = $k2_cats[$k2_cat_id];
// K2 category URL: /es/[alias] or /es/k2-items/[alias]
$old_url = $alias . '.html';
if (isset($existing[$old_url])) continue;
if (!$dry_run) {
$insert_cat->execute([$old_url, $wp_term_id]);
}
$cat_inserted++;
}
echo " Category redirects: $cat_inserted\n";
// -------------------------------------------------------------------------
// Summary
// -------------------------------------------------------------------------
echo "\n=== Results ===\n";
echo "K2 item redirects inserted: {$stats['inserted']}\n";
echo "Skipped (no alias): {$stats['skipped_no_alias']}\n";
echo "Skipped (no WP post): {$stats['skipped_no_wp']}\n";
echo "Skipped (already exists): {$stats['skipped_exists']}\n";
echo "Category redirects: $cat_inserted\n";
echo "\nTotal in wp_fg_redirect now:\n";
if (!$dry_run) {
$count = $wp_pdo->query("SELECT COUNT(*) FROM wp_fg_redirect")->fetchColumn();
echo " $count entries\n";
}
echo "\nDone.\n";
+82
View File
@@ -0,0 +1,82 @@
<?php
// Importa avatares col_*.png de uploads/autores/joomla/ como attachments WP
// y asigna foto_perfil al user_id correspondiente.
//
// Uso: docker exec wordpress-web php /tmp/import_avatars.php [--dry-run]
require '/var/www/html/wp-load.php';
require_once ABSPATH . 'wp-admin/includes/image.php';
$dry = in_array('--dry-run', $argv ?? [], true);
$tsv = '/tmp/avatar_assignments.tsv';
$uploads = wp_upload_dir();
$avatars_dir = $uploads['basedir'] . '/autores/joomla';
$avatars_url = $uploads['baseurl'] . '/autores/joomla';
$lines = file($tsv, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
array_shift($lines); // header
$stats = ['ok' => 0, 'reused' => 0, 'missing_file' => 0, 'missing_user' => 0, 'assigned' => 0, 'already' => 0];
$samples_ok = [];
global $wpdb;
$attach_by_file = [];
foreach ($lines as $line) {
[$aid, $login, $posts, $source, $filename] = explode("\t", $line);
$aid = (int) $aid;
$abs = $avatars_dir . '/' . $filename;
if (!is_file($abs)) { $stats['missing_file']++; continue; }
if (!get_userdata($aid)) { $stats['missing_user']++; continue; }
// Si ya tiene foto_perfil, no tocar (preservar manual assignments)
if (get_user_meta($aid, 'foto_perfil', true)) { $stats['already']++; continue; }
// ¿Ya existe attachment para este fichero? (reutilizar)
if (!isset($attach_by_file[$filename])) {
$url = $avatars_url . '/' . $filename;
$existing = $wpdb->get_var($wpdb->prepare(
"SELECT ID FROM {$wpdb->posts} WHERE post_type='attachment' AND guid=%s LIMIT 1",
$url
));
if ($existing) {
$attach_by_file[$filename] = (int) $existing;
$stats['reused']++;
} else {
if ($dry) {
$attach_by_file[$filename] = -1; // marcador dry
} else {
$attach_id = wp_insert_attachment([
'guid' => $url,
'post_mime_type' => wp_check_filetype($abs)['type'] ?: 'image/png',
'post_title' => pathinfo($filename, PATHINFO_FILENAME),
'post_content' => '',
'post_status' => 'inherit',
], $abs);
if (is_wp_error($attach_id) || !$attach_id) {
error_log("[import_avatars] insert FAIL para $filename: " . (is_wp_error($attach_id) ? $attach_id->get_error_message() : 'unknown'));
continue;
}
$meta = wp_generate_attachment_metadata($attach_id, $abs);
wp_update_attachment_metadata($attach_id, $meta);
$attach_by_file[$filename] = $attach_id;
$stats['ok']++;
}
}
}
$attach_id = $attach_by_file[$filename];
if ($attach_id !== 0) {
if (!$dry && $attach_id > 0) update_user_meta($aid, 'foto_perfil', (string) $attach_id);
$stats['assigned']++;
if (count($samples_ok) < 5) {
$samples_ok[] = "user $aid ($login) → attach " . ($attach_id > 0 ? $attach_id : 'NEW') . " ($filename)";
}
}
}
echo ($dry ? '[DRY] ' : '') . "Stats:\n";
foreach ($stats as $k => $v) echo " $k: $v\n";
echo "\nSamples:\n";
foreach ($samples_ok as $s) echo " $s\n";
+56
View File
@@ -0,0 +1,56 @@
<?php
/**
* import_avatars_143.php (#143) — da de alta los avatares de INICIALES de los
* usuarios nuevos creados al corregir la autoría K2 (ver fix_k2_authors.php).
*
* Crea/actualiza el attachment del PNG uploads/avatares/autores/autor-<uid>.png
* y reapunta foto_perfil (ACF user_meta). Backup del valor previo en
* user_meta _foto_perfil_pre143. Idempotente (si ya apunta, solo regenera metadata).
*
* Entrada: TSV «uid<TAB>display_name» (env FEA_TSV, por defecto /tmp/users29.tsv).
* Uso (en el servidor, dentro de /web/wp-nuevo):
* FEA_TSV=/tmp/users29.tsv wp eval-file scripts/import_avatars_143.php # dry-run
* APPLY=1 FEA_TSV=/tmp/users29.tsv wp eval-file scripts/import_avatars_143.php # aplica
*/
require_once ABSPATH . 'wp-admin/includes/image.php';
$apply = getenv('APPLY') === '1';
$tsv = getenv('FEA_TSV') ?: '/tmp/users29.tsv';
$updir = wp_get_upload_dir();
if (!is_readable($tsv)) { echo "No puedo leer TSV: $tsv\n"; return; }
$done = $regen = $err = 0;
foreach (file($tsv) as $line) {
$p = explode("\t", rtrim($line, "\n"));
if (count($p) < 2) continue;
$uid = (int) $p[0];
$name = trim($p[1]);
$rel = "avatares/autores/autor-{$uid}.png";
$abs = $updir['basedir'] . '/' . $rel;
if (!file_exists($abs)) { echo "MISSING uid=$uid ($name)\n"; $err++; continue; }
$cur = (int) get_user_meta($uid, 'foto_perfil', true);
if ($cur && get_post_meta($cur, '_wp_attached_file', true) === $rel) {
echo "REGEN #$uid $name (attachment $cur ya apunta al PNG)\n";
if ($apply) wp_update_attachment_metadata($cur, wp_generate_attachment_metadata($cur, $abs));
$regen++; continue;
}
echo "NUEVO #$uid $name (foto_perfil actual: " . ($cur ?: 'ninguna') . ")\n";
if (!$apply) { $done++; continue; }
if (get_user_meta($uid, '_foto_perfil_pre143', true) === '') {
update_user_meta($uid, '_foto_perfil_pre143', $cur);
}
$aid = wp_insert_attachment([
'post_mime_type' => 'image/png',
'post_title' => "Avatar {$name}",
'post_status' => 'inherit',
'guid' => $updir['baseurl'] . '/' . $rel,
], $abs, 0, true);
if (is_wp_error($aid)) { echo " ERR: " . $aid->get_error_message() . "\n"; $err++; continue; }
wp_update_attachment_metadata($aid, wp_generate_attachment_metadata($aid, $abs));
update_user_meta($uid, 'foto_perfil', $aid);
$done++;
}
echo "\n" . ($apply ? "APLICADO" : "DRY-RUN") . ": nuevos=$done regen=$regen errores=$err\n";
+59
View File
@@ -0,0 +1,59 @@
<?php
/**
* Issue #81 — avatares nuevos de 6 colaboradores habituales.
* Reapunta foto_perfil (user_meta) al PNG uploads/avatares/autores/autor-<uid>.png.
* Si foto_perfil ya apuntaba a ese fichero (caso #62), solo regenera metadata.
* Backup del foto_perfil anterior en user_meta _foto_perfil_pre75 (revertible).
*
* Uso (dentro del contenedor):
* php import_avatars_75.php -> DRY-RUN
* APPLY=1 php import_avatars_75.php -> aplica
*/
require "/var/www/html/wp-load.php";
require_once ABSPATH . 'wp-admin/includes/image.php';
$apply = getenv('APPLY') === '1';
$updir = wp_get_upload_dir();
$authors = [
[384, "Enrique Martínez Lozano"],
[583, "Fidel Aizpurúa"],
[1138, "Guadalupe Labrador"],
[383, "José Antonio Pagola"],
[774, "José Luis Sicre"],
[775, "Miguel A. Munárriz"],
];
$done = $regen = $err = 0;
foreach ($authors as [$uid, $name]) {
$rel = "avatares/autores/autor-{$uid}.png";
$abs = $updir['basedir'] . '/' . $rel;
if (!file_exists($abs)) { echo "MISSING uid=$uid ($name)\n"; $err++; continue; }
$cur = (int) get_user_meta($uid, 'foto_perfil', true);
// foto_perfil ya apunta a este fichero -> solo se sobrescribió el PNG
if ($cur && get_post_meta($cur, '_wp_attached_file', true) === $rel) {
echo "REGEN #$uid $name (attachment $cur ya apunta al PNG)\n";
if ($apply) wp_update_attachment_metadata($cur, wp_generate_attachment_metadata($cur, $abs));
$regen++; continue;
}
echo "NUEVO #$uid $name (foto_perfil actual: " . ($cur ?: 'ninguna') . ")\n";
if (!$apply) { $done++; continue; }
if (get_user_meta($uid, '_foto_perfil_pre75', true) === '') {
update_user_meta($uid, '_foto_perfil_pre75', $cur);
}
$aid = wp_insert_attachment([
'post_mime_type' => 'image/png',
'post_title' => "Avatar {$name}",
'post_status' => 'inherit',
'guid' => $updir['baseurl'] . '/' . $rel,
], $abs, 0, true);
if (is_wp_error($aid)) { echo " ERR: " . $aid->get_error_message() . "\n"; $err++; continue; }
wp_update_attachment_metadata($aid, wp_generate_attachment_metadata($aid, $abs));
update_user_meta($uid, 'foto_perfil', $aid);
$done++;
}
echo "\n" . ($apply ? "APLICADO" : "DRY-RUN") . ": nuevos=$done regen=$regen errores=$err\n";
+55
View File
@@ -0,0 +1,55 @@
<?php
/**
* Issue #90 — avatares nuevos de 2 colaboradores nuevos (Mari Paz Lopez, Africa de la Cruz).
* Reapunta foto_perfil (user_meta) al PNG uploads/avatares/autores/autor-<uid>.png.
* Si foto_perfil ya apuntaba a ese fichero (caso #62), solo regenera metadata.
* Backup del foto_perfil anterior en user_meta _foto_perfil_pre81 (revertible).
*
* Uso (dentro del contenedor):
* php import_avatars_75.php -> DRY-RUN
* APPLY=1 php import_avatars_75.php -> aplica
*/
require "/var/www/html/wp-load.php";
require_once ABSPATH . 'wp-admin/includes/image.php';
$apply = getenv('APPLY') === '1';
$updir = wp_get_upload_dir();
$authors = [
[474, "Mari Paz López Santos"],
[993, "África de la Cruz Tomé"],
];
$done = $regen = $err = 0;
foreach ($authors as [$uid, $name]) {
$rel = "avatares/autores/autor-{$uid}.png";
$abs = $updir['basedir'] . '/' . $rel;
if (!file_exists($abs)) { echo "MISSING uid=$uid ($name)\n"; $err++; continue; }
$cur = (int) get_user_meta($uid, 'foto_perfil', true);
// foto_perfil ya apunta a este fichero -> solo se sobrescribió el PNG
if ($cur && get_post_meta($cur, '_wp_attached_file', true) === $rel) {
echo "REGEN #$uid $name (attachment $cur ya apunta al PNG)\n";
if ($apply) wp_update_attachment_metadata($cur, wp_generate_attachment_metadata($cur, $abs));
$regen++; continue;
}
echo "NUEVO #$uid $name (foto_perfil actual: " . ($cur ?: 'ninguna') . ")\n";
if (!$apply) { $done++; continue; }
if (get_user_meta($uid, '_foto_perfil_pre81', true) === '') {
update_user_meta($uid, '_foto_perfil_pre81', $cur);
}
$aid = wp_insert_attachment([
'post_mime_type' => 'image/png',
'post_title' => "Avatar {$name}",
'post_status' => 'inherit',
'guid' => $updir['baseurl'] . '/' . $rel,
], $abs, 0, true);
if (is_wp_error($aid)) { echo " ERR: " . $aid->get_error_message() . "\n"; $err++; continue; }
wp_update_attachment_metadata($aid, wp_generate_attachment_metadata($aid, $abs));
update_user_meta($uid, 'foto_perfil', $aid);
$done++;
}
echo "\n" . ($apply ? "APLICADO" : "DRY-RUN") . ": nuevos=$done regen=$regen errores=$err\n";
+320
View File
@@ -0,0 +1,320 @@
#!/usr/bin/env python3
"""
import_new_cartas.py
Importa las cartas de la semana nuevas de ew4r_content (Joomla prod, id > 9043)
al WordPress local (Docker), y luego asigna _carta_id a los artículos K2
correspondientes según la fecha (extra_field id 15).
Categorías WP según catid Joomla:
catid 27 (Carta de la semana) → WP: 6 + 21 + 71
catid 40 (Cartas de otras sem) → WP: 21 + 71
catid 41 (Carta semana pasada) → WP: 21 + 22 + 71
"""
import json
import subprocess
import sys
import re
from datetime import datetime
JOOMLA_SSH_HOST = "134.0.10.170"
JOOMLA_SSH_USER = "feadulta"
JOOMLA_SSH_PASS = "C6c2A!mAl3Wj.BQF"
JOOMLA_DB_HOST = "127.0.0.1"
JOOMLA_DB_USER = "fejoomla3"
JOOMLA_DB_PASS = "5FF-}5^[>7^pK4W9"
JOOMLA_DB_NAME = "fejoomla3"
WP_DOCKER = "wordpress-mysql"
WP_DB_USER = "wordpress_user"
WP_DB_PASS = "wordpress_pass"
WP_DB_NAME = "wordpress_db"
LAST_CONTENT_ID = None # se calcula dinámicamente en main(): MAX(_fgj2wp_old_content_id) en WP
# WP term_ids y sus term_taxonomy_ids (se cargan dinámicamente)
CAT_FEADULTA = 71
CAT_CARTA_SEMANA = 6
CAT_CARTAS_OTRAS = 21
CAT_CARTA_PASADA = 22
CATID_TO_WP = {
27: [CAT_CARTA_SEMANA, CAT_CARTAS_OTRAS, CAT_FEADULTA],
40: [CAT_CARTAS_OTRAS, CAT_FEADULTA],
41: [CAT_CARTAS_OTRAS, CAT_CARTA_PASADA, CAT_FEADULTA],
}
DRY_RUN = '--dry-run' in sys.argv
# ── Helpers ────────────────────────────────────────────────────────────────────
def joomla_query(query: str) -> list[dict]:
mysql_cmd = (f"mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} "
f"-p'{JOOMLA_DB_PASS}' {JOOMLA_DB_NAME} "
f"--default-character-set=utf8mb4 -B")
cmd = ['sshpass', '-p', JOOMLA_SSH_PASS,
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}', mysql_cmd]
result = subprocess.run(cmd, input=query, capture_output=True,
text=True, encoding='utf-8')
if result.returncode != 0:
print(f"[ERR SSH] {result.stderr[:300]}", file=sys.stderr)
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
return [dict(zip(headers, line.split('\t'))) for line in lines[1:] if line]
def wp_mysql(query: str) -> list[dict]:
cmd = ['docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-B', '-e', query]
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
return [dict(zip(headers, line.split('\t'))) for line in lines[1:] if line]
def wp_execute(sql: str):
if DRY_RUN:
print(f" [DRY] {sql[:110]}")
return None
cmd = ['docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-e', sql]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
err = result.stderr.replace('mysql: [Warning] Using a password on the command line interface can be insecure.\n', '')
if err.strip():
print(f" [ERR] {err.strip()[:200]}", file=sys.stderr)
def esc(s: str) -> str:
return s.replace('\\', '\\\\').replace("'", "\\'")
def unhex(val: str) -> str:
if not val or val == 'NULL':
return ''
try:
return bytes.fromhex(val).decode('utf-8', errors='replace')
except Exception:
return val
# ── Main ───────────────────────────────────────────────────────────────────────
def main():
global LAST_CONTENT_ID
# Detección dinámica del último ew4r_content (carta) ya importado
r = wp_mysql("SELECT MAX(CAST(meta_value AS UNSIGNED)) m FROM wp_postmeta "
"WHERE meta_key='_fgj2wp_old_content_id'")
LAST_CONTENT_ID = int(r[0]['m']) if r and r[0].get('m') and r[0]['m'] != 'NULL' else 9043
print(f"=== Import nuevas cartas (ew4r_content id > {LAST_CONTENT_ID}) "
f"{'[DRY RUN]' if DRY_RUN else '[LIVE]'} ===\n")
# Cargar term_taxonomy_ids
all_term_ids = [CAT_FEADULTA, CAT_CARTA_SEMANA, CAT_CARTAS_OTRAS, CAT_CARTA_PASADA]
rows = wp_mysql(
f"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy "
f"WHERE term_id IN ({','.join(map(str,all_term_ids))}) AND taxonomy='category'"
)
tt_ids = {int(r['term_id']): int(r['term_taxonomy_id']) for r in rows}
print(f"TT IDs: {tt_ids}")
# Cargar Polylang ES
pl_rows = wp_mysql(
"SELECT tt.term_taxonomy_id FROM wp_terms t "
"JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id "
"WHERE tt.taxonomy='language' AND t.slug='es' LIMIT 1"
)
pl_es_tt = int(pl_rows[0]['term_taxonomy_id']) if pl_rows else None
print(f"Polylang ES tt_id: {pl_es_tt}")
# Cargar user map
user_rows = wp_mysql(
"SELECT um.meta_value jid, u.ID wid FROM wp_users u "
"JOIN wp_usermeta um ON um.user_id=u.ID "
"WHERE um.meta_key='_fgj2wp_old_user_id'"
)
user_map = {}
for r in user_rows:
try:
user_map[int(r['jid'])] = int(r['wid'])
except ValueError:
pass
# Obtener cartas nuevas de Joomla (con HEX para texto)
print("\nObteniendo cartas nuevas de Joomla...")
query = (
f"SELECT id, HEX(title) title, HEX(alias) alias, "
f"HEX(introtext) introtext, HEX(`fulltext`) fulltext_col, "
f"catid, created, created_by "
f"FROM ew4r_content "
f"WHERE state=1 AND id > {LAST_CONTENT_ID} AND catid IN (27,40,41) "
f"ORDER BY id;"
)
items = joomla_query(query)
print(f"Cartas a importar: {len(items)}")
# Mapa fecha_carta → wp_id (para asignar _carta_id a artículos K2)
fecha_a_wp_carta = {}
for item in items:
joomla_id = int(item['id'])
catid = int(item['catid'])
title = unhex(item.get('title',''))
alias = unhex(item.get('alias',''))
intro = unhex(item.get('introtext',''))
full = unhex(item.get('fulltext_col',''))
created = item.get('created','') or datetime.now().strftime('%Y-%m-%d %H:%M:%S')
created_by = int(item.get('created_by', 0) or 0)
content = intro + ('\n<!--more-->\n' + full if full.strip() else '')
# La carta semanal SIEMPRE la firma Inma Calvo (WP user 1048 icalvotorre),
# aunque en Joomla la cree el webmaster (José Chicharro / josek 1049).
CARTA_AUTHOR = 1048
wp_author = CARTA_AUTHOR
wp_cats = CATID_TO_WP.get(catid, [CAT_CARTAS_OTRAS, CAT_FEADULTA])
fecha_carta = created[:10] # YYYY-MM-DD
print(f"\n [{joomla_id}] {title[:55]} | catid={catid} | fecha={fecha_carta}")
print(f" → WP cats: {wp_cats}")
# INSERT post
post_slug = esc(alias[:200])
post_title = esc(title)
post_content = esc(content)
wp_execute(
f"INSERT INTO wp_posts "
f"(post_author, post_date, post_date_gmt, post_content, post_title, "
f"post_excerpt, post_status, comment_status, ping_status, post_name, "
f"post_type, post_modified, post_modified_gmt, comment_count, "
f"to_ping, pinged, post_content_filtered) VALUES ("
f"{wp_author}, '{created}', '{created}', '{post_content}', "
f"'{post_title}', '', 'publish', 'open', 'open', '{post_slug}', "
f"'post', '{created}', '{created}', 0, '', '', '')"
)
if DRY_RUN:
fecha_a_wp_carta[fecha_carta] = f"DRY_WP_ID_for_{joomla_id}"
continue
new_id_rows = wp_mysql("SELECT MAX(ID) new_id FROM wp_posts")
if not new_id_rows:
print(f" [ERR] No se pudo obtener ID del post", file=sys.stderr)
continue
new_wp_id = int(new_id_rows[0]['new_id'])
print(f" → WP post ID={new_wp_id}")
fecha_a_wp_carta[fecha_carta] = new_wp_id
# Metas
wp_execute(f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) VALUES ({new_wp_id}, '_fgj2wp_old_content_id', '{joomla_id}')")
wp_execute(f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) VALUES ({new_wp_id}, 'Idioma', '1')")
# Categorías
for term_id in wp_cats:
tt_id = tt_ids.get(term_id)
if tt_id:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {tt_id})"
)
# Polylang ES
if pl_es_tt:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {pl_es_tt})"
)
print(f"\nFecha→WP carta map: {fecha_a_wp_carta}")
# ── Asignar _carta_id a los artículos K2 importados ──────────────────────
if DRY_RUN or not fecha_a_wp_carta:
print("\n[SKIP] Asignación _carta_id (dry-run o sin cartas importadas)")
return
print("\n=== Asignando _carta_id a artículos K2 ===")
# Obtener los artículos K2 con su fecha (id 15), acotando por la fecha más
# antigua de las cartas importadas en esta ejecución (evita recorrer todo).
min_fecha = min(fecha_a_wp_carta.keys())
k2_query = (
f"SELECT id, HEX(extra_fields) ef "
f"FROM ew4r_k2_items WHERE published=1 AND created >= '{min_fecha} 00:00:00' "
f"ORDER BY id;"
)
k2_items = joomla_query(k2_query)
print(f"Artículos K2 a procesar (desde {min_fecha}): {len(k2_items)}")
assigned = 0
for k2item in k2_items:
k2_id = int(k2item['id'])
# wp_id REAL por meta (NO offset fijo, que pisaba metas en deltas sucesivos)
wp_rows = wp_mysql(
f"SELECT post_id FROM wp_postmeta WHERE meta_key='_fgj2wp_old_k2_id' "
f"AND meta_value='{k2_id}' LIMIT 1"
)
if not wp_rows:
continue
wp_id = int(wp_rows[0]['post_id'])
ef_raw = unhex(k2item.get('ef',''))
# Parsear fecha (id 15)
fecha_art = None
try:
fields = json.loads(ef_raw)
for f in fields:
if str(f.get('id','')) == '15':
fecha_art = str(f.get('value',''))[:10]
break
except Exception:
pass
if not fecha_art:
continue
carta_wp_id = fecha_a_wp_carta.get(fecha_art)
if not carta_wp_id:
continue
# Verificar que el meta no existe ya
existing = wp_mysql(
f"SELECT meta_id FROM wp_postmeta WHERE post_id={wp_id} AND meta_key='_carta_id' LIMIT 1"
)
if existing:
continue
wp_execute(
f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) "
f"VALUES ({wp_id}, '_carta_id', '{carta_wp_id}')"
)
print(f" K2 {k2_id} (WP {wp_id}) → _carta_id={carta_wp_id} [{fecha_art}]")
assigned += 1
print(f"\n_carta_id asignado a {assigned} artículos.")
# Actualizar counts de categorías
print("\nActualizando counts de categorías...")
tt_str = ','.join(str(v) for v in tt_ids.values())
wp_execute(
f"UPDATE wp_term_taxonomy tt SET count = ("
f"SELECT COUNT(*) FROM wp_term_relationships tr "
f"WHERE tr.term_taxonomy_id=tt.term_taxonomy_id"
f") WHERE tt.term_taxonomy_id IN ({tt_str})"
)
print("\nListo.")
if __name__ == '__main__':
main()
+254
View File
@@ -0,0 +1,254 @@
#!/usr/bin/env python3
"""
import_new_content.py
Importa los ew4r_content items no-carta nuevos (id > 9043, catid NOT IN 27,40,41)
al WordPress local (Docker).
Mapping catid → WP term_ids:
54 (Índice multimedia) → 26
77 (Videos) → 58
64 (Noticias de alcance) → 41
52 (Tablón de anuncios) → 1 (uncategorized / sin categoría)
63 (Fechas) → 40
61 (Lista completa de autores) → 38
65 (Cantoral Salomé Arricibita) → 31
otro → 1 (uncategorized)
"""
import subprocess
import sys
from datetime import datetime
JOOMLA_SSH_HOST = "134.0.10.170"
JOOMLA_SSH_USER = "feadulta"
JOOMLA_SSH_PASS = "6Rm2qOF@eundwpda"
JOOMLA_DB_HOST = "127.0.0.1"
JOOMLA_DB_USER = "fejoomla3"
JOOMLA_DB_PASS = "5FF-}5^[>7^pK4W9"
JOOMLA_DB_NAME = "fejoomla3"
WP_DOCKER = "wordpress-mysql"
WP_DB_USER = "wordpress_user"
WP_DB_PASS = "wordpress_pass"
WP_DB_NAME = "wordpress_db"
LAST_CONTENT_ID = 9043
CARTA_CATIDS = {27, 40, 41}
CATID_TO_WP = {
54: [26],
77: [58],
64: [41],
52: [1],
63: [40],
61: [38],
65: [31],
}
DRY_RUN = '--dry-run' in sys.argv
# ── Helpers ────────────────────────────────────────────────────────────────────
def joomla_query(query: str) -> list[dict]:
mysql_cmd = (f"mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} "
f"-p'{JOOMLA_DB_PASS}' {JOOMLA_DB_NAME} "
f"--default-character-set=utf8mb4 -B")
cmd = ['sshpass', '-p', JOOMLA_SSH_PASS,
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}', mysql_cmd]
result = subprocess.run(cmd, input=query, capture_output=True,
text=True, encoding='utf-8')
if result.returncode != 0:
print(f"[ERR SSH] {result.stderr[:300]}", file=sys.stderr)
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
return [dict(zip(headers, line.split('\t'))) for line in lines[1:] if line]
def wp_mysql(query: str) -> list[dict]:
cmd = ['docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-B', '-e', query]
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
return [dict(zip(headers, line.split('\t'))) for line in lines[1:] if line]
def wp_execute(sql: str):
if DRY_RUN:
print(f" [DRY] {sql[:110]}")
return
cmd = ['docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-e', sql]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
err = result.stderr.replace('mysql: [Warning] Using a password on the command line interface can be insecure.\n', '')
if err.strip():
print(f" [ERR] {err.strip()[:200]}", file=sys.stderr)
def esc(s: str) -> str:
return s.replace('\\', '\\\\').replace("'", "\\'")
def unhex(val: str) -> str:
if not val or val == 'NULL':
return ''
try:
return bytes.fromhex(val).decode('utf-8', errors='replace')
except Exception:
return val
def main():
print(f"=== Import ew4r_content no-cartas (id > {LAST_CONTENT_ID}) "
f"{'[DRY RUN]' if DRY_RUN else '[LIVE]'} ===\n")
# Cargar user map
user_rows = wp_mysql(
"SELECT um.meta_value jid, u.ID wid FROM wp_users u "
"JOIN wp_usermeta um ON um.user_id=u.ID "
"WHERE um.meta_key='_fgj2wp_old_user_id'"
)
user_map = {}
for r in user_rows:
try:
user_map[int(r['jid'])] = int(r['wid'])
except ValueError:
pass
print(f"Usuarios mapeados: {len(user_map)}")
# Cargar term_taxonomy_ids
all_term_ids = sorted({t for cats in CATID_TO_WP.values() for t in cats} | {1})
rows = wp_mysql(
f"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy "
f"WHERE term_id IN ({','.join(map(str,all_term_ids))}) AND taxonomy='category'"
)
tt_ids = {int(r['term_id']): int(r['term_taxonomy_id']) for r in rows}
print(f"TT IDs: {tt_ids}")
# Polylang ES
pl_rows = wp_mysql(
"SELECT tt.term_taxonomy_id FROM wp_terms t "
"JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id "
"WHERE tt.taxonomy='language' AND t.slug='es' LIMIT 1"
)
pl_es_tt = int(pl_rows[0]['term_taxonomy_id']) if pl_rows else None
# IDs ya en WP
existing_rows = wp_mysql(
f"SELECT meta_value FROM wp_postmeta "
f"WHERE meta_key='_fgj2wp_old_content_id' AND meta_value+0 > {LAST_CONTENT_ID}"
)
existing_ids = {int(r['meta_value']) for r in existing_rows}
print(f"IDs ya importados con id > {LAST_CONTENT_ID}: {len(existing_ids)}")
# Obtener items de Joomla
print("\nObteniendo items de Joomla...")
catids_excl = ','.join(str(c) for c in CARTA_CATIDS)
query = (
f"SELECT id, HEX(title) title, HEX(alias) alias, "
f"HEX(introtext) introtext, HEX(`fulltext`) fulltext_col, "
f"catid, created, created_by "
f"FROM ew4r_content "
f"WHERE state=1 AND id > {LAST_CONTENT_ID} AND catid NOT IN ({catids_excl}) "
f"ORDER BY id;"
)
items = joomla_query(query)
print(f"Items a importar: {len(items)}")
stats = {'ok': 0, 'skip': 0, 'err': 0}
for item in items:
joomla_id = int(item['id'])
catid = int(item['catid'])
title = unhex(item.get('title', ''))
alias = unhex(item.get('alias', ''))
intro = unhex(item.get('introtext', ''))
full = unhex(item.get('fulltext_col', ''))
created = item.get('created', '') or datetime.now().strftime('%Y-%m-%d %H:%M:%S')
created_by = int(item.get('created_by', 0) or 0)
if joomla_id in existing_ids:
print(f" [SKIP] id={joomla_id} ya existe")
stats['skip'] += 1
continue
content = intro + ('\n<!--more-->\n' + full if full.strip() else '')
# Multimedia/pensamientos/vídeos/cantoral (catid 54/77/65) son contenido
# propio de FeAdulta → autor "Fe Adulta" (WP user 890), no el webmaster
# que los sube en Joomla. El resto conserva su autor real (noticias, etc.).
FEADULTA_AUTHOR = 890
FEADULTA_CATIDS = {54, 77, 65}
wp_author = FEADULTA_AUTHOR if catid in FEADULTA_CATIDS else user_map.get(created_by, 1)
wp_cats = CATID_TO_WP.get(catid, [1])
print(f" [{joomla_id}] catid={catid} | {title[:50]}")
wp_execute(
f"INSERT INTO wp_posts "
f"(post_author, post_date, post_date_gmt, post_content, post_title, "
f"post_excerpt, post_status, comment_status, ping_status, post_name, "
f"post_type, post_modified, post_modified_gmt, comment_count, "
f"to_ping, pinged, post_content_filtered) VALUES ("
f"{wp_author}, '{created}', '{created}', '{esc(content)}', "
f"'{esc(title)}', '', 'publish', 'open', 'open', '{esc(alias[:200])}', "
f"'post', '{created}', '{created}', 0, '', '', '')"
)
if DRY_RUN:
stats['ok'] += 1
continue
new_id_rows = wp_mysql("SELECT MAX(ID) new_id FROM wp_posts")
if not new_id_rows:
stats['err'] += 1
continue
new_wp_id = int(new_id_rows[0]['new_id'])
print(f" → WP ID={new_wp_id}")
# Metas
wp_execute(f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) VALUES ({new_wp_id}, '_fgj2wp_old_content_id', '{joomla_id}')")
# Categorías
for term_id in wp_cats:
tt_id = tt_ids.get(term_id)
if tt_id:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {tt_id})"
)
# Polylang ES
if pl_es_tt:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {pl_es_tt})"
)
stats['ok'] += 1
if not DRY_RUN and stats['ok'] > 0:
print("\nActualizando counts de categorías...")
tt_str = ','.join(str(v) for v in tt_ids.values())
wp_execute(
f"UPDATE wp_term_taxonomy tt SET count = ("
f"SELECT COUNT(*) FROM wp_term_relationships tr "
f"WHERE tr.term_taxonomy_id=tt.term_taxonomy_id"
f") WHERE tt.term_taxonomy_id IN ({tt_str})"
)
print(f"\n=== Resultado: {stats['ok']} ok, {stats['skip']} skip, {stats['err']} err ===")
if __name__ == '__main__':
main()
+403
View File
@@ -0,0 +1,403 @@
#!/usr/bin/env python3
"""
import_new_k2_items.py
Importa los K2 items nuevos de Joomla prod (id > 17873) al WordPress local (Docker).
Conexión a Joomla: SSH + MySQL en feadulta@134.0.10.170
Conexión a WP: Docker exec wordpress-mysql
Categorías WP asignadas según extra_fields:
- ES + tiene "libro de la biblia" (id 9) → Comentarios al evangelio (1647) + Feadulta (71)
- ES + no id9 + título "DOMINGO/SEMANA SANTA/etc." → Eucaristía (1648) + Feadulta (71)
- ES + no id9 + otro → Artículos (1650) + Feadulta (71)
- No ES → Artículos (1650) + Feadulta (71)
Idioma Polylang asignado según extra_field id 16:
1=es, 2=en, 3=fr, 4=it, 5=pt
"""
import json
import subprocess
import sys
from datetime import datetime
# ── Configuración ──────────────────────────────────────────────────────────────
JOOMLA_SSH_HOST = "134.0.10.170"
JOOMLA_SSH_USER = "feadulta"
JOOMLA_SSH_PASS = "C6c2A!mAl3Wj.BQF"
JOOMLA_DB_HOST = "127.0.0.1"
JOOMLA_DB_USER = "fejoomla3"
JOOMLA_DB_PASS = "5FF-}5^[>7^pK4W9"
JOOMLA_DB_NAME = "fejoomla3"
WP_DOCKER = "wordpress-mysql"
WP_DB_USER = "wordpress_user"
WP_DB_PASS = "wordpress_pass"
WP_DB_NAME = "wordpress_db"
WP_DB_HOST = "wordpress-mysql" # dentro del container
LAST_K2_ID = None # se calcula dinámicamente en main(): MAX(_fgj2wp_old_k2_id) en WP
# WP term_taxonomy_ids (obtenidos con SELECT tt.term_taxonomy_id FROM wp_term_taxonomy tt WHERE tt.term_id=N)
# Precalculados:
CAT_FEADULTA = 71 # term_id (se convertirá a term_taxonomy_id abajo)
CAT_ARTICULOS = 1650
CAT_EVANGELIO = 1647
CAT_EUCARISTIA = 1648
LANG_MAP = {1: 'es', 2: 'en', 3: 'fr', 4: 'it', 5: 'pt'}
DOMINGO_RE = r'DOMINGO|SEMANA SANTA|SEMANA DE PASCUA|PENTECOST|NAVIDAD|EPIFAN'
DRY_RUN = '--dry-run' in sys.argv
# ── Helpers ────────────────────────────────────────────────────────────────────
def ssh_mysql(query: str) -> list[dict]:
"""Ejecuta una query en el MySQL de Joomla prod vía sshpass."""
cmd = [
'sshpass', '-p', JOOMLA_SSH_PASS,
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}',
f'mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} '
f'-p{repr(JOOMLA_DB_PASS)} {JOOMLA_DB_NAME} '
f'--default-character-set=utf8mb4 -B -e "{query}"'
]
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
print(f"[ERROR SSH] {result.stderr[:300]}", file=sys.stderr)
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
rows = []
for line in lines[1:]:
if line:
vals = line.split('\t')
rows.append(dict(zip(headers, vals)))
return rows
def wp_mysql(query: str) -> list[dict]:
"""Ejecuta una query en el MySQL del WP local vía Docker exec."""
cmd = [
'docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-B', '-e', query
]
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
print(f"[ERROR WP] {result.stderr[:300]}", file=sys.stderr)
return []
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
return []
headers = lines[0].split('\t')
rows = []
for line in lines[1:]:
if line:
vals = line.split('\t')
rows.append(dict(zip(headers, vals)))
return rows
def wp_execute(sql: str):
"""Ejecuta un INSERT/UPDATE en WP MySQL."""
if DRY_RUN:
print(f" [DRY] {sql[:120]}")
return
cmd = [
'docker', 'exec', WP_DOCKER,
'mysql', '-u', WP_DB_USER, f'-p{WP_DB_PASS}', WP_DB_NAME,
'--default-character-set=utf8mb4', '-e', sql
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"[ERROR INSERT] {result.stderr[:300]}", file=sys.stderr)
def esc(s: str) -> str:
"""Escapa una string para SQL."""
return s.replace('\\', '\\\\').replace("'", "\\'")
# ── Cargar datos auxiliares ────────────────────────────────────────────────────
def load_user_map() -> dict:
"""Devuelve {joomla_user_id: wp_user_id}."""
rows = wp_mysql(
"SELECT um.meta_value jid, u.ID wid FROM wp_users u "
"JOIN wp_usermeta um ON um.user_id=u.ID "
"WHERE um.meta_key='_fgj2wp_old_user_id'"
)
m = {}
for r in rows:
try:
m[int(r['jid'])] = int(r['wid'])
except ValueError:
pass
return m
def load_term_taxonomy_ids() -> dict:
"""Devuelve {term_id: term_taxonomy_id} para las categorías relevantes."""
term_ids = [CAT_FEADULTA, CAT_ARTICULOS, CAT_EVANGELIO, CAT_EUCARISTIA]
ids_str = ','.join(str(x) for x in term_ids)
rows = wp_mysql(
f"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy "
f"WHERE term_id IN ({ids_str}) AND taxonomy='category'"
)
return {int(r['term_id']): int(r['term_taxonomy_id']) for r in rows}
def load_polylang_term_ids() -> dict:
"""Devuelve {'es': tt_id, 'en': tt_id, ...} para los términos de idioma de Polylang."""
rows = wp_mysql(
"SELECT t.slug, tt.term_taxonomy_id FROM wp_terms t "
"JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id "
"WHERE tt.taxonomy='language' AND t.slug IN ('es','en','fr','it','pt')"
)
return {r['slug']: int(r['term_taxonomy_id']) for r in rows}
# ── Parsear extra_fields ───────────────────────────────────────────────────────
def parse_extra_fields(ef_json: str) -> dict:
"""Devuelve dict con claves: lang_val, has_libro, cita_biblica."""
result = {'lang_val': None, 'has_libro': False, 'cita_biblica': None}
if not ef_json or ef_json == 'NULL':
return result
try:
fields = json.loads(ef_json)
except json.JSONDecodeError:
return result
for f in fields:
fid = str(f.get('id', ''))
val = f.get('value')
if fid == '16' and val is not None:
try:
result['lang_val'] = int(val)
except (ValueError, TypeError):
pass
elif fid == '9':
result['has_libro'] = True
elif fid == '14':
if isinstance(val, list):
result['cita_biblica'] = ','.join(str(v) for v in val)
else:
result['cita_biblica'] = str(val) if val else None
return result
def determine_categories(ef: dict, title: str) -> list[int]:
"""Devuelve lista de term_ids de categoría para el post."""
import re
lang = ef.get('lang_val')
es = (lang == 1 or lang is None)
cats = [CAT_FEADULTA]
if es and ef.get('has_libro'):
cats.append(CAT_EVANGELIO)
elif es and re.search(DOMINGO_RE, title, re.IGNORECASE):
cats.append(CAT_EUCARISTIA)
else:
cats.append(CAT_ARTICULOS)
return cats
# ── Import principal ───────────────────────────────────────────────────────────
def main():
global LAST_K2_ID
# Detección dinámica del último K2 importado (evita hardcodear y re-importar deltas previos)
r = wp_mysql("SELECT MAX(CAST(meta_value AS UNSIGNED)) m FROM wp_postmeta "
"WHERE meta_key='_fgj2wp_old_k2_id'")
LAST_K2_ID = int(r[0]['m']) if r and r[0].get('m') and r[0]['m'] != 'NULL' else 17873
print(f"=== Import K2 items > {LAST_K2_ID} → WP local {'[DRY RUN]' if DRY_RUN else '[LIVE]'} ===\n")
user_map = load_user_map()
print(f"Usuarios mapeados: {len(user_map)}")
tt_ids = load_term_taxonomy_ids()
print(f"Categorías TT IDs: {tt_ids}")
pl_ids = load_polylang_term_ids()
print(f"Polylang idiomas: {pl_ids}")
# Verificar que los K2 IDs ya en WP no se reimportan
existing = wp_mysql(
f"SELECT meta_value FROM wp_postmeta WHERE meta_key='_fgj2wp_old_k2_id' "
f"AND meta_value+0 > {LAST_K2_ID}"
)
existing_ids = {int(r['meta_value']) for r in existing}
print(f"K2 IDs > {LAST_K2_ID} ya en WP: {len(existing_ids)}")
# Obtener items de Joomla vía SSH+MySQL (query por stdin para evitar escape de shell)
print("\nObteniendo K2 items de Joomla prod...")
# HEX encoding para campos de texto (evita que el HTML con saltos de línea
# rompa el parsing TSV)
query = (
f"SELECT id, HEX(title) title, HEX(alias) alias, "
f"HEX(introtext) introtext, HEX(`fulltext`) fulltext_col, "
f"created, created_by, HEX(extra_fields) extra_fields, publish_up "
f"FROM ew4r_k2_items "
f"WHERE published=1 AND id > {LAST_K2_ID} ORDER BY id;"
)
mysql_cmd = (
f"mysql --skip-ssl -h {JOOMLA_DB_HOST} -u {JOOMLA_DB_USER} "
f"-p'{JOOMLA_DB_PASS}' {JOOMLA_DB_NAME} "
f"--default-character-set=utf8mb4 -B"
)
cmd = [
'sshpass', '-p', JOOMLA_SSH_PASS,
'ssh', f'{JOOMLA_SSH_USER}@{JOOMLA_SSH_HOST}',
mysql_cmd
]
result = subprocess.run(cmd, input=query, capture_output=True, text=True, encoding='utf-8')
if result.returncode != 0:
print(f"ERROR: {result.stderr[:500]}")
sys.exit(1)
lines = result.stdout.strip().split('\n')
if len(lines) < 2:
print("No se encontraron items nuevos.")
return
headers = lines[0].split('\t')
items = []
for line in lines[1:]:
if line:
vals = line.split('\t')
items.append(dict(zip(headers, vals)))
print(f"Items a importar: {len(items)}")
stats = {'ok': 0, 'skip': 0, 'err': 0}
for item in items:
k2_id = int(item['id'])
if k2_id in existing_ids:
print(f" [SKIP] K2 id={k2_id} ya existe en WP")
stats['skip'] += 1
continue
def unhex(val: str) -> str:
if not val or val == 'NULL':
return ''
try:
return bytes.fromhex(val).decode('utf-8', errors='replace')
except Exception:
return val
title = unhex(item.get('title', ''))
alias = unhex(item.get('alias', ''))
intro = unhex(item.get('introtext', ''))
full = unhex(item.get('fulltext_col', ''))
ef_json = unhex(item.get('extra_fields', '')) or '[]'
created = item.get('created', '') or datetime.now().strftime('%Y-%m-%d %H:%M:%S')
if not created or created == 'NULL':
created = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
created_by_raw = item.get('created_by', '0')
created_by = int(created_by_raw) if created_by_raw and created_by_raw != 'NULL' else 0
# Contenido combinado
if full and full.strip():
content = intro + '\n<!--more-->\n' + full
else:
content = intro
# Autor WP
wp_author = user_map.get(created_by, 1) # fallback: admin
if created_by and created_by not in user_map:
# Autor Joomla sin usuario WP equivalente: queda atribuido a «Fe Adulta».
# NO se pierde el dato: corregir tras el delta con scripts/fix_k2_authors.php,
# que crea el usuario (nombre real de ew4r_users) y reasigna post_author (#143).
print(f" ⚠ autor K2 {created_by} sin user WP → queda en 'Fe Adulta' "
f"(corregir con fix_k2_authors.php)")
# Extra fields
ef = parse_extra_fields(ef_json)
lang_code = LANG_MAP.get(ef.get('lang_val'), 'es')
cats = determine_categories(ef, title)
print(f" [{k2_id}] {title[:50]} | lang={lang_code} | cats={cats}")
# INSERT post
post_slug = esc(alias[:200]) if alias else ''
post_title = esc(title)
post_content = esc(content)
post_date = created
post_date_gmt = created # simplificado (no ajuste TZ)
insert_post = (
f"INSERT INTO wp_posts "
f"(post_author, post_date, post_date_gmt, post_content, post_title, "
f"post_excerpt, post_status, comment_status, ping_status, post_name, "
f"post_type, post_modified, post_modified_gmt, comment_count, "
f"to_ping, pinged, post_content_filtered) VALUES ("
f"{wp_author}, '{post_date}', '{post_date_gmt}', '{post_content}', "
f"'{post_title}', '', 'publish', 'open', 'open', '{post_slug}', "
f"'post', '{post_date}', '{post_date_gmt}', 0, '', '', '')"
)
wp_execute(insert_post)
if DRY_RUN:
stats['ok'] += 1
continue
# Obtener el ID del post recién insertado. NO usar LAST_INSERT_ID(): cada
# docker exec abre una conexión nueva y devolvería 0. MAX(ID) es fiable
# en uso secuencial (sin inserciones concurrentes).
new_id_rows = wp_mysql("SELECT MAX(ID) as new_id FROM wp_posts")
if not new_id_rows:
print(f" [ERROR] No se pudo obtener el ID del post para k2_id={k2_id}")
stats['err'] += 1
continue
new_wp_id = int(new_id_rows[0]['new_id'])
print(f" → WP post ID={new_wp_id}")
# INSERT metas
metas = [
('_fgj2wp_old_k2_id', str(k2_id)),
('Idioma', str(ef.get('lang_val') or 1)),
]
for meta_key, meta_val in metas:
wp_execute(
f"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) "
f"VALUES ({new_wp_id}, '{esc(meta_key)}', '{esc(meta_val)}')"
)
# Categorías
for term_id in cats:
tt_id = tt_ids.get(term_id)
if tt_id:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {tt_id})"
)
# Polylang language
pl_tt = pl_ids.get(lang_code)
if pl_tt:
wp_execute(
f"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) "
f"VALUES ({new_wp_id}, {pl_tt})"
)
stats['ok'] += 1
# Actualizar counts de categorías
if not DRY_RUN and stats['ok'] > 0:
print("\nActualizando counts de categorías...")
tt_ids_list = ','.join(str(v) for v in tt_ids.values())
wp_execute(
f"UPDATE wp_term_taxonomy tt SET count = ("
f"SELECT COUNT(*) FROM wp_term_relationships tr WHERE tr.term_taxonomy_id=tt.term_taxonomy_id"
f") WHERE tt.term_taxonomy_id IN ({tt_ids_list})"
)
print(f"\n=== Resultado: {stats['ok']} ok, {stats['skip']} skip, {stats['err']} err ===")
if __name__ == '__main__':
main()
+517
View File
@@ -0,0 +1,517 @@
#!/usr/bin/env python3
"""
Importa a WordPress local el delta visible en Joomla produccion usando HTML publico.
Ruta de contingencia para cuando no hay SSH/DB a produccion. Conserva los IDs
Joomla en `_fgj2wp_old_k2_id` y `_fgj2wp_old_content_id` extraidos de las URLs.
Por defecto es dry-run. Usar `--apply` para escribir en la BD local.
"""
import argparse
import html
import re
import subprocess
import sys
import unicodedata
from dataclasses import dataclass, field
from typing import Optional
from urllib.parse import urljoin
import pymysql
ORIGIN_IP = "134.0.10.170"
HOST = "www.feadulta.com"
BASE = f"https://{HOST}"
WP_DB_USER = "wordpress_user"
WP_DB_PASS = "wordpress_pass"
WP_DB_NAME = "wordpress_db"
TERM_FEADULTA = 71
TERM_CARTA_SEMANA = 6
TERM_CARTAS_OTRAS = 21
TERM_CARTA_PASADA = 22
TERM_INDICE_MULTIMEDIA = 26
TERM_VIDEOS = 58
TERM_LECTURA = 1645
TERM_COMENTARIO_EDITORIAL = 1646
TERM_COMENTARIO = 1647
TERM_EUCARISTIA = 1648
TERM_MULTIMEDIA = 1649
TERM_ARTICULOS = 1650
SECTION_TO_TERM = {
"lectura": TERM_LECTURA,
"comentario_editorial": TERM_COMENTARIO_EDITORIAL,
"comentario": TERM_COMENTARIO,
"articulo": TERM_ARTICULOS,
"eucaristia": TERM_EUCARISTIA,
"multimedia": TERM_MULTIMEDIA,
}
CARTAS = [
{
"content_id": 9136,
"url": "/es/ayuda/otras-semanas/9136-uno-y-trino.html",
"date": "2026-05-28 00:00:00",
"cats": [TERM_CARTAS_OTRAS, TERM_FEADULTA],
},
{
"content_id": 9143,
"url": "/es/ayuda/semana-pasada/9143-20-anos-de-fe-adulta.html",
"date": "2026-06-06 00:00:00",
"cats": [TERM_CARTAS_OTRAS, TERM_CARTA_PASADA, TERM_FEADULTA],
},
{
"content_id": 9150,
"url": "/es/ayuda/esta-semana/9150-la-puerta-pequena.html",
"date": "2026-06-13 00:00:00",
"cats": [TERM_CARTA_SEMANA, TERM_CARTAS_OTRAS, TERM_FEADULTA],
},
]
@dataclass
class Item:
kind: str
source_id: int
url: str
title: str = ""
content: str = ""
slug: str = ""
date: str = "2026-06-13 00:00:00"
author_name: Optional[str] = None
term_ids: set[int] = field(default_factory=set)
carta_source_id: Optional[int] = None
def wp_ip() -> str:
result = subprocess.run(
[
"docker",
"inspect",
"wordpress-mysql",
"--format",
"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
],
capture_output=True,
text=True,
check=True,
)
return result.stdout.strip()
def conn():
return pymysql.connect(
host=wp_ip(),
user=WP_DB_USER,
password=WP_DB_PASS,
database=WP_DB_NAME,
charset="utf8mb4",
autocommit=False,
cursorclass=pymysql.cursors.DictCursor,
)
def normalize(text: str) -> str:
text = unicodedata.normalize("NFKD", text)
text = "".join(c for c in text if not unicodedata.combining(c))
return re.sub(r"\s+", " ", text).strip().lower()
def slug_from_url(path: str) -> str:
name = path.rstrip("/").rsplit("/", 1)[-1]
name = name.split("?", 1)[0]
name = re.sub(r"^\d+-", "", name)
return re.sub(r"\.html$", "", name)
def id_from_url(path: str) -> Optional[int]:
m = re.search(r"/(\d+)-[^/?#]+(?:\.html)?", path)
return int(m.group(1)) if m else None
def fetch(path: str) -> str:
url = urljoin(BASE, path)
print(f"FETCH {path}", file=sys.stderr, flush=True)
result = subprocess.run(
[
"curl",
"--resolve",
f"{HOST}:443:{ORIGIN_IP}",
"-k",
"-L",
"--max-time",
"12",
"-A",
"Mozilla/5.0 Codex Feadulta delta importer",
"-sS",
url,
],
capture_output=True,
text=True,
check=True,
)
return result.stdout
def clean_fragment(fragment: str) -> str:
fragment = re.sub(r"<script\b.*?</script>", "", fragment, flags=re.I | re.S)
fragment = re.sub(r"<form\b.*?</form>", "", fragment, flags=re.I | re.S)
fragment = re.sub(r"\s+href=\"([^\"]*)\?tmpl=component[^\"]*\"", r' href="\1"', fragment)
fragment = fragment.replace("\r\n", "\n")
# Rutas de imagen Joomla -> uploads WP cuando el fichero existe localmente.
def repl(m):
attr, path = m.group(1), m.group(2)
local = f"/home/rafa/joomla-migration/wordpress/wp-content/uploads/{path}"
try:
exists = subprocess.run(["test", "-f", local]).returncode == 0
except Exception:
exists = False
if exists:
return f'{attr}="/fea/wp-content/uploads/{path}"'
return f'{attr}="/images/{path}"'
fragment = re.sub(r'(src|href)="/images/([^"]+)"', repl, fragment)
return fragment.strip()
def extract_title_and_content(doc: str) -> tuple[str, str]:
title = ""
m = re.search(r'<h2 class="fa-postheader">\s*(.*?)\s*</h2>', doc, re.I | re.S)
if not m:
m = re.search(r'<h2 class="itemTitle">\s*(.*?)\s*</h2>', doc, re.I | re.S)
if not m:
m = re.search(r'<meta property="og:title" content="([^"]+)"', doc, re.I | re.S)
if not m:
m = re.search(r"<title>\s*(.*?)\s*</title>", doc, re.I | re.S)
if m:
title = html.unescape(re.sub(r"<.*?>", "", m.group(1))).strip()
m = re.search(r'<div class="fa-article">\s*(.*?)\s*</div>\s*</div>\s*<div class="cleared"', doc, re.I | re.S)
if not m:
m = re.search(r'<div class="itemFullText">\s*(.*?)\s*</div>', doc, re.I | re.S)
if not m:
m = re.search(r'<div class="fa-article">\s*(.*?)\s*</div>', doc, re.I | re.S)
content = clean_fragment(m.group(1)) if m else ""
return title, content
def extract_author(doc: str) -> Optional[str]:
m = re.search(r'<meta name="author" content="([^"]+)"', doc, re.I)
if m:
return html.unescape(m.group(1)).strip()
m = re.search(r'<a rel="author"[^>]*>\s*(.*?)\s*</a>', doc, re.I | re.S)
if m:
return html.unescape(re.sub(r"<.*?>", "", m.group(1))).strip()
return None
def iter_paragraphs(content: str):
for m in re.finditer(r"<p\b[^>]*>(.*?)</p>", content, flags=re.I | re.S):
yield m.group(1)
def links_by_section(carta: Item) -> list[tuple[str, str, str, Optional[str]]]:
section = None
evangelio_pos = 0
out = []
for p in iter_paragraphs(carta.content):
plain = normalize(re.sub(r"<.*?>", " ", html.unescape(p)))
if "evangelio y comentarios al evangelio" in plain:
section = "evangelio"
evangelio_pos = 0
continue
if "articulos seleccionados para la semana" in plain:
section = "articulo"
continue
if "eucaristias mas participativas" in plain:
section = "eucaristia"
continue
if "material multimedia" in plain:
section = "multimedia"
continue
if not section:
continue
for href, text in re.findall(r'<a\b[^>]*href="([^"]+)"[^>]*>(.*?)</a>', p, flags=re.I | re.S):
href = html.unescape(href)
text_plain = html.unescape(re.sub(r"<.*?>", " ", text))
text_plain = re.sub(r"\s+", " ", text_plain).strip()
if section == "evangelio":
if evangelio_pos == 0:
cat = "lectura"
elif evangelio_pos == 1:
cat = "comentario_editorial"
else:
cat = "comentario"
evangelio_pos += 1
else:
cat = section
author = text_plain.split(":", 1)[0].strip() if ":" in text_plain else None
out.append((href, cat, text_plain, author))
return out
def load_existing(c, meta_key: str) -> set[int]:
with c.cursor() as cur:
cur.execute(
"SELECT CAST(meta_value AS UNSIGNED) id FROM wp_postmeta WHERE meta_key=%s",
(meta_key,),
)
return {int(r["id"]) for r in cur.fetchall() if r["id"] is not None}
def max_existing(ids: set[int]) -> int:
return max(ids) if ids else 0
def load_terms(c) -> dict[int, int]:
term_ids = [
TERM_FEADULTA,
TERM_CARTA_SEMANA,
TERM_CARTAS_OTRAS,
TERM_CARTA_PASADA,
TERM_INDICE_MULTIMEDIA,
TERM_VIDEOS,
TERM_LECTURA,
TERM_COMENTARIO_EDITORIAL,
TERM_COMENTARIO,
TERM_EUCARISTIA,
TERM_MULTIMEDIA,
TERM_ARTICULOS,
]
with c.cursor() as cur:
cur.execute(
"SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy "
"WHERE taxonomy='category' AND term_id IN (%s)" % ",".join(["%s"] * len(term_ids)),
term_ids,
)
return {int(r["term_id"]): int(r["term_taxonomy_id"]) for r in cur.fetchall()}
def load_lang_es(c) -> Optional[int]:
with c.cursor() as cur:
cur.execute(
"SELECT tt.term_taxonomy_id FROM wp_terms t "
"JOIN wp_term_taxonomy tt ON tt.term_id=t.term_id "
"WHERE tt.taxonomy='language' AND t.slug='es' LIMIT 1"
)
row = cur.fetchone()
return int(row["term_taxonomy_id"]) if row else None
def load_authors(c) -> dict[str, int]:
with c.cursor() as cur:
cur.execute("SELECT ID, display_name, user_login FROM wp_users")
rows = cur.fetchall()
authors = {}
for r in rows:
authors[normalize(r["display_name"])] = int(r["ID"])
authors[normalize(r["user_login"])] = int(r["ID"])
return authors
def resolve_author(author_map: dict[str, int], name: Optional[str]) -> int:
if not name:
return 1
n = normalize(name)
if n in author_map:
return author_map[n]
for key, uid in author_map.items():
if n == key or n in key or key in n:
return uid
return 1
def build_items(c) -> list[Item]:
existing_k2 = load_existing(c, "_fgj2wp_old_k2_id")
existing_content = load_existing(c, "_fgj2wp_old_content_id")
max_k2 = max_existing(existing_k2)
max_content = max_existing(existing_content)
print(
f"WP existentes: K2={len(existing_k2)} max={max_k2} "
f"content={len(existing_content)} max={max_content}"
)
items: dict[tuple[str, int], Item] = {}
for carta_def in CARTAS:
doc = fetch(carta_def["url"])
title, content = extract_title_and_content(doc)
carta = Item(
kind="content",
source_id=carta_def["content_id"],
url=carta_def["url"],
title=title,
content=content,
slug=slug_from_url(carta_def["url"]),
date=carta_def["date"],
term_ids=set(carta_def["cats"]),
)
if carta.source_id > max_content and carta.source_id not in existing_content:
items[(carta.kind, carta.source_id)] = carta
for href, cat_name, _text, author in links_by_section(carta):
if "/buscadoravanzado/item/" in href:
sid = id_from_url(href)
if not sid or sid <= max_k2 or sid in existing_k2:
continue
key = ("k2", sid)
item = items.get(key)
if not item:
item = Item(
kind="k2",
source_id=sid,
url=href,
slug=slug_from_url(href),
date=carta.date,
author_name=author,
term_ids={TERM_FEADULTA},
carta_source_id=carta.source_id,
)
items[key] = item
item.term_ids.add(SECTION_TO_TERM[cat_name])
elif "/indice-multimedia/" in href or "/videos/" in href:
sid = id_from_url(href)
if not sid or sid <= max_content or sid in existing_content:
continue
is_video = "/videos/" in href
key = ("content", sid)
item = items.get(key)
if not item:
item = Item(
kind="content",
source_id=sid,
url=href,
slug=slug_from_url(href),
date=carta.date,
term_ids={TERM_MULTIMEDIA, TERM_VIDEOS if is_video else TERM_INDICE_MULTIMEDIA},
)
items[key] = item
# Fetch item pages after discovery.
for item in items.values():
if item.title and item.content:
continue
doc = fetch(item.url)
title, content = extract_title_and_content(doc)
author = extract_author(doc)
item.title = title or item.slug.replace("-", " ").title()
item.content = content
if author and not item.author_name:
item.author_name = author
return sorted(items.values(), key=lambda x: (x.date, x.kind, x.source_id))
def insert_item(c, item: Item, term_to_tt: dict[int, int], lang_es_tt: Optional[int], author_map: dict[str, int], dry_run: bool) -> Optional[int]:
author_id = resolve_author(author_map, item.author_name)
if dry_run:
print(
f"[DRY] {item.kind:7s} {item.source_id:5d} "
f"terms={sorted(item.term_ids)} author={author_id} {item.title[:70]}"
)
return None
with c.cursor() as cur:
cur.execute(
"""
INSERT INTO wp_posts
(post_author, post_date, post_date_gmt, post_content, post_title,
post_excerpt, post_status, comment_status, ping_status, post_name,
post_type, post_modified, post_modified_gmt, comment_count,
to_ping, pinged, post_content_filtered)
VALUES
(%s,%s,%s,%s,%s,'','publish','open','open',%s,
'post',%s,%s,0,'','','')
""",
(author_id, item.date, item.date, item.content, item.title, item.slug, item.date, item.date),
)
post_id = cur.lastrowid
meta_key = "_fgj2wp_old_k2_id" if item.kind == "k2" else "_fgj2wp_old_content_id"
cur.execute(
"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) VALUES (%s,%s,%s)",
(post_id, meta_key, str(item.source_id)),
)
cur.execute(
"INSERT INTO wp_postmeta (post_id, meta_key, meta_value) VALUES (%s,'Idioma','1')",
(post_id,),
)
for term_id in sorted(item.term_ids):
tt = term_to_tt.get(term_id)
if tt:
cur.execute(
"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) VALUES (%s,%s)",
(post_id, tt),
)
if lang_es_tt:
cur.execute(
"INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) VALUES (%s,%s)",
(post_id, lang_es_tt),
)
return post_id
def refresh_counts(c, term_to_tt: dict[int, int], lang_es_tt: Optional[int]):
ttids = list(term_to_tt.values())
if lang_es_tt:
ttids.append(lang_es_tt)
with c.cursor() as cur:
cur.execute(
"UPDATE wp_term_taxonomy tt SET count = ("
"SELECT COUNT(*) FROM wp_term_relationships tr "
"WHERE tr.term_taxonomy_id=tt.term_taxonomy_id"
") WHERE tt.term_taxonomy_id IN (%s)" % ",".join(["%s"] * len(ttids)),
ttids,
)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--apply", action="store_true", help="escribe en WordPress local")
args = ap.parse_args()
dry_run = not args.apply
c = conn()
try:
term_to_tt = load_terms(c)
lang_es_tt = load_lang_es(c)
author_map = load_authors(c)
items = build_items(c)
print(f"Items nuevos detectados: {len(items)}")
print(
" K2:",
len([i for i in items if i.kind == "k2"]),
"content:",
len([i for i in items if i.kind == "content"]),
)
source_to_wp = {}
for item in items:
wp_id = insert_item(c, item, term_to_tt, lang_es_tt, author_map, dry_run)
if wp_id:
source_to_wp[(item.kind, item.source_id)] = wp_id
if not dry_run:
with c.cursor() as cur:
for item in items:
if item.kind != "k2" or not item.carta_source_id:
continue
wp_id = source_to_wp.get(("k2", item.source_id))
carta_wp_id = source_to_wp.get(("content", item.carta_source_id))
if wp_id and carta_wp_id:
cur.execute(
"INSERT IGNORE INTO wp_postmeta (post_id, meta_key, meta_value) VALUES (%s,'_carta_id',%s)",
(wp_id, str(carta_wp_id)),
)
refresh_counts(c, term_to_tt, lang_es_tt)
c.commit()
print("Import commit OK.")
else:
c.rollback()
print("Dry-run: sin cambios.")
except Exception:
c.rollback()
raise
finally:
c.close()
if __name__ == "__main__":
main()
+84
View File
@@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""
lecturas_apply.py — Casa las lecturas ES sin traducir contra el índice del leccionario
(build_lectionary_index.py) POR REFERENCIA bíblica y vuelca las traducciones a crear.
Entrada: /tmp/lectionary_index.json , /tmp/lecturas_todo.json
Salida: /tmp/lecturas_creadas.json (para que un wp eval cree+asocie+publique)
/tmp/lecturas_skip.json
Uso: python3 lecturas_apply.py [--limit N]
"""
import sys, re, json, unicodedata
from collections import Counter
# Alias de nombre de libro: feadulta -> token usado por evangelizo (último token full_title ES)
ALIAS = {
"HECHOS": "APOSTOLES", "HCH": "APOSTOLES",
"CANTAR": "CANTARES",
"APOC": "APOCALIPSIS", "AP": "APOCALIPSIS",
"QOHELET": "ECLESIASTES",
# abreviaturas litúrgicas
"MT": "MATEO", "MC": "MARCOS", "LC": "LUCAS", "JN": "JUAN",
"RM": "ROMANOS", "GA": "GALATAS", "EF": "EFESIOS", "FLP": "FILIPENSES",
"COL": "COLOSENSES", "HB": "HEBREOS", "ST": "SANTIAGO",
"IS": "ISAIAS", "JR": "JEREMIAS", "EZ": "EZEQUIEL", "GN": "GENESIS",
"EX": "EXODO", "DT": "DEUTERONOMIO", "SAL": "SALMOS", "PR": "PROVERBIOS",
}
def norm(s):
s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode().upper()
return re.sub(r"[^A-Z]", "", s) # solo letras → descarta el número del libro
def title_keys(title):
keys = []
for part in re.split(r"\s*/\s*", title):
m = re.search(r"([0-9]?\s*[A-Za-zÀ-ÿ][A-Za-zÀ-ÿ.\s]+?)\s+(\d{1,3})\s*,\s*(\d{1,3})", part)
if not m:
return None # parte no parseable → no casar el post entero
book = norm(m.group(1))
book = ALIAS.get(book, book)
keys.append(f"{book}|{int(m.group(2))}|{int(m.group(3))}")
return keys or None
def main():
limit = 0
if "--limit" in sys.argv:
limit = int(sys.argv[sys.argv.index("--limit") + 1])
idx = json.load(open("/tmp/lectionary_index.json"))
todo = json.load(open("/tmp/lecturas_todo.json"))
if limit:
todo = todo[:limit]
creadas, skip = [], []
for t in todo:
keys = title_keys(t["title"])
if not keys:
skip.append({**t, "why": "título no parseable"})
continue
if not all(k in idx for k in keys):
missing = [k for k in keys if k not in idx]
skip.append({**t, "why": "ref no en índice", "missing": missing})
continue
langs = {}
for wl in ("en", "fr", "it", "pt"):
langs[wl] = "".join(idx[k][wl] for k in keys)
creadas.append({"es_id": t["id"], "title": t["title"], "langs": langs})
json.dump(creadas, open("/tmp/lecturas_creadas.json", "w"), ensure_ascii=False)
json.dump(skip, open("/tmp/lecturas_skip.json", "w"), ensure_ascii=False)
print(f"CASADAS: {len(creadas)} / {len(todo)} SKIP: {len(skip)}")
print("motivos skip:", dict(Counter(s["why"] for s in skip)))
# muestra de refs que faltan (para ampliar alias/rango)
missing = Counter()
for s in skip:
for k in s.get("missing", []):
missing[k.split("|")[0]] += 1
print("libros con más misses:", dict(missing.most_common(12)))
if __name__ == "__main__":
main()
+430
View File
@@ -0,0 +1,430 @@
#!/usr/bin/env python3
"""TTS con MiniMax (clonación de voz + síntesis de calidad). Issue #76.
Credenciales en /home/rafa/Feadulta/minimax.txt:
- la API key (línea que empieza por 'sk-api-')
- el GroupId (línea 'GroupId=...' o 'group_id ...' o un número suelto)
Subcomandos:
clone <audio.wav> <voice_id> sube y clona (voice_id: >=8 chars, letras+números)
carta <post_id> <voice_id> [model] [nombre] locuta una carta entera
text "<texto>" <voice_id> [model] [nombre] locuta texto suelto
models: speech-2.8-turbo (barato) | speech-2.8-hd (calidad)
"""
import html
import json
import os
import re
import subprocess
import sys
from pathlib import Path
import requests
CRED = "/home/rafa/Feadulta/minimax.txt"
BASE = "https://api.minimax.io/v1"
OUT = Path(__file__).resolve().parent.parent / "wordpress/wp-content/uploads/tts-samples"
CONTAINER = "wordpress-web"
def creds():
key = gid = None
for ln in open(CRED):
ln = ln.strip()
if not ln:
continue
if ln.startswith("sk-"):
key = ln # coge la última key del fichero (la más reciente)
elif "groupid" in ln.lower() or "group_id" in ln.lower():
gid = re.split(r"[=:\s]+", ln, 1)[1].strip()
elif ln.isdigit():
gid = ln
return key, gid
KEY, GID = creds()
H_JSON = {"Authorization": f"Bearer {KEY}", "Content-Type": "application/json"}
def _q(url):
return f"{url}?GroupId={GID}" if GID else url
def upload(path, purpose="voice_clone"):
r = requests.post(_q(f"{BASE}/files/upload"),
headers={"Authorization": f"Bearer {KEY}"},
data={"purpose": purpose},
files={"file": open(path, "rb")})
j = r.json()
fid = (j.get("file") or {}).get("file_id")
if not fid:
sys.exit(f"upload falló: {json.dumps(j)[:400]}")
print(f" file_id={fid}")
return fid
def clone(audio, voice_id):
print(f"Subiendo {audio}", flush=True)
fid = upload(audio, "voice_clone")
print(f"Clonando como voice_id={voice_id}", flush=True)
r = requests.post(_q(f"{BASE}/voice_clone"), headers=H_JSON,
json={"file_id": fid, "voice_id": voice_id, "model": "speech-2.8-hd"})
print(json.dumps(r.json(), ensure_ascii=False)[:500])
def get_post_text(pid):
subprocess.run(["docker", "exec", CONTAINER, "php", "/tmp/fea_post_io.php", "get", str(pid)],
check=True, capture_output=True)
subprocess.run(["docker", "cp", f"{CONTAINER}:/tmp/fea_es.json", "/tmp/fea_es.json"], check=True)
d = json.load(open("/tmp/fea_es.json"))
raw = re.sub(r"(?i)</p>|<br\s*/?>|</h[1-6]>", "\n", d["content"])
raw = re.sub(r"<[^>]+>", "", raw)
raw = re.sub(r"\[[^\]]+\]", "", raw)
raw = html.unescape(raw)
paras = [re.sub(r"\s+", " ", p).strip() for p in raw.split("\n") if len(p.strip()) > 1]
paras = trim_after_author_signature(paras)
return d["title"], "\n\n".join(paras)
def is_author_signature(text):
"""Heurística simple para detectar la firma final del autor.
Queremos conservar la línea del nombre y cortar todo lo que venga detrás
(URLs, notas, anexos o bloques extra), pero sin confundirla con títulos
internos del artículo.
"""
text = text.strip()
if not text or len(text) > 80 or any(ch.isdigit() for ch in text):
return False
if any(mark in text for mark in [":", ";", "http", "www.", "@"]):
return False
words = text.split()
if len(words) < 2 or len(words) > 6:
return False
allowed_lower = {"de", "del", "la", "las", "los", "y", "e"}
for word in words:
clean = re.sub(r"[^\wÁÉÍÓÚÜÑáéíóúüñ-]", "", word)
if not clean:
return False
if clean.lower() in allowed_lower:
continue
if not clean[0].isupper():
return False
return True
def trim_after_author_signature(paras):
out = []
for p in paras:
out.append(p)
if is_author_signature(p):
break
return out
def _sent_pause(n_words, short, long_):
"""Pausa (s) tras un punto, proporcional a la longitud de la frase que cierra:
frase corta → pausa corta; frase larga → el narrador 'respira' más."""
if n_words < short:
return os.environ.get("FEA_PAUSE_SHORT", "0.1")
if n_words <= long_:
return os.environ.get("FEA_PAUSE_MID", "0.2")
return os.environ.get("FEA_PAUSE_LONG", "0.3")
def ensure_terminal_punctuation(block):
"""Cierra con punto los bloques sin puntuación final.
MiniMax deja la entonación abierta cuando un título/párrafo termina "en seco".
Si el bloque ya acaba en . ! ? … : ;, se respeta.
"""
block = block.strip()
if not block:
return ""
if block[-1] not in ".!?…:;":
return block + "."
return block
def expand_bible_abbreviations(text):
"""Expande abreviaturas bíblicas cuando aparecen con forma de cita.
Ejemplos:
- Mt 5, 1-12 -> Mateo 5, 1-12
- Lc 2, 10 -> Lucas 2, 10
- Jn 3, 16 -> Juan 3, 16
- Mc 1, 14 -> Marcos 1, 14
Se limita a abreviaturas seguidas de capítulo/versículo para no tocar usos
no bíblicos de esas siglas dentro del texto.
"""
books = [
("1Cor", "Primera carta a los Corintios"),
("2Cor", "Segunda carta a los Corintios"),
("1Tes", "Primera carta a los Tesalonicenses"),
("2Tes", "Segunda carta a los Tesalonicenses"),
("1Tim", "Primera carta a Timoteo"),
("2Tim", "Segunda carta a Timoteo"),
("1Pe", "Primera carta de Pedro"),
("2Pe", "Segunda carta de Pedro"),
("1Jn", "Primera carta de Juan"),
("2Jn", "Segunda carta de Juan"),
("3Jn", "Tercera carta de Juan"),
("1Mac", "Primer libro de los Macabeos"),
("2Mac", "Segundo libro de los Macabeos"),
("1Sam", "Primer libro de Samuel"),
("2Sam", "Segundo libro de Samuel"),
("1Sm", "Primer libro de Samuel"),
("2Sm", "Segundo libro de Samuel"),
("1Re", "Primer libro de los Reyes"),
("2Re", "Segundo libro de los Reyes"),
("1Cr", "Primer libro de las Crónicas"),
("2Cr", "Segundo libro de las Crónicas"),
("Hch", "Hechos de los Apóstoles"),
("Rom", "Romanos"),
("Rm", "Romanos"),
("Gal", "Gálatas"),
("Gál", "Gálatas"),
("Ef", "Efesios"),
("Flp", "Filipenses"),
("Fil", "Filipenses"),
("Col", "Colosenses"),
("Tit", "Tito"),
("Flm", "Filemón"),
("Heb", "Hebreos"),
("Sant", "Santiago"),
("St", "Santiago"),
("Sto", "Santiago"),
("Jud", "Judas"),
("Ap", "Apocalipsis"),
("Mt", "Mateo"),
("Mc", "Marcos"),
("Lc", "Lucas"),
("Jn", "Juan"),
("Gn", "Génesis"),
("Gen", "Génesis"),
("Ex", "Éxodo"),
("Lv", "Levítico"),
("Lev", "Levítico"),
("Nm", "Números"),
("Num", "Números"),
("Dt", "Deuteronomio"),
("Jos", "Josué"),
("Jue", "Jueces"),
("Rut", "Rut"),
("Esd", "Esdras"),
("Neh", "Nehemías"),
("Tob", "Tobías"),
("Jdt", "Judit"),
("Est", "Ester"),
("Job", "Job"),
("Sal", "Salmos"),
("Prov", "Proverbios"),
("Cant", "Cantar de los Cantares"),
("Sab", "Sabiduría"),
("Eclo", "Eclesiástico"),
("Sir", "Eclesiástico"),
("Ecl", "Eclesiástico"),
("Isa", "Isaías"),
("Is", "Isaías"),
("Jer", "Jeremías"),
("Jr", "Jeremías"),
("Lam", "Lamentaciones"),
("Bar", "Baruc"),
("Eze", "Ezequiel"),
("Ez", "Ezequiel"),
("Dan", "Daniel"),
("Dn", "Daniel"),
("Os", "Oseas"),
("Joel", "Joel"),
("Am", "Amós"),
("Abd", "Abdías"),
("Jon", "Jonás"),
("Miq", "Miqueas"),
("Nah", "Nahúm"),
("Hab", "Habacuc"),
("Sof", "Sofonías"),
("Ag", "Ageo"),
("Zac", "Zacarías"),
("Mal", "Malaquías"),
]
for short, full in books:
text = re.sub(
rf"\b{short}\.?(?=\s+\d)",
full,
text,
)
text = re.sub(r"\b1\s+Co\.?(?=\s+\d)", "Primera carta a los Corintios", text)
text = re.sub(r"\b2\s+Co\.?(?=\s+\d)", "Segunda carta a los Corintios", text)
text = re.sub(r"\b1\s+Ts\.?(?=\s+\d)", "Primera carta a los Tesalonicenses", text)
text = re.sub(r"\b2\s+Ts\.?(?=\s+\d)", "Segunda carta a los Tesalonicenses", text)
text = re.sub(r"\b1\s+P\.?(?=\s+\d)", "Primera carta de Pedro", text)
text = re.sub(r"\b2\s+P\.?(?=\s+\d)", "Segunda carta de Pedro", text)
return text
def add_pauses(text, para=None):
"""Pausas MiniMax <#seg#> DINÁMICAS por longitud de frase + cierre de párrafos.
- Tras cada fin de frase (.!?…): pausa según nº de palabras de esa frase
(<short=0.1s, <=long=0.2s, >long=0.3s; umbrales por palabras).
- A los párrafos/títulos sin puntuación final se les añade un punto, para que
MiniMax cierre bien la entonación (si no, deja el tono abierto)."""
para = para if para is not None else os.environ.get("FEA_PARA_PAUSE", "0.7")
short = int(os.environ.get("FEA_SHORT_WORDS", "6"))
long_ = int(os.environ.get("FEA_LONG_WORDS", "12"))
text = expand_bible_abbreviations(text)
out = []
for p in text.split("\n\n"):
p = ensure_terminal_punctuation(p)
if not p:
continue
# Reconstruir insertando pausa proporcional tras cada signo de fin de frase.
parts = re.split(r"([.!?…]+)", p)
rebuilt = ""
for i in range(0, len(parts), 2):
frase = parts[i]
sign = parts[i + 1] if i + 1 < len(parts) else ""
rebuilt += frase + sign
if sign and frase.strip():
rebuilt += f" <#{_sent_pause(len(frase.split()), short, long_)}#> "
# Quitar la pausa de frase final: el separador de párrafo ya aporta la suya.
rebuilt = re.sub(r"\s*<#[\d.]+#>\s*$", "", rebuilt)
out.append(rebuilt.strip())
return f" <#{para}#> ".join(out)
# MiniMax limita a 10.000 car por petición; dejamos margen porque las pausas
# <#seg#> y el language_boost también cuentan.
CHAR_LIMIT = 8000
def _split_for_tts(text, limit=CHAR_LIMIT):
"""Trocea respetando las pausas <#..#> (frase/párrafo). Fallback por palabras
si una frase suelta supera el límite."""
if len(text) <= limit:
return [text]
parts = re.split(r"(\s*<#[\d.]+#>\s*)", text)
chunks, cur = [], ""
for seg in parts:
if not seg:
continue
if len(cur) + len(seg) <= limit:
cur += seg
continue
if cur.strip():
chunks.append(cur.strip())
if len(seg) > limit: # frase gigantesca: parte por palabras
cur = ""
for w in seg.split(" "):
if len(cur) + len(w) + 1 <= limit:
cur += (" " if cur else "") + w
else:
if cur:
chunks.append(cur)
cur = w
else:
cur = seg
if cur.strip():
chunks.append(cur.strip())
return chunks
def _synth_chunk(text, voice_id, model):
"""Una petición t2a. Devuelve (audio_bytes|None, rc, usage_chars)."""
body = {
"model": model,
"text": text,
"voice_setting": {"voice_id": voice_id, "speed": 1.0, "vol": 1.0, "pitch": 0},
"audio_setting": {"sample_rate": 32000, "bitrate": 128000, "format": "mp3", "channel": 1},
"language_boost": "Spanish",
}
r = requests.post(f"{BASE}/t2a_v2", headers=H_JSON, json=body)
j = r.json()
audio_hex = (j.get("data") or {}).get("audio")
if not audio_hex:
rc = (j.get("base_resp") or {}).get("status_code")
print(f"t2a falló: {json.dumps(j, ensure_ascii=False)[:300]}")
return None, rc, 0
usage = (j.get("extra_info") or {}).get("usage_characters", 0)
return bytes.fromhex(audio_hex), 0, usage
def t2a(text, voice_id, model, name):
chunks = _split_for_tts(text)
print(f"Sintetizando {len(text)} car con {model} / {voice_id} "
f"({len(chunks)} petición/es)…", flush=True)
raw = OUT / f"{name}.raw.mp3"
if len(chunks) == 1:
audio, rc, _ = _synth_chunk(chunks[0], voice_id, model)
if audio is None:
return rc
raw.write_bytes(audio)
else:
parts = []
for k, ch in enumerate(chunks):
if k > 0:
import os as _os, time as _t
_t.sleep(int(_os.environ.get("FEA_CHUNK_PAUSE", "35"))) # respetar TPM de MiniMax
print(f" trozo {k + 1}/{len(chunks)} ({len(ch)} car)…", flush=True)
audio, rc, _ = _synth_chunk(ch, voice_id, model)
if audio is None:
for p in parts:
p.unlink(missing_ok=True)
return rc
p = OUT / f"{name}.part{k}.mp3"
p.write_bytes(audio)
parts.append(p)
import subprocess as sp0
args = ["ffmpeg", "-y"]
for p in parts:
args += ["-i", str(p)]
n = len(parts)
filt = "".join(f"[{k}:a]" for k in range(n)) + f"concat=n={n}:v=0:a=1[a]"
args += ["-filter_complex", filt, "-map", "[a]", "-b:a", "128k", str(raw)]
sp0.run(args, capture_output=True)
for p in parts:
p.unlink(missing_ok=True)
# Acabado: comfort noise marrón + fade in/out (quita el "bump" final).
import subprocess as sp
dur = float(sp.run(["ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", str(raw)],
capture_output=True, text=True).stdout.strip() or "0")
st = max(0.0, dur - 0.5)
mp3 = OUT / f"{name}.mp3"
sp.run(["ffmpeg", "-y", "-i", str(raw), "-filter_complex",
"anoisesrc=color=brown:amplitude=0.004:sample_rate=32000[n];"
"[n]highpass=f=120,lowpass=f=3800[nf];"
"[0:a][nf]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[m];"
f"[m]afade=t=in:st=0:d=0.08,afade=t=out:st={st:.2f}:d=0.5[a]",
"-map", "[a]", "-b:a", "128k", str(mp3)], capture_output=True)
raw.unlink(missing_ok=True)
print(f"OK -> {mp3} ({dur:.0f}s)")
return 0
def main():
if len(sys.argv) < 2:
sys.exit(__doc__)
cmd = sys.argv[1]
if not KEY:
sys.exit("No encuentro la API key en " + CRED)
if cmd == "clone":
clone(sys.argv[2], sys.argv[3])
elif cmd == "carta":
pid, voice_id = sys.argv[2], sys.argv[3]
model = sys.argv[4] if len(sys.argv) > 4 else "speech-2.8-turbo"
title, text = get_post_text(int(pid))
name = sys.argv[5] if len(sys.argv) > 5 else f"carta-minimax-{pid}-{model.split('-')[-1]}"
text = add_pauses(text)
print(f"Post #{pid}: «{title}» ({len(text)} car con pausas)")
t2a(text, voice_id, model, name)
elif cmd == "text":
model = sys.argv[4] if len(sys.argv) > 4 else "speech-2.8-turbo"
name = sys.argv[5] if len(sys.argv) > 5 else "minimax-text"
t2a(sys.argv[2], sys.argv[3], model, name)
else:
sys.exit(__doc__)
if __name__ == "__main__":
main()
+125
View File
@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""Pre-traduce a EN con Haiku los posts del gap que Gemma AÚN no ha alcanzado.
Crea el post EN + enlace Polylang (reutiliza fea_translate_helper.php, igual que
Gemma) ANTES de que Gemma llegue. Cuando Gemma llega, ve la traducción EN ya
enlazada en Polylang y la salta (translate_post.py:233), haciendo solo FR/IT/PT.
Así el EN se hace UNA vez y bien, sin el reprocesado posterior.
Coordinación: recorre los posts en el MISMO orden que Gemma, localiza por dónde
va (último :en en el state) y arranca `--margin` posts por delante para no
colisionar con el que Gemma está procesando ahora. Haiku (API) es mucho más
rápido que Gemma local, así que se aleja y nunca la alcanza.
Uso:
pretranslate_en_haiku.py # PLAN: muestra arranque y pendientes
pretranslate_en_haiku.py --apply # crea los EN
opciones: --margin N (def 2), --limit N
"""
import argparse
import json
import os
import sys
import time
HERE = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, HERE)
import translate_post as tp # read_post, translation_exists, create_translation, carta_article_ids
from translate_haiku import translate # Haiku
# Mismo orden que translate_gap.sh
CARTAS = "45018 44997 44975 44230 44229 44228 44090 44089 44088 44087 44086 44085 44084 44083 42590".split()
def read_state():
"""Lee el state de Gemma con reintentos (lo reescribe en vivo)."""
for _ in range(6):
try:
d = json.loads(open(tp.STATE_FILE).read())
if d.get("done"):
return d
except (json.JSONDecodeError, FileNotFoundError):
pass
time.sleep(0.5)
sys.exit("No pude leer el state de Gemma con contenido; aborto por seguridad.")
def build_order():
"""Lista global de post_ids en el orden exacto en que Gemma los procesa."""
g = []
for c in CARTAS:
g.append(int(c))
g.extend(tp.carta_article_ids(int(c)))
return g
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--apply", action="store_true")
ap.add_argument("--margin", type=int, default=2)
ap.add_argument("--limit", type=int, default=0)
args = ap.parse_args()
state = read_state()
done = state["done"]
order = build_order()
# Frente de Gemma = último índice con su :en ya hecho.
front = -1
for i, pid in enumerate(order):
if f"{pid}:en" in done:
front = i
if front < 0:
sys.exit("No encuentro el frente de Gemma en la lista; aborto.")
start = front + 1 + args.margin
work = order[start:]
if args.limit:
work = work[:args.limit]
cur = order[front]
print(f"Gemma va por #{cur} (índice {front}/{len(order)-1}).")
print(f"Margen {args.margin} → arranco en índice {start} (#{order[start] if start < len(order) else ''}).")
print(f"Posts pendientes a pre-traducir: {len(work)}")
if work:
print(f" primeros: {work[:5]}")
print(f" últimos: {work[-5:]}")
if not args.apply:
print("\nMODO PLAN (no se crea nada). Añade --apply para ejecutar.")
return
tot_in = tot_out = 0.0
created = skipped = 0
for pid in work:
if tp.translation_exists(pid, "en"):
print(f"#{pid}: EN ya existe (Gemma se adelantó) — salto")
skipped += 1
continue
try:
src = tp.read_post(pid)
except Exception as e: # noqa: BLE001
print(f"#{pid}: no pude leer ({e}) — salto")
continue
if src.get("lang") and src["lang"] != "es":
continue
body, u1 = translate(src["content"], "en")
title, u2 = translate(src["title"], "en", is_title=True)
tot_in += u1.input_tokens + u2.input_tokens
tot_out += u1.output_tokens + u2.output_tokens
# Re-chequeo justo antes de crear (ventana de carrera con Gemma).
if tp.translation_exists(pid, "en"):
print(f"#{pid}: EN apareció mientras traducía — salto")
skipped += 1
continue
new_id = tp.create_translation(pid, "en", title, body, "draft")
created += 1
print(f"#{pid} → EN #{new_id} «{title[:45]}»")
cost = tot_in / 1e6 * 1.0 + tot_out / 1e6 * 5.0
print(f"\nCreados: {created} Saltados: {skipped}")
print(f"Tokens in={int(tot_in)} out={int(tot_out)} coste=${cost:.4f}")
if __name__ == "__main__":
main()
+44
View File
@@ -0,0 +1,44 @@
<?php
/**
* Convierte los enlaces internos `...?p=<id>` de las cartas (grupo Polylang) a
* su permalink "bonito" por slug. Necesario cuando se arreglaron los enlaces de
* la carta con los artículos en DRAFT: get_permalink() devolvía `?p=<id>`, que
* el parser de portada (fea_url_to_post_id, resuelve por slug) NO reconoce → los
* artículos no salían en sus secciones. Con los posts ya en publish, get_permalink
* da el slug. Solo datos, sin tocar mu-plugins. Dry-run por defecto.
*
* Uso: CARTA=<es_id> php prettify_carta_links.php (dry-run)
* APPLY=1 CARTA=<es_id> php prettify_carta_links.php
*/
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
$APPLY = getenv('APPLY') === '1';
$CARTA = (int)(getenv('CARTA') ?: 0);
if (!$CARTA) { fwrite(STDERR, "Falta CARTA=<es_id>\n"); exit(1); }
$BAK = "/tmp/prettify_carta_bak"; if ($APPLY) @mkdir($BAK, 0777, true);
$tot = 0;
foreach (pll_get_post_translations($CARTA) as $lang => $pid) {
$post = get_post($pid); if (!$post) continue;
$chg = 0;
$new = preg_replace_callback('~href="([^"]*[?&]p=(\d+)[^"]*)"~i', function($m) use (&$chg) {
$id = (int) $m[2];
$url = get_permalink($id);
if (!$url || strpos($url, '?p=') !== false) return $m[0]; // sigue feo → dejar
$chg++;
return 'href="' . esc_url($url) . '"';
}, $post->post_content);
echo sprintf("#%d [%s] «%s» — %d enlaces ?p= → slug\n", $pid, $lang, mb_substr($post->post_title,0,28), $chg);
$tot += $chg;
if ($APPLY && $chg) {
file_put_contents("$BAK/$pid.html", $post->post_content);
wp_update_post(['ID'=>$pid, 'post_content'=>$new]);
clean_post_cache($pid);
}
}
// Invalida los transients de secciones de la portada para que recoja los cambios.
if ($APPLY) {
global $wpdb;
$wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_fea_carta_sections_%'");
$wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_timeout_fea_carta_sections_%'");
}
echo ($APPLY ? "APLICADO" : "DRY-RUN") . ": $tot enlaces.\n";
+26
View File
@@ -0,0 +1,26 @@
<?php
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
$CARTA = (int)(getenv('CARTA') ?: 46956);
// IDs objetivo: carta + sus artículos ES, y TODAS sus traducciones Polylang.
$es_ids = [$CARTA];
$arts = get_posts(['post_type'=>'post','numberposts'=>-1,'post_status'=>'any',
'fields'=>'ids','meta_key'=>'_carta_id','meta_value'=>$CARTA]);
$es_ids = array_merge($es_ids, $arts);
$all = [];
foreach ($es_ids as $id) foreach (pll_get_post_translations($id) as $tid) $all[$tid]=true;
$all = array_keys($all);
$now = current_time('mysql'); $now_gmt = current_time('mysql', true);
$pub=0; $skip=0; $fixed=0;
foreach ($all as $id) {
$p = get_post($id); if(!$p) continue;
if ($p->post_status === 'publish') { $skip++; continue; }
$data = ['ID'=>$id, 'post_status'=>'publish'];
if (strtotime($p->post_date_gmt) > strtotime($now_gmt)) { // fecha futura -> a ahora
$data['post_date']=$now; $data['post_date_gmt']=$now_gmt; $fixed++;
}
$r = wp_update_post($data, true);
if (is_wp_error($r)) { echo "ERR #$id: ".$r->get_error_message()."\n"; continue; }
clean_post_cache($id); $pub++;
}
echo "Publicados: $pub | ya estaban: $skip | fechas futuras corregidas: $fixed\n";
+14
View File
@@ -0,0 +1,14 @@
<?php
// #4: quita la categoría Multimedia (1649 + traducciones) de los posts-lectura.
$multi=[1649];
foreach(["en","fr","it","pt"] as $L){ $t=pll_get_term(1649,$L); if($t)$multi[]=(int)$t; }
global $wpdb;
$rows=$wpdb->get_results("SELECT ID FROM {$wpdb->posts} WHERE post_type='post' AND post_status='publish' AND post_title REGEXP '[0-9]+, *[0-9]+(-[0-9]+)?\\.?$'");
$feadulta=(int)$wpdb->get_var("SELECT term_id FROM {$wpdb->terms} WHERE name='Feadulta' LIMIT 1");
$changed=0;
foreach($rows as $r){
$cats=wp_get_post_categories($r->ID);
$new=array_values(array_diff($cats,$multi));
if(count($new)!==count($cats)){ if(!$new)$new=[$feadulta]; wp_set_post_categories($r->ID,$new); $changed++; }
}
echo "lecturas con Multimedia eliminada: $changed\n";
+70
View File
@@ -0,0 +1,70 @@
<?php
/**
* reasign_cats.php — #136: reasigna las categorías de los posts traducidos (en/fr/it/pt)
* a la versión Polylang de su idioma. Si la categoría no tiene traducción, crea un
* término espejo (mismo nombre) en ese idioma y lo asocia. Idempotente y resumible
* (marca _cats_reasignadas=1).
*
* Ejecutar: docker exec wordpress-web wp eval-file /tmp/reasign_cats.php [LANG] [LIMIT] --allow-root
* sin args: procesa los 4 idiomas. Con LANG (en/fr/it/pt) y LIMIT acota.
*/
$only_lang = (isset($argv[1]) && in_array($argv[1], ['en','fr','it','pt'], true)) ? $argv[1] : null;
$limit = isset($argv[2]) ? (int) $argv[2] : 0;
$mirror_cache = []; // "cat_es|lang" => term_id
function translate_or_mirror($c, $lang, &$cache) {
$key = $c . '|' . $lang;
if (isset($cache[$key])) return $cache[$key];
$t = pll_get_term($c, $lang);
if (!$t) {
$term = get_term($c, 'category');
if (!$term || is_wp_error($term)) { return $cache[$key] = $c; }
$slug = sanitize_title($term->name) . '-' . $lang . '-' . $c;
$res = wp_insert_term($term->name, 'category', ['slug' => $slug]);
if (is_wp_error($res)) {
$ex = $res->get_error_data();
if (!$ex) { return $cache[$key] = $c; }
$t = is_array($ex) ? ($ex['term_id'] ?? 0) : (int) $ex;
} else {
$t = $res['term_id'];
}
if ($t) {
pll_set_term_language($t, $lang);
$tr = pll_get_term_translations($c);
$tr[$lang] = $t;
pll_save_term_translations($tr);
} else {
$t = $c;
}
}
return $cache[$key] = (int) $t;
}
$langs = $only_lang ? [$only_lang] : ['en','fr','it','pt'];
$done = 0; $changed = 0; $mirrors = 0;
$start_mirror_terms = count($mirror_cache);
foreach ($langs as $lang) {
$args = ['lang'=>$lang,'post_type'=>'post','post_status'=>['publish','draft'],
'fields'=>'ids','posts_per_page'=> $limit ?: -1,'no_found_rows'=>true,
'meta_query'=>[['key'=>'_cats_reasignadas','compare'=>'NOT EXISTS']]];
$ids = get_posts($args);
foreach ($ids as $pid) {
// Fuente de verdad = categorías del ES original (las del post traducido
// pueden estar incompletas). Se traducen al idioma del post.
$es = pll_get_post($pid, 'es');
$source = ($es && $es != $pid) ? wp_get_post_categories($es) : wp_get_post_categories($pid);
$new = [];
foreach ($source as $c) {
$clang = pll_get_term_language($c);
$new[] = ($clang === $lang) ? $c : translate_or_mirror($c, $lang, $mirror_cache);
}
$new = array_values(array_unique($new));
$cur = wp_get_post_categories($pid);
sort($cur); $cmp = $new; sort($cmp);
if ($cur !== $cmp && $new) { wp_set_post_categories($pid, $new); $changed++; }
update_post_meta($pid, '_cats_reasignadas', 1);
$done++;
}
}
echo "procesados: $done | con cambios: $changed | términos espejo en caché: " . count($mirror_cache) . "\n";
+40
View File
@@ -0,0 +1,40 @@
<?php
// Regenera thumbnails de los attachments creados en /uploads/autores/joomla/
// Borra los thumbnails viejos y los recrea con las versiones face-cropped.
require '/var/www/html/wp-load.php';
require_once ABSPATH . 'wp-admin/includes/image.php';
global $wpdb;
$rows = $wpdb->get_results(
"SELECT ID FROM {$wpdb->posts}
WHERE post_type='attachment'
AND guid LIKE '%/autores/joomla/%'"
);
echo 'Attachments: ' . count($rows) . PHP_EOL;
$uploads_basedir = wp_upload_dir()['basedir'];
$ok = 0; $fail = 0;
foreach ($rows as $r) {
$aid = (int) $r->ID;
$file = get_attached_file($aid);
if (!$file || !file_exists($file)) { $fail++; continue; }
// Borrar thumbnails viejos del attachment (todas las variantes -WxH)
$old_meta = wp_get_attachment_metadata($aid);
if (!empty($old_meta['sizes'])) {
$dir = dirname($file);
foreach ($old_meta['sizes'] as $s) {
$thumb = $dir . '/' . $s['file'];
if (file_exists($thumb)) @unlink($thumb);
}
}
// Regenerar
$meta = wp_generate_attachment_metadata($aid, $file);
if ($meta) {
wp_update_attachment_metadata($aid, $meta);
$ok++;
} else {
$fail++;
}
}
echo "OK: $ok, FAIL: $fail" . PHP_EOL;
+298
View File
@@ -0,0 +1,298 @@
#!/usr/bin/env python3
"""
Regenera `clasificacion_articulos.csv` recorriendo las cartas semanales y
extrayendo los links que cada una agrupa por encabezado (Artículos, Evangelio,
Eucaristía, Multimedia, EFFA). Paridad con `wp-content/mu-plugins/fea-carta-portada.php`.
Output: post_id, post_title, categoria_propuesta, seccion_original, carta_id, carta_titulo, carta_fecha
ALCANCE (vs. CSV histórico de marzo 2026):
- Cubre las 5 secciones estándar: comentario, articulo, eucaristia, multimedia, effa.
- Sub-clasifica por posición dentro de "Evangelio y comentarios al Evangelio":
· pos 1 → `lectura` (cita del evangelio)
· pos 2 → `comentario_editorial`
· pos 3+ → `comentario`
(regla del editor, confirmada contra el CSV histórico)
- NO cubre todavía:
· `lectura` dentro de eucaristía (lecturas bíblicas — el viejo las separa, aquí van todas a `eucaristia`)
· `otro` (catch-all del catch-all)
· `noticia` (subgrupo poco usado, 12 filas en el viejo)
· Encabezados de fiestas especiales ("Domingo de Resurrección", "Navidad", "Vigilia Pascual", etc.)
Para regenerar el CSV con cobertura completa habría que ampliar el mapping en
SECTION_PATTERNS y SECTION_LABELS con reglas adicionales. El CSV histórico
existente (raíz del repo) sirve como baseline para esa cobertura granular.
Uso:
python3 regenerar_clasificacion_csv.py [--out /path/clasificacion_articulos.csv] [--diff /path/csv_marzo.csv]
Issue: rafa/feadulta#42
"""
import argparse, csv, os, re, subprocess, sys
try:
import pymysql
except ImportError:
sys.exit('pymysql requerido: pip install --user pymysql')
# Mapping sección encabezado → cat slug (debe espejar fea-carta-portada.php)
SECTION_PATTERNS = [
('comentario', re.compile(r'Evangelio\s+y\s+comentarios\s+al\s+Evangelio', re.I)),
('articulo', re.compile(r'Art[ií]culos\s+seleccionados\s+para\s+la\s+semana', re.I)),
('eucaristia', re.compile(r'Para\s+unas\s+eucarist[ií]as\s+m[áa]s\s+participativas', re.I)),
('multimedia', re.compile(r'Material\s+multimedia', re.I)),
('effa', re.compile(r'Escuela\s+EFFA', re.I)),
]
# Nombres “bonitos” usados en seccion_original (verbatim del CSV histórico)
SECTION_LABELS = {
'comentario': 'Evangelio y comentarios al Evangelio',
'articulo': 'Artículos seleccionados para la semana',
'eucaristia': 'Para unas eucaristías más participativas y actuales',
'multimedia': 'Material multimedia',
'effa': 'Escuela EFFA',
}
CAT_PROPUESTA = {
'comentario': 'comentario',
'articulo': 'articulo',
'eucaristia': 'eucaristia',
'multimedia': 'multimedia',
'effa': 'effa',
}
# Sub-clasificación posicional dentro de la sección "Evangelio y comentarios al Evangelio".
# El editor SIEMPRE coloca: 1º lectura del evangelio, 2º comentario editorial, 3º+ comentarios.
SUBCAT_EVANGELIO_BY_POS = ['lectura', 'comentario_editorial'] # resto = 'comentario'
HREF_RX = re.compile(r'href=["\']([^"\']+)["\']', re.I)
WP_SLUG_RX = re.compile(r'(?:^|/)fea/([a-z0-9\-]+)/?(?:[?#]|$)', re.I)
K2_ITEM_RX = re.compile(r'/item/(\d+)-[^/"]+\.html', re.I)
RESERVED_SLUGS = {'wp-admin','wp-content','category','tag','author','page','en','fr','it','pt'}
def get_conn():
ip = subprocess.run(
['docker', 'inspect', 'wordpress-mysql', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'],
capture_output=True, text=True, check=True,
).stdout.strip()
return pymysql.connect(
host=ip, user='wordpress_user', password='wordpress_pass',
database='wordpress_db', charset='utf8mb4', autocommit=True,
)
def mysql(query, conn=None):
"""Ejecuta query y devuelve filas (lista de tuplas) — pymysql, sin parsing CLI."""
own = False
if conn is None:
conn = get_conn(); own = True
try:
with conn.cursor() as c:
c.execute(query)
return list(c.fetchall())
finally:
if own: conn.close()
def fetch_cartas(conn):
"""Todas las cartas (cat 6 actual, 22 semana pasada, 21 otras) + sus contenidos."""
q = """
SELECT p.ID, p.post_title, DATE(p.post_date), p.post_content
FROM wp_posts p
WHERE p.post_status='publish' AND p.post_type='post'
AND p.ID IN (
SELECT DISTINCT tr.object_id FROM wp_term_relationships tr
JOIN wp_term_taxonomy tt ON tt.term_taxonomy_id=tr.term_taxonomy_id
WHERE tt.term_id IN (6, 21, 22) AND tt.taxonomy='category'
)
ORDER BY p.post_date DESC;
"""
return mysql(q, conn)
def build_lookups(conn):
"""Construye dicts slug→post_id y k2_id→post_id para no machacar la BD por cada link.
Para slugs duplicados (varios posts con mismo slug), se usa el MÁS RECIENTE
(criterio espejo del mu-plugin fea-carta-portada.php tras el bug detectado en #38).
"""
print('Cargando lookups (slug y k2_id) ...', file=sys.stderr, flush=True)
slug_to_id = {}
rows = mysql("""
SELECT p1.post_name, p1.ID
FROM wp_posts p1
WHERE p1.post_status='publish' AND p1.post_type='post' AND p1.post_name<>''
ORDER BY p1.post_date DESC;
""", conn)
for r in rows:
slug = r[0]
if slug not in slug_to_id: # primero (más reciente) gana
slug_to_id[slug] = int(r[1])
k2_to_id = {}
rows = mysql("""
SELECT meta_value, MAX(post_id) FROM wp_postmeta
WHERE meta_key='_fgj2wp_old_k2_id' AND meta_value<>''
GROUP BY meta_value;
""", conn)
for r in rows:
try:
k2_to_id[int(r[0])] = int(r[1])
except (ValueError, TypeError):
continue
print(f' slugs: {len(slug_to_id)} k2_ids: {len(k2_to_id)}', file=sys.stderr)
return slug_to_id, k2_to_id
def fetch_titles(ids, conn):
if not ids: return {}
ids_str = ','.join(str(i) for i in ids)
rows = mysql(f"SELECT ID, post_title FROM wp_posts WHERE ID IN ({ids_str});", conn)
return {int(r[0]): r[1] for r in rows}
def url_to_post_id(url, slug_to_id, k2_to_id):
m = WP_SLUG_RX.search(url)
if m:
slug = m.group(1).lower()
if slug not in RESERVED_SLUGS and slug in slug_to_id:
return slug_to_id[slug]
m = K2_ITEM_RX.search(url)
if m:
k2 = int(m.group(1))
if k2 in k2_to_id:
return k2_to_id[k2]
return None
def extract_sections(html_content):
"""Devuelve dict {section_slug: [post_id, ...]} basándose en encabezados.
NOTA: los post_ids aún no están resueltos aquí — devuelve hrefs en su lugar.
"""
positions = []
for slug, rx in SECTION_PATTERNS:
m = rx.search(html_content)
if m:
positions.append((m.start(), slug))
if not positions:
return {}
positions.sort()
positions.append((len(html_content), None))
out = {}
for i in range(len(positions) - 1):
start, slug = positions[i]
end = positions[i+1][0]
segment = html_content[start:end]
hrefs = HREF_RX.findall(segment)
# Dedup preservando orden
seen, urls = set(), []
for h in hrefs:
if h not in seen:
seen.add(h); urls.append(h)
out[slug] = urls
return out
def main():
ap = argparse.ArgumentParser()
ap.add_argument('--out', default='/tmp/clasificacion_articulos_regen.csv')
ap.add_argument('--diff', help='CSV de referencia para mostrar diff')
args = ap.parse_args()
conn = get_conn()
slug_to_id, k2_to_id = build_lookups(conn)
print('Leyendo cartas ...', file=sys.stderr, flush=True)
cartas = fetch_cartas(conn)
print(f' cartas: {len(cartas)}', file=sys.stderr)
rows_out = []
needed_titles = set()
n_unresolved = 0
n_resolved = 0
for c in cartas:
carta_id, carta_title, carta_fecha, content = c
carta_id = int(carta_id)
if not content:
continue
sections = extract_sections(content)
for slug, urls in sections.items():
label = SECTION_LABELS[slug]
default_cat = CAT_PROPUESTA[slug]
# Filtrar a posts resueltos manteniendo orden
resolved = []
for url in urls:
pid = url_to_post_id(url, slug_to_id, k2_to_id)
if pid is None:
n_unresolved += 1
continue
resolved.append(pid)
for pos, pid in enumerate(resolved):
# Sub-clasificación posicional para evangelio
if slug == 'comentario' and pos < len(SUBCAT_EVANGELIO_BY_POS):
cat = SUBCAT_EVANGELIO_BY_POS[pos]
else:
cat = default_cat
n_resolved += 1
needed_titles.add(pid)
rows_out.append({
'post_id': pid,
'categoria_propuesta': cat,
'seccion_original': label,
'carta_id': carta_id,
'carta_titulo': carta_title,
'carta_fecha': carta_fecha,
})
print(f'Resueltos: {n_resolved} Sin resolver: {n_unresolved}', file=sys.stderr)
print('Cargando títulos ...', file=sys.stderr, flush=True)
titles = fetch_titles(list(needed_titles), conn)
conn.close()
# Escribir CSV
cols = ['post_id', 'post_title', 'categoria_propuesta', 'seccion_original', 'carta_id', 'carta_titulo', 'carta_fecha']
with open(args.out, 'w', newline='', encoding='utf-8') as f:
w = csv.DictWriter(f, fieldnames=cols, quoting=csv.QUOTE_MINIMAL)
w.writeheader()
for r in rows_out:
r['post_title'] = titles.get(r['post_id'], '')
w.writerow(r)
print(f'Escrito: {args.out} ({len(rows_out)} filas)')
# Diff opcional
if args.diff and os.path.exists(args.diff):
from collections import defaultdict, Counter
def load(path):
d = defaultdict(set) # (post_id, cat) → set((carta_id, seccion))
cats_by_post = defaultdict(set)
with open(path, encoding='utf-8-sig') as fh:
r = csv.DictReader(fh)
for row in r:
pid = row.get('post_id','')
cat = row.get('categoria_propuesta','')
if not pid: continue
cats_by_post[pid].add(cat)
return cats_by_post
old = load(args.diff)
new = load(args.out)
old_keys = set(old.keys())
new_keys = set(new.keys())
print('\n=== DIFF ===')
print(f'posts en CSV viejo: {len(old_keys)}')
print(f'posts en CSV nuevo: {len(new_keys)}')
print(f'solo en viejo: {len(old_keys - new_keys)}')
print(f'solo en nuevo: {len(new_keys - old_keys)}')
common = old_keys & new_keys
same_cats = sum(1 for k in common if old[k] == new[k])
diff_cats = len(common) - same_cats
print(f'común con mismas cats: {same_cats}')
print(f'común con cats distintas: {diff_cats}')
if __name__ == '__main__':
main()
+40
View File
@@ -0,0 +1,40 @@
<?php
// Remapea los enlaces internos de las traducciones de una carta: convierte
// permalinks/IDs que apuntan al artículo ES → permalink del artículo en el
// idioma de CADA carta (vía Polylang). Necesario cuando la carta ES se tradujo
// DESPUÉS de haberle fijado los enlaces a permalink (las traducciones heredan
// los permalinks ES). Solo toca traducciones (no la ES). Dry-run por defecto.
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
global $wpdb;
$APPLY = getenv("APPLY") === "1";
$BAK = "/tmp/remap_carta_tr_bak"; if ($APPLY) @mkdir($BAK,0777,true);
$CARTA = (int)(getenv("CARTA") ?: 53644);
$tr = pll_get_post_translations($CARTA);
$tot = 0;
foreach($tr as $lang=>$pid){
if($lang === 'es') continue; // la ES ya está bien
$post = get_post($pid); if(!$post) continue;
$chg=0; $miss=[];
$new = preg_replace_callback('~href="([^"]+)"~i', function($m) use($lang,&$chg,&$miss){
$href = html_entity_decode(trim($m[1]));
if(stripos($href,'farmer.taild3aaf6.ts.net')===false && stripos($href,'/fea/')===false) return $m[0];
$es = 0;
if(preg_match('~[?&]p=(\d+)~',$href,$mm)) $es=(int)$mm[1]; // forma ?p=ID
if(!$es) $es = (int)url_to_postid($href); // forma /slug/
if(!$es){ return $m[0]; }
if(pll_get_post_language($es) !== 'es'){ return $m[0]; } // solo si apunta a ES
$t = pll_get_post($es,$lang);
if(!$t || $t==$es){ $miss[]=$href; return $m[0]; } // sin traducción → dejar
$url = get_permalink($t);
if(!$url) return $m[0];
$chg++;
return 'href="'.esc_url($url).'"';
}, $post->post_content);
echo sprintf("#%d [%s] «%s» — %d remapeados%s\n",$pid,$lang,mb_substr($post->post_title,0,30),$chg,
$miss?(" | sin traducción: ".count($miss)):"");
$tot+=$chg;
if($APPLY && $chg){ file_put_contents("$BAK/$pid.html",$post->post_content);
$wpdb->update($wpdb->posts,['post_content'=>$new],['ID'=>$pid]); clean_post_cache($pid); }
}
echo ($APPLY?"APLICADO":"DRY-RUN").": $tot enlaces.\n";
+42
View File
@@ -0,0 +1,42 @@
<?php
/**
* Remapea las categorías de las traducciones automáticas (meta traduccion_origen)
* a los términos traducidos de su propio idioma. Idempotente y sin llamar a Gemma.
*
* Arregla las traducciones creadas antes de que fea_translate_helper.php mapeara
* categorías (issue #75): p.ej. una carta EN que quedó en la categoría ES `cartasemana`
* pasa a la categoría EN `letter-of-the-week`, poblando el archivo de carta por idioma.
*
* Uso: docker exec wordpress-web php /tmp/remap_translation_cats.php
*/
$_SERVER['REQUEST_URI'] = $_SERVER['REQUEST_URI'] ?? '/';
$_SERVER['HTTP_HOST'] = $_SERVER['HTTP_HOST'] ?? 'farmer.taild3aaf6.ts.net';
require_once '/var/www/html/wp-load.php';
if (!function_exists('pll_get_term')) { fwrite(STDERR, "Polylang no disponible\n"); exit(2); }
global $wpdb;
$ids = $wpdb->get_col("SELECT DISTINCT post_id FROM {$wpdb->postmeta} WHERE meta_key='traduccion_origen'");
$fixed = 0;
foreach ($ids as $pid) {
$pid = (int) $pid;
$lang = pll_get_post_language($pid);
if (!$lang || $lang === 'es') continue;
$cats = wp_get_post_categories($pid);
$mapped = [];
$changed = false;
foreach ($cats as $c) {
$tc = (int) pll_get_term($c, $lang);
if ($tc && $tc !== $c) { $mapped[] = $tc; $changed = true; }
else { $mapped[] = $c; }
}
if ($changed) {
wp_set_post_categories($pid, array_values(array_unique($mapped)));
$fixed++;
}
}
echo "Remapeadas categorías en $fixed traducciones (de " . count($ids) . " revisadas)\n";
+41
View File
@@ -0,0 +1,41 @@
<?php
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
global $wpdb;
$APPLY = getenv("APPLY")==="1";
$CARTA = (int)(getenv("CARTA") ?: 46956);
$BAK="/tmp/repoint_bak"; if($APPLY) @mkdir($BAK,0777,true);
function resolve_post($href){
// ?p=ID
if (preg_match('~[?&]p=(\d+)~',$href,$m)) return (int)$m[1];
$path = preg_replace('~^https?://[^/]+~i','',$href);
$path = preg_replace('~[?#].*$~','',$path);
$path = preg_replace('~^/fea~','',$path);
$path = preg_replace('~^/(en|fr|it|pt|es)(/|$)~','/',$path);
$segs = array_values(array_filter(explode('/',$path),'strlen'));
if (count($segs)!==1) return 0;
$p = get_page_by_path($segs[0], OBJECT, 'post');
return $p ? $p->ID : 0;
}
foreach (array_values(pll_get_post_translations($CARTA)) as $pid) {
$post=get_post($pid); if(!$post) continue;
$lang=pll_get_post_language($pid) ?: 'es';
$chg=0;
$new=preg_replace_callback('~href="([^"]+)"~i', function($m) use($lang,&$chg){
$href=$m[1];
if (stripos($href,'.html')!==false) return $m[0]; // legacy lo maneja el otro script
$tid=resolve_post($href);
if(!$tid) return $m[0];
$plang=pll_get_post_language($tid);
if(!$plang || $plang===$lang) return $m[0]; // ya está en el idioma correcto
$t=pll_get_post($tid,$lang);
if(!$t || $t==$tid) return $m[0]; // no hay traducción -> dejar
$url=get_permalink($t); if(!$url) return $m[0];
$chg++; return 'href="'.esc_url($url).'"';
}, $post->post_content);
echo "#$pid [$lang] — $chg repuntado(s)\n";
if($APPLY && $chg){ file_put_contents("$BAK/$pid.html",$post->post_content);
$wpdb->update($wpdb->posts,['post_content'=>$new],['ID'=>$pid]); clean_post_cache($pid); }
}
echo $APPLY?"APLICADO\n":"DRY-RUN\n";
+128
View File
@@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""Reprocesa traducciones ROTAS con Claude Haiku 4.5 (API directa).
Coge el ES original, lo traduce con Haiku y SOBRESCRIBE la traducción ya
existente (in-place, sin duplicar). Detecta los rotos por ratio de longitud.
Uso:
reprocess_en_haiku.py --auto --langs en --limit 100 # EN rotos
reprocess_en_haiku.py --auto --langs fr,it,pt --limit 50 # otros idiomas
reprocess_en_haiku.py --ids 44205 --langs en # src concretos
añade --apply para ESCRIBIR en la BD (sin él = dry-run).
"""
import argparse
import json
import os
import re
import subprocess
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from translate_haiku import translate # noqa: E402
STATE = "/tmp/feadulta-translate-state.json"
CONTAINER = "wordpress-web"
RATIO_BROKEN = 0.45
def dexec(args):
return subprocess.run(["docker", "exec", CONTAINER, *args],
capture_output=True, text=True)
def dcp(remote, local):
subprocess.run(["docker", "cp", f"{CONTAINER}:{remote}", local], check=True)
def dcp_to(local, remote):
subprocess.run(["docker", "cp", local, f"{CONTAINER}:{remote}"], check=True)
def get_post(pid):
r = dexec(["php", "/tmp/fea_post_io.php", "get", str(pid)])
if r.returncode != 0:
raise RuntimeError(f"get {pid}: {r.stderr.strip()}")
dcp("/tmp/fea_es.json", "/tmp/fea_es.json")
return json.load(open("/tmp/fea_es.json"))
def strip_len(html):
return len(re.sub(r"<[^>]*>", "", html))
def find_broken(state, langs, limit):
"""Devuelve [(src, lang), ...] de traducciones rotas."""
out = []
for key, tid in state["done"].items():
src, lang = key.split(":")
if lang not in langs:
continue
try:
es = get_post(int(src))
tr = get_post(int(tid))
except RuntimeError:
continue
olen = strip_len(es["content"])
if olen < 40:
continue
if strip_len(tr["content"]) / olen < RATIO_BROKEN:
out.append((int(src), lang))
if len(out) >= limit:
break
return out
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--ids", nargs="*", type=int, default=[])
ap.add_argument("--langs", default="en")
ap.add_argument("--auto", action="store_true")
ap.add_argument("--limit", type=int, default=100)
ap.add_argument("--apply", action="store_true")
args = ap.parse_args()
langs = [l.strip() for l in args.langs.split(",") if l.strip()]
state = json.load(open(STATE))
if args.auto:
print(f"Autodetectando rotos (ratio<{RATIO_BROKEN}) en {langs}")
pairs = find_broken(state, langs, args.limit)
print(f"Encontrados: {pairs}")
else:
pairs = [(src, lang) for src in args.ids for lang in langs]
tot_in = tot_out = 0.0
for src, lang in pairs:
tid = state["done"].get(f"{src}:{lang}")
if not tid:
print(f"[{src}:{lang}] sin traducción en state; salto")
continue
es = get_post(src)
body, u1 = translate(es["content"], lang)
title, u2 = translate(es["title"], lang, is_title=True)
tot_in += u1.input_tokens + u2.input_tokens
tot_out += u1.output_tokens + u2.output_tokens
print(f"\n===== src #{src} [{lang}] -> #{tid} =====")
print(f"TÍTULO {lang}: {title}")
print(f"cuerpo ES={strip_len(es['content'])} -> {lang}={strip_len(body)}")
if args.apply:
open("/tmp/fea_title.txt", "w").write(title)
open("/tmp/fea_body.txt", "w").write(body)
dcp_to("/tmp/fea_title.txt", "/tmp/fea_title.txt")
dcp_to("/tmp/fea_body.txt", "/tmp/fea_body.txt")
r = dexec(["php", "/tmp/fea_post_io.php", "update", str(tid),
"/tmp/fea_title.txt", "/tmp/fea_body.txt"])
print(("APLICADO: " + r.stdout.strip()) if r.returncode == 0
else ("FALLO: " + r.stderr.strip()))
else:
print("(dry-run)")
cost = tot_in / 1e6 * 1.0 + tot_out / 1e6 * 5.0
print(f"\nTOTAL tokens: in={int(tot_in)} out={int(tot_out)} coste=${cost:.4f}")
print("MODO: " + ("APLICADO a BD" if args.apply else "DRY-RUN"))
if __name__ == "__main__":
main()
+310
View File
@@ -0,0 +1,310 @@
#!/usr/bin/env python3
"""
retranslate_chunks.py
Re-translates posts where content is in the wrong language.
Splits post_content into chunks of ~800 chars (at </p> boundaries)
and translates each chunk independently to avoid model drift.
"""
import pymysql
import json
import re
import html
import urllib.request
import time
import sys
import csv
from langdetect import detect, LangDetectException, DetectorFactory
DetectorFactory.seed = 0
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
LANG_NAMES = {"en": "English", "fr": "French", "it": "Italian", "pt": "Portuguese"}
LANG_NORM = {'es':'es','pt':'pt','fr':'fr','en':'en','it':'it','ca':'es','gl':'es'}
AI_FOOTER = "\n<p><em>Traducido con IA</em></p>"
CHUNK_SIZE = 800 # max chars per translation chunk
MAX_RETRIES = 2
def strip_html(text):
if not text: return ''
text = re.sub(r'<[^>]+>', ' ', text)
text = html.unescape(text)
return re.sub(r'\s+', ' ', text).strip()
def detect_lang(text, min_len=60):
t = strip_html(text)[:600].strip()
if len(t) < min_len: return None
try: return LANG_NORM.get(detect(t), detect(t))
except: return None
def call_jan(messages, max_tokens=1200, temperature=0.2, timeout=120):
payload = json.dumps({
"model": JAN_MODEL,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL, data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
with urllib.request.urlopen(req, timeout=timeout) as r:
result = json.loads(r.read())
return result["choices"][0]["message"]["content"].strip()
def translate_chunk(chunk, lang_name):
"""Translate a single HTML chunk. Returns translated text or None on failure."""
system = (
f"You are a professional translator. Translate the following Spanish text to {lang_name}. "
f"Preserve all HTML tags exactly as they are. "
f"Return ONLY the translated text, nothing else. No preamble, no explanation."
)
plain_len = len(strip_html(chunk).strip())
for attempt in range(MAX_RETRIES):
try:
result = call_jan([
{"role": "system", "content": system},
{"role": "user", "content": chunk}
])
# For short chunks (headings, short phrases) langdetect is unreliable —
# accept the result as long as it changed from the original Spanish
if plain_len < 40:
changed = strip_html(result).strip().lower() != strip_html(chunk).strip().lower()
if changed or attempt > 0:
return result
else:
lang = detect_lang(result, min_len=40)
if lang is None or lang == lang_name[:2].lower():
return result
# Wrong language — retry with more explicit prompt
system = (
f"Translate from Spanish to {lang_name}. "
f"Your response must be entirely in {lang_name}. "
f"Preserve HTML tags. Return ONLY the translation."
)
except Exception as e:
if attempt == MAX_RETRIES - 1:
return None
time.sleep(2)
return None # all retries failed
def translate_title(title, lang_name):
try:
result = call_jan([
{"role": "system", "content": "You are a translator. Respond ONLY with the translated text, nothing else."},
{"role": "user", "content": f"Translate from Spanish to {lang_name}, ALL CAPS:\n\n{title}"}
], max_tokens=120, temperature=0.1, timeout=30)
return result.strip().strip('"').strip("'")
except:
return None
def split_into_chunks(content, max_size=CHUNK_SIZE):
"""Split HTML content at </p> boundaries into chunks <= max_size chars."""
# Split at closing block tags
parts = re.split(r'(</p>|</li>|</h[1-6]>|</blockquote>)', content)
chunks = []
current = ""
for i in range(0, len(parts), 2):
piece = parts[i]
closer = parts[i+1] if i+1 < len(parts) else ""
segment = piece + closer
if len(current) + len(segment) <= max_size:
current += segment
else:
if current:
chunks.append(current)
# If a single segment exceeds max_size, split it roughly
if len(segment) > max_size:
# Split at sentence boundaries
sentences = re.split(r'(?<=[.!?])\s+', segment)
current = ""
for s in sentences:
if len(current) + len(s) <= max_size:
current += s + " "
else:
if current:
chunks.append(current.strip())
current = s + " "
else:
current = segment
if current:
chunks.append(current)
return [c for c in chunks if c.strip()]
def translate_content_chunked(content, lang_name):
"""
Translate full post_content by splitting into chunks.
Returns (translated_content, success_ratio).
"""
if not content or not content.strip():
return content, 1.0
chunks = split_into_chunks(content)
translated_chunks = []
failed = 0
for chunk in chunks:
# Skip chunks that are only HTML tags / whitespace
if not strip_html(chunk).strip():
translated_chunks.append(chunk)
continue
result = translate_chunk(chunk, lang_name)
if result is None:
# Keep original chunk rather than losing it
translated_chunks.append(chunk)
failed += 1
else:
translated_chunks.append(result)
success_ratio = 1.0 - (failed / len(chunks)) if chunks else 1.0
return "\n".join(translated_chunks), success_ratio
def main():
audit_path = '/tmp/audit_clean.csv'
failed_ids = set()
try:
with open(audit_path) as f:
reader = csv.DictReader(f)
for row in reader:
failed_ids.add(int(row['id']))
print(f"Loaded {len(failed_ids)} post IDs with issues from audit")
except FileNotFoundError:
print(f"ERROR: {audit_path} not found. Run audit_translations.py first.")
sys.exit(1)
db = pymysql.connect(**DB)
c = db.cursor()
id_list = ','.join(str(i) for i in sorted(failed_ids))
c.execute(f"""
SELECT DISTINCT p.ID, p.post_title, p.post_content,
t_lang.slug as lang,
ttg.description as group_desc
FROM wp_posts p
JOIN wp_term_relationships trl ON p.ID=trl.object_id
JOIN wp_term_taxonomy ttl ON trl.term_taxonomy_id=ttl.term_taxonomy_id AND ttl.taxonomy='language'
JOIN wp_terms t_lang ON ttl.term_id=t_lang.term_id
JOIN wp_term_relationships trg ON p.ID=trg.object_id
JOIN wp_term_taxonomy ttg ON trg.term_taxonomy_id=ttg.term_taxonomy_id AND ttg.taxonomy='post_translations'
WHERE p.ID IN ({id_list}) AND p.post_type='post' AND p.post_status='publish'
""")
raw_posts = c.fetchall()
# Fetch Spanish originals
posts = []
es_cache = {}
for p in raw_posts:
desc = p['group_desc'] or ''
m = re.search(r's:2:"es";i:(\d+);', desc)
if not m:
continue
es_id = int(m.group(1))
if es_id not in es_cache:
c.execute("SELECT ID, post_title, post_content FROM wp_posts WHERE ID=%s", (es_id,))
row = c.fetchone()
es_cache[es_id] = row
es = es_cache[es_id]
if es:
posts.append({**p, 'es_id': es_id, 'es_title': es['post_title'], 'es_content': es['post_content']})
db.close()
print(f"Fetched {len(posts)} posts to retranslate\n")
by_es = {}
for p in posts:
by_es.setdefault(p['es_id'], []).append(p)
done = errors = skipped = partial = 0
total = len(posts)
n = 0
for es_id, translations in sorted(by_es.items()):
es_title = translations[0]['es_title'] or ''
es_content = translations[0]['es_content'] or ''
content_len = len(strip_html(es_content))
if content_len < 50:
print(f" ES:{es_id} — SKIPPING (too short: {content_len} chars)")
skipped += len(translations)
n += len(translations)
continue
# Show chunk count for visibility
chunks = split_into_chunks(es_content)
print(f"\nES:{es_id}{es_title[:50]} ({content_len} chars, {len(chunks)} chunks)")
for p in translations:
post_id = p['ID']
lang = p['lang']
lang_name = LANG_NAMES.get(lang, lang)
n += 1
try:
t0 = time.time()
# Translate title
t_title = translate_title(es_title, lang_name) if es_title else ''
if not t_title or t_title.upper() == es_title.upper():
t_title = p['post_title'] # keep existing if translation failed
# Translate content chunk by chunk
t_content, ratio = translate_content_chunked(es_content, lang_name)
elapsed = time.time() - t0
# Validate overall content language
content_lang = detect_lang(t_content, min_len=80)
lang_ok = (content_lang == lang) or content_lang is None
# Add AI footer
if AI_FOOTER.strip() not in t_content:
t_content = t_content + AI_FOOTER
# Update DB
db2 = pymysql.connect(**DB)
c2 = db2.cursor()
c2.execute("UPDATE wp_posts SET post_title=%s, post_content=%s WHERE ID=%s",
(t_title, t_content, post_id))
db2.commit()
db2.close()
status = "" if (lang_ok and ratio == 1.0) else ("~" if lang_ok else "")
if ratio < 1.0:
partial += 1
elif lang_ok:
done += 1
else:
errors += 1
print(f" [{lang}] {status} {post_id}: {t_title[:50]} ({elapsed:.0f}s, {ratio:.0%} ok)")
except Exception as e:
print(f" [{lang}] ✗ ERROR on {post_id}: {e}")
errors += 1
print(f"\n{'='*50}")
print(f"Done: {done} ✓ partial: {partial} ~ errors/wrong-lang: {errors} ⚠ skipped: {skipped}")
print(f"Total: {n}/{total}")
if __name__ == "__main__":
main()
+230
View File
@@ -0,0 +1,230 @@
#!/usr/bin/env python3
"""
retranslate_en_all.py
Retranslates ALL English posts (ID > 42760) from their Spanish originals.
Uses chunk-based translation (~800 chars per chunk) to avoid model drift.
Sequential, single process.
"""
import pymysql, json, re, html, urllib.request, time, sys
from langdetect import detect, LangDetectException, DetectorFactory
DetectorFactory.seed = 0
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
CHUNK_SIZE = 800
MAX_RETRIES = 2
AI_FOOTER = "\n<p><em>Traducido con IA</em></p>"
def strip_html(text):
if not text: return ''
text = re.sub(r'<[^>]+>', ' ', text)
text = html.unescape(text)
return re.sub(r'\s+', ' ', text).strip()
def detect_lang(text, min_len=40):
t = strip_html(text)[:400].strip()
if len(t) < min_len: return None
try:
from langdetect import detect as _detect
return _detect(t)
except: return None
def call_jan(messages, max_tokens=1200, temperature=0.2, timeout=120):
payload = json.dumps({
"model": JAN_MODEL, "messages": messages,
"temperature": temperature, "max_tokens": max_tokens,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL, data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
with urllib.request.urlopen(req, timeout=timeout) as r:
return json.loads(r.read())["choices"][0]["message"]["content"].strip()
def translate_chunk(chunk, attempt=0):
prompts = [
"You are a professional translator. Translate the following Spanish text to English. Preserve all HTML tags exactly. Return ONLY the translated text, no preamble, no explanation.",
"Translate from Spanish to English. Your entire response must be in English. Preserve HTML tags. Return ONLY the translation, nothing else.",
]
system = prompts[min(attempt, len(prompts)-1)]
result = call_jan([
{"role": "system", "content": system},
{"role": "user", "content": chunk}
])
# Short chunks: retry if output == input (model didn't translate)
plain_in = strip_html(chunk).strip().lower()
plain_out = strip_html(result).strip().lower()
if len(plain_in) < 40 and plain_in == plain_out and attempt == 0:
return translate_chunk(chunk, attempt=1)
return result
def translate_title(es_title):
try:
result = call_jan([
{"role": "system", "content": "You are a translator. Respond ONLY with the translated text, nothing else."},
{"role": "user", "content": f"Translate from Spanish to English, ALL CAPS:\n\n{es_title}"}
], max_tokens=150, temperature=0.1, timeout=30)
result = result.strip().strip('"').strip("'")
# Reject if identical to original
if result.upper() == es_title.upper():
return es_title
return result
except:
return es_title
def split_chunks(content):
parts = re.split(r'(</p>|</li>|</h[1-6]>|</blockquote>)', content)
chunks, current = [], ""
for i in range(0, len(parts), 2):
segment = parts[i] + (parts[i+1] if i+1 < len(parts) else "")
if len(current) + len(segment) <= CHUNK_SIZE:
current += segment
else:
if current: chunks.append(current)
if len(segment) > CHUNK_SIZE:
# Split long segment at sentence boundaries
sentences = re.split(r'(?<=[.!?])\s+', segment)
current = ""
for s in sentences:
if len(current) + len(s) <= CHUNK_SIZE:
current += s + " "
else:
if current: chunks.append(current.strip())
current = s + " "
else:
current = segment
if current: chunks.append(current)
return [c for c in chunks if strip_html(c).strip()]
def main():
db = pymysql.connect(**DB)
c = db.cursor()
# Fetch all EN posts with their Spanish originals
c.execute("""
SELECT DISTINCT p.ID, p.post_title,
ttg.description as group_desc
FROM wp_posts p
JOIN wp_term_relationships trl ON p.ID=trl.object_id
JOIN wp_term_taxonomy ttl ON trl.term_taxonomy_id=ttl.term_taxonomy_id AND ttl.taxonomy='language'
JOIN wp_terms t_lang ON ttl.term_id=t_lang.term_id AND t_lang.slug='en'
JOIN wp_term_relationships trg ON p.ID=trg.object_id
JOIN wp_term_taxonomy ttg ON trg.term_taxonomy_id=ttg.term_taxonomy_id AND ttg.taxonomy='post_translations'
WHERE p.ID > 42760 AND p.post_type='post' AND p.post_status='publish'
ORDER BY p.ID
""")
posts = c.fetchall()
print(f"Found {len(posts)} EN posts to retranslate\n", flush=True)
done = errors = skipped = 0
total = len(posts)
for n, p in enumerate(posts, 1):
post_id = p['ID']
desc = p['group_desc'] or ''
m = re.search(r's:2:"es";i:(\d+);', desc)
if not m:
print(f"[{n}/{total}] {post_id} — SKIP (no ES original in group)", flush=True)
skipped += 1
continue
es_id = int(m.group(1))
c.execute("SELECT post_title, post_content FROM wp_posts WHERE ID=%s", (es_id,))
es = c.fetchone()
if not es or not es['post_content']:
print(f"[{n}/{total}] {post_id} — SKIP (ES:{es_id} empty)", flush=True)
skipped += 1
continue
es_title = es['post_title'] or ''
es_content = es['post_content']
plain_len = len(strip_html(es_content))
chunks = split_chunks(es_content)
print(f"\n[{n}/{total}] WP:{post_id} ← ES:{es_id}{es_title[:50]}", flush=True)
print(f" {plain_len} chars, {len(chunks)} chunks", flush=True)
if plain_len < 50:
print(f" SKIP (too short)", flush=True)
skipped += 1
continue
try:
t0 = time.time()
# Translate title
t_title = translate_title(es_title)
# Translate content chunk by chunk
translated = []
chunk_ok = chunk_bad = 0
for i, chunk in enumerate(chunks):
try:
result = translate_chunk(chunk, attempt=0)
lang = detect_lang(result, min_len=40)
if lang and lang != 'en' and len(strip_html(result)) >= 40:
result2 = translate_chunk(chunk, attempt=1)
lang2 = detect_lang(result2, min_len=40)
if lang2 == 'en' or lang2 is None:
result = result2
chunk_ok += 1
else:
chunk_bad += 1
else:
chunk_ok += 1
translated.append(result)
except Exception as e:
print(f" chunk {i+1} ERROR: {e}", flush=True)
translated.append(chunk)
chunk_bad += 1
t_content = "\n".join(translated)
if AI_FOOTER.strip() not in t_content:
t_content += AI_FOOTER
# Validate overall
content_lang = detect_lang(t_content, min_len=80)
lang_ok = content_lang in ('en', None)
elapsed = time.time() - t0
# Save
db2 = pymysql.connect(**DB)
c2 = db2.cursor()
c2.execute("UPDATE wp_posts SET post_title=%s, post_content=%s WHERE ID=%s",
(t_title, t_content, post_id))
db2.commit()
db2.close()
status = "" if lang_ok else ""
bad_note = f" ({chunk_bad} chunks bad)" if chunk_bad else ""
print(f" {status} {t_title[:60]} ({elapsed:.0f}s){bad_note}", flush=True)
done += 1
except Exception as e:
print(f" ✗ ERROR: {e}", flush=True)
errors += 1
db.close()
print(f"\n{'='*50}")
print(f"Done: {done} ✓ errors: {errors} ✗ skipped: {skipped}")
print(f"Total: {total}")
if __name__ == "__main__":
main()
+233
View File
@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
retranslate_failures.py
Re-translates posts where content is in the wrong language.
Reads the audit CSV (/tmp/audit_clean.csv), fetches Spanish originals,
retranslates content (and title if needed), and updates the DB.
Uses a clean prompt WITHOUT few-shot examples to avoid contamination.
"""
import pymysql
import json
import re
import html
import urllib.request
import urllib.error
import time
import sys
import csv
from langdetect import detect, LangDetectException, DetectorFactory
DetectorFactory.seed = 0
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
LANG_NAMES = {"en": "English", "fr": "French", "it": "Italian", "pt": "Portuguese"}
LANG_NORM = {'es':'es','pt':'pt','fr':'fr','en':'en','it':'it','ca':'es','gl':'es'}
AI_FOOTER = "\n<p><em>Traducido con IA</em></p>"
def strip_html(text):
if not text: return ''
text = re.sub(r'<[^>]+>', ' ', text)
text = html.unescape(text)
return re.sub(r'\s+', ' ', text).strip()
def detect_lang(text, min_len=80):
t = strip_html(text)[:600].strip()
if len(t) < min_len: return None
try: return LANG_NORM.get(detect(t), detect(t))
except: return None
def call_jan(messages, max_tokens=4096, temperature=0.3, timeout=300):
payload = json.dumps({
"model": JAN_MODEL,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL, data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
with urllib.request.urlopen(req, timeout=timeout) as r:
result = json.loads(r.read())
return result["choices"][0]["message"]["content"].strip()
def translate_content(title, content, lang_code, lang_name):
"""Translate title + content using a clean prompt (no few-shot contamination)."""
system = (
f"You are a professional translator specializing in theological and religious texts. "
f"Translate from Spanish to {lang_name}. "
f"Rules: preserve all HTML tags exactly; translate the title literally in ALL CAPS; "
f"maintain formal theological register; translate standard religious proper nouns (e.g. 'Jesús''Jesus' in English); "
f"keep person/place names as-is; return ONLY the translation starting with 'Title:'"
)
user = f"Title: {title}\n\n{content}"
response = call_jan([
{"role": "system", "content": system},
{"role": "user", "content": user}
])
lines = response.split("\n", 2)
if lines[0].startswith("Title:"):
t_title = lines[0].replace("Title:", "").strip()
t_content = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
else:
t_title = lines[0].strip()
t_content = "\n".join(lines[1:]).strip() if len(lines) > 1 else response
return t_title, t_content
def translate_title_only(title, lang_name):
response = call_jan([
{"role": "system", "content": "You are a translator. Respond ONLY with the translated text, nothing else."},
{"role": "user", "content": f"Translate from Spanish to {lang_name}, ALL CAPS:\n\n{title}"}
], max_tokens=120, temperature=0.1, timeout=30)
return response.strip().strip('"').strip("'")
def main():
# Load audit results
audit_path = '/tmp/audit_clean.csv'
failed_ids = set()
try:
with open(audit_path) as f:
reader = csv.DictReader(f)
for row in reader:
failed_ids.add(int(row['id']))
print(f"Loaded {len(failed_ids)} post IDs with issues from audit")
except FileNotFoundError:
print(f"ERROR: {audit_path} not found. Run audit_translations.py first.")
sys.exit(1)
db = pymysql.connect(**DB)
c = db.cursor()
# Fetch failed posts - get lang and translation group description
id_list = ','.join(str(i) for i in sorted(failed_ids))
c.execute(f"""
SELECT DISTINCT p.ID, p.post_title, p.post_content,
t_lang.slug as lang,
ttg.description as group_desc
FROM wp_posts p
JOIN wp_term_relationships trl ON p.ID=trl.object_id
JOIN wp_term_taxonomy ttl ON trl.term_taxonomy_id=ttl.term_taxonomy_id AND ttl.taxonomy='language'
JOIN wp_terms t_lang ON ttl.term_id=t_lang.term_id
JOIN wp_term_relationships trg ON p.ID=trg.object_id
JOIN wp_term_taxonomy ttg ON trg.term_taxonomy_id=ttg.term_taxonomy_id AND ttg.taxonomy='post_translations'
WHERE p.ID IN ({id_list}) AND p.post_type='post' AND p.post_status='publish'
""")
raw_posts = c.fetchall()
# Extract Spanish ID from group description and fetch Spanish content
import re as _re
posts = []
es_cache = {}
for p in raw_posts:
desc = p['group_desc'] or ''
m = _re.search(r's:2:"es";i:(\d+);', desc)
if not m:
continue
es_id = int(m.group(1))
if es_id not in es_cache:
c.execute("SELECT ID, post_title, post_content FROM wp_posts WHERE ID=%s", (es_id,))
row = c.fetchone()
es_cache[es_id] = row
es = es_cache[es_id]
if es:
posts.append({**p, 'es_id': es_id, 'es_title': es['post_title'], 'es_content': es['post_content']})
db.close()
print(f"Fetched {len(posts)} posts to retranslate\n")
# Group by Spanish original to avoid redundant API calls
by_es = {}
for p in posts:
by_es.setdefault(p['es_id'], []).append(p)
done = errors = skipped = 0
total = len(posts)
n = 0
for es_id, translations in sorted(by_es.items()):
es_title = translations[0]['es_title']
es_content = translations[0]['es_content'] or ''
content_len = len(strip_html(es_content))
if content_len < 50:
print(f" ES:{es_id} — SKIPPING (content too short: {content_len} chars)")
skipped += len(translations)
n += len(translations)
continue
print(f"\nES:{es_id}{(es_title or '')[:50]} ({content_len} chars)")
for p in translations:
post_id = p['ID']
lang = p['lang']
lang_name = LANG_NAMES.get(lang, lang)
n += 1
try:
t0 = time.time()
t_title, t_content = translate_content(es_title or '', es_content, lang, lang_name)
elapsed = time.time() - t0
# Validate: content should now be in target language
content_lang = detect_lang(t_content, min_len=80)
ok = (content_lang == lang) or content_lang is None
# If still wrong language, retry with simpler prompt
if not ok and content_lang:
print(f" [{lang}] ⚠ Content still {content_lang}, retrying...")
retry_response = call_jan([
{"role": "system", "content": f"You are a professional translator. Translate the following Spanish text to {lang_name}. Preserve all HTML tags. Return ONLY the translated text, no preamble, no explanation."},
{"role": "user", "content": es_content}
])
t_content = retry_response
content_lang2 = detect_lang(t_content, min_len=80)
if content_lang2 == lang or content_lang2 is None:
print(f" [{lang}] ✓ Retry succeeded ({content_lang2})")
ok = True
else:
print(f" [{lang}] ✗ Retry still {content_lang2}, saving anyway")
# Add AI footer if not present
if AI_FOOTER.strip() not in t_content:
t_content = t_content + AI_FOOTER
# Update DB
db2 = pymysql.connect(**DB)
c2 = db2.cursor()
c2.execute("UPDATE wp_posts SET post_title=%s, post_content=%s WHERE ID=%s",
(t_title, t_content, post_id))
db2.commit()
db2.close()
status = "" if ok else ""
print(f" [{lang}] {status} {post_id}: {t_title[:50]} ({elapsed:.0f}s)")
done += 1
except Exception as e:
print(f" [{lang}] ✗ ERROR on {post_id}: {e}")
errors += 1
print(f"\n{'='*50}")
print(f"Done: {done} retranslated, {errors} errors, {skipped} skipped")
print(f"Total processed: {n}/{total}")
if __name__ == "__main__":
main()
+275
View File
@@ -0,0 +1,275 @@
#!/usr/bin/env python3
"""
retranslate_lang.py
Retranslates ALL posts for a given language (ID > 42760) from their Spanish originals.
Uses chunk-based translation (~800 chars per chunk) to avoid model drift.
Sequential, single process.
Usage: python3 retranslate_lang.py fr
python3 retranslate_lang.py it
python3 retranslate_lang.py pt
"""
import pymysql, json, re, html, urllib.request, time, sys
from langdetect import detect, LangDetectException, DetectorFactory
DetectorFactory.seed = 0
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
LANG_CONFIG = {
"en": {"name": "English", "footer": "<p><em>English version translated with AI</em></p>"},
"fr": {"name": "French", "footer": "<p><em>Version française traduite par IA</em></p>"},
"it": {"name": "Italian", "footer": "<p><em>Versione italiana tradotta con IA</em></p>"},
"pt": {"name": "Portuguese", "footer": "<p><em>Versão portuguesa traduzida com IA</em></p>"},
}
CHUNK_SIZE = 800
MAX_RETRIES = 2
def strip_html(text):
if not text: return ''
text = re.sub(r'<[^>]+>', ' ', text)
text = html.unescape(text)
return re.sub(r'\s+', ' ', text).strip()
def detect_lang(text, min_len=40):
t = strip_html(text)[:400].strip()
if len(t) < min_len: return None
try: return detect(t)
except: return None
def call_jan(messages, max_tokens=1200, temperature=0.2, timeout=150):
payload = json.dumps({
"model": JAN_MODEL, "messages": messages,
"temperature": temperature, "max_tokens": max_tokens,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL, data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
with urllib.request.urlopen(req, timeout=timeout) as r:
return json.loads(r.read())["choices"][0]["message"]["content"].strip()
def fix_html_structure(content):
"""Fix common model errors: markdown bold → HTML, orphaned text → <p> wrapped,
unclosed <p> before a new <p>."""
# **text** → <p><strong>text</strong></p>
content = re.sub(r'\*\*(.+?)\*\*',
lambda m: '<p><strong>' + m.group(1).strip() + '</strong></p>',
content)
# Lines of bare text not inside any block tag → wrap in <p>
lines = content.split('\n')
fixed = []
for line in lines:
s = line.strip()
if s and not s.startswith('<') and not s.startswith('<!--'):
fixed.append('<p>' + s + '</p>')
else:
fixed.append(line)
content = '\n'.join(fixed)
# Clean up doubled closing tags
content = re.sub(r'</p>\s*</p>', '</p>', content)
# Fix unclosed <p>: text not ending in block tag followed by \n\n<p>
content = re.sub(r'([^>])\n\n(<p[> ])', r'\1</p>\n\n\2', content)
# Fix nested <em> inside a quote: <em>"..."(n. <em>18).</em> → <em>"..."(n. 18).</em>
content = re.sub(r'\(n\.\s*<em>(\d+\)\.)</em>', r'(n. \1</em>', content)
# Generic: remove extra </em> after </p> if em tags unbalanced
opens = len(re.findall(r'<em[ >]', content))
closes = len(re.findall(r'</em>', content))
if opens < closes:
# Remove extra closing tags
for _ in range(closes - opens):
content = content.replace('</em></p>', '</p>', 1)
elif opens > closes:
# Add missing closing tag before </p> of last unbalanced paragraph
content = re.sub(r'(<em>[^<]*(?:<(?!/em>)[^<]*)*)\n\n<p', r'\1</em>\n\n<p', content)
return content
def translate_chunk(chunk, lang_name, attempt=0):
prompts = [
f"You are a professional translator. Translate the following Spanish text to {lang_name}. Preserve all HTML tags exactly. Return ONLY the translated text, no preamble, no explanation.",
f"Translate from Spanish to {lang_name}. Your entire response must be in {lang_name}. Preserve HTML tags. Return ONLY the translation, nothing else.",
]
result = call_jan([
{"role": "system", "content": prompts[min(attempt, 1)]},
{"role": "user", "content": chunk}
])
# Short chunks: retry if output == input (model didn't translate)
plain_in = strip_html(chunk).strip().lower()
plain_out = strip_html(result).strip().lower()
if len(plain_in) < 40 and plain_in == plain_out and attempt == 0:
return translate_chunk(chunk, lang_name, attempt=1)
return result
def translate_title(es_title, lang_name):
try:
result = call_jan([
{"role": "system", "content": "You are a translator. Respond ONLY with the translated text, nothing else."},
{"role": "user", "content": f"Translate from Spanish to {lang_name}, ALL CAPS:\n\n{es_title}"}
], max_tokens=150, temperature=0.1, timeout=30)
result = result.strip().strip('"').strip("'")
if result.upper() == es_title.upper():
return es_title
return result
except:
return es_title
def split_chunks(content):
parts = re.split(r'(</p>|</li>|</h[1-6]>|</blockquote>)', content)
chunks, current = [], ""
for i in range(0, len(parts), 2):
segment = parts[i] + (parts[i+1] if i+1 < len(parts) else "")
if len(current) + len(segment) <= CHUNK_SIZE:
current += segment
else:
if current: chunks.append(current)
if len(segment) > CHUNK_SIZE:
sentences = re.split(r'(?<=[.!?])\s+', segment)
current = ""
for s in sentences:
if len(current) + len(s) <= CHUNK_SIZE:
current += s + " "
else:
if current: chunks.append(current.strip())
current = s + " "
else:
current = segment
if current: chunks.append(current)
return [c for c in chunks if strip_html(c).strip()]
def main():
if len(sys.argv) < 2 or sys.argv[1] not in LANG_CONFIG:
print(f"Usage: python3 {sys.argv[0]} [fr|it|pt|en]")
sys.exit(1)
lang = sys.argv[1]
lang_name = LANG_CONFIG[lang]["name"]
footer = LANG_CONFIG[lang]["footer"]
db = pymysql.connect(**DB)
c = db.cursor()
c.execute("""
SELECT DISTINCT p.ID, p.post_title,
ttg.description as group_desc
FROM wp_posts p
JOIN wp_term_relationships trl ON p.ID=trl.object_id
JOIN wp_term_taxonomy ttl ON trl.term_taxonomy_id=ttl.term_taxonomy_id AND ttl.taxonomy='language'
JOIN wp_terms t_lang ON ttl.term_id=t_lang.term_id AND t_lang.slug=%s
JOIN wp_term_relationships trg ON p.ID=trg.object_id
JOIN wp_term_taxonomy ttg ON trg.term_taxonomy_id=ttg.term_taxonomy_id AND ttg.taxonomy='post_translations'
WHERE p.ID > 42760 AND p.post_type='post' AND p.post_status='publish'
ORDER BY p.ID
""", (lang,))
posts = c.fetchall()
print(f"Found {len(posts)} {lang_name} posts to retranslate\n", flush=True)
done = errors = skipped = 0
for n, p in enumerate(posts, 1):
post_id = p['ID']
desc = p['group_desc'] or ''
m = re.search(r's:2:"es";i:(\d+);', desc)
if not m:
print(f"[{n}/{len(posts)}] {post_id} — SKIP (no ES original)", flush=True)
skipped += 1
continue
es_id = int(m.group(1))
c.execute("SELECT post_title, post_content FROM wp_posts WHERE ID=%s", (es_id,))
es = c.fetchone()
if not es or not es['post_content']:
print(f"[{n}/{len(posts)}] {post_id} — SKIP (ES:{es_id} empty)", flush=True)
skipped += 1
continue
es_title = es['post_title'] or ''
es_content = es['post_content']
plain_len = len(strip_html(es_content))
chunks = split_chunks(es_content)
print(f"\n[{n}/{len(posts)}] WP:{post_id} ← ES:{es_id}{es_title[:50]}", flush=True)
print(f" {plain_len} chars, {len(chunks)} chunks", flush=True)
if plain_len < 50:
print(f" SKIP (too short)", flush=True)
skipped += 1
continue
try:
t0 = time.time()
t_title = translate_title(es_title, lang_name)
translated = []
chunk_bad = 0
for i, chunk in enumerate(chunks):
try:
result = translate_chunk(chunk, lang_name, attempt=0)
detected = detect_lang(result, min_len=40)
if detected and detected != lang and len(strip_html(result)) >= 40:
result2 = translate_chunk(chunk, lang_name, attempt=1)
detected2 = detect_lang(result2, min_len=40)
if detected2 == lang or detected2 is None:
result = result2
else:
chunk_bad += 1
translated.append(result)
except Exception as e:
print(f" chunk {i+1} ERROR: {e}", flush=True)
translated.append(chunk)
chunk_bad += 1
t_content = fix_html_structure("\n".join(translated))
# Remove any old footer variants before adding the correct one
for old in ["<p><em>Traducido con IA</em></p>",
"<p><em>English version translated with AI</em></p>",
"<p><em>Version française traduite par IA</em></p>",
"<p><em>Versione italiana tradotta con IA</em></p>",
"<p><em>Versão portuguesa traduzida com IA</em></p>"]:
t_content = t_content.replace(old, "")
t_content = t_content.rstrip() + "\n" + footer
elapsed = time.time() - t0
lang_ok = detect_lang(t_content, min_len=80) in (lang, None)
status = "" if lang_ok else ""
bad_note = f" ({chunk_bad} chunks bad)" if chunk_bad else ""
db2 = pymysql.connect(**DB)
c2 = db2.cursor()
c2.execute("UPDATE wp_posts SET post_title=%s, post_content=%s WHERE ID=%s",
(t_title, t_content, post_id))
db2.commit()
db2.close()
print(f" {status} {t_title[:60]} ({elapsed:.0f}s){bad_note}", flush=True)
done += 1
except Exception as e:
print(f" ✗ ERROR: {e}", flush=True)
errors += 1
db.close()
print(f"\n{'='*50}")
print(f"Done: {done} ✓ errors: {errors} ✗ skipped: {skipped}")
print(f"Total: {len(posts)}")
if __name__ == "__main__":
main()
+83
View File
@@ -0,0 +1,83 @@
<?php
/**
* Ciclo carta nueva — ROTACIÓN de la "carta de la semana" en TODOS los idiomas.
*
* Rotación (en este orden, para no perder ninguna):
* 1) la que estaba en "semana pasada" -> queda solo en "otras semanas"
* 2) la que estaba en "semana actual" -> pasa a "semana pasada"
* 3) la carta NUEVA -> pasa a "semana actual"
*
* Robusto/autocorrige: la NUEVA es el parámetro CARTA; la "semana pasada" se
* deriva como la carta publicada más reciente que NO es la nueva (por fecha),
* no por quién estuviera en el término (que puede estar roto). Garantiza
* count=1 en "actual" y count=1 en "pasada" por idioma. "Otras semanas" (21)
* es el cajón base que conservan TODAS las cartas.
*
* Términos ES base (se derivan por Polylang a cada idioma):
* actual = 6 (cartasemana) | pasada = 22 (carta-semana-pasada) | otras = 21
*
* Uso: CARTA=<es_id> php rotate_cartas.php (dry-run)
* APPLY=1 CARTA=<es_id> php rotate_cartas.php
*/
require getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
$APPLY = getenv('APPLY') === '1';
$CARTA = (int)(getenv('CARTA') ?: 0);
if (!$CARTA) { fwrite(STDERR, "Falta CARTA=<es_id>\n"); exit(1); }
$actual_terms = pll_get_term_translations(6);
$pasada_terms = pll_get_term_translations(22);
$otras_terms = pll_get_term_translations(21);
$carta_tr = pll_get_post_translations($CARTA);
function cartas_en($terms) { // posts publish en cualquiera de esos términos (mismo idioma), por fecha desc
return get_posts(['post_type'=>'post','post_status'=>'publish','numberposts'=>-1,'fields'=>'ids',
'orderby'=>'date','order'=>'DESC','suppress_filters'=>true,
'tax_query'=>[['taxonomy'=>'category','field'=>'term_id','terms'=>array_values(array_filter($terms))]]]);
}
foreach ($actual_terms as $lang => $t_actual) {
$t_pasada = (int)($pasada_terms[$lang] ?? 0);
$t_otras = (int)($otras_terms[$lang] ?? 0);
$t_actual = (int)$t_actual;
$new = (int)($carta_tr[$lang] ?? 0);
// Conjunto de cartas de ESTE idioma (los términos ya son por idioma) por fecha desc.
$all = cartas_en([$t_actual, $t_pasada, $t_otras]);
// "semana pasada" = la más reciente que no es la nueva.
$prev = 0; foreach ($all as $pid) { if ($pid != $new) { $prev = $pid; break; } }
// Posts actualmente marcados como actual/pasada (conjunto pequeño a limpiar).
$flagged = get_posts(['post_type'=>'post','post_status'=>'any','numberposts'=>-1,'fields'=>'ids',
'suppress_filters'=>true,
'tax_query'=>[['taxonomy'=>'category','field'=>'term_id','terms'=>array_values(array_filter([$t_actual,$t_pasada]))]]]);
if ($APPLY) {
// 1) limpiar: quitar actual+pasada de cualquiera salvo los dos destinos.
foreach ($flagged as $pid) {
if ($pid == $new || $pid == $prev) continue;
wp_remove_object_terms($pid, array_values(array_filter([$t_actual,$t_pasada])), 'category');
}
// 2) NUEVA -> semana actual (y fuera de pasada). Mantener otras.
if ($new) {
wp_set_object_terms($new, [$t_actual], 'category', true);
if ($t_pasada) wp_remove_object_terms($new, [$t_pasada], 'category');
if ($t_otras) wp_set_object_terms($new, [$t_otras], 'category', true);
}
// 3) ANTERIOR -> semana pasada (y fuera de actual). Mantener otras.
if ($prev && $t_pasada) {
wp_set_object_terms($prev, [$t_pasada], 'category', true);
wp_remove_object_terms($prev, [$t_actual], 'category');
if ($t_otras) wp_set_object_terms($prev, [$t_otras], 'category', true);
}
clean_term_cache(array_filter([$t_actual,$t_pasada,$t_otras]), 'category');
}
$cleaned = count(array_diff($flagged, [$new, $prev]));
$tn = $new ? get_post($new) : null;
$tp = $prev ? get_post($prev) : null;
printf("%s: actual=#%d «%s» | pasada=#%d «%s» | degradadas a 'otras' %d post(s)%s\n",
strtoupper($lang), $new, $tn?mb_substr($tn->post_title,0,26):'-',
$prev, $tp?mb_substr($tp->post_title,0,26):'-',
$cleaned, $APPLY?'':' [DRY-RUN]');
}
echo $APPLY ? "APLICADO\n" : "DRY-RUN (APPLY=1 para aplicar)\n";
+86
View File
@@ -0,0 +1,86 @@
<?php
/**
* set_search_template.php (#8) — instala un template FSE 'search' con resultados
* COMPACTOS (rejilla de tarjetas título+fecha+extracto), igual que el 'archive'
* (#63), en vez del patrón genérico del tema que muestra los posts «todos seguidos».
*
* Reutiliza las clases fea-archive-grid / fea-archive-card (mismo CSS ya presente).
* Idempotente: crea el wp_template 'search' si no existe, o actualiza su contenido.
*
* Uso (dentro del contenedor / wp-cli):
* wp eval-file scripts/set_search_template.php # DRY-RUN
* APPLY=1 wp eval-file scripts/set_search_template.php # aplica
*/
$apply = getenv('APPLY') === '1';
$theme = get_stylesheet(); // twentytwentyfive
$content = <<<HTML
<!-- wp:template-part {"slug":"header","theme":"{$theme}"} /-->
<!-- wp:group {"tagName":"main","style":{"spacing":{"margin":{"top":"var:preset|spacing|60"}}},"layout":{"type":"constrained"}} -->
<main class="wp-block-group" style="margin-top:var(--wp--preset--spacing--60)"><!-- wp:query-title {"type":"search","align":"wide","fontSize":"x-large"} /-->
<!-- wp:spacer {"height":"var:preset|spacing|40"} -->
<div style="height:var(--wp--preset--spacing--40)" aria-hidden="true" class="wp-block-spacer"></div>
<!-- /wp:spacer -->
<!-- wp:query {"queryId":74,"query":{"perPage":12,"pages":0,"offset":0,"postType":"post","order":"desc","orderBy":"date","author":"","search":"","exclude":[],"sticky":"","inherit":true,"taxQuery":null,"parents":[]},"align":"wide","layout":{"type":"default"}} -->
<div class="wp-block-query alignwide"><!-- wp:group {"layout":{"type":"constrained"}} -->
<div class="wp-block-group"><!-- wp:query-no-results {"align":"wide","fontSize":"medium"} -->
<!-- wp:paragraph -->
<p>Lo siento, no se ha encontrado nada. Por favor, prueba a buscar con otras palabras clave.</p>
<!-- /wp:paragraph -->
<!-- /wp:query-no-results --></div>
<!-- /wp:group -->
<!-- wp:post-template {"align":"wide","className":"fea-archive-grid","style":{"spacing":{"blockGap":"1.5rem"}},"layout":{"type":"grid","columnCount":3}} -->
<!-- wp:group {"className":"fea-archive-card","layout":{"type":"constrained"}} -->
<div class="wp-block-group fea-archive-card"><!-- wp:post-title {"isLink":true,"className":"fea-archive-title","fontSize":"medium"} /-->
<!-- wp:post-date {"isLink":true,"className":"fea-archive-date","fontSize":"small"} /-->
<!-- wp:post-excerpt {"showMoreOnNewLine":false,"excerptLength":22,"className":"fea-archive-excerpt"} /--></div>
<!-- /wp:group -->
<!-- /wp:post-template -->
<!-- wp:spacer {"height":"var:preset|spacing|30"} -->
<div style="height:var(--wp--preset--spacing--30)" aria-hidden="true" class="wp-block-spacer"></div>
<!-- /wp:spacer -->
<!-- wp:group {"align":"full","style":{"spacing":{"margin":{"top":"var:preset|spacing|40","bottom":"var:preset|spacing|40"}}},"layout":{"type":"constrained"}} -->
<div class="wp-block-group alignfull" style="margin-top:var(--wp--preset--spacing--40);margin-bottom:var(--wp--preset--spacing--40)"><!-- wp:query-pagination {"align":"full","style":{"typography":{"fontStyle":"normal","fontWeight":"400"}},"layout":{"type":"flex","justifyContent":"space-between","flexWrap":"wrap"}} -->
<!-- wp:query-pagination-previous /-->
<!-- wp:query-pagination-numbers /-->
<!-- wp:query-pagination-next /-->
<!-- /wp:query-pagination --></div>
<!-- /wp:group --></div>
<!-- /wp:query --></main>
<!-- /wp:group -->
<!-- wp:template-part {"slug":"footer","theme":"{$theme}"} /-->
HTML;
$existing = get_posts(['post_type' => 'wp_template', 'name' => 'search', 'post_status' => 'any', 'posts_per_page' => 1]);
if ($existing) {
$id = $existing[0]->ID;
echo "Template 'search' existe (post $id) → " . ($apply ? "actualizando" : "[dry] actualizaría") . "\n";
if ($apply) wp_update_post(['ID' => $id, 'post_content' => $content]);
} else {
echo ($apply ? "Creando" : "[dry] crearía") . " wp_template 'search' (theme $theme)\n";
if ($apply) {
$id = wp_insert_post([
'post_type' => 'wp_template',
'post_status' => 'publish',
'post_name' => 'search',
'post_title' => 'Search Results',
'post_content' => $content,
], true);
if (is_wp_error($id)) { echo "ERROR: " . $id->get_error_message() . "\n"; return; }
wp_set_object_terms($id, $theme, 'wp_theme');
echo " creado post $id\n";
}
}
if (function_exists('wp_cache_flush')) wp_cache_flush();
echo ($apply ? "APLICADO" : "DRY-RUN") . "\n";
+88
View File
@@ -0,0 +1,88 @@
#!/bin/bash
# Script de configuración automática de WordPress
# Fe Adulta - Migración desde Joomla
set -e
echo "🚀 Instalando WordPress..."
# Instalar WordPress
docker exec wordpress-web wp core install \
--url="http://localhost:8081" \
--title="Fe Adulta - Para poner al día la Fe" \
--admin_user="admin" \
--admin_password="FeAdulta2024!" \
--admin_email="inma@tyve.es" \
--skip-email \
--allow-root
echo "✅ WordPress instalado"
# Configurar idioma español
echo "🌍 Configurando idioma español..."
docker exec wordpress-web wp language core install es_ES --activate --allow-root
# Configurar timezone
docker exec wordpress-web wp option update timezone_string "Europe/Madrid" --allow-root
# Configurar permalink estructura (importante para SEO)
docker exec wordpress-web wp rewrite structure '/%postname%/' --allow-root
echo "📦 Instalando plugins esenciales..."
# Plugins de migración
docker exec wordpress-web wp plugin install fg-joomla-to-wordpress --activate --allow-root
# Plugins de SEO
docker exec wordpress-web wp plugin install wordpress-seo --activate --allow-root
# Plugins de cache y optimización
docker exec wordpress-web wp plugin install wp-super-cache --allow-root
# Plugins de seguridad
docker exec wordpress-web wp plugin install wordfence --allow-root
# Plugins de AdSense
docker exec wordpress-web wp plugin install advanced-ads --allow-root
# Text-to-Speech - varias opciones, instalamos para evaluar
docker exec wordpress-web wp plugin install speech-kit --allow-root
docker exec wordpress-web wp plugin install gspeech --allow-root
# Editor mejorado
docker exec wordpress-web wp plugin install classic-editor --allow-root
# Importador de WordPress
docker exec wordpress-web wp plugin install wordpress-importer --activate --allow-root
echo "🎨 Instalando temas..."
# Tema ligero y optimizado para contenido
docker exec wordpress-web wp theme install astra --activate --allow-root
# Temas alternativos para evaluar
docker exec wordpress-web wp theme install generatepress --allow-root
docker exec wordpress-web wp theme install kadence --allow-root
echo "⚙️ Configuraciones finales..."
# Deshabilitar comentarios por defecto (se pueden habilitar después)
docker exec wordpress-web wp option update default_comment_status "closed" --allow-root
# Configurar posts por página
docker exec wordpress-web wp option update posts_per_page 20 --allow-root
# Eliminar contenido de ejemplo
docker exec wordpress-web wp post delete 1 --force --allow-root || true
docker exec wordpress-web wp post delete 2 --force --allow-root || true
docker exec wordpress-web wp comment delete 1 --force --allow-root || true
echo "✨ WordPress configurado correctamente!"
echo ""
echo "🔑 Credenciales de acceso:"
echo " URL: http://localhost:8081/wp-admin"
echo " Usuario: admin"
echo " Contraseña: FeAdulta2024!"
echo ""
echo "📊 Próximo paso: Accede al panel y revisa la configuración"
+325
View File
@@ -0,0 +1,325 @@
#!/usr/bin/env python3
"""
sync_translations_to_prod.py — Sincroniza contenido local a PROD reutilizando el
texto ya verificado en local.
Tiene dos modos:
1. Legado: sincroniza traducciones automáticas (`traduccion_origen`) suponiendo que
el post ES origen ya existe en prod con el mismo ID.
2. IDs preservados: clona posts locales a prod con ID explícito, copiando contenido,
slug, metas y categorías, y después reconstruye los grupos Polylang exactos.
El modo 2 es el que usa el handoff de la carta 46956 para evitar romper la
coincidencia local↔prod cuando prod va por detrás.
"""
from __future__ import annotations
import argparse
import json
import os
import subprocess
import time
from pathlib import Path
# ── Config ───────────────────────────────────────────────────────────────────
WP_CONTAINER = os.environ.get("FEA_WP_CONTAINER", "wordpress-web")
DB_CONTAINER = os.environ.get("FEA_DB_CONTAINER", "wordpress-mysql")
DB_NAME = os.environ.get("FEA_DB_NAME", "wordpress_db")
DB_USER = os.environ.get("FEA_DB_USER", "wordpress_user")
DB_PASS = os.environ.get("FEA_DB_PASS", "wordpress_pass")
PROD_HOST = os.environ.get("FEA_PROD_HOST", "feadulta@134.0.10.170")
PROD_PASS = os.environ.get("FEA_PROD_PASS", "C6c2A!mAl3Wj.BQF")
PROD_WPLOAD = os.environ.get("FEA_PROD_WPLOAD", "/web/wp-nuevo/wp-load.php")
PROD_HELPER = "/tmp/fea_translate_helper.php"
HELPER_SRC = Path(__file__).resolve().parent / "fea_translate_helper.php"
LOCAL_HELPER_DST = "/tmp/fea_translate_helper.php"
STATE_FILE = Path(os.environ.get("FEA_SYNC_STATE", "/tmp/feadulta-sync-state.json"))
LOG_FILE = Path(os.environ.get("FEA_SYNC_LOG", "/tmp/feadulta-sync.log"))
STATUS = os.environ.get("FEA_SYNC_STATUS", "draft")
# URLs absolutas del entorno local que NO deben llegar a prod (issue #91): el
# post_content local arrastra el host de Tailscale con prefijo /fea; en prod la
# instalación cuelga de la raíz. Se reescriben al desplegar para no dejar enlaces
# rotos (Tailscale es inaccesible para los visitantes).
LOCAL_BASE = os.environ.get("FEA_LOCAL_BASE", "https://farmer.taild3aaf6.ts.net/fea")
PROD_BASE = os.environ.get("FEA_PROD_BASE", "https://wp-nuevo.feadulta.com")
def log(msg: str) -> None:
line = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}"
print(line, flush=True)
try:
LOG_FILE.open("a", encoding="utf-8").write(line + "\n")
except OSError:
pass
def sh(cmd: list[str], *, stdin: str | None = None, timeout: int = 120) -> str:
r = subprocess.run(cmd, input=stdin, capture_output=True, text=True, timeout=timeout)
if r.returncode != 0:
raise RuntimeError(f"cmd falló ({r.returncode}): {' '.join(cmd[:3])}\n{r.stderr.strip()[:400]}")
return r.stdout
def parse_csv_ints(raw: str) -> list[int]:
out: list[int] = []
for part in raw.split(","):
part = part.strip()
if part.isdigit():
out.append(int(part))
return out
def localize_urls(text: str | None) -> tuple[str, int]:
"""Reescribe URLs absolutas local→prod en el contenido antes de subirlo.
Equivale al search-replace `farmer.taild3aaf6.ts.net/fea` → `wp-nuevo.feadulta.com`
pero aplicado en origen, así el contenido llega ya correcto a prod (issue #91).
Devuelve (texto, nº de reemplazos).
"""
if not text or not LOCAL_BASE:
return text or "", 0
n = text.count(LOCAL_BASE)
return (text.replace(LOCAL_BASE, PROD_BASE), n) if n else (text, 0)
# ── Local ────────────────────────────────────────────────────────────────────
_local_ready = False
def local_helper(subcmd: str, *args: str) -> str:
global _local_ready
if not _local_ready:
sh(["docker", "cp", str(HELPER_SRC), f"{WP_CONTAINER}:{LOCAL_HELPER_DST}"])
_local_ready = True
return sh(["docker", "exec", "-i", WP_CONTAINER, "php", LOCAL_HELPER_DST, subcmd, *args], timeout=180)
def local_read(post_id: int) -> dict:
return json.loads(local_helper("read", str(post_id)))
def local_read_full(post_id: int) -> dict:
return json.loads(local_helper("read_full", str(post_id)))
def local_translation_pairs() -> list[tuple[int, int]]:
q = ("SELECT post_id, meta_value FROM wp_postmeta "
"WHERE meta_key='traduccion_origen' ORDER BY CAST(meta_value AS UNSIGNED), post_id;")
out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}",
DB_NAME, "-N", "-e", q])
pairs = []
for line in out.splitlines():
parts = line.split("\t")
if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
pairs.append((int(parts[0]), int(parts[1])))
return pairs
def carta_article_ids(carta_id: int) -> list[int]:
q = ("SELECT post_id FROM wp_postmeta "
f"WHERE meta_key='_carta_id' AND meta_value='{carta_id}' ORDER BY post_id;")
out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}",
DB_NAME, "-N", "-e", q])
return [int(x) for x in out.split() if x.isdigit()]
def collect_related_posts(seed_ids: list[int]) -> tuple[dict[int, dict], list[dict[str, int]]]:
posts: dict[int, dict] = {}
groups: dict[tuple[tuple[str, int], ...], dict[str, int]] = {}
for seed in seed_ids:
info = local_read_full(seed)
posts[seed] = info
raw_group = info.get("translations") or {}
group = {
lang: int(pid)
for lang, pid in raw_group.items()
if str(pid).isdigit()
}
if not group:
lang = info.get("lang") or "es"
group = {lang: seed}
sig = tuple(sorted(group.items()))
groups[sig] = group
all_ids = sorted({pid for group in groups.values() for pid in group.values()})
for pid in all_ids:
if pid not in posts:
posts[pid] = local_read_full(pid)
return posts, list(groups.values())
# ── Prod ─────────────────────────────────────────────────────────────────────
_prod_ready = False
def _ssh(remote_cmd: str, *, stdin: str | None = None, timeout: int = 120) -> str:
cmd = ["sshpass", "-p", PROD_PASS, "ssh", "-o", "StrictHostKeyChecking=accept-new",
"-o", "ConnectTimeout=20", PROD_HOST, remote_cmd]
return sh(cmd, stdin=stdin, timeout=timeout)
def prod_helper(subcmd: str, *args: str, stdin: str | None = None) -> str:
global _prod_ready
if not _prod_ready:
_ssh(f"cat > {PROD_HELPER}", stdin=HELPER_SRC.read_text(encoding="utf-8"))
_prod_ready = True
inner = f"FEA_WP_LOAD={PROD_WPLOAD} php {PROD_HELPER} {subcmd} " + " ".join(args)
return _ssh(inner, stdin=stdin, timeout=180)
def prod_create(origin: int, lang: str, title: str, content: str) -> int:
content, n = localize_urls(content)
if n:
log(f" localize origin={origin} [{lang}]: {n} URL(s) Tailscale→prod")
payload = json.dumps({"title": title, "content": content, "model": "google/gemma-4-e4b (sync)"})
out = prod_helper("create", str(origin), lang, STATUS, stdin=payload).strip()
return int(out)
def prod_clone(post: dict) -> int:
content, n1 = localize_urls(post.get("content", ""))
excerpt, n2 = localize_urls(post.get("excerpt", ""))
if n1 or n2:
log(f" localize #{post['id']} [{post.get('lang','?')}]: {n1 + n2} URL(s) Tailscale→prod")
payload = {
"title": post["title"],
"content": content,
"excerpt": excerpt,
"slug": post.get("slug", ""),
"type": post.get("type", "post"),
"author": post.get("author", 1),
"date": post.get("date"),
"date_gmt": post.get("date_gmt"),
"status": post.get("status"),
"cats": post.get("cats", []),
"cat_slugs": post.get("cat_slugs", []),
"meta": post.get("meta", {}),
}
out = prod_helper("clone", str(post["id"]), post["lang"], STATUS, stdin=json.dumps(payload)).strip()
return int(out)
def prod_save_group(group: dict[str, int]) -> dict[str, int]:
out = prod_helper("save_translations", stdin=json.dumps({"translations": group})).strip()
return json.loads(out)
# ── Estado ───────────────────────────────────────────────────────────────────
def load_state() -> dict:
if STATE_FILE.exists():
try:
return json.loads(STATE_FILE.read_text())
except json.JSONDecodeError:
pass
return {"done": {}, "errors": {}}
def save_state(state: dict) -> None:
STATE_FILE.write_text(json.dumps(state, ensure_ascii=False, indent=2))
# ── Modo IDs preservados ─────────────────────────────────────────────────────
def deploy_fixed_ids(seed_ids: list[int], *, keep_existing: set[int], dry_run: bool) -> int:
posts, groups = collect_related_posts(seed_ids)
clone_ids = [pid for pid in posts if pid not in keep_existing]
clone_ids.sort(key=lambda pid: (0 if posts[pid].get("lang") == "es" else 1, pid))
log(f"Plan IDs preservados: seeds={seed_ids} clone={len(clone_ids)} grupos={len(groups)} status={STATUS}")
if keep_existing:
log(f"IDs marcados como ya existentes en prod: {sorted(keep_existing)}")
if dry_run:
for pid in clone_ids:
p = posts[pid]
log(f" CLONE #{pid} [{p.get('lang','?')}] slug={p.get('slug','')} cats={len(p.get('cat_slugs', []))}")
for group in groups:
log(f" GROUP {group}")
return 0
for pid in clone_ids:
p = posts[pid]
new_id = prod_clone(p)
log(f" clone #{pid} [{p.get('lang','?')}] → prod #{new_id} «{p['title'][:45]}»")
for group in groups:
saved = prod_save_group(group)
log(f" group enlazado {saved}")
log("FIN sync IDs preservados.")
return 0
# ── Main legado ──────────────────────────────────────────────────────────────
def legacy_sync(limit: int, origin: int) -> int:
state = load_state()
pairs = local_translation_pairs()
if origin:
pairs = [p for p in pairs if p[1] == origin]
log(f"Traducciones locales a sincronizar: {len(pairs)} (status={STATUS})")
n_ok = n_skip = n_err = 0
for tid, src_origin in pairs:
if limit and (n_ok + n_err) >= limit:
break
try:
t = local_read(tid)
except Exception as exc: # noqa: BLE001
log(f" local read #{tid} ERROR: {exc}")
n_err += 1
continue
lang = t.get("lang", "")
if lang in ("", "es"):
continue
key = f"{src_origin}:{lang}"
if key in state["done"]:
n_skip += 1
continue
try:
new_id = prod_create(src_origin, lang, t["title"], t["content"])
state["done"][key] = new_id
save_state(state)
n_ok += 1
log(f" {key} → prod #{new_id} «{t['title'][:45]}»")
except Exception as exc: # noqa: BLE001
state["errors"][key] = str(exc)[:300]
save_state(state)
n_err += 1
log(f" {key} ERROR: {exc}")
save_state(state)
log(f"FIN sync legado. nuevos={n_ok} saltados={n_skip} errores={n_err}. Estado: {STATE_FILE}")
log("Recuerda en prod: ejecutar remap_translation_cats.php si alguna quedó sin categoría traducida.")
return 0
def main() -> int:
ap = argparse.ArgumentParser(description="Sincroniza contenido local→prod reutilizando el texto local.")
ap.add_argument("--limit", type=int, default=0, help="Modo legado: máximo de traducciones a sincronizar.")
ap.add_argument("--origin", type=int, default=0, help="Modo legado: solo traducciones de este ES.")
ap.add_argument("--carta", type=int, default=0, help="Modo IDs preservados: carta ES y todo su cluster.")
ap.add_argument("--ids", default="", help="Modo IDs preservados: lista CSV de posts semilla a clonar/enlazar.")
ap.add_argument("--keep-existing", default="", help="IDs que ya existen en prod y no deben clonarse.")
ap.add_argument("--dry-run", action="store_true", help="Solo muestra el plan; no toca prod.")
args = ap.parse_args()
seed_ids: list[int] = []
if args.carta:
seed_ids = [args.carta, *carta_article_ids(args.carta)]
elif args.ids:
seed_ids = parse_csv_ints(args.ids)
if seed_ids:
keep_existing = set(parse_csv_ints(args.keep_existing))
return deploy_fixed_ids(seed_ids, keep_existing=keep_existing, dry_run=args.dry_run)
return legacy_sync(args.limit, args.origin)
if __name__ == "__main__":
raise SystemExit(main())
+178
View File
@@ -0,0 +1,178 @@
#!/usr/bin/env python3
"""
test_5articles.py
Translates 5 specific articles ES→EN using chunk approach.
Prints per-chunk results so we can verify quality before full batch.
"""
import pymysql, json, re, html, urllib.request, time
from langdetect import detect, LangDetectException, DetectorFactory
DetectorFactory.seed = 0
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
# (wp_id_EN, es_id)
TEST_POSTS = [
(43127, 42557), # ~3k chars
(43132, 42547), # ~4k chars
(43114, 42570), # ~4k chars
(43139, 42536), # ~5k chars
(42987, 42535), # ~15k chars
]
CHUNK_SIZE = 800
AI_FOOTER = "\n<p><em>Traducido con IA</em></p>"
def strip_html(text):
if not text: return ''
text = re.sub(r'<[^>]+>', ' ', text)
text = html.unescape(text)
return re.sub(r'\s+', ' ', text).strip()
def detect_lang(text, min_len=40):
t = strip_html(text)[:400].strip()
if len(t) < min_len: return None
try: return detect(t)
except: return None
def call_jan(messages, max_tokens=1200, temperature=0.2, timeout=120):
payload = json.dumps({
"model": JAN_MODEL, "messages": messages,
"temperature": temperature, "max_tokens": max_tokens,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL, data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
with urllib.request.urlopen(req, timeout=timeout) as r:
return json.loads(r.read())["choices"][0]["message"]["content"].strip()
def translate_chunk(chunk, attempt=0):
prompts = [
"You are a professional translator. Translate the following Spanish text to English. Preserve all HTML tags exactly. Return ONLY the translated text, no preamble.",
"Translate from Spanish to English. Your response must be entirely in English. Preserve HTML tags. Return ONLY the translation.",
]
system = prompts[min(attempt, len(prompts)-1)]
result = call_jan([
{"role": "system", "content": system},
{"role": "user", "content": chunk}
])
# For very short chunks, retry if result == original (model didn't translate)
plain_in = strip_html(chunk).strip().lower()
plain_out = strip_html(result).strip().lower()
if len(plain_in) < 40 and plain_in == plain_out and attempt == 0:
return translate_chunk(chunk, attempt=1)
return result
def split_chunks(content):
parts = re.split(r'(</p>|</li>|</h[1-6]>|</blockquote>)', content)
chunks, current = [], ""
for i in range(0, len(parts), 2):
segment = parts[i] + (parts[i+1] if i+1 < len(parts) else "")
if len(current) + len(segment) <= CHUNK_SIZE:
current += segment
else:
if current: chunks.append(current)
current = segment
if current: chunks.append(current)
return [c for c in chunks if strip_html(c).strip()]
def main():
db = pymysql.connect(**DB)
c = db.cursor()
for wp_en_id, es_id in TEST_POSTS:
c.execute("SELECT post_title, post_content FROM wp_posts WHERE ID=%s", (es_id,))
es = c.fetchone()
if not es:
print(f"\n[SKIP] ES:{es_id} not found"); continue
es_title = es['post_title'] or ''
es_content = es['post_content'] or ''
chunks = split_chunks(es_content)
plain_len = len(strip_html(es_content))
print(f"\n{'='*60}")
print(f"WP:{wp_en_id} ← ES:{es_id}")
print(f"Title: {es_title[:60]}")
print(f"Content: {plain_len} chars, {len(chunks)} chunks")
print(f"{'='*60}")
# Translate title
try:
t0 = time.time()
t_title = call_jan([
{"role": "system", "content": "You are a translator. Respond ONLY with the translated text."},
{"role": "user", "content": f"Translate from Spanish to English, ALL CAPS:\n\n{es_title}"}
], max_tokens=120, temperature=0.1, timeout=30)
t_title = t_title.strip().strip('"').strip("'")
print(f"Title [{detect_lang(t_title) or '?'}]: {t_title[:70]} ({time.time()-t0:.0f}s)")
except Exception as e:
t_title = es_title
print(f"Title ERROR: {e}")
# Translate chunks
translated = []
ok = bad = 0
for i, chunk in enumerate(chunks):
try:
t0 = time.time()
result = translate_chunk(chunk, attempt=0)
lang = detect_lang(result) or '?'
if lang not in ('en', None, '?') and len(strip_html(result)) > 40:
# Retry
result2 = translate_chunk(chunk, attempt=1)
lang2 = detect_lang(result2) or '?'
if lang2 == 'en' or lang2 in ('?', None):
result, lang = result2, lang2
print(f" chunk {i+1}/{len(chunks)} [retry→{lang}] {time.time()-t0:.0f}s ✓")
else:
print(f" chunk {i+1}/{len(chunks)} [STILL {lang2}] {time.time()-t0:.0f}s ⚠ — keeping anyway")
bad += 1
else:
print(f" chunk {i+1}/{len(chunks)} [{lang}] {time.time()-t0:.0f}s ✓")
ok += 1
translated.append(result)
except Exception as e:
print(f" chunk {i+1}/{len(chunks)} ERROR: {e}")
translated.append(chunk) # keep original
bad += 1
t_content = "\n".join(translated)
if AI_FOOTER.strip() not in t_content:
t_content += AI_FOOTER
# Save to DB
c.execute("UPDATE wp_posts SET post_title=%s, post_content=%s WHERE ID=%s",
(t_title, t_content, wp_en_id))
db.commit()
ratio = ok / len(chunks) if chunks else 1.0
print(f" → Saved. {ok}/{len(chunks)} chunks ok ({ratio:.0%})")
print(f" → Check: http://localhost:8081/?p={wp_en_id}")
db.close()
print(f"\n{'='*60}")
print("Done. Review the 5 posts in WP admin before running full batch.")
print("URLs to check:")
for wp_en_id, _ in TEST_POSTS:
print(f" http://localhost:8081/?p={wp_en_id}")
if __name__ == "__main__":
main()
+38
View File
@@ -0,0 +1,38 @@
<?php
// Categorías estructurales de contenido → traducciones (es_term_id => [en,fr,it,pt])
$D = [
410 => ['Nuevo Testamento', 'New Testament','Nouveau Testament','Nuovo Testamento','Novo Testamento'],
411 => ['Antiguo Testamento','Old Testament','Ancien Testament','Antico Testamento','Antigo Testamento'],
49 => ['Adviento y Navidad','Advent and Christmas','Avent et Noël','Avvento e Natale','Advento e Natal'],
12 => ['In memoriam','In Memoriam','In Memoriam','In Memoriam','In Memoriam'],
1651 => ['Noticias','News','Actualités','Notizie','Notícias'],
61 => ['Comunidades cristianas','Christian Communities','Communautés chrétiennes','Comunità cristiane','Comunidades cristãs'],
23 => ['Cartas que nos llegan','Letters We Receive','Lettres reçues','Lettere che riceviamo','Cartas que recebemos'],
39 => ['Temas','Topics','Thèmes','Temi','Temas'],
27 => ['Índice cronológico','Chronological Index','Index chronologique','Indice cronologico','Índice cronológico'],
63 => ['EFFA','EFFA','EFFA','EFFA','EFFA'],
];
$LangSlug = ['en'=>'en','fr'=>'fr','it'=>'it','pt'=>'pt'];
$created=0;
foreach ($D as $es_id => $names) {
$es_term = get_term($es_id, 'category');
if (!$es_term || is_wp_error($es_term)) { echo "skip $es_id (no existe)\n"; continue; }
$existing = pll_get_term_translations($es_id);
$group = $existing;
$i = 1;
foreach (['en','fr','it','pt'] as $L) {
$i++;
if (!empty($existing[$L])) { $group[$L]=$existing[$L]; continue; }
$name = $names[$i-1];
$slug = sanitize_title($name).'-'.$L;
$res = wp_insert_term($name, 'category', ['slug'=>$slug]);
if (is_wp_error($res)) { echo " $es_id $L ERROR: ".$res->get_error_message()."\n"; continue; }
$tid = $res['term_id'];
pll_set_term_language($tid, $L);
$group[$L] = $tid;
$created++;
}
pll_save_term_translations($group);
echo "#$es_id ".$names[0]."".implode(",", array_map(function($k,$v){return $k.":".$v;}, array_keys($group),$group))."\n";
}
echo "términos traducidos creados: $created\n";
+401
View File
@@ -0,0 +1,401 @@
#!/usr/bin/env python3
"""
translate_cartas.py
Traduce artículos españoles de las últimas 2 cartas semanales usando Jan (Gemma 12B).
Crea los posts traducidos en WordPress local (Docker) y los vincula con Polylang.
Uso:
1. Arranca Jan con Gemma 12B
2. python3 translate_cartas.py --check-api # verifica conexión a Jan
3. python3 translate_cartas.py --dry-run # muestra qué se traduciría
4. python3 translate_cartas.py # traduce todo
5. python3 translate_cartas.py --lang en # solo un idioma
6. python3 translate_cartas.py --id 42579 # solo un artículo
"""
import subprocess
import json
import re
import sys
import time
import argparse
import pymysql
# ── Configuración ─────────────────────────────────────────────────────────────
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB_HOST = "172.18.0.2"
DB_PORT = 3306
DB_NAME = "wordpress_db"
DB_USER = "wordpress_user"
DB_PASS = "wordpress_pass"
WP_CONTAINER = "wordpress-web"
TARGET_LANGS = {
"en": "English",
"fr": "French",
"it": "Italian",
"pt": "Portuguese",
}
# IDs de artículos en español de todas las cartas de 2026
# (excluye 26899 = 42k chars, demasiado largo para Jan)
SPANISH_IDS = [
# Carta 2026-03-05 (Agua Viva) — las 2 últimas ya traducidas, se saltarán automáticamente
42732, 42731, 42730, 42729, 42728, 42727, 42726, 42590,
42579, 42578, 42577, 42576, 42575, 42574, 42573, 42572, 42571,
42570, 42569, 42568, 42567, 42566, 42565, 42564, 42563, 42562,
42561, 42560, 42559, 42558, 42557, 42556,
# Carta 2026-02-26 (¿Creemos en el evangelio?)
42594, 42555, 42554, 42553, 42552, 42551, 42550, 42549, 42548, 42547,
42546, 42545, 42544, 42543, 42542, 42541, 42540, 42539, 42538,
42537, 42536, 42535, 42534, 42533, 42532, 42531, 42530, 42529,
42528, 42527, 42526, 42525, 42524, 42523,
# Carta 2026-02-19 (Seres limitados)
42589, 42517, 42516, 42515, 42514, 42513, 42512, 42511,
42510, 42509, 42508, 42507, 42506, 42518, 42505, 42504, 42503,
42502, 42501,
# Carta 2026-02-12 (Más allá de la ley)
42588, 42500, 42499, 42498, 42497, 42496, 42495, 42490,
42489, 42488, 42487, 42486, 42485, 42484, 42587, 42478,
# Carta 2026-02-05 (Ser sal, ser luz)
42477, 42476, 42475, 42474, 42473, 42472, 42471, 42470,
42469, 42468, 42467, 42466, 42465, 42464, 42586, 42479,
# Carta 2026-01-29 (Bienaventurados)
42459, 42458, 42457, 42456, 42455, 42454, 42453, 42452,
42451, 42585, 42450, 42463, 42462, 42461, 42460, 42445, 42444,
# Carta 2026-01-22 (Nuevos caminos)
42584, 42443, 42442, 42441, 42440, 42439, 42438, 42437,
42436, 42431, 42430, 42429, 42428, 42427, 42426, 42425, 42424,
# Carta 2026-01-15 (La ley del Oeste)
26899, # 42k chars — se saltará por tamaño
26898, 26897, 26896, 26895, 26894, 26893, 26892,
26714, 26713, 26712, 26711, 26710, 26717, 26887, 26716, 26886, 26715,
# Carta 2026-01-08 (Hakuna / Avivando ilusiones)
26885, 26884, 26883, 26882, 26881, 26880, 26875, 26708,
26707, 26706, 26705, 26704, 26703, 26702, 26874, 26873,
26872, 26871, 26870, 26869, 26868, 26867, 26866, 26865,
# Carta 2026-01-01
26864, 26863, 26862, 26861, 26860, 26859, 26858, 26857,
26856, 26855, 26709,
]
# Tamaño máximo de contenido para traducción automática (chars)
MAX_CONTENT_LEN = 35000
AI_FOOTER = "\n<p><em>Traducido con IA</em></p>"
# ── Detectar modelo Jan ───────────────────────────────────────────────────────
def get_jan_model():
import urllib.request
try:
req_m = urllib.request.Request(JAN_URL.replace("/chat/completions", "/models"), headers={"Authorization": "Bearer dummy"})
with urllib.request.urlopen(req_m, timeout=5) as r:
data = json.loads(r.read())
models = data.get("data", [])
if models:
return models[0]["id"]
except Exception as e:
print(f"ERROR: No se puede conectar a Jan en {JAN_URL}")
print(f" {e}")
print(" Asegúrate de que Jan está corriendo con Gemma 12B cargado.")
sys.exit(1)
return "gemma"
# ── Traducción via Jan ────────────────────────────────────────────────────────
def translate(title, content, lang_code, lang_name):
import urllib.request, urllib.error
# Few-shot examples from existing human translations (Pagola) to guide style
few_shot = {
"en": [
("NO SABEMOS SABOREAR LA FE", "WE DON'T KNOW HOW TO SAVOR FAITH"),
("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "LISTENING TO JESUS IN TODAY'S SOCIETY"),
("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FAITHFUL TO JESUS IN TEMPTATIONS"),
],
"fr": [
("NO SABEMOS SABOREAR LA FE", "NOUS NE SAVONS PAS APPRÉCIER LA FOI"),
("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "ÉCOUTER JÉSUS DANS LA SOCIÉTÉ ACTUELLE"),
("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FIDÈLES À JÉSUS AU MILIEU DES TENTATIONS"),
],
"it": [
("NO SABEMOS SABOREAR LA FE", "NON SAPPIAMO ASSAPORARE LA FEDE"),
("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "ASCOLTARE GESÙ NELLA SOCIETÀ ATTUALE"),
("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FEDELI A GESÙ NELLE TENTAZIONI"),
],
"pt": [
("NO SABEMOS SABOREAR LA FE", "NÃO SABEMOS SABOREAR A FÉ"),
("ESCUCHAR A JESÚS EN LA SOCIEDAD ACTUAL", "OUVIR JESUS NA SOCIEDADE ATUAL"),
("FIELES A JESÚS EN MEDIO DE LAS TENTACIONES", "FIÉIS A JESUS NO MEIO DAS TENTAÇÕES"),
],
}
example_lines = "\n".join(
f" ES: {e}\n {lang_code.upper()}: {t}"
for e, t in few_shot.get(lang_code, [])
)
example_block = f"\n\nTitle translation examples (be exactly this literal):\n{example_lines}" if example_lines else ""
system_prompt = f"""You are a professional translator specializing in theological and religious texts.
Translate from Spanish to {lang_name}.
Rules:
- Preserve all HTML tags exactly as they appear
- Translate the title LITERALLY — never paraphrase or summarize it
- Keep the full title including everything after colons and quoted subtitles
- Titles must be in ALL CAPS
- Maintain formal theological register
- Standard religious proper nouns: translate them (e.g. "Jesús""Jesus" in English)
- Other proper nouns (person names, place names): keep as-is
- Return ONLY the translation, starting with 'Title:'{example_block}"""
payload = json.dumps({
"model": JAN_MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Title: {title}\n\n{content}"}
],
"temperature": 0.3,
"max_tokens": 4096,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL,
data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
try:
with urllib.request.urlopen(req, timeout=300) as r:
result = json.loads(r.read())
full = result["choices"][0]["message"]["content"].strip()
# Separar título traducido del contenido
lines = full.split("\n", 2)
if lines[0].startswith("Title:"):
translated_title = lines[0].replace("Title:", "").strip()
translated_content = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
else:
translated_title = lines[0].strip()
translated_content = "\n".join(lines[1:]).strip() if len(lines) > 1 else full
# Si el título volvió igual al original (sin traducir), reintentamos solo el título
if translated_title.strip().upper() == title.strip().upper():
title_payload = json.dumps({
"model": JAN_MODEL,
"messages": [
{"role": "user", "content": f"Translate this title from Spanish to {lang_name}. Return ONLY the translated title in ALL CAPS, nothing else: {title}"}
],
"temperature": 0.2,
"max_tokens": 50,
}).encode("utf-8")
title_req = urllib.request.Request(JAN_URL, data=title_payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"}, method="POST")
with urllib.request.urlopen(title_req, timeout=30) as tr:
title_result = json.loads(tr.read())
translated_title = title_result["choices"][0]["message"]["content"].strip().strip('"')
# Si el contenido traducido está vacío o es muy corto, reintentamos con prompt más directo
if len(translated_content.strip()) < 50 and len(content.strip()) > 50:
retry_payload = json.dumps({
"model": JAN_MODEL,
"messages": [
{"role": "system", "content": f"You are a professional translator. Translate the following text from Spanish to {lang_name}. Preserve all HTML tags. Return ONLY the translated text, no preamble."},
{"role": "user", "content": content}
],
"temperature": 0.3,
"max_tokens": 4096,
}).encode("utf-8")
retry_req = urllib.request.Request(JAN_URL, data=retry_payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"}, method="POST")
with urllib.request.urlopen(retry_req, timeout=300) as rr:
retry_result = json.loads(rr.read())
translated_content = retry_result["choices"][0]["message"]["content"].strip()
return translated_title, translated_content
except urllib.error.URLError as e:
raise RuntimeError(f"Error llamando a Jan: {e}")
# ── Base de datos WordPress ───────────────────────────────────────────────────
def get_db():
return pymysql.connect(
host=DB_HOST, port=DB_PORT,
user=DB_USER, password=DB_PASS,
database=DB_NAME, charset="utf8mb4",
cursorclass=pymysql.cursors.DictCursor
)
def get_article(db, wp_id):
with db.cursor() as c:
c.execute("""
SELECT p.ID, p.post_title, p.post_content, p.post_author,
p.post_date, p.post_name,
GROUP_CONCAT(t.term_id) as term_ids
FROM wp_posts p
LEFT JOIN wp_term_relationships tr ON p.ID=tr.object_id
LEFT JOIN wp_term_taxonomy tt ON tr.term_taxonomy_id=tt.term_taxonomy_id
AND tt.taxonomy='category'
LEFT JOIN wp_terms t ON tt.term_id=t.term_id
WHERE p.ID=%s
GROUP BY p.ID
""", (wp_id,))
return c.fetchone()
def get_existing_translation(db, original_id, lang_code):
"""Devuelve el WP ID de la traducción si ya existe."""
with db.cursor() as c:
# Polylang guarda las traducciones en wp_term_relationships con taxonomy 'post_translations'
c.execute("""
SELECT tr2.object_id as translated_id
FROM wp_term_relationships tr1
JOIN wp_term_relationships tr2 ON tr1.term_taxonomy_id=tr2.term_taxonomy_id
JOIN wp_term_taxonomy tt1 ON tr1.term_taxonomy_id=tt1.term_taxonomy_id
WHERE tt1.taxonomy='post_translations'
AND tr1.object_id=%s AND tr2.object_id!=%s
""", (original_id, original_id))
candidates = [r['translated_id'] for r in c.fetchall()]
for cid in candidates:
c.execute("""
SELECT t.slug FROM wp_terms t
JOIN wp_term_taxonomy tt ON t.term_id=tt.term_id
JOIN wp_term_relationships tr ON tt.term_taxonomy_id=tr.term_taxonomy_id
WHERE tt.taxonomy='language' AND tr.object_id=%s
""", (cid,))
row = c.fetchone()
if row and row['slug'] == lang_code:
return cid
return None
# ── Crear post vía WP-CLI en Docker ──────────────────────────────────────────
def create_wp_post(article, translated_title, translated_content, lang_code, original_id, dry_run=False):
content_with_footer = translated_content + AI_FOOTER
php = f"""
global $wpdb;
$post_id = wp_insert_post([
'post_title' => {json.dumps(translated_title, ensure_ascii=False)},
'post_content' => {json.dumps(content_with_footer, ensure_ascii=False)},
'post_author' => {article['post_author']},
'post_status' => 'publish',
'post_type' => 'post',
'post_date' => {json.dumps(article['post_date'].strftime('%Y-%m-%d %H:%M:%S') if hasattr(article['post_date'], 'strftime') else str(article['post_date']), ensure_ascii=False)},
]);
if (is_wp_error($post_id)) {{ echo 'ERROR: ' . $post_id->get_error_message(); exit; }}
// Asignar idioma Polylang
if (function_exists('pll_set_post_language')) {{
pll_set_post_language($post_id, {json.dumps(lang_code)});
}}
// Vincular traducciones
if (function_exists('pll_save_post_translations')) {{
$translations = pll_get_post_translations({original_id});
$translations[{json.dumps(lang_code)}] = $post_id;
$translations['es'] = {original_id};
pll_save_post_translations($translations);
}}
// Copiar categorías del original (excepto las de idioma)
$cats = wp_get_post_categories({original_id}, ['fields' => 'ids']);
if (!empty($cats)) wp_set_post_categories($post_id, $cats);
echo 'CREATED:' . $post_id;
"""
if dry_run:
print(f" [DRY] Crearía post '{translated_title[:60]}' en {lang_code}")
return 0
cmd = ["docker", "exec", WP_CONTAINER, "wp", "eval", php, "--allow-root"]
result = subprocess.run(cmd, capture_output=True, text=True)
output = result.stdout.strip()
if "CREATED:" in output:
new_id = int(output.split("CREATED:")[1].strip())
return new_id
else:
raise RuntimeError(f"Error creando post: {result.stdout} {result.stderr}")
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--check-api", action="store_true", help="Verificar conexión a Jan")
parser.add_argument("--dry-run", action="store_true", help="Simular sin crear posts")
parser.add_argument("--lang", help="Solo traducir a este idioma (en/fr/it/pt)")
parser.add_argument("--id", type=int, help="Solo traducir este WP ID")
args = parser.parse_args()
global JAN_MODEL
JAN_MODEL = get_jan_model()
print(f"Jan API OK — modelo: {JAN_MODEL}")
if args.check_api:
print("Probando traducción...")
t, c = translate("Prueba", "<p>Hola mundo</p>", "en", "English")
print(f" Título: {t}")
print(f" Contenido: {c}")
return
langs = {args.lang: TARGET_LANGS[args.lang]} if args.lang else TARGET_LANGS
ids = [args.id] if args.id else SPANISH_IDS
db = get_db()
total = len(ids) * len(langs)
done = 0
skipped = 0
errors = 0
print(f"\nArtículos: {len(ids)} | Idiomas: {list(langs.keys())} | Total: {total} traducciones\n")
for wp_id in ids:
article = get_article(db, wp_id)
if not article:
print(f" ⚠ ID {wp_id} no encontrado, saltando")
continue
title = article['post_title']
content = article['post_content']
print(f"\n[{wp_id}] {title[:70]}")
if len(content) > MAX_CONTENT_LEN:
print(f" ⚠ Contenido demasiado largo ({len(content)} chars), saltando")
skipped += 1
continue
for lang_code, lang_name in langs.items():
existing = get_existing_translation(db, wp_id, lang_code)
if existing:
print(f"{lang_code.upper()}: ya existe (ID {existing}), saltando")
skipped += 1
continue
try:
if args.dry_run:
print(f"{lang_code.upper()}: [DRY] se traduciría y crearía post")
done += 1
continue
print(f"{lang_code.upper()}: traduciendo... ", end="", flush=True)
t0 = time.time()
trans_title, trans_content = translate(title, content, lang_code, lang_name)
elapsed = time.time() - t0
print(f"{elapsed:.0f}s")
print(f" Título: {trans_title[:60]}")
new_id = create_wp_post(article, trans_title, trans_content, lang_code, wp_id, False)
print(f" Post creado: ID {new_id}")
done += 1
except Exception as e:
print(f" ERROR: {e}")
errors += 1
time.sleep(2)
db.close()
print(f"\n{'='*50}")
print(f"Completado: {done} creados, {skipped} saltados, {errors} errores")
if errors:
print("Puedes volver a ejecutar — los ya creados se saltarán automáticamente.")
if __name__ == "__main__":
main()
+76
View File
@@ -0,0 +1,76 @@
#!/usr/bin/env bash
# ============================================================================
# translate_gap.sh — Traduce el "gap" marzo→ahora de feadulta a EN/FR/IT/PT.
#
# Hace TODO de una vez: comprobaciones previas, traduce las 15 cartas semanales
# (marzo a junio) + sus artículos con Gemma local, remapea categorías y enseña
# el recuento final. Idempotente y REANUDABLE: re-ejecutar es seguro (salta lo
# ya traducido). NO publica nada (todo queda en borrador / draft).
#
# USO (un solo comando, en segundo plano):
# nohup bash scripts/translate_gap.sh > /tmp/feadulta-gap.out 2>&1 &
# Y para ver el progreso:
# tail -f /tmp/feadulta-gap.log
#
# Ver issue rafa/feadulta#75.
# ============================================================================
set -u
cd "$(dirname "$0")/.." || { echo "No puedo entrar en el repo"; exit 1; }
LOG=/tmp/feadulta-gap.log
LANGS="en,fr,it,pt"
# Cartas del gap (de más reciente a más antigua). Override opcional: CARTAS="45018" bash ...
CARTAS="${CARTAS:-45018 44997 44975 44230 44229 44228 44090 44089 44088 44087 44086 44085 44084 44083 42590}"
ts() { date '+%Y-%m-%d %H:%M:%S'; }
say() { echo "[$(ts)] $*" | tee -a "$LOG"; }
say "================ INICIO batch del gap (draft) ================"
# 1) LM Studio + Gemma cargado
say "Preflight 1/2: LM Studio / Gemma..."
if ! curl -s --max-time 10 http://172.19.128.1:1234/v1/models 2>/dev/null | grep -q 'gemma-4-e4b'; then
say "ERROR: LM Studio no responde o 'google/gemma-4-e4b' no está cargado."
say " -> En Windows: abre LM Studio, carga 'google/gemma-4-e4b', server en 0.0.0.0:1234."
exit 1
fi
say " OK: Gemma disponible."
# 2) Contenedores docker arriba
say "Preflight 2/2: contenedores docker..."
for cnt in wordpress-web wordpress-mysql; do
if ! docker ps --format '{{.Names}}' | grep -qx "$cnt"; then
say "ERROR: el contenedor '$cnt' no está arriba. Arranca el stack (docker compose up -d) y reintenta."
exit 1
fi
done
say " OK: wordpress-web y wordpress-mysql arriba."
# 3) Traducir cada carta + sus artículos
N=$(echo $CARTAS | wc -w)
i=0
for c in $CARTAS; do
i=$((i+1))
say "=== Carta $c ($i/$N) -> $LANGS (draft) ==="
python3 scripts/translate_post.py --carta "$c" --langs "$LANGS" --status draft 2>&1 | tee -a "$LOG"
done
# 4) Remap de categorías (idempotente, sin Gemma): mete cada traducción en la
# categoría de su idioma (arregla el archivo de carta por idioma).
say "Remapeando categorías de todas las traducciones..."
docker cp scripts/remap_translation_cats.php wordpress-web:/tmp/remap_translation_cats.php >/dev/null 2>&1
docker exec wordpress-web php /tmp/remap_translation_cats.php 2>&1 | tee -a "$LOG"
# 5) Recuento final por idioma
say "Recuento final de traducciones por idioma (meta traduccion_origen):"
docker exec wordpress-mysql mysql -uwordpress_user -pwordpress_pass wordpress_db -N -e "
SELECT t.slug, COUNT(*) FROM wp_postmeta m
JOIN wp_term_relationships tr ON m.post_id=tr.object_id
JOIN wp_term_taxonomy tt ON tr.term_taxonomy_id=tt.term_taxonomy_id AND tt.taxonomy='language'
JOIN wp_terms t ON tt.term_id=t.term_id
WHERE m.meta_key='traduccion_origen' GROUP BY t.slug;" 2>/dev/null | tee -a "$LOG"
say "================ FIN batch del gap ================"
say "Todo en DRAFT. No se ha publicado nada. Avisa a Rafa para revisar antes de publicar."
say "Log completo: $LOG"
+87
View File
@@ -0,0 +1,87 @@
#!/usr/bin/env python3
"""Traduce ES->EN con Claude Haiku 4.5 vía API directa. Prueba de coste/calidad.
Lee la ANTHROPIC_API_KEY de portfolio-tracker/.env (la misma que usa el
portfolio tracker para trade setups). Reporta tokens reales de la API.
"""
import os
import re
import sys
# Cargar API key del .env de portfolio-tracker sin pisar el entorno existente.
ENV_PATH = "/home/rafa/portfolio-tracker/.env"
if "ANTHROPIC_API_KEY" not in os.environ:
for line in open(ENV_PATH):
line = line.strip()
if line.startswith("ANTHROPIC_API_KEY="):
os.environ["ANTHROPIC_API_KEY"] = line.split("=", 1)[1].strip().strip('"').strip("'")
break
import anthropic
MODEL = "claude-haiku-4-5"
LANG_NAMES = {"en": "English", "fr": "French (français)",
"it": "Italian (italiano)", "pt": "Portuguese (português)"}
def system_prompt(lang: str) -> str:
target = LANG_NAMES[lang]
return (
f"Eres un traductor profesional de textos religiosos cristianos "
f"(espiritualidad y teología católica). Traduce del español al {target}. "
f"REGLAS ESTRICTAS:\n"
f"1. Conserva EXACTAMENTE el marcado HTML (etiquetas y atributos) y los "
f"shortcodes entre [ ] y {{ }}. No los traduzcas ni los reordenes.\n"
f"2. NO traduzcas las referencias bíblicas ni sus abreviaturas "
f"(p.ej. 'Jn 3, 16', 'Mt 5'). Déjalas idénticas.\n"
f"3. Conserva los nombres propios de persona y lugar (salvo exónimos establecidos).\n"
f"4. Términos litúrgicos correctos (p.ej. 'Cuaresma' = Lent/Carême/Quaresima/Quaresma; "
f"NO inventes palabras).\n"
f"5. Traducción FIEL: no resumas, no añadas, no comentes.\n"
f"6. Devuelve SOLO la traducción entre las marcas <<<INI>>> y <<<FIN>>>, sin nada más."
)
def extract(text: str) -> str:
# Coge el bloque <<<INI>>>...<<<FIN>>> de contenido MÁS LARGO (robusto al
# bug del runner local, donde el modelo a veces re-menciona las marcas).
blocks = re.findall(r"<<<INI>>>(.*?)<<<FIN>>>", text, re.S)
out = max(blocks, key=len).strip() if blocks else text.strip()
out = re.sub(r"^```[a-z]*\n?", "", out)
out = re.sub(r"\n?```$", "", out)
return out.strip()
def translate(text: str, lang: str, *, is_title: bool = False) -> tuple[str, object]:
client = anthropic.Anthropic()
kind = "el TÍTULO" if is_title else "el texto"
user = (
f"Traduce {kind} que va entre las marcas. "
f"Debe quedar en {LANG_NAMES[lang]} de forma natural.\n"
f"<<<INI>>>{text}<<<FIN>>>"
)
max_tokens = max(1024, int(len(text) * 0.9))
resp = client.messages.create(
model=MODEL,
max_tokens=min(max_tokens, 16000),
system=system_prompt(lang),
messages=[{"role": "user", "content": user}],
)
body = "".join(b.text for b in resp.content if b.type == "text")
return extract(body), resp.usage
if __name__ == "__main__":
path = sys.argv[1] if len(sys.argv) > 1 else "/tmp/orig_es.html"
lang = sys.argv[2] if len(sys.argv) > 2 else "en"
src = open(path).read()
out, usage = translate(src, lang)
open("/tmp/trad_haiku.html", "w").write(out)
cost = usage.input_tokens / 1e6 * 1.0 + usage.output_tokens / 1e6 * 5.0
print(f"MODEL={MODEL} lang={lang}")
print(f"input_tokens={usage.input_tokens} output_tokens={usage.output_tokens}")
print(f"coste_articulo=${cost:.5f}")
print(f"chars_in={len(src)} chars_out={len(out)}")
print("--- primeras 500 car ---")
print(out[:500])
+218
View File
@@ -0,0 +1,218 @@
<?php
/**
* translate_lectura_titles.php (issue Gitea #140)
*
* Traduce SOLO el nombre del libro bíblico en el INICIO del post_title de los
* posts no-ES (EN/FR/IT/PT) cuyo título es una cita bíblica «<LIBRO> <num>, ...».
* El cuerpo ya está traducido; esto es title-only.
*
* - Determinista: mapa fijo de libros ES -> {en,fr,it,pt}.
* - Idempotente: si el token inicial ya está en el idioma destino, no toca nada.
* - Seguro: exige número tras el libro (excluye «Juan Pablo II», «Domingo 30...»,
* etc. — DOMINGO/SEMANA no son libros, no están en el mapa).
* - Cotejo insensible a acentos (fold a ASCII-mayúsculas) para casar variantes;
* el valor canónico por idioma garantiza que ES==destino sea un no-op.
*
* Uso (local): docker exec wordpress-web php /var/www/html/scripts/... (o vía cwd)
* php scripts/translate_lectura_titles.php # dry-run + reporte
* APPLY=1 php scripts/translate_lectura_titles.php # aplica
* Prod: FEA_WP_LOAD=/web/wp-nuevo/wp-load.php php translate_lectura_titles.php
*/
error_reporting(E_ALL & ~E_DEPRECATED & ~E_NOTICE);
$WP_LOAD = getenv('FEA_WP_LOAD') ?: '/var/www/html/wp-load.php';
if (!file_exists($WP_LOAD)) {
fwrite(STDERR, "No encuentro wp-load.php en $WP_LOAD\n");
exit(1);
}
define('WP_USE_THEMES', false);
require $WP_LOAD;
global $wpdb;
$APPLY = getenv('APPLY') === '1';
$LANGS = ['en', 'fr', 'it', 'pt'];
/*
* Mapa de libros: clave en español (display) => [en, fr, it, pt] (Title case canónico).
* El cotejo es accent-insensitive; los valores destino son los litúrgicos católicos.
*/
$BOOKS = [
// --- Antiguo Testamento ---
'Génesis' => ['Genesis', 'Genèse', 'Genesi', 'Gênesis'],
'Éxodo' => ['Exodus', 'Exode', 'Esodo', 'Êxodo'],
'Levítico' => ['Leviticus', 'Lévitique', 'Levitico', 'Levítico'],
'Números' => ['Numbers', 'Nombres', 'Numeri', 'Números'],
'Deuteronomio' => ['Deuteronomy', 'Deutéronome', 'Deuteronomio', 'Deuteronômio'],
'Josué' => ['Joshua', 'Josué', 'Giosuè', 'Josué'],
'Jueces' => ['Judges', 'Juges', 'Giudici', 'Juízes'],
'Rut' => ['Ruth', 'Ruth', 'Rut', 'Rute'],
'Samuel' => ['Samuel', 'Samuel', 'Samuele', 'Samuel'],
'Reyes' => ['Kings', 'Rois', 'Re', 'Reis'],
'Crónicas' => ['Chronicles', 'Chroniques', 'Cronache', 'Crônicas'],
'Esdras' => ['Ezra', 'Esdras', 'Esdra', 'Esdras'],
'Nehemías' => ['Nehemiah', 'Néhémie', 'Neemia', 'Neemias'],
'Tobías' => ['Tobit', 'Tobie', 'Tobia', 'Tobias'],
'Judit' => ['Judith', 'Judith', 'Giuditta', 'Judite'],
'Ester' => ['Esther', 'Esther', 'Ester', 'Ester'],
'Macabeos' => ['Maccabees', 'Maccabées', 'Maccabei', 'Macabeus'],
'Job' => ['Job', 'Job', 'Giobbe', 'Jó'],
'Salmos' => ['Psalms', 'Psaumes', 'Salmi', 'Salmos'],
'Salmo' => ['Psalm', 'Psaume', 'Salmo', 'Salmo'],
'Proverbios' => ['Proverbs', 'Proverbes', 'Proverbi', 'Provérbios'],
'Eclesiastés' => ['Ecclesiastes', 'Ecclésiaste', 'Ecclesiaste', 'Eclesiastes'],
'Eclesiástico' => ['Ecclesiasticus', 'Siracide', 'Siracide', 'Eclesiástico'],
'Sabiduría' => ['Wisdom', 'Sagesse', 'Sapienza', 'Sabedoria'],
'Isaías' => ['Isaiah', 'Isaïe', 'Isaia', 'Isaías'],
'Jeremías' => ['Jeremiah', 'Jérémie', 'Geremia', 'Jeremias'],
'Lamentaciones' => ['Lamentations', 'Lamentations', 'Lamentazioni', 'Lamentações'],
'Baruc' => ['Baruch', 'Baruch', 'Baruc', 'Baruc'],
'Ezequiel' => ['Ezekiel', 'Ézéchiel', 'Ezechiele', 'Ezequiel'],
'Daniel' => ['Daniel', 'Daniel', 'Daniele', 'Daniel'],
'Oseas' => ['Hosea', 'Osée', 'Osea', 'Oseias'],
'Joel' => ['Joel', 'Joël', 'Gioele', 'Joel'],
'Amós' => ['Amos', 'Amos', 'Amos', 'Amós'],
'Abdías' => ['Obadiah', 'Abdias', 'Abdia', 'Abdias'],
'Jonás' => ['Jonah', 'Jonas', 'Giona', 'Jonas'],
'Miqueas' => ['Micah', 'Michée', 'Michea', 'Miqueias'],
'Nahúm' => ['Nahum', 'Nahum', 'Naum', 'Naum'],
'Habacuc' => ['Habakkuk', 'Habacuc', 'Abacuc', 'Habacuc'],
'Sofonías' => ['Zephaniah', 'Sophonie', 'Sofonia', 'Sofonias'],
'Ageo' => ['Haggai', 'Aggée', 'Aggeo', 'Ageu'],
'Zacarías' => ['Zechariah', 'Zacharie', 'Zaccaria', 'Zacarias'],
'Malaquías' => ['Malachi', 'Malachie', 'Malachia', 'Malaquias'],
// --- Nuevo Testamento ---
'Mateo' => ['Matthew', 'Matthieu', 'Matteo', 'Mateus'],
'Marcos' => ['Mark', 'Marc', 'Marco', 'Marcos'],
'Lucas' => ['Luke', 'Luc', 'Luca', 'Lucas'],
'Juan' => ['John', 'Jean', 'Giovanni', 'João'],
'Hechos' => ['Acts', 'Actes', 'Atti', 'Atos'],
'Romanos' => ['Romans', 'Romains', 'Romani', 'Romanos'],
'Corintios' => ['Corinthians', 'Corinthiens', 'Corinzi', 'Coríntios'],
'Gálatas' => ['Galatians', 'Galates', 'Galati', 'Gálatas'],
'Efesios' => ['Ephesians', 'Éphésiens', 'Efesini', 'Efésios'],
'Filipenses' => ['Philippians', 'Philippiens', 'Filippesi', 'Filipenses'],
'Colosenses' => ['Colossians', 'Colossiens', 'Colossesi', 'Colossenses'],
'Tesalonicenses' => ['Thessalonians', 'Thessaloniciens', 'Tessalonicesi', 'Tessalonicenses'],
'Timoteo' => ['Timothy', 'Timothée', 'Timoteo', 'Timóteo'],
'Tito' => ['Titus', 'Tite', 'Tito', 'Tito'],
'Filemón' => ['Philemon', 'Philémon', 'Filemone', 'Filêmon'],
'Hebreos' => ['Hebrews', 'Hébreux', 'Ebrei', 'Hebreus'],
'Santiago' => ['James', 'Jacques', 'Giacomo', 'Tiago'],
'Pedro' => ['Peter', 'Pierre', 'Pietro', 'Pedro'],
'Judas' => ['Jude', 'Jude', 'Giuda', 'Judas'],
'Apocalipsis' => ['Revelation', 'Apocalypse', 'Apocalisse', 'Apocalipse'],
];
// fold a ASCII-mayúsculas (quita acentos) para cotejo robusto
function fold($s) {
$s = trim($s);
$map = ['Á'=>'A','À'=>'A','Ä'=>'A','Â'=>'A','Ã'=>'A','É'=>'E','È'=>'E','Ë'=>'E','Ê'=>'E',
'Í'=>'I','Ì'=>'I','Ï'=>'I','Î'=>'I','Ó'=>'O','Ò'=>'O','Ö'=>'O','Ô'=>'O','Õ'=>'O',
'Ú'=>'U','Ù'=>'U','Ü'=>'U','Û'=>'U','Ñ'=>'N','Ç'=>'C'];
$s = mb_strtoupper($s, 'UTF-8');
return strtr($s, $map);
}
$langIdx = array_flip($LANGS); // en=>0,...
// índice de búsqueda: foldedSpanish => [en,fr,it,pt]
$LOOKUP = [];
foreach ($BOOKS as $es => $tr) {
$LOOKUP[fold($es)] = $tr;
}
// Todos los posts no-ES (filtramos/transformamos en PHP, regex /u fiable).
$placeholders = implode(',', array_fill(0, count($LANGS), '%s'));
$sql = $wpdb->prepare(
"SELECT p.ID, t.slug AS lang, p.post_title
FROM {$wpdb->posts} p
JOIN {$wpdb->term_relationships} tr ON tr.object_id = p.ID
JOIN {$wpdb->term_taxonomy} tt ON tt.term_taxonomy_id = tr.term_taxonomy_id AND tt.taxonomy='language'
JOIN {$wpdb->terms} t ON t.term_id = tt.term_id
WHERE p.post_type='post'
AND p.post_status IN ('publish','draft','future','pending','private')
AND t.slug IN ($placeholders)",
$LANGS
);
$rows = $wpdb->get_results($sql);
$changes = []; // [ID => [lang, old, new]]
$per_lang = array_fill_keys($LANGS, 0);
$skipped_already = 0;
$candidates = 0; // títulos con al menos una cita bíblica detectada
/*
* Traduce CADA token de libro de una cita bíblica dentro del título:
* - una palabra (letras) NO precedida por letra/número
* - seguida de «<espacios><dígito>» (el capítulo de la cita).
* Cubre el inicio, los compuestos «1ª lectura / 2ª lectura / evangelio» (tras «/»),
* el ordinal previo («2 Timoteo 4») y prefijos de fiesta («Epifanía - Isaías 60»).
* Como SOLO casa ortografías españolas (las del mapa), en posts no-ES únicamente
* toca citas heredadas del ES; las descripciones van en el idioma destino.
*/
$BOOK_RE = '/(?<![\p{L}\p{N}])(\p{L}[\p{L}.]*)(?=\s+\d)/u';
foreach ($rows as $r) {
$lang = $r->lang;
$li = $langIdx[$lang];
$hit = false;
$new_title = preg_replace_callback($BOOK_RE, function ($m) use ($LOOKUP, $li, &$hit) {
$book = $m[1];
$key = fold($book);
if (!isset($LOOKUP[$key])) {
return $m[0]; // no es libro conocido -> intacto
}
$hit = true;
$canon = $LOOKUP[$key][$li];
$isUpper = (mb_strtoupper($book, 'UTF-8') === $book);
return $isUpper ? mb_strtoupper($canon, 'UTF-8') : $canon;
}, $r->post_title);
if (!$hit) { continue; }
$candidates++;
if ($new_title === $r->post_title) { $skipped_already++; continue; } // idempotente
$changes[$r->ID] = [$lang, $r->post_title, $new_title];
$per_lang[$lang]++;
}
// --- Reporte ---
echo "== translate_lectura_titles.php (issue #140) ==\n";
echo "WP_LOAD: $WP_LOAD\n";
echo "Posts no-ES escaneados: " . count($rows) . "\n";
echo " - con cita bíblica detectada: $candidates\n";
echo " - ya en idioma destino (idempotente): $skipped_already\n";
echo " - A CAMBIAR: " . count($changes) . "\n";
echo " por idioma: ";
foreach ($LANGS as $l) echo strtoupper($l) . "=" . $per_lang[$l] . " ";
echo "\n\n";
$SAMPLE = (int)(getenv('SAMPLE') ?: 8);
foreach ($LANGS as $l) {
$shown = 0;
echo "--- muestra $l ---\n";
foreach ($changes as $id => $c) {
if ($c[0] !== $l) continue;
echo sprintf(" %d «%s» -> «%s»\n", $id, $c[1], $c[2]);
if ($SAMPLE > 0 && ++$shown >= $SAMPLE) break;
}
}
echo "\n";
if (!$APPLY) {
echo "DRY-RUN (no se ha tocado nada). Ejecuta con APPLY=1 para aplicar.\n";
exit(0);
}
// --- Aplica (title-only, sin tocar slug/cuerpo) ---
$done = 0;
foreach ($changes as $id => $c) {
$ok = $wpdb->update($wpdb->posts, ['post_title' => $c[2]], ['ID' => $id], ['%s'], ['%d']);
if ($ok !== false) {
clean_post_cache($id);
$done++;
} else {
fwrite(STDERR, "ERROR al actualizar ID $id\n");
}
}
echo "APLICADOS: $done de " . count($changes) . "\n";
+340
View File
@@ -0,0 +1,340 @@
#!/usr/bin/env python3
"""
translate_post.py — Traduce posts de feadulta (ES → EN/FR/IT/PT) con Gemma 4B local
y los enlaza como traducciones de Polylang, SIN servicios de pago.
Diseño (issue rafa/feadulta#75, fase 1):
- Gemma (LM Studio, http://172.19.128.1:1234/v1) traduce título + contenido HTML.
- Reglas estrictas: preserva HTML/shortcodes, NO traduce referencias bíblicas, respeta
nombres propios y un glosario fijo del proyecto. Traducción fiel (sin resumir).
- La lógica WordPress/Polylang vive en fea_translate_helper.php (corre dentro del
contenedor cargando wp-load.php; no necesita wp-cli ni proc_open).
- Idempotente y reanudable: si ya existe la traducción en ese idioma, se salta.
Uso:
python3 scripts/translate_post.py --post-id 45018 --langs en,fr,it,pt
python3 scripts/translate_post.py --carta 45018 # carta + sus _carta_id
python3 scripts/translate_post.py --post-id 45018 --langs en --status publish --force
Pensado para que Codex lo lance en lote sobre la cola priorizada (cartas/destacados).
"""
from __future__ import annotations
import argparse
import fcntl
import json
import os
import re
import subprocess
import sys
import time
from pathlib import Path
# ── Configuración ────────────────────────────────────────────────────────────
WP_CONTAINER = os.environ.get("FEA_WP_CONTAINER", "wordpress-web")
DB_CONTAINER = os.environ.get("FEA_DB_CONTAINER", "wordpress-mysql")
DB_NAME = os.environ.get("FEA_DB_NAME", "wordpress_db")
DB_USER = os.environ.get("FEA_DB_USER", "wordpress_user")
DB_PASS = os.environ.get("FEA_DB_PASS", "wordpress_pass")
LM_BASE_URL = os.environ.get("OPENAI_BASE_URL", "http://172.19.128.1:1234/v1")
MODEL = os.environ.get("LOCAL_MODEL", "google/gemma-4-e4b")
# Motor de traducción: "gemma" (local, por defecto) o "haiku" (Claude Haiku 4.5 vía API).
# Haiku da más calidad y no necesita trocear (contexto 200k). Reutiliza translate_haiku.py.
ENGINE = os.environ.get("FEA_ENGINE", "gemma").lower()
if ENGINE == "haiku":
MODEL = "claude-haiku-4-5"
sys.path.insert(0, str(Path(__file__).resolve().parent))
import translate_haiku # carga la API key de portfolio-tracker/.env
elif ENGINE == "minimax":
MODEL = os.environ.get("LOCAL_MODEL", "MiniMax-Text-01")
MINIMAX_URL = os.environ.get("MINIMAX_URL", "https://api.minimax.io/v1/text/chatcompletion_v2")
_kf = Path(os.environ.get("MINIMAX_KEY_FILE", "/home/rafa/Feadulta/minimax.txt"))
_keys = [l.strip() for l in _kf.read_text().splitlines() if l.strip().startswith("sk-")]
MINIMAX_KEY = _keys[-1] if _keys else ""
HELPER_SRC = Path(__file__).resolve().parent / "fea_translate_helper.php"
HELPER_DST = "/tmp/fea_translate_helper.php"
STATE_FILE = Path(os.environ.get("FEA_TR_STATE", "/tmp/feadulta-translate-state.json"))
LOG_FILE = Path(os.environ.get("FEA_TR_LOG", "/tmp/feadulta-translate.log"))
LANG_NAMES = {"en": "English", "fr": "French (français)", "it": "Italian (italiano)", "pt": "Portuguese (português)"}
# Glosario: términos que NO se traducen o se fijan.
GLOSSARY = {
"Fe Adulta": {"en": "Fe Adulta", "fr": "Fe Adulta", "it": "Fe Adulta", "pt": "Fe Adulta"},
"EFFA": {"en": "EFFA", "fr": "EFFA", "it": "EFFA", "pt": "EFFA"},
}
CHUNK_LIMIT = 5000 # caracteres por llamada a Gemma (parte por </p> si se supera)
# ── Utilidades de proceso ────────────────────────────────────────────────────
def log(msg: str) -> None:
line = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}"
print(line, flush=True)
try:
LOG_FILE.open("a", encoding="utf-8").write(line + "\n")
except OSError:
pass
def sh(cmd: list[str], *, stdin: str | None = None, timeout: int = 120) -> str:
r = subprocess.run(cmd, input=stdin, capture_output=True, text=True, timeout=timeout)
if r.returncode != 0:
raise RuntimeError(f"cmd falló ({r.returncode}): {' '.join(cmd)}\n{r.stderr.strip()}")
return r.stdout
_helper_ready = False
def php_helper(subcmd: str, *args: str, stdin: str | None = None) -> str:
"""Copia el helper al contenedor (una vez) y lo ejecuta cargando wp-load.php."""
global _helper_ready
if not _helper_ready:
sh(["docker", "cp", str(HELPER_SRC), f"{WP_CONTAINER}:{HELPER_DST}"])
_helper_ready = True
cmd = ["docker", "exec", "-i", WP_CONTAINER, "php", HELPER_DST, subcmd, *args]
return sh(cmd, stdin=stdin, timeout=180)
# ── Gemma (LM Studio) ────────────────────────────────────────────────────────
def gemma(messages: list[dict], *, max_tokens: int) -> str:
import urllib.request
body = json.dumps({
"model": MODEL,
"messages": messages,
"temperature": 0.2,
"max_tokens": max_tokens,
"reasoning_effort": "none",
}).encode("utf-8")
req = urllib.request.Request(
f"{LM_BASE_URL}/chat/completions", data=body,
headers={"Content-Type": "application/json"},
)
with urllib.request.urlopen(req, timeout=300) as resp:
data = json.loads(resp.read().decode("utf-8"))
return data["choices"][0]["message"]["content"]
def minimax(messages: list[dict], *, max_tokens: int) -> str:
import urllib.request
body = json.dumps({
"model": MODEL,
"messages": messages,
"temperature": 0.2,
"max_tokens": max_tokens,
}).encode("utf-8")
req = urllib.request.Request(
MINIMAX_URL, data=body,
headers={"Content-Type": "application/json", "Authorization": f"Bearer {MINIMAX_KEY}"},
)
with urllib.request.urlopen(req, timeout=300) as resp:
data = json.loads(resp.read().decode("utf-8"))
return data["choices"][0]["message"]["content"]
def _extract(text: str) -> str:
"""Extrae la traducción del ÚLTIMO bloque <<<INI>>>…<<<FIN>>>.
Gemma (modo reasoning) escribe un preámbulo que MENCIONA las propias marcas,
así que hay que quedarse con la última ocurrencia, no la primera.
"""
start_tok, end_tok = "<<<INI>>>", "<<<FIN>>>"
i = text.rfind(start_tok)
if i != -1:
rest = text[i + len(start_tok):]
j = rest.find(end_tok)
out = (rest[:j] if j != -1 else rest).strip()
else:
out = text.strip()
# Quita vallas de código markdown si Gemma las añade.
out = re.sub(r"^```[a-z]*\n?", "", out)
out = re.sub(r"\n?```$", "", out)
return out.strip()
def _system_prompt(lang: str) -> str:
target = LANG_NAMES[lang]
glos = "; ".join(f'"{k}""{v[lang]}"' for k, v in GLOSSARY.items())
return (
f"Eres un traductor profesional de textos religiosos cristianos (espiritualidad y "
f"teología católica). Traduce del español al {target}. REGLAS ESTRICTAS:\n"
f"1. Conserva EXACTAMENTE el marcado HTML (etiquetas y atributos) y los shortcodes "
f"entre [ ] y { '{' } { '}' }. No los traduzcas ni los reordenes.\n"
f"2. NO traduzcas las referencias bíblicas ni sus abreviaturas (p.ej. 'Jn 3, 16', "
f"'Isaías 5, 1-7', 'Mt 5'). Déjalas idénticas.\n"
f"3. Conserva los nombres propios de persona y lugar (salvo exónimos establecidos).\n"
f"4. Glosario fijo: {glos}.\n"
f"5. Traducción FIEL: no resumas, no añadas, no comentes.\n"
f"6. Devuelve SOLO la traducción entre las marcas <<<INI>>> y <<<FIN>>>, sin nada más."
)
def translate_text(text: str, lang: str, *, is_title: bool = False) -> str:
text = text.strip()
if not text:
return ""
if ENGINE == "haiku":
out, _usage = translate_haiku.translate(text, lang, is_title=is_title)
return out
user = f"<<<INI>>>{text}<<<FIN>>>"
if is_title:
kind = "el TÍTULO"
task = (
f"Traduce {kind} que va entre las marcas.\n"
f"Debe quedar en {LANG_NAMES[lang]} de forma natural. "
f"No lo dejes en inglés salvo que el original ya sea un nombre propio o una marca.\n"
f"Responde solo con el título traducido entre las marcas:\n{user}"
)
else:
kind = "el texto"
task = f"Traduce {kind} que va entre las marcas:\n{user}"
messages = [
{"role": "system", "content": _system_prompt(lang)},
{"role": "user", "content": task},
]
max_tokens = max(800, int(len(text) * 1.6))
engine_fn = minimax if ENGINE == "minimax" else gemma
raw = engine_fn(messages, max_tokens=max_tokens)
return _extract(raw)
def translate_html(html: str, lang: str) -> str:
"""Trocea por párrafos si el contenido es largo, para no saturar el contexto de Gemma."""
if ENGINE == "haiku":
# Haiku tiene 200k de contexto: el artículo entero de una vez (mejor coherencia).
return translate_text(html, lang)
if len(html) <= CHUNK_LIMIT:
return translate_text(html, lang)
parts = re.split(r"(?<=</p>)", html)
chunks, buf = [], ""
for p in parts:
if len(buf) + len(p) > CHUNK_LIMIT and buf:
chunks.append(buf)
buf = ""
buf += p
if buf:
chunks.append(buf)
log(f" contenido largo ({len(html)} car) → {len(chunks)} trozos")
return "".join(translate_text(c, lang) for c in chunks)
# ── Datos / estado ───────────────────────────────────────────────────────────
def read_post(post_id: int) -> dict:
return json.loads(php_helper("read", str(post_id)))
def translation_exists(es_id: int, lang: str) -> int:
return int(php_helper("exists", str(es_id), lang).strip() or "0")
WP_LOCK_FILE = Path(os.environ.get("FEA_TR_LOCK", "/tmp/feadulta-translate.lock"))
def create_translation(es_id: int, lang: str, title: str, content: str, status: str) -> int:
payload = json.dumps({"title": title, "content": content, "model": MODEL})
# Lock entre procesos: serializa SOLO la escritura/enlace Polylang (rápido), no la
# traducción LLM (lenta), para que 4 streams por idioma no pisen el grupo de traducciones.
with WP_LOCK_FILE.open("w") as lk:
fcntl.flock(lk, fcntl.LOCK_EX)
try:
return int(php_helper("create", str(es_id), lang, status, stdin=payload).strip())
finally:
fcntl.flock(lk, fcntl.LOCK_UN)
def carta_article_ids(carta_id: int) -> list[int]:
q = (f"SELECT post_id FROM wp_postmeta WHERE meta_key='_carta_id' "
f"AND meta_value='{carta_id}' ORDER BY post_id;")
out = sh(["docker", "exec", DB_CONTAINER, "mysql", f"-u{DB_USER}", f"-p{DB_PASS}",
DB_NAME, "-N", "-e", q])
return [int(x) for x in out.split() if x.strip().isdigit()]
def load_state() -> dict:
if STATE_FILE.exists():
try:
return json.loads(STATE_FILE.read_text())
except json.JSONDecodeError:
pass
return {"done": {}}
def save_state(state: dict) -> None:
STATE_FILE.write_text(json.dumps(state, ensure_ascii=False, indent=2))
# ── Orquestación ─────────────────────────────────────────────────────────────
def process_post(post_id: int, langs: list[str], status: str, force: bool, state: dict) -> None:
src = read_post(post_id)
if src.get("lang") and src["lang"] != "es":
log(f"#{post_id} no es ES (lang={src['lang']}) — saltado")
return
log(f"#{post_id} «{src['title'][:60]}»")
for lang in langs:
key = f"{post_id}:{lang}"
existing = translation_exists(post_id, lang)
if existing and not force:
log(f" {lang}: ya existe (#{existing}) — saltado")
state["done"][key] = existing
continue
if existing and force:
php_helper("unlink", str(post_id), lang)
log(f" {lang}: --force, eliminada traducción previa #{existing}")
try:
t0 = time.time()
title = translate_text(src["title"], lang, is_title=True)
content = translate_html(src["content"], lang)
new_id = create_translation(post_id, lang, title, content, status)
dt = time.time() - t0
log(f" {lang}: creado #{new_id} ({dt:.0f}s) → «{title[:50]}»")
state["done"][key] = new_id
save_state(state)
except Exception as exc: # noqa: BLE001
log(f" {lang}: ERROR {exc}")
state.setdefault("errors", {})[key] = str(exc)
save_state(state)
def main() -> int:
ap = argparse.ArgumentParser(description="Traduce posts de feadulta con Gemma local + Polylang.")
g = ap.add_mutually_exclusive_group(required=True)
g.add_argument("--post-id", type=int, help="ID de un post ES a traducir.")
g.add_argument("--carta", type=int, help="ID de carta: traduce la carta y todos sus artículos (_carta_id).")
g.add_argument("--ids-file", help="Fichero con un ID de post ES por línea.")
ap.add_argument("--langs", default="en,fr,it,pt", help="Idiomas destino separados por coma.")
ap.add_argument("--status", default="draft", choices=["draft", "publish"], help="Estado de la traducción.")
ap.add_argument("--force", action="store_true", help="Regenera aunque ya exista la traducción.")
args = ap.parse_args()
langs = [l.strip() for l in args.langs.split(",") if l.strip() in LANG_NAMES]
if not langs:
log("Sin idiomas válidos."); return 1
if args.post_id:
ids = [args.post_id]
elif args.ids_file:
ids = [int(x) for x in Path(args.ids_file).read_text().split() if x.strip().isdigit()]
log(f"ids-file {args.ids_file}: {len(ids)} posts")
else:
ids = [args.carta] + carta_article_ids(args.carta)
log(f"Carta {args.carta}: {len(ids)} posts (carta + {len(ids)-1} artículos)")
state = load_state()
for pid in ids:
process_post(pid, langs, args.status, args.force, state)
save_state(state)
log(f"FIN. {len(state['done'])} traducciones registradas, "
f"{len(state.get('errors', {}))} errores. Estado: {STATE_FILE}")
return 0
if __name__ == "__main__":
raise SystemExit(main())
+99
View File
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
"""Locuta una carta/artículo entero de feadulta con la voz clonada (XTTS-v2 + GPU).
Saca el texto del post ES, lo trocea por párrafos, lo locuta con la voz de
referencia (calculando los latents del hablante UNA sola vez), concatena con
pausas y añade comfort noise. Issue #76.
Uso:
tts_carta.py <post_id> <muestra_voz.wav> [nombre_salida]
"""
import html
import json
import os
import re
import subprocess
import sys
from pathlib import Path
os.environ.setdefault("COQUI_TOS_AGREED", "1")
import numpy as np # noqa: E402
import soundfile as sf # noqa: E402
import torch # noqa: E402
from TTS.api import TTS # noqa: E402
DEVICE = "cuda" if torch.cuda.is_available() and not os.environ.get("FEA_CPU") else "cpu"
OUT = Path(__file__).resolve().parent.parent / "wordpress/wp-content/uploads/tts-samples"
SR = 24000
CONTAINER = "wordpress-web"
def get_post_text(pid):
subprocess.run(["docker", "exec", CONTAINER, "php", "/tmp/fea_post_io.php", "get", str(pid)],
check=True, capture_output=True)
subprocess.run(["docker", "cp", f"{CONTAINER}:/tmp/fea_es.json", "/tmp/fea_es.json"], check=True)
d = json.load(open("/tmp/fea_es.json"))
raw = d["content"]
# Conserva límites de párrafo antes de quitar tags.
raw = re.sub(r"(?i)</p>|<br\s*/?>|</h[1-6]>", "\n", raw)
raw = re.sub(r"<[^>]+>", "", raw) # quita tags
raw = re.sub(r"\[[^\]]+\]", "", raw) # quita shortcodes
raw = html.unescape(raw)
paras = [re.sub(r"\s+", " ", p).strip() for p in raw.split("\n")]
paras = [p for p in paras if len(p) > 1]
return d["title"], paras
def main():
if len(sys.argv) < 3:
sys.exit("uso: tts_carta.py <post_id> <muestra_voz.wav> [nombre_salida]")
pid = int(sys.argv[1])
spk = sys.argv[2]
name = sys.argv[3] if len(sys.argv) > 3 else f"carta-{pid}"
title, paras = get_post_text(pid)
print(f"Post #{pid}: «{title}» ({len(paras)} párrafos, {sum(len(p) for p in paras)} car)")
print(f"Cargando XTTS-v2 en {DEVICE}", flush=True)
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(DEVICE)
model = tts.synthesizer.tts_model
print("Calculando timbre del hablante (1 vez)…", flush=True)
gpt_cond, spk_emb = model.get_conditioning_latents(audio_path=[spk])
pause = np.zeros(int(SR * 0.35), dtype=np.float32)
pieces = []
import time
t0 = time.time()
for i, para in enumerate(paras, 1):
out = model.inference(
para, "es", gpt_cond, spk_emb,
temperature=0.65, repetition_penalty=5.0, top_k=50, top_p=0.85,
enable_text_splitting=True,
)
pieces.append(np.asarray(out["wav"], dtype=np.float32))
pieces.append(pause)
print(f" párrafo {i}/{len(paras)} ({len(para)} car) ok", flush=True)
audio = np.concatenate(pieces)
dt = time.time() - t0
dur = len(audio) / SR
print(f"Síntesis: {dt:.1f}s para {dur:.1f}s de audio (x{dur/dt:.1f} tiempo real) en {DEVICE}")
raw = OUT / f"{name}.raw.wav"
sf.write(raw, audio, SR)
wav = OUT / f"{name}.wav"
subprocess.run([
"ffmpeg", "-y", "-i", str(raw), "-filter_complex",
"anoisesrc=color=brown:amplitude=0.004:sample_rate=24000[n];"
"[n]highpass=f=120,lowpass=f=3800[nf];"
"[0:a][nf]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[a]",
"-map", "[a]", "-ar", "24000", str(wav),
], capture_output=True)
raw.unlink(missing_ok=True)
mp3 = OUT / f"{name}.mp3"
subprocess.run(["ffmpeg", "-y", "-i", str(wav), "-b:a", "96k", str(mp3)], capture_output=True)
print(f"OK -> {mp3} ({dur:.0f}s de audio)")
if __name__ == "__main__":
main()
+51
View File
@@ -0,0 +1,51 @@
#!/usr/bin/env python3
"""Locuta una carta entera con edge-tts (online, gratis). Para comparar con XTTS.
Uso: tts_carta_edge.py <post_id> [voz] [nombre_salida]
voz por defecto: es-ES-XimenaNeural
"""
import html
import json
import os
import re
import subprocess
import sys
from pathlib import Path
EDGE = os.path.expanduser("~/.hermes/hermes-agent/venv/bin/edge-tts")
OUT = Path(__file__).resolve().parent.parent / "wordpress/wp-content/uploads/tts-samples"
CONTAINER = "wordpress-web"
def get_post_text(pid):
subprocess.run(["docker", "exec", CONTAINER, "php", "/tmp/fea_post_io.php", "get", str(pid)],
check=True, capture_output=True)
subprocess.run(["docker", "cp", f"{CONTAINER}:/tmp/fea_es.json", "/tmp/fea_es.json"], check=True)
d = json.load(open("/tmp/fea_es.json"))
raw = d["content"]
raw = re.sub(r"(?i)</p>|<br\s*/?>|</h[1-6]>", "\n", raw)
raw = re.sub(r"<[^>]+>", "", raw)
raw = re.sub(r"\[[^\]]+\]", "", raw)
raw = html.unescape(raw)
paras = [re.sub(r"\s+", " ", p).strip() for p in raw.split("\n")]
return d["title"], [p for p in paras if len(p) > 1]
def main():
pid = int(sys.argv[1])
voice = sys.argv[2] if len(sys.argv) > 2 else "es-ES-XimenaNeural"
name = sys.argv[3] if len(sys.argv) > 3 else f"carta-edge-{pid}"
title, paras = get_post_text(pid)
text = "\n\n".join(paras)
txt_path = "/tmp/carta_text.txt"
open(txt_path, "w").write(text)
print(f"Post #{pid}: «{title}» ({len(text)} car) → {voice}")
OUT.mkdir(parents=True, exist_ok=True)
mp3 = OUT / f"{name}.mp3"
subprocess.run([EDGE, "--voice", voice, "--file", txt_path,
"--write-media", str(mp3)], check=True)
print(f"OK -> {mp3}")
if __name__ == "__main__":
main()
+82
View File
@@ -0,0 +1,82 @@
#!/usr/bin/env python3
"""
tts_eval.py — Genera la MISMA frase de feadulta con varias voces/modelos TTS para
compararlas (evaluación de voz, issue #76). Incluye:
- edge-tts Ximena (referencia, gratis, ya la usamos) — siempre.
- Modelos premium vía Hugging Face Inference Providers (consume crédito HF) — opcional.
Objetivo: ELEGIR voz. Para producción en masa NO se usa HF (sale caro); el modelo
abierto ganador se corre en LOCAL (RTX 5060 Ti) gratis. Ver análisis en #76.
Uso:
# Solo la referencia local (gratis):
python3 scripts/tts_eval.py
# Con modelos HF (necesita token; gasta unos céntimos del crédito):
HF_TOKEN=hf_xxx python3 scripts/tts_eval.py --hf
Salida: ./tts-eval/<nombre>.mp3 (escúchalos y elige).
"""
from __future__ import annotations
import argparse, os, subprocess, sys
from pathlib import Path
SAMPLE = (
"Bienvenido a Fe Adulta. La humanidad abriga una esperanza: verse liberada de la "
"esclavitud y alcanzar la libertad de los hijos de Dios. Una fe adulta es una fe "
"personal, valiente, sin miedos infantiles. Detente un instante y respira."
)
OUT = Path(__file__).resolve().parent.parent / "tts-eval"
EDGE = os.path.expanduser("~/.hermes/hermes-agent/venv/bin/edge-tts")
# Candidatos vía HF Inference Providers (provider, model). Verifica disponibilidad en la
# pestaña "Inference Providers" de cada modelo en huggingface.co — el routing cambia.
HF_CANDIDATES = [
("fal-ai", "fal-ai/f5-tts"),
("fal-ai", "fal-ai/chatterbox/text-to-speech"),
("hf-inference", "myshell-ai/MeloTTS-Spanish"),
]
def edge_samples():
OUT.mkdir(exist_ok=True)
for voz in ("es-ES-XimenaNeural", "es-ES-ElviraNeural", "es-MX-JorgeNeural"):
dst = OUT / f"edge-{voz}.mp3"
print(f"edge-tts {voz} ...", flush=True)
subprocess.run([EDGE, "--voice", voz, "--text", SAMPLE, "--write-media", str(dst)],
capture_output=True)
print(f" -> {OUT}")
def hf_samples():
try:
from huggingface_hub import InferenceClient
except ImportError:
sys.exit("Falta huggingface_hub: pip install huggingface_hub")
token = os.environ.get("HF_TOKEN")
if not token:
sys.exit("Define HF_TOKEN para usar --hf")
OUT.mkdir(exist_ok=True)
for provider, model in HF_CANDIDATES:
name = model.replace("/", "_")
try:
client = InferenceClient(provider=provider, api_key=token)
audio = client.text_to_speech(SAMPLE, model=model)
dst = OUT / f"hf-{name}.mp3"
dst.write_bytes(audio)
print(f"OK {provider}:{model} -> {dst.name}")
except Exception as exc: # noqa: BLE001
print(f"FALLO {provider}:{model} -> {exc}")
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--hf", action="store_true", help="También generar con modelos HF (gasta crédito).")
args = ap.parse_args()
edge_samples()
if args.hf:
hf_samples()
print("\nEscucha los .mp3 en", OUT, "y elige. Para producción: correr el modelo abierto ganador en local.")
if __name__ == "__main__":
raise SystemExit(main())
+41
View File
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
"""Genera la muestra de feadulta con Kokoro (TTS local, gratis). Issue #76.
Voces español: ef_dora (fem), em_alex / em_santa (masc). lang_code 'e' = español.
Salida en uploads/tts-samples/ como kokoro-<voz>.wav (+ mp3 si hay ffmpeg).
"""
import subprocess
import sys
from pathlib import Path
import numpy as np
import soundfile as sf
from kokoro import KPipeline
SAMPLE = (
"Bienvenido a Fe Adulta. La humanidad abriga una esperanza: verse liberada de la "
"esclavitud y alcanzar la libertad de los hijos de Dios. Una fe adulta es una fe "
"personal, valiente, sin miedos infantiles. Detente un instante y respira."
)
OUT = Path(__file__).resolve().parent.parent / "wordpress/wp-content/uploads/tts-samples"
SR = 24000
VOICES = sys.argv[1:] or ["ef_dora", "em_alex"]
def main():
OUT.mkdir(parents=True, exist_ok=True)
pipe = KPipeline(lang_code="e") # español
for voice in VOICES:
chunks = [audio for _, _, audio in pipe(SAMPLE, voice=voice)]
audio = np.concatenate(chunks) if len(chunks) > 1 else chunks[0]
wav = OUT / f"kokoro-{voice}.wav"
sf.write(wav, audio, SR)
mp3 = OUT / f"kokoro-{voice}.mp3"
subprocess.run(["ffmpeg", "-y", "-i", str(wav), "-b:a", "96k", str(mp3)],
capture_output=True)
dur = len(audio) / SR
print(f"OK {voice}: {dur:.1f}s -> {mp3.name}")
if __name__ == "__main__":
main()
+118
View File
@@ -0,0 +1,118 @@
#!/usr/bin/env python3
"""Orquestador nocturno: locuta cartas ES del gap con MiniMax (voz Nico), una a
una, repartido en el tiempo. Reanudable (meta fea_audio_done) y con freno ante
la cuota (para tras N fallos seguidos). NO toca el front; solo genera el mp3 y
asocia la URL al post (meta fea_audio_url).
Lanzar: nohup ~/tts-local/xtts-venv/bin/python scripts/tts_produce.py > /tmp/feadulta-tts-prod.out 2>&1 &
Log: /tmp/feadulta-tts-prod.log
"""
import os
import shutil
import subprocess
import sys
import time
from pathlib import Path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import minimax_tts as mm # get_post_text, add_pauses, t2a, OUT
import translate_post as tp # carta_article_ids
VOICE = "NicoFeadulta2026"
MODEL = "speech-2.8-hd"
CONTAINER = "wordpress-web"
PROD = Path(__file__).resolve().parent.parent / "wordpress/wp-content/uploads/tts"
LOG = Path("/tmp/feadulta-tts-prod.log")
INTERVAL = 180 # s entre cartas exitosas (reparte el ritmo)
BACKOFF = 1800 # s de espera ante fallo de cuota antes de reintentar
MAX_CONSEC_FAIL = 3 # fallos seguidos → parar (cuota probablemente agotada)
MIN_CHARS = 200 # por debajo, se considera sin contenido locutable
# Cola de cartas a locutar. Override por entorno (FEA_TTS_CARTAS) para priorizar
# la carta nueva de la semana; si no, cae al orden del gap histórico.
_DEFAULT_CARTAS = "45018 44997 44975 44230 44229 44228 44090 44089 44088 44087 44086 44085 44084 44083 42590"
CARTAS = os.environ.get("FEA_TTS_CARTAS", _DEFAULT_CARTAS).replace(",", " ").split()
def log(msg):
line = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}"
print(line, flush=True)
with LOG.open("a") as f:
f.write(line + "\n")
def php(*args):
return subprocess.run(["docker", "exec", CONTAINER, "php", "/tmp/fea_post_io.php", *args],
capture_output=True, text=True)
def meta(pid, key):
return php("getmeta", str(pid), key).stdout.strip()
def build_queue():
# Cola literal de IDs (ya filtrada/ordenada) para priorizar la carta nueva.
ids_override = os.environ.get("FEA_TTS_IDS", "").replace(",", " ").split()
if ids_override:
return [int(x) for x in ids_override if x.strip().isdigit()]
q = []
for c in CARTAS:
cid = int(c)
for pid in tp.carta_article_ids(cid):
if pid not in q:
q.append(pid)
return q
def main():
PROD.mkdir(parents=True, exist_ok=True)
subprocess.run(["docker", "cp", "scripts/fea_post_io.php", f"{CONTAINER}:/tmp/fea_post_io.php"],
capture_output=True)
queue = build_queue()
log(f"=== INICIO orquestador TTS. Cola: {len(queue)} posts ES del gap ===")
i = consec = ok = 0
while i < len(queue):
pid = queue[i]
if meta(pid, "fea_audio_done") == "1" or meta(pid, "fea_audio_skip") == "1":
i += 1
continue
try:
title, text = mm.get_post_text(pid)
except Exception as e: # noqa: BLE001
log(f"#{pid}: error leyendo ({e}); skip")
php("setflag", str(pid), "fea_audio_skip", "1")
i += 1
continue
if len(text) < MIN_CHARS:
log(f"#{pid}: sin contenido ({len(text)} car); skip")
php("setflag", str(pid), "fea_audio_skip", "1")
i += 1
continue
rc = mm.t2a(mm.add_pauses(text), VOICE, MODEL, f"prod-{pid}")
if rc == 0:
src = mm.OUT / f"prod-{pid}.mp3"
dst = PROD / f"{pid}.mp3"
shutil.move(str(src), str(dst))
php("setaudio", str(pid), f"/wp-content/uploads/tts/{pid}.mp3")
ok += 1
consec = 0
log(f"#{pid} OK «{title[:45]}» → tts/{pid}.mp3 (total {ok})")
i += 1
time.sleep(INTERVAL)
else:
consec += 1
log(f"#{pid} FALLO rc={rc} (fallo seguido {consec}/{MAX_CONSEC_FAIL})")
php("setflag", str(pid), "fea_audio_error", str(rc))
if consec >= MAX_CONSEC_FAIL:
log("Demasiados fallos seguidos → cuota agotada probablemente. PARO. "
"Reanudable: relanzar el script más tarde (salta lo ya hecho).")
break
time.sleep(BACKOFF) # reintenta el mismo post tras esperar
log(f"=== FIN tanda. {ok} audios generados esta ejecución. ===")
if __name__ == "__main__":
main()
+72
View File
@@ -0,0 +1,72 @@
#!/usr/bin/env python3
"""Clona una voz con XTTS-v2 (local) y locuta la muestra de feadulta. Issue #76.
Uso:
tts_xtts.py <muestra_voz.wav|mp3> [nombre_salida]
La muestra: 6-20s de voz limpia en español. Salida en uploads/tts-samples/.
NOTA: XTTS-v2 tiene licencia no comercial (CPML). En CPU tarda ~1-2 min por
muestra; con GPU sería casi instantáneo.
"""
import os
import subprocess
import sys
from pathlib import Path
os.environ.setdefault("COQUI_TOS_AGREED", "1") # acepta licencia CPML no-interactivo
import torch # noqa: E402
from TTS.api import TTS # noqa: E402
DEVICE = "cuda" if torch.cuda.is_available() and not os.environ.get("FEA_CPU") else "cpu"
SAMPLE = (
"Bienvenido a Fe Adulta. La humanidad abriga una esperanza: verse liberada de la "
"esclavitud y alcanzar la libertad de los hijos de Dios. Una fe adulta es una fe "
"personal, valiente, sin miedos infantiles. Detente un instante y respira."
)
OUT = Path(__file__).resolve().parent.parent / "wordpress/wp-content/uploads/tts-samples"
def main():
if len(sys.argv) < 2:
sys.exit("uso: tts_xtts.py <muestra_voz.wav|mp3> [nombre_salida]")
spk = sys.argv[1]
name = sys.argv[2] if len(sys.argv) > 2 else "xtts-clon"
OUT.mkdir(parents=True, exist_ok=True)
print(f"Cargando XTTS-v2 en {DEVICE}", flush=True)
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(DEVICE)
raw = OUT / f"{name}.raw.wav"
print(f"Clonando voz de {spk} y locutando…", flush=True)
tts.tts_to_file(
text=SAMPLE, speaker_wav=spk, language="es", file_path=str(raw),
temperature=0.65, # menos aleatoriedad → más estable
length_penalty=1.0,
repetition_penalty=5.0, # reduce artefactos/balbuceos en español
top_k=50,
top_p=0.85,
enable_text_splitting=True, # parte por frases → mejor prosodia
)
# Comfort noise: ruido marrón suave y constante que rellena los silencios de
# comas/puntos para que no contrasten con el suelo de ruido del habla clonada.
wav = OUT / f"{name}.wav"
if os.environ.get("FEA_NO_COMFORT"):
subprocess.run(["ffmpeg", "-y", "-i", str(raw), str(wav)], capture_output=True)
else:
subprocess.run([
"ffmpeg", "-y", "-i", str(raw), "-filter_complex",
"anoisesrc=color=brown:amplitude=0.004:sample_rate=24000[n];"
"[n]highpass=f=120,lowpass=f=3800[nf];"
"[0:a][nf]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[a]",
"-map", "[a]", "-ar", "24000", str(wav),
], capture_output=True)
raw.unlink(missing_ok=True)
mp3 = OUT / f"{name}.mp3"
subprocess.run(["ffmpeg", "-y", "-i", str(wav), "-b:a", "96k", str(mp3)],
capture_output=True)
print(f"OK -> {mp3}")
if __name__ == "__main__":
main()
+153
View File
@@ -0,0 +1,153 @@
<?php
/**
* Unpublishes migrated posts whose title or slug is only a date (dd-mm-yyyy).
*
* Usage:
* docker cp scripts/unpublish_date_slug_posts.php wordpress-web:/tmp/unpublish_date_slug_posts.php
* docker exec wordpress-web php /tmp/unpublish_date_slug_posts.php --dry-run
* docker exec wordpress-web php /tmp/unpublish_date_slug_posts.php --apply
*/
$cli_args = array_merge($argv ?? [], $args ?? []);
$apply = getenv('APPLY') === '1' || in_array('--apply', $cli_args, true);
$dry_run = in_array('--dry-run', $cli_args, true) || !$apply;
$backup_table = 'wp_fea_date_slug_posts_backup';
$where = "
post_type = 'post'
AND post_status = 'publish'
AND (
post_title REGEXP '^[0-9]{2}-[0-9]{2}-[0-9]{4}$'
OR post_name REGEXP '^[0-9]{2}-[0-9]{2}-[0-9]{4}$'
)
";
echo "=== Unpublish date-like migrated posts ===\n";
echo $dry_run ? "[DRY RUN]\n\n" : "[APPLY]\n\n";
if (isset($GLOBALS['wpdb']) && $GLOBALS['wpdb'] instanceof wpdb) {
global $wpdb;
$posts_table = $wpdb->posts;
$backup_table = $wpdb->prefix . 'fea_date_slug_posts_backup';
$count = (int)$wpdb->get_var("SELECT COUNT(*) FROM $posts_table WHERE $where");
echo "Matching published posts: $count\n";
$sample = $wpdb->get_results("
SELECT ID, post_date, post_title, post_name
FROM $posts_table
WHERE $where
ORDER BY post_date DESC
LIMIT 20
", ARRAY_A);
foreach ($sample as $row) {
echo sprintf(
" #%d %s %s (%s)\n",
$row['ID'],
$row['post_date'],
$row['post_title'],
$row['post_name']
);
}
if ($dry_run) {
echo "\nNo changes made. Re-run with APPLY=1 to set these posts to draft.\n";
return;
}
$wpdb->query("
CREATE TABLE IF NOT EXISTS $backup_table (
post_id BIGINT UNSIGNED NOT NULL PRIMARY KEY,
old_status VARCHAR(20) NOT NULL,
old_modified DATETIME NOT NULL,
backed_up_at DATETIME NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
");
$backed_up = $wpdb->query("
INSERT IGNORE INTO $backup_table (post_id, old_status, old_modified, backed_up_at)
SELECT ID, post_status, post_modified, NOW()
FROM $posts_table
WHERE $where
");
$updated = $wpdb->query("
UPDATE $posts_table
SET post_status = 'draft',
post_modified = NOW(),
post_modified_gmt = UTC_TIMESTAMP()
WHERE $where
");
echo "\nBacked up rows in $backup_table: $backed_up\n";
echo "Posts set to draft: $updated\n";
return;
}
$db_host = getenv('WORDPRESS_DB_HOST') ?: 'wordpress-db';
$db_name = getenv('WORDPRESS_DB_NAME') ?: 'wordpress_db';
$db_user = getenv('WORDPRESS_DB_USER') ?: 'wordpress_user';
$db_pass = getenv('WORDPRESS_DB_PASSWORD') ?: 'wordpress_pass';
$pdo = new PDO("mysql:host=$db_host;dbname=$db_name;charset=utf8mb4", $db_user, $db_pass, [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
]);
$count = (int)$pdo->query("SELECT COUNT(*) FROM wp_posts WHERE $where")->fetchColumn();
echo "Matching published posts: $count\n";
$sample = $pdo->query("
SELECT ID, post_date, post_title, post_name
FROM wp_posts
WHERE $where
ORDER BY post_date DESC
LIMIT 20
")->fetchAll(PDO::FETCH_ASSOC);
foreach ($sample as $row) {
echo sprintf(
" #%d %s %s (%s)\n",
$row['ID'],
$row['post_date'],
$row['post_title'],
$row['post_name']
);
}
if ($dry_run) {
echo "\nNo changes made. Re-run with --apply to set these posts to draft.\n";
exit(0);
}
$pdo->exec("
CREATE TABLE IF NOT EXISTS $backup_table (
post_id BIGINT UNSIGNED NOT NULL PRIMARY KEY,
old_status VARCHAR(20) NOT NULL,
old_modified DATETIME NOT NULL,
backed_up_at DATETIME NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
");
$pdo->beginTransaction();
$backed_up = $pdo->exec("
INSERT IGNORE INTO $backup_table (post_id, old_status, old_modified, backed_up_at)
SELECT ID, post_status, post_modified, NOW()
FROM wp_posts
WHERE $where
");
$updated = $pdo->exec("
UPDATE wp_posts
SET post_status = 'draft',
post_modified = NOW(),
post_modified_gmt = UTC_TIMESTAMP()
WHERE $where
");
$pdo->commit();
echo "\nBacked up rows in $backup_table: $backed_up\n";
echo "Posts set to draft: $updated\n";