181 lines
6.8 KiB
Python
181 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Aplica `clasificacion_articulos_regen.csv` a wp_term_relationships.
|
|
|
|
MODO CONSERVADOR (--mode=add): solo AÑADE las cats nuevas que el CSV indique
|
|
y que no estén ya. NO borra cats existentes. Maximiza seguridad — no perdemos
|
|
atribuciones legítimas que el CSV viejo o asignaciones manuales pusieran.
|
|
|
|
MODO ESTRICTO (--mode=replace): para los posts presentes en el CSV, sustituye
|
|
el conjunto de cats {1645,1646,1647,1648,1649,1650} por exactamente las que
|
|
el CSV indique. Borra las que sobren. Posts NO presentes en CSV no se tocan.
|
|
|
|
Recalcula `wp_term_taxonomy.count` al final.
|
|
|
|
Issue: rafa/feadulta#42
|
|
Uso: python3 aplicar_clasificacion_a_bd.py [--csv FILE] [--mode add|replace] [--dry-run]
|
|
"""
|
|
import argparse, csv, subprocess, sys
|
|
from collections import defaultdict
|
|
|
|
try:
|
|
import pymysql
|
|
except ImportError:
|
|
sys.exit('requiere pymysql')
|
|
|
|
CAT_NAME_TO_TERM = {
|
|
'lectura': 1645,
|
|
'comentario_editorial':1646,
|
|
'comentario': 1647,
|
|
'eucaristia': 1648,
|
|
'multimedia': 1649,
|
|
'articulo': 1650,
|
|
# 'noticia': 1651, # no implementado
|
|
# 'otro': 1652, # no implementado
|
|
# 'effa': ?, # no implementado
|
|
}
|
|
MANAGED_TERMS = set(CAT_NAME_TO_TERM.values())
|
|
|
|
|
|
def get_conn():
|
|
ip = subprocess.run(['docker','inspect','wordpress-mysql','--format',
|
|
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'],
|
|
capture_output=True, text=True, check=True).stdout.strip()
|
|
return pymysql.connect(host=ip, user='wordpress_user', password='wordpress_pass',
|
|
database='wordpress_db', charset='utf8mb4', autocommit=False)
|
|
|
|
|
|
def get_term_taxonomy_ids(conn, term_ids):
|
|
"""Devuelve dict term_id → term_taxonomy_id para taxonomy='category'."""
|
|
with conn.cursor() as c:
|
|
c.execute(f"""
|
|
SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy
|
|
WHERE taxonomy='category' AND term_id IN ({','.join(str(t) for t in term_ids)})
|
|
""")
|
|
return dict(c.fetchall())
|
|
|
|
|
|
def load_csv(path):
|
|
"""Devuelve dict post_id → set(cat_name)."""
|
|
out = defaultdict(set)
|
|
with open(path, encoding='utf-8') as f:
|
|
r = csv.DictReader(f)
|
|
for row in r:
|
|
pid = row.get('post_id')
|
|
cat = row.get('categoria_propuesta')
|
|
if pid and cat in CAT_NAME_TO_TERM:
|
|
out[int(pid)].add(cat)
|
|
return out
|
|
|
|
|
|
def current_cats(conn, post_ids, tt_ids):
|
|
"""Para cada post devuelve set de term_ids de MANAGED_TERMS que tiene actualmente."""
|
|
if not post_ids: return {}
|
|
in_ttids = ','.join(str(t) for t in tt_ids)
|
|
in_pids = ','.join(str(p) for p in post_ids)
|
|
out = defaultdict(set)
|
|
with conn.cursor() as c:
|
|
c.execute(f"""
|
|
SELECT tr.object_id, tt.term_id
|
|
FROM wp_term_relationships tr
|
|
JOIN wp_term_taxonomy tt ON tt.term_taxonomy_id=tr.term_taxonomy_id
|
|
WHERE tr.object_id IN ({in_pids}) AND tt.term_taxonomy_id IN ({in_ttids})
|
|
""")
|
|
for pid, tid in c.fetchall():
|
|
out[pid].add(tid)
|
|
return out
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument('--csv', default='/tmp/clasif_new.csv')
|
|
ap.add_argument('--mode', choices=['add', 'replace'], default='add')
|
|
ap.add_argument('--dry-run', action='store_true')
|
|
args = ap.parse_args()
|
|
|
|
print(f'CSV: {args.csv}', file=sys.stderr)
|
|
print(f'Mode: {args.mode}{" (DRY)" if args.dry_run else ""}', file=sys.stderr)
|
|
|
|
desired_by_pid = load_csv(args.csv)
|
|
print(f'Posts en CSV: {len(desired_by_pid)}', file=sys.stderr)
|
|
|
|
conn = get_conn()
|
|
term_to_tt = get_term_taxonomy_ids(conn, MANAGED_TERMS)
|
|
print(f'Term taxonomy ids: {term_to_tt}', file=sys.stderr)
|
|
if len(term_to_tt) != len(CAT_NAME_TO_TERM):
|
|
sys.exit(f'No encuentro todos los term_ids: {set(MANAGED_TERMS) - set(term_to_tt)}')
|
|
|
|
cat_to_tt = {name: term_to_tt[tid] for name, tid in CAT_NAME_TO_TERM.items()}
|
|
|
|
# Cats actuales para los posts del CSV
|
|
pids = list(desired_by_pid.keys())
|
|
BATCH = 5000
|
|
current_by_pid = {}
|
|
for i in range(0, len(pids), BATCH):
|
|
chunk = pids[i:i+BATCH]
|
|
current_by_pid.update(current_cats(conn, chunk, term_to_tt.values()))
|
|
|
|
# Computar añadir / quitar
|
|
to_add = [] # (object_id, term_taxonomy_id)
|
|
to_del = [] # (object_id, term_taxonomy_id)
|
|
for pid, desired_names in desired_by_pid.items():
|
|
desired_tids = {CAT_NAME_TO_TERM[n] for n in desired_names}
|
|
current_tids = current_by_pid.get(pid, set())
|
|
# Añadir las que estén en desired y no en current
|
|
for tid in desired_tids - current_tids:
|
|
to_add.append((pid, term_to_tt[tid]))
|
|
# En modo replace: quitar las MANAGED que estén en current y no en desired
|
|
if args.mode == 'replace':
|
|
for tid in current_tids - desired_tids:
|
|
to_del.append((pid, term_to_tt[tid]))
|
|
|
|
print(f'A añadir: {len(to_add)}', file=sys.stderr)
|
|
print(f'A quitar: {len(to_del)}', file=sys.stderr)
|
|
|
|
if args.dry_run:
|
|
# Muestra
|
|
print('\n--- 5 ejemplos añadir ---', file=sys.stderr)
|
|
for x in to_add[:5]: print(' ', x, file=sys.stderr)
|
|
print('\n--- 5 ejemplos quitar ---', file=sys.stderr)
|
|
for x in to_del[:5]: print(' ', x, file=sys.stderr)
|
|
conn.close()
|
|
return
|
|
|
|
with conn.cursor() as c:
|
|
# Bulk insert (INSERT IGNORE)
|
|
if to_add:
|
|
for i in range(0, len(to_add), 1000):
|
|
chunk = to_add[i:i+1000]
|
|
vals = ','.join(f'({p},{t})' for p, t in chunk)
|
|
c.execute(f'INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) VALUES {vals}')
|
|
if to_del:
|
|
for i in range(0, len(to_del), 1000):
|
|
chunk = to_del[i:i+1000]
|
|
conds = ' OR '.join(f'(object_id={p} AND term_taxonomy_id={t})' for p, t in chunk)
|
|
c.execute(f'DELETE FROM wp_term_relationships WHERE {conds}')
|
|
# Recalcular counts
|
|
in_ttids = ','.join(str(t) for t in term_to_tt.values())
|
|
c.execute(f"""
|
|
UPDATE wp_term_taxonomy tt
|
|
SET tt.count = (SELECT COUNT(*) FROM wp_term_relationships tr WHERE tr.term_taxonomy_id=tt.term_taxonomy_id)
|
|
WHERE tt.term_taxonomy_id IN ({in_ttids})
|
|
""")
|
|
conn.commit()
|
|
print('Commit OK.', file=sys.stderr)
|
|
|
|
# Conteos finales
|
|
with conn.cursor() as c:
|
|
c.execute(f"""
|
|
SELECT t.term_id, t.slug, tt.count FROM wp_term_taxonomy tt
|
|
JOIN wp_terms t USING(term_id)
|
|
WHERE tt.term_taxonomy_id IN ({in_ttids}) ORDER BY t.term_id
|
|
""")
|
|
print('\nCats finales:', file=sys.stderr)
|
|
for row in c.fetchall():
|
|
print(f' {row[0]:5d} {row[1]:30s} {row[2]}', file=sys.stderr)
|
|
conn.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|