Añadir mu-plugins y scripts de feadulta
This commit is contained in:
@@ -0,0 +1,180 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Aplica `clasificacion_articulos_regen.csv` a wp_term_relationships.
|
||||
|
||||
MODO CONSERVADOR (--mode=add): solo AÑADE las cats nuevas que el CSV indique
|
||||
y que no estén ya. NO borra cats existentes. Maximiza seguridad — no perdemos
|
||||
atribuciones legítimas que el CSV viejo o asignaciones manuales pusieran.
|
||||
|
||||
MODO ESTRICTO (--mode=replace): para los posts presentes en el CSV, sustituye
|
||||
el conjunto de cats {1645,1646,1647,1648,1649,1650} por exactamente las que
|
||||
el CSV indique. Borra las que sobren. Posts NO presentes en CSV no se tocan.
|
||||
|
||||
Recalcula `wp_term_taxonomy.count` al final.
|
||||
|
||||
Issue: rafa/feadulta#42
|
||||
Uso: python3 aplicar_clasificacion_a_bd.py [--csv FILE] [--mode add|replace] [--dry-run]
|
||||
"""
|
||||
import argparse, csv, subprocess, sys
|
||||
from collections import defaultdict
|
||||
|
||||
try:
|
||||
import pymysql
|
||||
except ImportError:
|
||||
sys.exit('requiere pymysql')
|
||||
|
||||
CAT_NAME_TO_TERM = {
|
||||
'lectura': 1645,
|
||||
'comentario_editorial':1646,
|
||||
'comentario': 1647,
|
||||
'eucaristia': 1648,
|
||||
'multimedia': 1649,
|
||||
'articulo': 1650,
|
||||
# 'noticia': 1651, # no implementado
|
||||
# 'otro': 1652, # no implementado
|
||||
# 'effa': ?, # no implementado
|
||||
}
|
||||
MANAGED_TERMS = set(CAT_NAME_TO_TERM.values())
|
||||
|
||||
|
||||
def get_conn():
|
||||
ip = subprocess.run(['docker','inspect','wordpress-mysql','--format',
|
||||
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'],
|
||||
capture_output=True, text=True, check=True).stdout.strip()
|
||||
return pymysql.connect(host=ip, user='wordpress_user', password='wordpress_pass',
|
||||
database='wordpress_db', charset='utf8mb4', autocommit=False)
|
||||
|
||||
|
||||
def get_term_taxonomy_ids(conn, term_ids):
|
||||
"""Devuelve dict term_id → term_taxonomy_id para taxonomy='category'."""
|
||||
with conn.cursor() as c:
|
||||
c.execute(f"""
|
||||
SELECT term_id, term_taxonomy_id FROM wp_term_taxonomy
|
||||
WHERE taxonomy='category' AND term_id IN ({','.join(str(t) for t in term_ids)})
|
||||
""")
|
||||
return dict(c.fetchall())
|
||||
|
||||
|
||||
def load_csv(path):
|
||||
"""Devuelve dict post_id → set(cat_name)."""
|
||||
out = defaultdict(set)
|
||||
with open(path, encoding='utf-8') as f:
|
||||
r = csv.DictReader(f)
|
||||
for row in r:
|
||||
pid = row.get('post_id')
|
||||
cat = row.get('categoria_propuesta')
|
||||
if pid and cat in CAT_NAME_TO_TERM:
|
||||
out[int(pid)].add(cat)
|
||||
return out
|
||||
|
||||
|
||||
def current_cats(conn, post_ids, tt_ids):
|
||||
"""Para cada post devuelve set de term_ids de MANAGED_TERMS que tiene actualmente."""
|
||||
if not post_ids: return {}
|
||||
in_ttids = ','.join(str(t) for t in tt_ids)
|
||||
in_pids = ','.join(str(p) for p in post_ids)
|
||||
out = defaultdict(set)
|
||||
with conn.cursor() as c:
|
||||
c.execute(f"""
|
||||
SELECT tr.object_id, tt.term_id
|
||||
FROM wp_term_relationships tr
|
||||
JOIN wp_term_taxonomy tt ON tt.term_taxonomy_id=tr.term_taxonomy_id
|
||||
WHERE tr.object_id IN ({in_pids}) AND tt.term_taxonomy_id IN ({in_ttids})
|
||||
""")
|
||||
for pid, tid in c.fetchall():
|
||||
out[pid].add(tid)
|
||||
return out
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument('--csv', default='/tmp/clasif_new.csv')
|
||||
ap.add_argument('--mode', choices=['add', 'replace'], default='add')
|
||||
ap.add_argument('--dry-run', action='store_true')
|
||||
args = ap.parse_args()
|
||||
|
||||
print(f'CSV: {args.csv}', file=sys.stderr)
|
||||
print(f'Mode: {args.mode}{" (DRY)" if args.dry_run else ""}', file=sys.stderr)
|
||||
|
||||
desired_by_pid = load_csv(args.csv)
|
||||
print(f'Posts en CSV: {len(desired_by_pid)}', file=sys.stderr)
|
||||
|
||||
conn = get_conn()
|
||||
term_to_tt = get_term_taxonomy_ids(conn, MANAGED_TERMS)
|
||||
print(f'Term taxonomy ids: {term_to_tt}', file=sys.stderr)
|
||||
if len(term_to_tt) != len(CAT_NAME_TO_TERM):
|
||||
sys.exit(f'No encuentro todos los term_ids: {set(MANAGED_TERMS) - set(term_to_tt)}')
|
||||
|
||||
cat_to_tt = {name: term_to_tt[tid] for name, tid in CAT_NAME_TO_TERM.items()}
|
||||
|
||||
# Cats actuales para los posts del CSV
|
||||
pids = list(desired_by_pid.keys())
|
||||
BATCH = 5000
|
||||
current_by_pid = {}
|
||||
for i in range(0, len(pids), BATCH):
|
||||
chunk = pids[i:i+BATCH]
|
||||
current_by_pid.update(current_cats(conn, chunk, term_to_tt.values()))
|
||||
|
||||
# Computar añadir / quitar
|
||||
to_add = [] # (object_id, term_taxonomy_id)
|
||||
to_del = [] # (object_id, term_taxonomy_id)
|
||||
for pid, desired_names in desired_by_pid.items():
|
||||
desired_tids = {CAT_NAME_TO_TERM[n] for n in desired_names}
|
||||
current_tids = current_by_pid.get(pid, set())
|
||||
# Añadir las que estén en desired y no en current
|
||||
for tid in desired_tids - current_tids:
|
||||
to_add.append((pid, term_to_tt[tid]))
|
||||
# En modo replace: quitar las MANAGED que estén en current y no en desired
|
||||
if args.mode == 'replace':
|
||||
for tid in current_tids - desired_tids:
|
||||
to_del.append((pid, term_to_tt[tid]))
|
||||
|
||||
print(f'A añadir: {len(to_add)}', file=sys.stderr)
|
||||
print(f'A quitar: {len(to_del)}', file=sys.stderr)
|
||||
|
||||
if args.dry_run:
|
||||
# Muestra
|
||||
print('\n--- 5 ejemplos añadir ---', file=sys.stderr)
|
||||
for x in to_add[:5]: print(' ', x, file=sys.stderr)
|
||||
print('\n--- 5 ejemplos quitar ---', file=sys.stderr)
|
||||
for x in to_del[:5]: print(' ', x, file=sys.stderr)
|
||||
conn.close()
|
||||
return
|
||||
|
||||
with conn.cursor() as c:
|
||||
# Bulk insert (INSERT IGNORE)
|
||||
if to_add:
|
||||
for i in range(0, len(to_add), 1000):
|
||||
chunk = to_add[i:i+1000]
|
||||
vals = ','.join(f'({p},{t})' for p, t in chunk)
|
||||
c.execute(f'INSERT IGNORE INTO wp_term_relationships (object_id, term_taxonomy_id) VALUES {vals}')
|
||||
if to_del:
|
||||
for i in range(0, len(to_del), 1000):
|
||||
chunk = to_del[i:i+1000]
|
||||
conds = ' OR '.join(f'(object_id={p} AND term_taxonomy_id={t})' for p, t in chunk)
|
||||
c.execute(f'DELETE FROM wp_term_relationships WHERE {conds}')
|
||||
# Recalcular counts
|
||||
in_ttids = ','.join(str(t) for t in term_to_tt.values())
|
||||
c.execute(f"""
|
||||
UPDATE wp_term_taxonomy tt
|
||||
SET tt.count = (SELECT COUNT(*) FROM wp_term_relationships tr WHERE tr.term_taxonomy_id=tt.term_taxonomy_id)
|
||||
WHERE tt.term_taxonomy_id IN ({in_ttids})
|
||||
""")
|
||||
conn.commit()
|
||||
print('Commit OK.', file=sys.stderr)
|
||||
|
||||
# Conteos finales
|
||||
with conn.cursor() as c:
|
||||
c.execute(f"""
|
||||
SELECT t.term_id, t.slug, tt.count FROM wp_term_taxonomy tt
|
||||
JOIN wp_terms t USING(term_id)
|
||||
WHERE tt.term_taxonomy_id IN ({in_ttids}) ORDER BY t.term_id
|
||||
""")
|
||||
print('\nCats finales:', file=sys.stderr)
|
||||
for row in c.fetchall():
|
||||
print(f' {row[0]:5d} {row[1]:30s} {row[2]}', file=sys.stderr)
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user