Files
feadulta/scripts/fix_remaining_titles.py

114 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""
fix_remaining_titles.py
Fixes posts where the translated title still equals the Spanish original.
Queries DB dynamically, then translates each title via Jan API.
"""
import pymysql
import json
import urllib.request
import time
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
LANG_NAMES = {"en": "English", "fr": "French", "it": "Italian", "pt": "Portuguese"}
def translate_title(spanish_title, lang_name):
payload = json.dumps({
"model": JAN_MODEL,
"messages": [
{"role": "system", "content": "You are a translator. Respond ONLY with the translated text, nothing else."},
{"role": "user", "content": f"Translate from Spanish to {lang_name}, ALL CAPS:\n\n{spanish_title}"}
],
"temperature": 0.1,
"max_tokens": 120,
}).encode("utf-8")
req = urllib.request.Request(
JAN_URL, data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
method="POST"
)
with urllib.request.urlopen(req, timeout=30) as r:
result = json.loads(r.read())
return result["choices"][0]["message"]["content"].strip().strip('"').strip("'")
def main():
db = pymysql.connect(**DB)
c = db.cursor()
# Find all posts where the title = the Spanish original's title (untranslated)
c.execute("""
SELECT p.ID, t.slug as lang, p.post_title as current_title, p2.post_title as sp_title
FROM wp_posts p
JOIN wp_term_relationships trl ON p.ID=trl.object_id
JOIN wp_term_taxonomy ttl ON trl.term_taxonomy_id=ttl.term_taxonomy_id AND ttl.taxonomy='language'
JOIN wp_terms t ON ttl.term_id=t.term_id
JOIN wp_term_relationships trg ON p.ID=trg.object_id
JOIN wp_term_taxonomy ttg ON trg.term_taxonomy_id=ttg.term_taxonomy_id AND ttg.taxonomy='post_translations'
JOIN wp_posts p2 ON (ttg.description LIKE CONCAT('%i:',p2.ID,';%') OR ttg.description LIKE CONCAT('%i:',p2.ID,'}%'))
JOIN wp_term_relationships trl2 ON p2.ID=trl2.object_id
JOIN wp_term_taxonomy ttl2 ON trl2.term_taxonomy_id=ttl2.term_taxonomy_id AND ttl2.taxonomy='language'
JOIN wp_terms t2 ON ttl2.term_id=t2.term_id AND t2.slug='es'
WHERE p.ID > 42760 AND p.post_type='post' AND p.post_status='publish'
AND t.slug != 'es'
AND p.post_title = p2.post_title
ORDER BY t.slug, p.ID
""")
rows = c.fetchall()
print(f"Found {len(rows)} posts with untranslated titles\n")
cache = {} # (sp_title, lang) -> translated
done = 0
errors = 0
for row in rows:
post_id = row['ID']
lang = row['lang']
sp_title = row['sp_title']
lang_name = LANG_NAMES.get(lang, lang)
key = (sp_title, lang)
if key not in cache:
try:
t0 = time.time()
translated = translate_title(sp_title, lang_name)
elapsed = time.time() - t0
# Reject if translation = original (model failed)
if translated.upper() == sp_title.upper():
print(f" [{lang}] FAILED (returned same): {sp_title[:50]}")
errors += 1
cache[key] = None
continue
cache[key] = translated
print(f" [{lang}] {sp_title[:40]!r} -> {translated[:40]!r} ({elapsed:.0f}s)")
except Exception as e:
print(f" [{lang}] ERROR: {e}")
errors += 1
cache[key] = None
continue
new_title = cache.get(key)
if not new_title:
continue
c.execute("UPDATE wp_posts SET post_title=%s WHERE ID=%s", (new_title, post_id))
db.commit()
done += 1
print(f" Updated {post_id} [{lang}]: {new_title[:60]}")
db.close()
print(f"\nDone: {done} fixed, {errors} errors/skipped")
if __name__ == "__main__":
main()