114 lines
4.1 KiB
Python
114 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
fix_remaining_titles.py
|
|
|
|
Fixes posts where the translated title still equals the Spanish original.
|
|
Queries DB dynamically, then translates each title via Jan API.
|
|
"""
|
|
|
|
import pymysql
|
|
import json
|
|
import urllib.request
|
|
import time
|
|
|
|
JAN_URL = "http://172.19.128.1:1337/v1/chat/completions"
|
|
JAN_MODEL = "gemma-3-12b-it-Q4_K_M"
|
|
|
|
DB = dict(host='172.18.0.2', port=3306, user='wordpress_user',
|
|
password='wordpress_pass', database='wordpress_db', charset='utf8mb4',
|
|
cursorclass=pymysql.cursors.DictCursor)
|
|
|
|
LANG_NAMES = {"en": "English", "fr": "French", "it": "Italian", "pt": "Portuguese"}
|
|
|
|
|
|
def translate_title(spanish_title, lang_name):
|
|
payload = json.dumps({
|
|
"model": JAN_MODEL,
|
|
"messages": [
|
|
{"role": "system", "content": "You are a translator. Respond ONLY with the translated text, nothing else."},
|
|
{"role": "user", "content": f"Translate from Spanish to {lang_name}, ALL CAPS:\n\n{spanish_title}"}
|
|
],
|
|
"temperature": 0.1,
|
|
"max_tokens": 120,
|
|
}).encode("utf-8")
|
|
req = urllib.request.Request(
|
|
JAN_URL, data=payload,
|
|
headers={"Content-Type": "application/json", "Authorization": "Bearer dummy"},
|
|
method="POST"
|
|
)
|
|
with urllib.request.urlopen(req, timeout=30) as r:
|
|
result = json.loads(r.read())
|
|
return result["choices"][0]["message"]["content"].strip().strip('"').strip("'")
|
|
|
|
|
|
def main():
|
|
db = pymysql.connect(**DB)
|
|
c = db.cursor()
|
|
|
|
# Find all posts where the title = the Spanish original's title (untranslated)
|
|
c.execute("""
|
|
SELECT p.ID, t.slug as lang, p.post_title as current_title, p2.post_title as sp_title
|
|
FROM wp_posts p
|
|
JOIN wp_term_relationships trl ON p.ID=trl.object_id
|
|
JOIN wp_term_taxonomy ttl ON trl.term_taxonomy_id=ttl.term_taxonomy_id AND ttl.taxonomy='language'
|
|
JOIN wp_terms t ON ttl.term_id=t.term_id
|
|
JOIN wp_term_relationships trg ON p.ID=trg.object_id
|
|
JOIN wp_term_taxonomy ttg ON trg.term_taxonomy_id=ttg.term_taxonomy_id AND ttg.taxonomy='post_translations'
|
|
JOIN wp_posts p2 ON (ttg.description LIKE CONCAT('%i:',p2.ID,';%') OR ttg.description LIKE CONCAT('%i:',p2.ID,'}%'))
|
|
JOIN wp_term_relationships trl2 ON p2.ID=trl2.object_id
|
|
JOIN wp_term_taxonomy ttl2 ON trl2.term_taxonomy_id=ttl2.term_taxonomy_id AND ttl2.taxonomy='language'
|
|
JOIN wp_terms t2 ON ttl2.term_id=t2.term_id AND t2.slug='es'
|
|
WHERE p.ID > 42760 AND p.post_type='post' AND p.post_status='publish'
|
|
AND t.slug != 'es'
|
|
AND p.post_title = p2.post_title
|
|
ORDER BY t.slug, p.ID
|
|
""")
|
|
rows = c.fetchall()
|
|
print(f"Found {len(rows)} posts with untranslated titles\n")
|
|
|
|
cache = {} # (sp_title, lang) -> translated
|
|
done = 0
|
|
errors = 0
|
|
|
|
for row in rows:
|
|
post_id = row['ID']
|
|
lang = row['lang']
|
|
sp_title = row['sp_title']
|
|
lang_name = LANG_NAMES.get(lang, lang)
|
|
key = (sp_title, lang)
|
|
|
|
if key not in cache:
|
|
try:
|
|
t0 = time.time()
|
|
translated = translate_title(sp_title, lang_name)
|
|
elapsed = time.time() - t0
|
|
# Reject if translation = original (model failed)
|
|
if translated.upper() == sp_title.upper():
|
|
print(f" [{lang}] FAILED (returned same): {sp_title[:50]}")
|
|
errors += 1
|
|
cache[key] = None
|
|
continue
|
|
cache[key] = translated
|
|
print(f" [{lang}] {sp_title[:40]!r} -> {translated[:40]!r} ({elapsed:.0f}s)")
|
|
except Exception as e:
|
|
print(f" [{lang}] ERROR: {e}")
|
|
errors += 1
|
|
cache[key] = None
|
|
continue
|
|
|
|
new_title = cache.get(key)
|
|
if not new_title:
|
|
continue
|
|
|
|
c.execute("UPDATE wp_posts SET post_title=%s WHERE ID=%s", (new_title, post_id))
|
|
db.commit()
|
|
done += 1
|
|
print(f" Updated {post_id} [{lang}]: {new_title[:60]}")
|
|
|
|
db.close()
|
|
print(f"\nDone: {done} fixed, {errors} errors/skipped")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|