||~', "\n", $html); $t = preg_replace('~<[^>]+>~', ' ', $t); $t = preg_replace('~\[[^\]]+\]~', ' ', $t); $t = html_entity_decode($t, ENT_QUOTES); return $t; } /** Frases normalizadas de longitud >= 40 (las cortas dan falsos positivos). */ function sentences($html) { $t = norm_text($html); $parts = preg_split('~(?<=[.!?…])\s+|\n+~u', $t); $out = []; foreach ($parts as $s) { $s = trim(preg_replace('~\s+~u', ' ', $s)); $s = mb_strtolower($s); if (mb_strlen($s) >= 40) $out[$s] = mb_strlen($s); } return $out; } $statuses = $STATUS === 'any' ? ['draft','publish'] : [$STATUS]; $in = "'" . implode("','", $statuses) . "'"; $ids = $wpdb->get_col( "SELECT p.ID FROM wp_posts p JOIN wp_term_relationships tr ON tr.object_id=p.ID JOIN wp_term_taxonomy tt ON tt.term_taxonomy_id=tr.term_taxonomy_id AND tt.taxonomy='language' JOIN wp_terms t ON t.term_id=tt.term_id AND t.slug IN ('en','fr','it','pt') WHERE p.post_type='post' AND p.post_status IN ($in) GROUP BY p.ID" ); $by_lang = []; $offenders = []; foreach ($ids as $id) { $lang = pll_get_post_language($id); $es = pll_get_post((int)$id, 'es'); if (!$es) continue; $tr_s = sentences(get_post($id)->post_content); if (!$tr_s) continue; $es_s = sentences(get_post($es)->post_content); if (!$es_s) continue; $total = array_sum($tr_s); $match = 0; foreach ($tr_s as $s => $len) if (isset($es_s[$s])) $match += $len; $ratio = $total ? $match / $total : 0; $by_lang[$lang]['n'] = ($by_lang[$lang]['n'] ?? 0) + 1; if ($ratio >= $THRESH) { $by_lang[$lang]['bad'] = ($by_lang[$lang]['bad'] ?? 0) + 1; $offenders[] = [$id, $lang, $es, round($ratio, 2), get_post($id)->post_title]; } } usort($offenders, fn($a, $b) => $b[3] <=> $a[3]); echo "=== Traducciones con fragmentos ES (ratio >= $THRESH, status=$STATUS) ===\n"; foreach ($offenders as $o) echo sprintf("#%d [%s] ratio=%.2f es=%d %s\n", $o[0], $o[1], $o[3], $o[2], mb_substr($o[4], 0, 45)); echo "\n--- resumen por idioma ---\n"; foreach ($by_lang as $l => $d) echo sprintf("%s: %d/%d con fragmentos ES\n", $l, $d['bad'] ?? 0, $d['n']); echo "TOTAL ofensores: " . count($offenders) . "\n"; // Volcar IDs para el reprocesado file_put_contents('/tmp/untranslated_ids.txt', implode("\n", array_map(fn($o) => $o[0], $offenders)));