Añadir mu-plugins y scripts de feadulta

This commit is contained in:
2026-06-28 15:10:46 -04:00
parent bce7e42f44
commit b6116b066d
106 changed files with 17600 additions and 2 deletions
+99
View File
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
"""Locuta una carta/artículo entero de feadulta con la voz clonada (XTTS-v2 + GPU).
Saca el texto del post ES, lo trocea por párrafos, lo locuta con la voz de
referencia (calculando los latents del hablante UNA sola vez), concatena con
pausas y añade comfort noise. Issue #76.
Uso:
tts_carta.py <post_id> <muestra_voz.wav> [nombre_salida]
"""
import html
import json
import os
import re
import subprocess
import sys
from pathlib import Path
os.environ.setdefault("COQUI_TOS_AGREED", "1")
import numpy as np # noqa: E402
import soundfile as sf # noqa: E402
import torch # noqa: E402
from TTS.api import TTS # noqa: E402
DEVICE = "cuda" if torch.cuda.is_available() and not os.environ.get("FEA_CPU") else "cpu"
OUT = Path(__file__).resolve().parent.parent / "wordpress/wp-content/uploads/tts-samples"
SR = 24000
CONTAINER = "wordpress-web"
def get_post_text(pid):
subprocess.run(["docker", "exec", CONTAINER, "php", "/tmp/fea_post_io.php", "get", str(pid)],
check=True, capture_output=True)
subprocess.run(["docker", "cp", f"{CONTAINER}:/tmp/fea_es.json", "/tmp/fea_es.json"], check=True)
d = json.load(open("/tmp/fea_es.json"))
raw = d["content"]
# Conserva límites de párrafo antes de quitar tags.
raw = re.sub(r"(?i)</p>|<br\s*/?>|</h[1-6]>", "\n", raw)
raw = re.sub(r"<[^>]+>", "", raw) # quita tags
raw = re.sub(r"\[[^\]]+\]", "", raw) # quita shortcodes
raw = html.unescape(raw)
paras = [re.sub(r"\s+", " ", p).strip() for p in raw.split("\n")]
paras = [p for p in paras if len(p) > 1]
return d["title"], paras
def main():
if len(sys.argv) < 3:
sys.exit("uso: tts_carta.py <post_id> <muestra_voz.wav> [nombre_salida]")
pid = int(sys.argv[1])
spk = sys.argv[2]
name = sys.argv[3] if len(sys.argv) > 3 else f"carta-{pid}"
title, paras = get_post_text(pid)
print(f"Post #{pid}: «{title}» ({len(paras)} párrafos, {sum(len(p) for p in paras)} car)")
print(f"Cargando XTTS-v2 en {DEVICE}", flush=True)
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(DEVICE)
model = tts.synthesizer.tts_model
print("Calculando timbre del hablante (1 vez)…", flush=True)
gpt_cond, spk_emb = model.get_conditioning_latents(audio_path=[spk])
pause = np.zeros(int(SR * 0.35), dtype=np.float32)
pieces = []
import time
t0 = time.time()
for i, para in enumerate(paras, 1):
out = model.inference(
para, "es", gpt_cond, spk_emb,
temperature=0.65, repetition_penalty=5.0, top_k=50, top_p=0.85,
enable_text_splitting=True,
)
pieces.append(np.asarray(out["wav"], dtype=np.float32))
pieces.append(pause)
print(f" párrafo {i}/{len(paras)} ({len(para)} car) ok", flush=True)
audio = np.concatenate(pieces)
dt = time.time() - t0
dur = len(audio) / SR
print(f"Síntesis: {dt:.1f}s para {dur:.1f}s de audio (x{dur/dt:.1f} tiempo real) en {DEVICE}")
raw = OUT / f"{name}.raw.wav"
sf.write(raw, audio, SR)
wav = OUT / f"{name}.wav"
subprocess.run([
"ffmpeg", "-y", "-i", str(raw), "-filter_complex",
"anoisesrc=color=brown:amplitude=0.004:sample_rate=24000[n];"
"[n]highpass=f=120,lowpass=f=3800[nf];"
"[0:a][nf]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[a]",
"-map", "[a]", "-ar", "24000", str(wav),
], capture_output=True)
raw.unlink(missing_ok=True)
mp3 = OUT / f"{name}.mp3"
subprocess.run(["ffmpeg", "-y", "-i", str(wav), "-b:a", "96k", str(mp3)], capture_output=True)
print(f"OK -> {mp3} ({dur:.0f}s de audio)")
if __name__ == "__main__":
main()