Añadir mu-plugins y scripts de feadulta
This commit is contained in:
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Locuta una carta/artículo entero de feadulta con la voz clonada (XTTS-v2 + GPU).
|
||||
|
||||
Saca el texto del post ES, lo trocea por párrafos, lo locuta con la voz de
|
||||
referencia (calculando los latents del hablante UNA sola vez), concatena con
|
||||
pausas y añade comfort noise. Issue #76.
|
||||
|
||||
Uso:
|
||||
tts_carta.py <post_id> <muestra_voz.wav> [nombre_salida]
|
||||
"""
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
os.environ.setdefault("COQUI_TOS_AGREED", "1")
|
||||
|
||||
import numpy as np # noqa: E402
|
||||
import soundfile as sf # noqa: E402
|
||||
import torch # noqa: E402
|
||||
from TTS.api import TTS # noqa: E402
|
||||
|
||||
DEVICE = "cuda" if torch.cuda.is_available() and not os.environ.get("FEA_CPU") else "cpu"
|
||||
OUT = Path(__file__).resolve().parent.parent / "wordpress/wp-content/uploads/tts-samples"
|
||||
SR = 24000
|
||||
CONTAINER = "wordpress-web"
|
||||
|
||||
|
||||
def get_post_text(pid):
|
||||
subprocess.run(["docker", "exec", CONTAINER, "php", "/tmp/fea_post_io.php", "get", str(pid)],
|
||||
check=True, capture_output=True)
|
||||
subprocess.run(["docker", "cp", f"{CONTAINER}:/tmp/fea_es.json", "/tmp/fea_es.json"], check=True)
|
||||
d = json.load(open("/tmp/fea_es.json"))
|
||||
raw = d["content"]
|
||||
# Conserva límites de párrafo antes de quitar tags.
|
||||
raw = re.sub(r"(?i)</p>|<br\s*/?>|</h[1-6]>", "\n", raw)
|
||||
raw = re.sub(r"<[^>]+>", "", raw) # quita tags
|
||||
raw = re.sub(r"\[[^\]]+\]", "", raw) # quita shortcodes
|
||||
raw = html.unescape(raw)
|
||||
paras = [re.sub(r"\s+", " ", p).strip() for p in raw.split("\n")]
|
||||
paras = [p for p in paras if len(p) > 1]
|
||||
return d["title"], paras
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
sys.exit("uso: tts_carta.py <post_id> <muestra_voz.wav> [nombre_salida]")
|
||||
pid = int(sys.argv[1])
|
||||
spk = sys.argv[2]
|
||||
name = sys.argv[3] if len(sys.argv) > 3 else f"carta-{pid}"
|
||||
|
||||
title, paras = get_post_text(pid)
|
||||
print(f"Post #{pid}: «{title}» ({len(paras)} párrafos, {sum(len(p) for p in paras)} car)")
|
||||
|
||||
print(f"Cargando XTTS-v2 en {DEVICE}…", flush=True)
|
||||
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(DEVICE)
|
||||
model = tts.synthesizer.tts_model
|
||||
print("Calculando timbre del hablante (1 vez)…", flush=True)
|
||||
gpt_cond, spk_emb = model.get_conditioning_latents(audio_path=[spk])
|
||||
|
||||
pause = np.zeros(int(SR * 0.35), dtype=np.float32)
|
||||
pieces = []
|
||||
import time
|
||||
t0 = time.time()
|
||||
for i, para in enumerate(paras, 1):
|
||||
out = model.inference(
|
||||
para, "es", gpt_cond, spk_emb,
|
||||
temperature=0.65, repetition_penalty=5.0, top_k=50, top_p=0.85,
|
||||
enable_text_splitting=True,
|
||||
)
|
||||
pieces.append(np.asarray(out["wav"], dtype=np.float32))
|
||||
pieces.append(pause)
|
||||
print(f" párrafo {i}/{len(paras)} ({len(para)} car) ok", flush=True)
|
||||
audio = np.concatenate(pieces)
|
||||
dt = time.time() - t0
|
||||
dur = len(audio) / SR
|
||||
print(f"Síntesis: {dt:.1f}s para {dur:.1f}s de audio (x{dur/dt:.1f} tiempo real) en {DEVICE}")
|
||||
|
||||
raw = OUT / f"{name}.raw.wav"
|
||||
sf.write(raw, audio, SR)
|
||||
wav = OUT / f"{name}.wav"
|
||||
subprocess.run([
|
||||
"ffmpeg", "-y", "-i", str(raw), "-filter_complex",
|
||||
"anoisesrc=color=brown:amplitude=0.004:sample_rate=24000[n];"
|
||||
"[n]highpass=f=120,lowpass=f=3800[nf];"
|
||||
"[0:a][nf]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[a]",
|
||||
"-map", "[a]", "-ar", "24000", str(wav),
|
||||
], capture_output=True)
|
||||
raw.unlink(missing_ok=True)
|
||||
mp3 = OUT / f"{name}.mp3"
|
||||
subprocess.run(["ffmpeg", "-y", "-i", str(wav), "-b:a", "96k", str(mp3)], capture_output=True)
|
||||
print(f"OK -> {mp3} ({dur:.0f}s de audio)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user