fix: PCM streaming — missing Response import + wrong tuple unpacking
- Add Response to flask imports (caused NameError on every PCM request) - Unpack (audio, sr, timing) tuple correctly from generate_custom_voice_streaming (was iterating the tuple itself, passing a 3-element object to np.clip) - Move elapsed/chunk logging inside the generator so it fires after stream ends - PCM streaming now working: 12c test → 2.3s audio in 1.8s, 3 chunks
This commit is contained in:
@@ -27,7 +27,7 @@ if os.getenv("AOTRITON", "0") == "1":
|
||||
import io, time, logging, subprocess, tempfile
|
||||
import torch, soundfile as sf
|
||||
import numpy as np
|
||||
from flask import Flask, request, jsonify, abort, send_file, stream_with_context
|
||||
from flask import Flask, request, jsonify, abort, send_file, stream_with_context, Response
|
||||
from flask_cors import CORS
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
@@ -216,32 +216,33 @@ def speech():
|
||||
|
||||
# Handle PCM streaming
|
||||
if fmt == "pcm" and USE_GRAPHS:
|
||||
log.info("Starting PCM streaming synthesis")
|
||||
log.info("Streaming PCM | %d chars | voice=%s speaker=%s",
|
||||
len(text), voice, info["speaker"])
|
||||
|
||||
def generate_pcm():
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
chunks = 0
|
||||
def generator():
|
||||
nonlocal chunks
|
||||
for audio_chunk in tts.generate_custom_voice_streaming(
|
||||
try:
|
||||
for audio, sr, timing in tts.generate_custom_voice_streaming(
|
||||
text=text,
|
||||
language=info["language"],
|
||||
speaker=info["speaker"],
|
||||
instruct=info["instruct"] or None,
|
||||
max_new_tokens=max(60, int(len(text) * 2.5))
|
||||
max_new_tokens=max(60, int(len(text) * 2.5)),
|
||||
):
|
||||
chunks += 1
|
||||
# Convert float32 numpy array to int16 PCM
|
||||
pcm_chunk = (np.clip(audio_chunk, -1.0, 1.0) * 32767).astype(np.int16).tobytes()
|
||||
yield pcm_chunk
|
||||
|
||||
elapsed = time.monotonic() - t0
|
||||
log.info("PCM streaming completed in %.1fs with %d chunks", elapsed, chunks)
|
||||
return Response(stream_with_context(generator()), mimetype="audio/pcm", headers={"Cache-Control": "no-cache"})
|
||||
pcm = (np.clip(audio, -1.0, 1.0) * 32767).astype(np.int16)
|
||||
yield pcm.tobytes()
|
||||
except Exception as exc:
|
||||
log.exception("PCM streaming failed")
|
||||
abort(500, description=str(exc))
|
||||
elif fmt == "pcm":
|
||||
log.warning("PCM streaming requires HIP_GRAPHS=1 to be enabled. Returning MP3 instead.")
|
||||
log.exception("PCM stream error after %d chunks", chunks)
|
||||
return
|
||||
log.info("PCM stream done: %d chunks in %.1fs", chunks, time.monotonic() - t0)
|
||||
|
||||
return Response(
|
||||
stream_with_context(generate_pcm()),
|
||||
mimetype="audio/pcm",
|
||||
headers={"Cache-Control": "no-cache"},
|
||||
)
|
||||
# Fall through to regular MP3 path below
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user