Files
qwen3-tts-ra/setup_qwen3_readaloud.sh
pi-bot-01 d3ca5ab0b2 feat: Qwen3-TTS proxy with HIP graph + CPU decoder optimisations
- OpenAI-compatible Flask proxy (POST /audio/speech, GET /models)
- faster-qwen3-tts HIP graph acceleration: GPU LLM at 1.78x RTF
- CPU speech tokenizer decoder: bypasses MIOpen ConvDirectNaiveConvFwd,
  eliminates 4-40s per-request decode overhead
- attn_implementation=sdpa for transformer attention
- AOTRITON env var toggle (off=short sentences, on=long-form/novel chapters)
- HIP_GRAPHS env var toggle (default on)
- Startup warmup with HIP graph capture (~5s)
- CORS support for browser extension requests
- RTF: 0.9-1.5x on AMD RX 7900 XTX (gfx1100, ROCm 6.3)

Performance vs baseline (CPU-only, ~3 min/sentence):
  12c: 3.2s | 44c: 2.7s | 115c: 6.6s
2026-03-25 21:18:42 -07:00

288 lines
10 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
set -euo pipefail
# -----------------------------------------------------------------
# Configuration edit only if you need to change defaults
# -----------------------------------------------------------------
HOME_DIR="${HOME:-/home/oc}"
# Preferred Python version for the virtualenv (must be on the system)
PYTHON_VERSION="3.12"
# Fallback Python version if preferred version is not available
FALLBACK_PYTHON_VERSION="3.10"
# Name of the virtualenv directory (will be created under $HOME)
VENV_DIR="${HOME_DIR}/qwen3tts-venv"
# Model to serve the 0.6B CustomVoice model is quick to download
QWEN_MODEL="Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice"
DEMO_PORT=8000 # Gradio demo port
PROXY_PORT=5000 # Flask proxy port
PROJECT_ROOT="$(pwd)" # must be the directory that contains Qwen3-TTS and read-aloud
PROXY_DIR="${PROJECT_ROOT}/qwen3-proxy"
SYSTEMD_USER_DIR="${HOME_DIR}/.config/systemd/user"
# -----------------------------------------------------------------
# Helper functions for pretty output
# -----------------------------------------------------------------
info(){ echo -e "\e[32m[INFO]\e[0m $*"; }
error(){ echo -e "\e[31m[ERROR]\e[0m $*" >&2; }
warning(){ echo -e "\e[33m[WARNING]\e[0m $*" >&2; }
# -----------------------------------------------------------------
# 0⃣ Helper: ensure we have a recent Python interpreter
# ---------------------------------------------------------
detect_python() {
# Try preferred version first
if command -v "python${PYTHON_VERSION}" >/dev/null 2>&1; then
echo "python${PYTHON_VERSION}"
return 0
elif command -v "python${FALLBACK_PYTHON_VERSION}" >/dev/null 2>&1; then
warning "Python ${PYTHON_VERSION} not found, using ${FALLBACK_PYTHON_VERSION} as fallback"
echo "python${FALLBACK_PYTHON_VERSION}"
return 0
elif command -v python3 >/dev/null 2>&1; then
warning "No specific Python version found, using python3 (may not be compatible)"
echo "python3"
return 0
else
error "No Python interpreter found. Please install Python 3.10 or higher."
exit 1
fi
}
PYTHON_BIN=$(detect_python)
# -----------------------------------------------------------------
# 1⃣ Create (or reuse) a virtualenv and install the Python deps
# -----------------------------------------------------------------
if [[ ! -d "${VENV_DIR}" ]]; then
info "Creating virtualenv at ${VENV_DIR}"
if ! "${PYTHON_BIN}" -m venv "${VENV_DIR}"; then
error "Failed to create virtual environment. Check Python installation and permissions."
exit 1
fi
else
info "Virtualenv already exists reusing."
fi
# Activate the env for the remainder of the script
source "${VENV_DIR}/bin/activate"
# Upgrade pip (helps with binary wheels)
info "Upgrading pip…"
if ! pip install -U pip setuptools wheel; then
error "Failed to upgrade pip"
exit 1
fi
# Check if qwen-tts is already installed
if pip show qwen-tts >/dev/null 2>&1; then
info "qwen-tts already installed, upgrading"
pip install -U qwen-tts
else
info "Installing qwen-tts (Python wrapper)…"
if ! pip install qwen-tts; then
error "Failed to install qwen-tts"
exit 1
fi
fi
# -----------------------------------------------------------------
# 2⃣ Prepare the Flask proxy source tree
# ---------------------------------------------------------
mkdir -p "${PROXY_DIR}"
# Create requirements.txt for the proxy
cat > "${PROXY_DIR}/requirements.txt" <<'EOF'
flask
requests
EOF
# Create app.py for the proxy
cat > "${PROXY_DIR}/app.py" <<'PY'
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Flask proxy for the ReadAloud extension.
It implements the two endpoints that ReadAloud expects and forwards the
actual synthesis request to a locallyrunning Qwen3TTS Gradio demo.
"""
import os, io, base64
from flask import Flask, request, jsonify, abort, send_file
import requests
app = Flask(__name__)
# --------------------------------------------------------------
# Configuration via environment variables (defaults shown)
# --------------------------------------------------------------
GRADIO_URL = os.getenv("GRADIO_URL", "http://127.0.0.1:8000")
# Map the voice name shown in the extension to the internal speaker token
# that the Gradio demo expects. Extend this dict if you want more voices.
SPEAKERS = {
"Vivian": {"voice_name": "Qwen3 Vivian", "lang": "zh-CN"},
"Ryan": {"voice_name": "Qwen3 Ryan", "lang": "en-US"},
# Add other speakers from the Qwen3TTS README if desired
}
@app.route("/read-aloud/list-voices/premium")
def list_voices():
# Return a JSON array: [{"voice_name":…, "lang":…}, …]
return jsonify(list(SPEAKERS.values()))
@app.route("/read-aloud/speak/<lang>/<voice_name>")
def speak(lang, voice_name):
text = request.args.get("q", "")
if not text:
abort(400, "missing query parameter 'q'")
# Find the internal speaker token (caseinsensitive match)
internal = None
for key, val in SPEAKERS.items():
if val["voice_name"].lower() == voice_name.lower():
internal = key
break
if internal is None:
abort(404, f"voice '{voice_name}' not known to proxy")
# Build the payload for the Gradio API the demo expects:
# [text, language, speaker, instruct]
payload = {"data": [text, lang, internal, ""]}
try:
r = requests.post(f"{GRADIO_URL}/api/predict", json=payload, timeout=120)
except Exception as exc:
abort(502, f"cannot reach Gradio server: {exc}")
if r.status_code != 200:
abort(r.status_code, f"Gradio error: {r.text}")
try:
# Gradio returns something like [{"name": "audio.wav", "data": "data:audio/wav;base64,…"}]
data = r.json()["data"][0]["data"]
except Exception:
abort(500, "unexpected Gradio response format")
# Strip possible dataURL prefix
if data.startswith("data:"):
b64 = data.split(",", 1)[1]
else:
b64 = data
wav_bytes = base64.b64decode(b64)
return send_file(io.BytesIO(wav_bytes), mimetype="audio/wav", as_attachment=False, download_name="speech.wav")
if __name__ == "__main__":
# Flask's builtin dev server is fine for a local user service
app.run(host="0.0.0.0", port=int(os.getenv("PROXY_PORT", "5000")), debug=False)
PY
# Install the proxy deps inside the venv
info "Installing Flask proxy dependencies…"
if ! pip install -r "${PROXY_DIR}/requirements.txt"; then
error "Failed to install Flask proxy dependencies"
exit 1
fi
# -----------------------------------------------------------------
# 3⃣ Write the systemd user unit files (they will activate the venv)
# ---------------------------------------------------------
mkdir -p "${SYSTEMD_USER_DIR}"
# ---- qwen3-tts-demo.service ---------------------------------------
cat > "${SYSTEMD_USER_DIR}/qwen3-tts-demo.service" <<'EOF'
[Unit]
Description=Qwen3TTS Gradio demo (CustomVoice model)
After=network-online.target
Wants=network-online.target
[Service]
# Activate the virtualenv created by the install script
Environment=VENV_DIR=${HOME_DIR}/qwen3tts-venv
ExecStart=/bin/bash -c '\
source "${VENV_DIR}/bin/activate" && \
qwen-tts-demo "${QWEN_MODEL}" \
--ip 0.0.0.0 \
--port ${DEMO_PORT} \
--no-ssl-verify \
--share false'
ExecStop=/usr/bin/pkill -f "qwen-tts-demo"
Restart=on-failure
RestartSec=5
StartLimitBurst=5
StartLimitIntervalSec=60
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=default.target
EOF
# ---- qwen3-tts-proxy.service ---------------------------------------
cat > "${SYSTEMD_USER_DIR}/qwen3-tts-proxy.service" <<'EOF'
[Unit]
Description=Flask proxy translating ReadAloud API → Qwen3TTS Gradio demo
After=qwen3-tts-demo.service
Requires=qwen3-tts-demo.service
# Wait up to ~1 minute for the Gradio demo to become reachable before starting.
ExecStartPre=/usr/bin/bash -c '\
for i in {1..30}; do \
if curl -s http://127.0.0.1:${DEMO_PORT}/ >/dev/null 2>&1; then exit 0; fi; \
echo "Waiting for Qwen3TTS demo … ($i)"; sleep 2; \
done; \
echo "Qwen3TTS demo never became reachable aborting proxy start." >&2; exit 1'
[Service]
Environment=VENV_DIR=${HOME_DIR}/qwen3tts-venv
Environment=PROXY_DIR=${PROJECT_ROOT}/qwen3-proxy
Environment=PROXY_PORT=${PROXY_PORT}
Environment=GRADIO_URL=http://127.0.0.1:${DEMO_PORT}
ExecStart=/bin/bash -c '\
source "${VENV_DIR}/bin/activate" && \
cd "${PROXY_DIR}" && \
python app.py --host 0.0.0.0 --port "${PROXY_PORT}"'
ExecStop=/usr/bin/pkill -f "python.*app.py"
Restart=on-failure
RestartSec=5
StartLimitBurst=5
StartLimitIntervalSec=60
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=default.target
EOF
# -----------------------------------------------------------------
# 4⃣ Reload systemd, enable and start the services
# ---------------------------------------------------------
if command -v systemctl >/dev/null 2>&1; then
info "Reloading user systemd daemon…"
systemctl --user daemon-reload || warning "Failed to reload systemd daemon"
info "Enabling & starting the Qwen3TTS demo service…"
if ! systemctl --user enable --now qwen3-tts-demo.service; then
error "Failed to enable/start Qwen3-TTS demo service"
exit 1
fi
info "Enabling & starting the Flask proxy service…"
if ! systemctl --user enable --now qwen3-tts-proxy.service; then
error "Failed to enable/start Flask proxy service"
exit 1
fi
else
warning "systemctl not found. Services not enabled/started automatically. Please enable manually:"
warning " systemctl --user enable --now qwen3-tts-demo.service"
warning " systemctl --user enable --now qwen3-tts-proxy.service"
fi
# -----------------------------------------------------------------
# 5⃣ Final status report & next steps for the extension
# ---------------------------------------------------------
info "Both services should now be active. Verify with:"
info " systemctl --user status qwen3-tts-demo.service"
info " systemctl --user status qwen3-tts-proxy.service"
info "When configuring the ReadAloud extension, set the service URL to:"
info " http://127.0.0.1:${PROXY_PORT}"
info "Setup finished. Enjoy Qwen3TTS in ReadAloud!"