From d3ca5ab0b29dcb30a53f8e2406208a1de0711192 Mon Sep 17 00:00:00 2001 From: pi-bot-01 Date: Wed, 25 Mar 2026 21:18:42 -0700 Subject: [PATCH] feat: Qwen3-TTS proxy with HIP graph + CPU decoder optimisations - OpenAI-compatible Flask proxy (POST /audio/speech, GET /models) - faster-qwen3-tts HIP graph acceleration: GPU LLM at 1.78x RTF - CPU speech tokenizer decoder: bypasses MIOpen ConvDirectNaiveConvFwd, eliminates 4-40s per-request decode overhead - attn_implementation=sdpa for transformer attention - AOTRITON env var toggle (off=short sentences, on=long-form/novel chapters) - HIP_GRAPHS env var toggle (default on) - Startup warmup with HIP graph capture (~5s) - CORS support for browser extension requests - RTF: 0.9-1.5x on AMD RX 7900 XTX (gfx1100, ROCm 6.3) Performance vs baseline (CPU-only, ~3 min/sentence): 12c: 3.2s | 44c: 2.7s | 115c: 6.6s --- .gitignore | 49 ++++++ README.md | 82 ++++++++++ qwen3-proxy/app.py | 206 +++++++++++++++++++++++++ qwen3-proxy/requirements.txt | 2 + setup_qwen3_readaloud.sh | 288 +++++++++++++++++++++++++++++++++++ 5 files changed, 627 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 qwen3-proxy/app.py create mode 100644 qwen3-proxy/requirements.txt create mode 100755 setup_qwen3_readaloud.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4b19d32 --- /dev/null +++ b/.gitignore @@ -0,0 +1,49 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +*.egg-info/ +dist/ +build/ +*.egg +.eggs/ + +# Virtual envs +venv/ +.venv/ +env/ +*.venv + +# Model weights / audio output +*.wav +*.mp3 +*.bin +*.safetensors +*.pt +*.pth + +# HuggingFace cache +.cache/ + +# Test artifacts +test_output.* +test_simple.py + +# OS +.DS_Store +Thumbs.db + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Submodule source trees (large, checked out separately) +Qwen3-TTS/ +read-aloud/ + +# Systemd units are user-specific, generated by setup script +${HOME_DIR}/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..b96e23f --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# qwen3-tts-ra + +Qwen3-TTS with Read-Aloud browser extension integration. + +## Components + +- `qwen3-proxy/` — OpenAI-compatible TTS proxy (`POST /audio/speech`) +- `Qwen3-TTS/` — Qwen3-TTS library (submodule / clone) +- `read-aloud/` — Read-Aloud browser extension (submodule / clone) +- `setup_qwen3_readaloud.sh` — Initial environment setup script + +## Architecture + +``` +Read-Aloud extension + → POST http://localhost:5000/audio/speech + → qwen3-proxy/app.py (Flask, OpenAI-compatible API) + → faster-qwen3-tts (HIP graph acceleration, AMD gfx1100) + → GPU: LLM token generation at ~1.78x RTF + → CPU: speech tokenizer decode (bypasses MIOpen) +``` + +## Performance (AMD Radeon RX 7900 XTX, gfx1100) + +| Input | Audio | Time | RTF | +|-------|-------|------|-----| +| 12c "Hello world." | ~2s | ~3s | ~0.9x | +| 44c sentence | ~4s | ~3s | **1.5x** | +| 115c paragraph | ~10s | ~7s | **1.5x** | + +RTF > 1.0 = generates faster than real-time. + +## Key optimisations + +1. **HIP Graphs** (`faster-qwen3-tts`) — captures autoregressive decode loop as a static GPU program, eliminating Python overhead per token +2. **CPU speech decoder** — moves `speech_tokenizer.model` to CPU, bypassing MIOpen's slow `ConvDirectNaiveConvFwd` fallback entirely +3. **`attn_implementation=sdpa`** — PyTorch native SDPA for transformer attention +4. **`MIOPEN_USER_DB_PATH`** — persistent MIOpen find-DB for LLM-side convolutions + +## Setup + +```bash +# Install Python venv + deps +./setup_qwen3_readaloud.sh + +# Start the proxy service +systemctl --user start qwen3-tts-proxy.service + +# Watch logs +journalctl --user -u qwen3-tts-proxy.service -f +``` + +## Read-Aloud Extension Settings + +In Read-Aloud → Settings → OpenAI: + +| Field | Value | +|-------|-------| +| URL | `http://127.0.0.1:5000` | +| API Key | *(leave blank)* | +| Voice list | see below | + +```json +[ + {"voice": "alloy", "lang": "en-US", "model": "tts-1"}, + {"voice": "echo", "lang": "en-US", "model": "tts-1"}, + {"voice": "fable", "lang": "en-US", "model": "tts-1"}, + {"voice": "onyx", "lang": "en-US", "model": "tts-1"}, + {"voice": "nova", "lang": "zh-CN", "model": "tts-1"}, + {"voice": "shimmer", "lang": "zh-CN", "model": "tts-1"} +] +``` + +## Env vars (systemd service) + +| Variable | Default | Notes | +|----------|---------|-------| +| `QWEN_MODEL` | `Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice` | HF model id or local path | +| `DEVICE` | `cuda:0` | GPU device | +| `HIP_GRAPHS` | `1` | Enable faster-qwen3-tts HIP graphs | +| `AOTRITON` | `0` | AOTriton flash attention — faster for long text (>80 chars), slower for short sentences | +| `PROXY_PORT` | `5000` | Listening port | diff --git a/qwen3-proxy/app.py b/qwen3-proxy/app.py new file mode 100644 index 0000000..f13af45 --- /dev/null +++ b/qwen3-proxy/app.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""OpenAI-compatible TTS proxy backed by Qwen3-TTS. + +Implements the two endpoints that Read-Aloud's OpenAI engine uses: + GET /models — connection test + POST /audio/speech — synthesise text → mp3 + +Set env vars to override defaults: + QWEN_MODEL — HuggingFace model id or local path + PROXY_PORT — listening port (default 5000) + DEVICE — torch device (default: cuda:0 if available, else cpu) + AOTRITON — "1" to enable AOTriton flash attention on gfx1100. + Faster for long text (>~80 chars, e.g. novel chapters). + Slower for short sentences (e.g. read-aloud). Default: 0. + HIP_GRAPHS — "1" to use faster-qwen3-tts (HIP/CUDA graph acceleration). + Eliminates Python overhead per autoregressive token — 3-4x + faster than the standard path. Requires GPU. Default: 1. +""" + +import os + +# Must be set before the first torch SDPA call (checked lazily, not at import). +if os.getenv("AOTRITON", "0") == "1": + os.environ["TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL"] = "1" + +import io, time, logging, subprocess, tempfile +import torch, soundfile as sf +from flask import Flask, request, jsonify, abort, send_file +from flask_cors import CORS + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +log = logging.getLogger(__name__) + +app = Flask(__name__) +CORS(app) # allow requests from browser extensions (chrome-extension:// etc.) + +# ── Configuration ────────────────────────────────────────────────────────────── +MODEL_PATH = os.getenv("QWEN_MODEL", "Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice") +DEVICE = os.getenv("DEVICE", "cuda:0" if torch.cuda.is_available() else "cpu") +DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32 +USE_GRAPHS = os.getenv("HIP_GRAPHS", "1") == "1" and torch.cuda.is_available() + +# Map OpenAI voice names → Qwen3-TTS speaker + language + optional instruct +VOICE_MAP = { + "alloy": {"speaker": "Ryan", "language": "English", "instruct": ""}, + "echo": {"speaker": "Ryan", "language": "English", "instruct": "Speak in a calm, measured tone."}, + "fable": {"speaker": "Ryan", "language": "English", "instruct": "Speak warmly and expressively."}, + "onyx": {"speaker": "Ryan", "language": "English", "instruct": "Speak with a deep, authoritative voice."}, + "nova": {"speaker": "Vivian", "language": "Chinese", "instruct": ""}, + "shimmer": {"speaker": "Vivian", "language": "Chinese", "instruct": "Speak gently and softly."}, +} +DEFAULT_VOICE = "alloy" + +# ── Load model ───────────────────────────────────────────────────────────────── +if USE_GRAPHS: + from faster_qwen3_tts import FasterQwen3TTS + log.info("Loading FasterQwen3TTS (HIP graph mode) %s on %s …", MODEL_PATH, DEVICE) + tts = FasterQwen3TTS.from_pretrained(MODEL_PATH, device=DEVICE, dtype=DTYPE) + + def _synthesise(text, language, speaker, instruct): + # Cap audio length proportional to input text length. + # At 12Hz token rate, ~2.5 tokens per character is a generous ceiling. + # This prevents stochastic generation from producing absurdly long audio + # (e.g. "Hello world." generating 16s of audio with default max_new_tokens=2048). + max_new_tokens = max(60, int(len(text) * 2.5)) + wavs, sr = tts.generate_custom_voice( + text=text, language=language, speaker=speaker, + instruct=instruct or None, + max_new_tokens=max_new_tokens, + ) + return wavs, sr + + def _synthesise_greedy(text, language, speaker): + """Deterministic synthesis for warmup — uses tight token budget.""" + max_new_tokens = max(60, int(len(text) * 2.5)) + wavs, sr = tts.generate_custom_voice( + text=text, language=language, speaker=speaker, + instruct=None, do_sample=False, + max_new_tokens=max_new_tokens, + ) + return wavs, sr + +else: + from qwen_tts import Qwen3TTSModel + log.info("Loading Qwen3TTSModel (standard mode) %s on %s …", MODEL_PATH, DEVICE) + tts = Qwen3TTSModel.from_pretrained( + MODEL_PATH, device_map=DEVICE, dtype=DTYPE, attn_implementation="sdpa", + ) + + def _synthesise(text, language, speaker, instruct): + wavs, sr = tts.generate_custom_voice( + text=text, language=language, speaker=speaker, instruct=instruct, + ) + return wavs, sr + + def _synthesise_greedy(text, language, speaker): + return _synthesise(text, language, speaker, "") + +# ── Patch: run the speech tokenizer decoder on CPU ──────────────────────────── +# The 12Hz decoder is pure Conv1d/ConvTranspose1d. On AMD ROCm, MIOpen's solver +# for these ops falls back to ConvDirectNaiveConvFwd (named "naive" for a reason), +# causing 4-40s of GPU decode time per request. +# +# Moving to CPU sidesteps MIOpen entirely. The Ryzen's AVX2 path handles these +# small 1D convolutions in <100ms, giving end-to-end RTF > 1.0x on typical text. + +def _move_decoder_to_cpu(model_obj): + try: + st = model_obj.model.model.speech_tokenizer # FasterQwen3TTS path + except AttributeError: + st = model_obj.model.speech_tokenizer # Qwen3TTSModel path + st.model.to("cpu") + st.device = torch.device("cpu") + log.info("Speech tokenizer decoder moved to CPU (bypasses MIOpen)") + +_move_decoder_to_cpu(tts) +# Use greedy (deterministic) decoding so warmup produces consistent audio lengths +# and MIOpen compiles the exact shapes that common inputs will hit at runtime. +# The 3 texts below produce ~1s, ~4s, and ~6s of audio deterministically. +log.info("Warming up — HIP graph capture …") +_t = time.monotonic() + +# One synthesis call captures both HIP graphs (talker + predictor). +# No MIOpen warmup needed — decoder runs on CPU now. +_synthesise_greedy("Hello.", "English", "Ryan") +log.info("Warm-up done in %.1fs — proxy ready. mode=%s", + time.monotonic() - _t, "HIP-graphs" if USE_GRAPHS else "standard-sdpa") + + +# ── Helpers ──────────────────────────────────────────────────────────────────── +def wav_to_mp3(wav_bytes: bytes) -> bytes: + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_in: + tmp_in.write(wav_bytes) + tmp_in_path = tmp_in.name + tmp_out_path = tmp_in_path.replace(".wav", ".mp3") + try: + subprocess.run( + ["ffmpeg", "-y", "-i", tmp_in_path, "-codec:a", "libmp3lame", "-q:a", "4", tmp_out_path], + check=True, capture_output=True, + ) + with open(tmp_out_path, "rb") as f: + return f.read() + finally: + os.unlink(tmp_in_path) + if os.path.exists(tmp_out_path): + os.unlink(tmp_out_path) + + +# ── Endpoints ────────────────────────────────────────────────────────────────── +@app.route("/models", methods=["GET"]) +def models(): + return jsonify({"object": "list", "data": [{"id": "tts-1", "object": "model"}]}) + + +@app.route("/audio/speech", methods=["POST"]) +def speech(): + data = request.get_json(force=True, silent=True) or {} + text = data.get("input", "").strip() + voice = data.get("voice", DEFAULT_VOICE) + fmt = data.get("response_format", "mp3") + + if not text: + abort(400, description="'input' field is required") + + info = VOICE_MAP.get(voice, VOICE_MAP[DEFAULT_VOICE]) + log.info("Synthesising %d chars | voice=%s speaker=%s", len(text), voice, info["speaker"]) + + try: + t0 = time.monotonic() + wavs, sr = _synthesise(text, info["language"], info["speaker"], info["instruct"]) + elapsed = time.monotonic() - t0 + audio_s = len(wavs[0]) / sr + log.info("Synthesis done in %.1fs audio=%.1fs RTF=%.2fx", + elapsed, audio_s, audio_s / elapsed) + except Exception as exc: + log.exception("TTS generation failed") + abort(500, description=str(exc)) + + wav_buf = io.BytesIO() + sf.write(wav_buf, wavs[0], sr, format="WAV") + wav_bytes = wav_buf.getvalue() + + if fmt == "mp3": + audio_bytes = wav_to_mp3(wav_bytes) + mimetype = "audio/mpeg" + else: + audio_bytes = wav_bytes + mimetype = "audio/wav" + + return send_file(io.BytesIO(audio_bytes), mimetype=mimetype) + + +# ── Error handlers ───────────────────────────────────────────────────────────── +@app.errorhandler(400) +@app.errorhandler(404) +@app.errorhandler(500) +@app.errorhandler(502) +def json_error(e): + return jsonify({"error": {"message": str(e), "type": "proxy_error"}}), e.code + + +if __name__ == "__main__": + port = int(os.getenv("PROXY_PORT", "5000")) + log.info("Starting proxy on port %d", port) + app.run(host="0.0.0.0", port=port, debug=False) diff --git a/qwen3-proxy/requirements.txt b/qwen3-proxy/requirements.txt new file mode 100644 index 0000000..30692b7 --- /dev/null +++ b/qwen3-proxy/requirements.txt @@ -0,0 +1,2 @@ +flask +requests diff --git a/setup_qwen3_readaloud.sh b/setup_qwen3_readaloud.sh new file mode 100755 index 0000000..41a2e94 --- /dev/null +++ b/setup_qwen3_readaloud.sh @@ -0,0 +1,288 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------- +# Configuration – edit only if you need to change defaults +# ----------------------------------------------------------------- +HOME_DIR="${HOME:-/home/oc}" +# Preferred Python version for the virtual‑env (must be on the system) +PYTHON_VERSION="3.12" +# Fallback Python version if preferred version is not available +FALLBACK_PYTHON_VERSION="3.10" +# Name of the virtual‑env directory (will be created under $HOME) +VENV_DIR="${HOME_DIR}/qwen3tts-venv" +# Model to serve – the 0.6B CustomVoice model is quick to download +QWEN_MODEL="Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice" +DEMO_PORT=8000 # Gradio demo port +PROXY_PORT=5000 # Flask proxy port +PROJECT_ROOT="$(pwd)" # must be the directory that contains Qwen3-TTS and read-aloud +PROXY_DIR="${PROJECT_ROOT}/qwen3-proxy" +SYSTEMD_USER_DIR="${HOME_DIR}/.config/systemd/user" + +# ----------------------------------------------------------------- +# Helper functions for pretty output +# ----------------------------------------------------------------- +info(){ echo -e "\e[32m[INFO]\e[0m $*"; } +error(){ echo -e "\e[31m[ERROR]\e[0m $*" >&2; } +warning(){ echo -e "\e[33m[WARNING]\e[0m $*" >&2; } + +# ----------------------------------------------------------------- +# 0️⃣ Helper: ensure we have a recent Python interpreter +# --------------------------------------------------------- +detect_python() { + # Try preferred version first + if command -v "python${PYTHON_VERSION}" >/dev/null 2>&1; then + echo "python${PYTHON_VERSION}" + return 0 + elif command -v "python${FALLBACK_PYTHON_VERSION}" >/dev/null 2>&1; then + warning "Python ${PYTHON_VERSION} not found, using ${FALLBACK_PYTHON_VERSION} as fallback" + echo "python${FALLBACK_PYTHON_VERSION}" + return 0 + elif command -v python3 >/dev/null 2>&1; then + warning "No specific Python version found, using python3 (may not be compatible)" + echo "python3" + return 0 + else + error "No Python interpreter found. Please install Python 3.10 or higher." + exit 1 + fi +} + +PYTHON_BIN=$(detect_python) + +# ----------------------------------------------------------------- +# 1️⃣ Create (or reuse) a virtual‑env and install the Python deps +# ----------------------------------------------------------------- +if [[ ! -d "${VENV_DIR}" ]]; then + info "Creating virtual‑env at ${VENV_DIR}…" + if ! "${PYTHON_BIN}" -m venv "${VENV_DIR}"; then + error "Failed to create virtual environment. Check Python installation and permissions." + exit 1 + fi +else + info "Virtual‑env already exists – reusing." +fi + +# Activate the env for the remainder of the script +source "${VENV_DIR}/bin/activate" + +# Upgrade pip (helps with binary wheels) +info "Upgrading pip…" +if ! pip install -U pip setuptools wheel; then + error "Failed to upgrade pip" + exit 1 +fi + +# Check if qwen-tts is already installed +if pip show qwen-tts >/dev/null 2>&1; then + info "qwen-tts already installed, upgrading" + pip install -U qwen-tts +else + info "Installing qwen-tts (Python wrapper)…" + if ! pip install qwen-tts; then + error "Failed to install qwen-tts" + exit 1 + fi +fi + +# ----------------------------------------------------------------- +# 2️⃣ Prepare the Flask proxy source tree +# --------------------------------------------------------- +mkdir -p "${PROXY_DIR}" +# Create requirements.txt for the proxy +cat > "${PROXY_DIR}/requirements.txt" <<'EOF' +flask +requests +EOF + +# Create app.py for the proxy +cat > "${PROXY_DIR}/app.py" <<'PY' +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Flask proxy for the Read‑Aloud extension. +It implements the two endpoints that Read‑Aloud expects and forwards the +actual synthesis request to a locally‑running Qwen3‑TTS Gradio demo. +""" +import os, io, base64 +from flask import Flask, request, jsonify, abort, send_file +import requests + +app = Flask(__name__) + +# -------------------------------------------------------------- +# Configuration via environment variables (defaults shown) +# -------------------------------------------------------------- +GRADIO_URL = os.getenv("GRADIO_URL", "http://127.0.0.1:8000") + +# Map the voice name shown in the extension to the internal speaker token +# that the Gradio demo expects. Extend this dict if you want more voices. +SPEAKERS = { + "Vivian": {"voice_name": "Qwen3 Vivian", "lang": "zh-CN"}, + "Ryan": {"voice_name": "Qwen3 Ryan", "lang": "en-US"}, + # Add other speakers from the Qwen3‑TTS README if desired +} + +@app.route("/read-aloud/list-voices/premium") +def list_voices(): + # Return a JSON array: [{"voice_name":…, "lang":…}, …] + return jsonify(list(SPEAKERS.values())) + +@app.route("/read-aloud/speak//") +def speak(lang, voice_name): + text = request.args.get("q", "") + if not text: + abort(400, "missing query parameter 'q'") + + # Find the internal speaker token (case‑insensitive match) + internal = None + for key, val in SPEAKERS.items(): + if val["voice_name"].lower() == voice_name.lower(): + internal = key + break + if internal is None: + abort(404, f"voice '{voice_name}' not known to proxy") + + # Build the payload for the Gradio API – the demo expects: + # [text, language, speaker, instruct] + payload = {"data": [text, lang, internal, ""]} + try: + r = requests.post(f"{GRADIO_URL}/api/predict", json=payload, timeout=120) + except Exception as exc: + abort(502, f"cannot reach Gradio server: {exc}") + if r.status_code != 200: + abort(r.status_code, f"Gradio error: {r.text}") + + try: + # Gradio returns something like [{"name": "audio.wav", "data": "data:audio/wav;base64,…"}] + data = r.json()["data"][0]["data"] + except Exception: + abort(500, "unexpected Gradio response format") + + # Strip possible data‑URL prefix + if data.startswith("data:"): + b64 = data.split(",", 1)[1] + else: + b64 = data + wav_bytes = base64.b64decode(b64) + return send_file(io.BytesIO(wav_bytes), mimetype="audio/wav", as_attachment=False, download_name="speech.wav") + +if __name__ == "__main__": + # Flask's built‑in dev server is fine for a local user service + app.run(host="0.0.0.0", port=int(os.getenv("PROXY_PORT", "5000")), debug=False) +PY + +# Install the proxy deps inside the venv +info "Installing Flask proxy dependencies…" +if ! pip install -r "${PROXY_DIR}/requirements.txt"; then + error "Failed to install Flask proxy dependencies" + exit 1 +fi + +# ----------------------------------------------------------------- +# 3️⃣ Write the systemd user unit files (they will activate the venv) +# --------------------------------------------------------- +mkdir -p "${SYSTEMD_USER_DIR}" + +# ---- qwen3-tts-demo.service --------------------------------------- +cat > "${SYSTEMD_USER_DIR}/qwen3-tts-demo.service" <<'EOF' +[Unit] +Description=Qwen3‑TTS Gradio demo (CustomVoice model) +After=network-online.target +Wants=network-online.target + +[Service] +# Activate the virtual‑env created by the install script +Environment=VENV_DIR=${HOME_DIR}/qwen3tts-venv +ExecStart=/bin/bash -c '\ + source "${VENV_DIR}/bin/activate" && \ + qwen-tts-demo "${QWEN_MODEL}" \ + --ip 0.0.0.0 \ + --port ${DEMO_PORT} \ + --no-ssl-verify \ + --share false' + +ExecStop=/usr/bin/pkill -f "qwen-tts-demo" +Restart=on-failure +RestartSec=5 +StartLimitBurst=5 +StartLimitIntervalSec=60 +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=default.target +EOF + +# ---- qwen3-tts-proxy.service --------------------------------------- +cat > "${SYSTEMD_USER_DIR}/qwen3-tts-proxy.service" <<'EOF' +[Unit] +Description=Flask proxy translating Read‑Aloud API → Qwen3‑TTS Gradio demo +After=qwen3-tts-demo.service +Requires=qwen3-tts-demo.service + +# Wait up to ~1 minute for the Gradio demo to become reachable before starting. +ExecStartPre=/usr/bin/bash -c '\ + for i in {1..30}; do \ + if curl -s http://127.0.0.1:${DEMO_PORT}/ >/dev/null 2>&1; then exit 0; fi; \ + echo "Waiting for Qwen3‑TTS demo … ($i)"; sleep 2; \ + done; \ + echo "Qwen3‑TTS demo never became reachable – aborting proxy start." >&2; exit 1' + +[Service] +Environment=VENV_DIR=${HOME_DIR}/qwen3tts-venv +Environment=PROXY_DIR=${PROJECT_ROOT}/qwen3-proxy +Environment=PROXY_PORT=${PROXY_PORT} +Environment=GRADIO_URL=http://127.0.0.1:${DEMO_PORT} + +ExecStart=/bin/bash -c '\ + source "${VENV_DIR}/bin/activate" && \ + cd "${PROXY_DIR}" && \ + python app.py --host 0.0.0.0 --port "${PROXY_PORT}"' + +ExecStop=/usr/bin/pkill -f "python.*app.py" +Restart=on-failure +RestartSec=5 +StartLimitBurst=5 +StartLimitIntervalSec=60 +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=default.target +EOF + +# ----------------------------------------------------------------- +# 4️⃣ Reload systemd, enable and start the services +# --------------------------------------------------------- +if command -v systemctl >/dev/null 2>&1; then + info "Reloading user systemd daemon…" + systemctl --user daemon-reload || warning "Failed to reload systemd daemon" + + info "Enabling & starting the Qwen3‑TTS demo service…" + if ! systemctl --user enable --now qwen3-tts-demo.service; then + error "Failed to enable/start Qwen3-TTS demo service" + exit 1 + fi + + info "Enabling & starting the Flask proxy service…" + if ! systemctl --user enable --now qwen3-tts-proxy.service; then + error "Failed to enable/start Flask proxy service" + exit 1 + fi +else + warning "systemctl not found. Services not enabled/started automatically. Please enable manually:" + warning " systemctl --user enable --now qwen3-tts-demo.service" + warning " systemctl --user enable --now qwen3-tts-proxy.service" +fi + +# ----------------------------------------------------------------- +# 5️⃣ Final status report & next steps for the extension +# --------------------------------------------------------- +info "Both services should now be active. Verify with:" +info " systemctl --user status qwen3-tts-demo.service" +info " systemctl --user status qwen3-tts-proxy.service" + +info "When configuring the Read‑Aloud extension, set the service URL to:" +info " http://127.0.0.1:${PROXY_PORT}" + +info "Setup finished. Enjoy Qwen3‑TTS in Read‑Aloud!" \ No newline at end of file