From 19d2721ff2e829970e0491b3c61d8ea72869fa88 Mon Sep 17 00:00:00 2001 From: Seppe De Loore Date: Mon, 6 Apr 2026 07:36:22 +0200 Subject: [PATCH] [add] Load voice parameters from .env file Configure Piper TTS synthesis via environment variables (speaker_id, length_scale, noise_scale, noise_w_scale, volume) loaded from .env with python-dotenv. Includes .env.example as reference template. Co-Authored-By: Claude Opus 4.6 (1M context) --- .env.example | 10 ++++++++++ .gitignore | 1 + h2g2/engine/tts.py | 20 ++++++++++++++++++-- h2g2/main.py | 34 ++++++++++++++++++++++++++++++---- pyproject.toml | 1 + 5 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..7cffd05 --- /dev/null +++ b/.env.example @@ -0,0 +1,10 @@ +# Piper TTS voice configuration +VOICE_MODEL=en_GB-alan-medium.onnx # Path to Piper .onnx voice model (relative to project root) +VOICE_SPEAKER_ID= # Speaker ID for multi-speaker models (integer, leave empty for default) +VOICE_LENGTH_SCALE= # Speech speed: >1.0 = slower, <1.0 = faster (leave empty for model default) +VOICE_NOISE_SCALE= # Phoneme variability: 0.0 = robotic, 1.0 = expressive (leave empty for model default) +VOICE_NOISE_W_SCALE= # Phoneme width variability (leave empty for model default) +VOICE_VOLUME=1.0 # Playback volume multiplier (default: 1.0) + +# Vosk STT configuration +STT_MODEL=vosk-model-small-en-us-0.15 # Vosk model directory name (relative to project root) diff --git a/.gitignore b/.gitignore index a6a2876..eeb1594 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ __pycache__/ save.qzl vosk-model-*/ .wolf +.env diff --git a/h2g2/engine/tts.py b/h2g2/engine/tts.py index 9ad4b8f..3df5262 100644 --- a/h2g2/engine/tts.py +++ b/h2g2/engine/tts.py @@ -9,13 +9,29 @@ import wave from pathlib import Path from piper import PiperVoice +from piper.config import SynthesisConfig class TTS: """Speaks text using Piper TTS and aplay for playback.""" - def __init__(self, model_path: str | Path) -> None: + def __init__( + self, + model_path: str | Path, + speaker_id: int | None = None, + length_scale: float | None = None, + noise_scale: float | None = None, + noise_w_scale: float | None = None, + volume: float = 1.0, + ) -> None: self._voice = PiperVoice.load(str(model_path)) + self._syn_config = SynthesisConfig( + speaker_id=speaker_id, + length_scale=length_scale, + noise_scale=noise_scale, + noise_w_scale=noise_w_scale, + volume=volume, + ) self._thread: threading.Thread | None = None self._last_wav: bytes | None = None @@ -39,7 +55,7 @@ class TTS: def _synthesize(self, text: str) -> bytes: buf = io.BytesIO() with wave.open(buf, "wb") as wav_file: - self._voice.synthesize_wav(text, wav_file) + self._voice.synthesize_wav(text, wav_file, syn_config=self._syn_config) return buf.getvalue() def _play(self, wav_data: bytes) -> None: diff --git a/h2g2/main.py b/h2g2/main.py index 606bf45..2dc62c7 100644 --- a/h2g2/main.py +++ b/h2g2/main.py @@ -2,8 +2,11 @@ """The Hitchhiker's Guide to the Galaxy — Python text adventure engine.""" import argparse +import os from pathlib import Path +from dotenv import load_dotenv + from h2g2.engine.clock import Clock from h2g2.engine.loop import GameLoop from h2g2.engine.output import Output @@ -17,9 +20,25 @@ import h2g2.engine.verbs # noqa: F401 # Import content modules from h2g2.content import globals_content, earth, vogon, heart, unearth, dark -# Default model locations (project root) -_DEFAULT_VOICE = Path(__file__).resolve().parent.parent / "en_GB-alan-medium.onnx" -_DEFAULT_STT_MODEL = Path(__file__).resolve().parent.parent / "vosk-model-small-en-us-0.15" +# Load .env from project root +_PROJECT_ROOT = Path(__file__).resolve().parent.parent +load_dotenv(_PROJECT_ROOT / ".env") + +# Default model locations (from env or project root) +_DEFAULT_VOICE = _PROJECT_ROOT / os.getenv("VOICE_MODEL", "en_GB-alan-medium.onnx") +_DEFAULT_STT_MODEL = _PROJECT_ROOT / os.getenv("STT_MODEL", "vosk-model-small-en-us-0.15") + + +def _env_float(key: str) -> float | None: + """Read an env var as float, returning None if empty/unset.""" + val = os.getenv(key, "").strip() + return float(val) if val else None + + +def _env_int(key: str) -> int | None: + """Read an env var as int, returning None if empty/unset.""" + val = os.getenv(key, "").strip() + return int(val) if val else None def main() -> None: @@ -116,7 +135,14 @@ def main() -> None: tts = None if args.audio: from h2g2.engine.tts import TTS - tts = TTS(args.voice) + tts = TTS( + args.voice, + speaker_id=_env_int("VOICE_SPEAKER_ID"), + length_scale=_env_float("VOICE_LENGTH_SCALE"), + noise_scale=_env_float("VOICE_NOISE_SCALE"), + noise_w_scale=_env_float("VOICE_NOISE_W_SCALE"), + volume=_env_float("VOICE_VOLUME") or 1.0, + ) # Initialize STT if requested stt = None diff --git a/pyproject.toml b/pyproject.toml index 0331de9..6e99f00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ dependencies = [ "piper-tts>=1.2.0", "vosk>=0.3.45", "pyaudio>=0.2.14", + "python-dotenv>=1.0.0", ] [build-system]