VoiceAPI / src /config.py
Harshil748's picture
Initial HF Spaces deployment - downloads models at runtime
ecde958
"""
Configuration for SYSPIN Multi-lingual TTS System
"""
from dataclasses import dataclass
from typing import Dict, Optional
import os
# Base path for models
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
@dataclass
class LanguageConfig:
"""Configuration for each language"""
name: str
code: str
hf_model_id: str
model_filename: str
chars_filename: str = "chars.txt"
sample_rate: int = 22050
# All SYSPIN models available
# JIT traced format (.pt + chars.txt): Hindi, Bengali, Marathi, Telugu, Kannada, etc.
# Coqui TTS checkpoints (.pth + config.json): Bhojpuri
LANGUAGE_CONFIGS: Dict[str, LanguageConfig] = {
# Hindi
"hi_male": LanguageConfig(
name="Hindi",
code="hi",
hf_model_id="SYSPIN/tts_vits_coquiai_HindiMale",
model_filename="hi_male_vits_30hrs.pt",
),
"hi_female": LanguageConfig(
name="Hindi",
code="hi",
hf_model_id="SYSPIN/tts_vits_coquiai_HindiFemale",
model_filename="hi_female_vits_30hrs.pt",
),
# Bengali
"bn_male": LanguageConfig(
name="Bengali",
code="bn",
hf_model_id="SYSPIN/tts_vits_coquiai_BengaliMale",
model_filename="bn_male_vits_30hrs.pt",
),
"bn_female": LanguageConfig(
name="Bengali",
code="bn",
hf_model_id="SYSPIN/tts_vits_coquiai_BengaliFemale",
model_filename="bn_female_vits_30hrs.pt",
),
# Marathi
"mr_male": LanguageConfig(
name="Marathi",
code="mr",
hf_model_id="SYSPIN/tts_vits_coquiai_MarathiMale",
model_filename="mr_male_vits_30hrs.pt",
),
"mr_female": LanguageConfig(
name="Marathi",
code="mr",
hf_model_id="SYSPIN/tts_vits_coquiai_MarathiFemale",
model_filename="mr_female_vits_30hrs.pt",
),
# Telugu
"te_male": LanguageConfig(
name="Telugu",
code="te",
hf_model_id="SYSPIN/tts_vits_coquiai_TeluguMale",
model_filename="te_male_vits_30hrs.pt",
),
"te_female": LanguageConfig(
name="Telugu",
code="te",
hf_model_id="SYSPIN/tts_vits_coquiai_TeluguFemale",
model_filename="te_female_vits_30hrs.pt",
),
# Kannada
"kn_male": LanguageConfig(
name="Kannada",
code="kn",
hf_model_id="SYSPIN/tts_vits_coquiai_KannadaMale",
model_filename="kn_male_vits_30hrs.pt",
),
"kn_female": LanguageConfig(
name="Kannada",
code="kn",
hf_model_id="SYSPIN/tts_vits_coquiai_KannadaFemale",
model_filename="kn_female_vits_30hrs.pt",
),
# Bhojpuri (Coqui TTS checkpoint format)
"bho_male": LanguageConfig(
name="Bhojpuri",
code="bho",
hf_model_id="SYSPIN/tts_vits_coquiai_BhojpuriMale",
model_filename="checkpoint_200000.pth",
),
"bho_female": LanguageConfig(
name="Bhojpuri",
code="bho",
hf_model_id="SYSPIN/tts_vits_coquiai_BhojpuriFemale",
model_filename="checkpoint_340000.pth",
),
# Chhattisgarhi (ISO 639-3: hne)
"hne_male": LanguageConfig(
name="Chhattisgarhi",
code="hne",
hf_model_id="SYSPIN/tts_vits_coquiai_ChhattisgarhiMale",
model_filename="ch_male_vits_30hrs.pt",
),
"hne_female": LanguageConfig(
name="Chhattisgarhi",
code="hne",
hf_model_id="SYSPIN/tts_vits_coquiai_ChhattisgarhiFemale",
model_filename="ch_female_vits_30hrs.pt",
),
# Maithili (ISO 639-3: mai)
"mai_male": LanguageConfig(
name="Maithili",
code="mai",
hf_model_id="SYSPIN/tts_vits_coquiai_MaithiliMale",
model_filename="mt_male_vits_30hrs.pt",
),
"mai_female": LanguageConfig(
name="Maithili",
code="mai",
hf_model_id="SYSPIN/tts_vits_coquiai_MaithiliFemale",
model_filename="mt_female_vits_30hrs.pt",
),
# Magahi (ISO 639-3: mag)
"mag_male": LanguageConfig(
name="Magahi",
code="mag",
hf_model_id="SYSPIN/tts_vits_coquiai_MagahiMale",
model_filename="mg_male_vits_30hrs.pt",
),
"mag_female": LanguageConfig(
name="Magahi",
code="mag",
hf_model_id="SYSPIN/tts_vits_coquiai_MagahiFemale",
model_filename="mg_female_vits_30hrs.pt",
),
# English
"en_male": LanguageConfig(
name="English",
code="en",
hf_model_id="SYSPIN/tts_vits_coquiai_EnglishMale",
model_filename="en_male_vits_30hrs.pt",
),
"en_female": LanguageConfig(
name="English",
code="en",
hf_model_id="SYSPIN/tts_vits_coquiai_EnglishFemale",
model_filename="en_female_vits_30hrs.pt",
),
# Gujarati - Using Facebook MMS model (1100+ languages)
"gu_mms": LanguageConfig(
name="Gujarati",
code="gu",
hf_model_id="facebook/mms-tts-guj",
model_filename="mms_guj.pt",
sample_rate=16000, # MMS uses 16kHz
),
}
# Style presets for prosody control
STYLE_PRESETS = {
"default": {"speed": 1.0, "pitch": 1.0, "energy": 1.0},
"slow": {"speed": 0.75, "pitch": 1.0, "energy": 1.0},
"fast": {"speed": 1.25, "pitch": 1.0, "energy": 1.0},
"soft": {"speed": 0.9, "pitch": 0.95, "energy": 0.7},
"loud": {"speed": 1.0, "pitch": 1.05, "energy": 1.3},
"happy": {"speed": 1.1, "pitch": 1.1, "energy": 1.2},
"sad": {"speed": 0.85, "pitch": 0.9, "energy": 0.8},
"calm": {"speed": 0.9, "pitch": 0.95, "energy": 0.85},
"excited": {"speed": 1.2, "pitch": 1.15, "energy": 1.3},
}
def get_available_languages() -> Dict[str, str]:
"""Returns mapping of language codes to names"""
seen = {}
for key, config in LANGUAGE_CONFIGS.items():
if config.code not in seen:
seen[config.code] = config.name
return seen
def get_available_voices() -> Dict[str, Dict]:
"""Returns all available voice configurations"""
return {
key: {
"name": config.name,
"code": config.code,
"gender": (
"male"
if "male" in key
else ("female" if "female" in key else "neutral")
),
}
for key, config in LANGUAGE_CONFIGS.items()
}
def get_style_presets() -> Dict[str, Dict]:
"""Returns available style presets"""
return STYLE_PRESETS