NihalGazi's picture
Update app.py
424d566 verified
import gradio as gr
import requests
import random
import tempfile
import os
import base64
import time
import threading
from datetime import datetime, timedelta, timezone
# --- Configuration ---
API_URL = "https://gen.pollinations.ai/v1/chat/completions"
# Default system key (Used for Free Tier)
SYSTEM_API_KEY = os.getenv("POLLINATIONS_TOKEN")
# VOICES
VOICES = [
"alloy", "echo", "fable", "onyx", "nova", "shimmer",
"coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
]
# --- Rate Limiting Globals ---
MAX_CHAR_LIMIT = 200 # Limit input characters for Free Tier
COOLDOWN_SECONDS = 35 # 50s wait to ensure stability
LAST_REQUEST_TIME = 0 # Tracks the last successful FREE generation time
# THE TRAFFIC CONTROLLER
free_tier_lock = threading.Lock()
def generate_audio_api(prompt: str, voice: str, emotion: str, seed: int, api_key: str) -> bytes:
"""
Core function that hits the Pollinations/OpenAI API.
"""
print(f"DEBUG: Generating audio | Voice: {voice} | Emotion: {emotion}")
headers = {
"Content-Type": "application/json",
}
# Inject Key if provided
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
# Your "Secret Sauce" Prompt Engineering
system_instruction = (
f"Only repeat what I say. "
f"Now say with proper emphasis in a \"{emotion}\" emotion this statement."
)
payload = {
"model": "openai-audio",
"modalities": ["text", "audio"],
"audio": {"voice": voice, "format": "mp3"},
"messages": [
{"role": "system", "content": system_instruction},
{"role": "user", "content": prompt}
],
"seed": seed
}
try:
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
if response.status_code != 200:
# Error Handling
if response.status_code == 402 or response.status_code == 429:
raise gr.Error(f"⚠️ Server Limit: The Free Tier is busy. Please wait or use a Premium Key.")
elif response.status_code == 401:
raise gr.Error("401 Unauthorized: The provided API Key is invalid.")
else:
raise gr.Error(f"API Error {response.status_code}: {response.text}")
data = response.json()
try:
audio_b64 = data['choices'][0]['message']['audio']['data']
return base64.b64decode(audio_b64)
except KeyError:
raise gr.Error("API returned empty audio data.")
except requests.exceptions.RequestException as e:
raise gr.Error(f"Network error: {str(e)}")
def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int, api_key_input: str):
global LAST_REQUEST_TIME
# 1. User Status Check
user_provided_key = api_key_input.strip()
# Use Client Key if present, otherwise fall back to System Key
active_key = user_provided_key if user_provided_key else SYSTEM_API_KEY
is_paid_user = len(user_provided_key) > 5
# 2. Validation
if not prompt.strip():
raise gr.Error("Prompt cannot be empty.")
# RULE: Character Limit applies ONLY to Free Tier
if not is_paid_user and len(prompt) > MAX_CHAR_LIMIT:
raise gr.Error(f"Free Tier Limit: {MAX_CHAR_LIMIT} chars. Enter an API Key for unlimited length.")
# 3. Execution Logic (The Traffic Split)
try:
seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
if is_paid_user:
# === FAST LANE (Paid) ===
# Bypasses the lock entirely. No waiting.
print("LOG: Premium Request Processing...")
audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)
status_msg = "Generated! (Premium Speed ⚡)"
else:
# === SLOW LANE (Free) ===
# Must wait for the lock (Bathroom Key)
print("LOG: Free Request - Attempting to acquire lock...")
with free_tier_lock:
# Once we have the key, we check the timer
elapsed_time = time.time() - LAST_REQUEST_TIME
if elapsed_time < COOLDOWN_SECONDS:
wait_time = COOLDOWN_SECONDS - elapsed_time
print(f"Queueing User: Sleeping for {wait_time:.1f}s...")
time.sleep(wait_time)
# Generate inside the lock so nobody else steals the slot
audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)
# Reset the global timer
LAST_REQUEST_TIME = time.time()
status_msg = "Generated! (Free Tier 🐢)"
# 4. Save and Return
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
temp_audio_file.write(audio_bytes)
return temp_audio_file.name, status_msg
except Exception as e:
return None, str(e)
def toggle_seed_input(use_random_seed):
return gr.update(visible=not use_random_seed, value=12345)
# --- RESTORED ORIGINAL UI ---
with gr.Blocks() as app:
gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
gr.Markdown(
"""
Enter text, choose a voice and emotion, and generate audio.
**🚀 Skip the Queue & Run Locally:**
To avoid the **50s cooldown** and generate instantly:
1. **Duplicate this Space** to run it privately.
2. Get your own API key from [Pollinations.ai](https://enter.pollinations.ai/).
3. Paste it into the `POLLINATIONS_TOKEN` secret in your duplicated space.
**Commercial Use:**
For a **Lifetime Commercial Use License**, please refer to our terms here:
[LICENSE TERMS](https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited/raw/main/COMMERCIAL_LICENSE_TERMS.txt)
**Like & follow** for more AI projects:
• Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/)
• X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09)
• Discord: nihal_gazi_io
"""
)
with gr.Row():
with gr.Column(scale=2):
api_key_input = gr.Textbox(
label="Pollinations API Key (Optional)",
placeholder="sk_...",
type="password",
info="Enter your key to REMOVE the 50s cooldown and generate instantly."
)
prompt_input = gr.Textbox(
label=f"Prompt (Max {MAX_CHAR_LIMIT} chars)",
placeholder="Enter the text you want to convert to speech...",
max_lines=3
)
emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
with gr.Column(scale=1):
random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)
submit_button = gr.Button("Generate Audio", variant="primary")
with gr.Row():
audio_output = gr.Audio(label="Generated Audio", type="filepath")
status_output = gr.Textbox(label="Status")
random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input])
submit_button.click(
fn=text_to_speech_app,
inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
outputs=[audio_output, status_output],
# Concurrency must be HIGH to allow Premium users to bypass sleeping Free users
concurrency_limit=20
)
gr.Examples(
examples=[
["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345, ""],
["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345, ""],
["[sobbing] I am feeling... [sighs] a bit down today [cry]", "onyx", "sad and depressed, with stammering", True, 662437, ""],
["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345, ""],
],
inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
outputs=[audio_output, status_output],
fn=text_to_speech_app,
cache_examples=False,
)
if __name__ == "__main__":
app.launch()