import gradio as gr import requests import random import tempfile import os import base64 import time import threading from datetime import datetime, timedelta, timezone # --- Configuration --- API_URL = "https://gen.pollinations.ai/v1/chat/completions" # Default system key (Used for Free Tier) SYSTEM_API_KEY = os.getenv("POLLINATIONS_TOKEN") # VOICES VOICES = [ "alloy", "echo", "fable", "onyx", "nova", "shimmer", "coral", "verse", "ballad", "ash", "sage", "amuch", "dan" ] # --- Rate Limiting Globals --- MAX_CHAR_LIMIT = 200 # Limit input characters for Free Tier COOLDOWN_SECONDS = 35 # 50s wait to ensure stability LAST_REQUEST_TIME = 0 # Tracks the last successful FREE generation time # THE TRAFFIC CONTROLLER free_tier_lock = threading.Lock() def generate_audio_api(prompt: str, voice: str, emotion: str, seed: int, api_key: str) -> bytes: """ Core function that hits the Pollinations/OpenAI API. """ print(f"DEBUG: Generating audio | Voice: {voice} | Emotion: {emotion}") headers = { "Content-Type": "application/json", } # Inject Key if provided if api_key: headers["Authorization"] = f"Bearer {api_key}" # Your "Secret Sauce" Prompt Engineering system_instruction = ( f"Only repeat what I say. " f"Now say with proper emphasis in a \"{emotion}\" emotion this statement." ) payload = { "model": "openai-audio", "modalities": ["text", "audio"], "audio": {"voice": voice, "format": "mp3"}, "messages": [ {"role": "system", "content": system_instruction}, {"role": "user", "content": prompt} ], "seed": seed } try: response = requests.post(API_URL, headers=headers, json=payload, timeout=60) if response.status_code != 200: # Error Handling if response.status_code == 402 or response.status_code == 429: raise gr.Error(f"⚠️ Server Limit: The Free Tier is busy. Please wait or use a Premium Key.") elif response.status_code == 401: raise gr.Error("401 Unauthorized: The provided API Key is invalid.") else: raise gr.Error(f"API Error {response.status_code}: {response.text}") data = response.json() try: audio_b64 = data['choices'][0]['message']['audio']['data'] return base64.b64decode(audio_b64) except KeyError: raise gr.Error("API returned empty audio data.") except requests.exceptions.RequestException as e: raise gr.Error(f"Network error: {str(e)}") def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int, api_key_input: str): global LAST_REQUEST_TIME # 1. User Status Check user_provided_key = api_key_input.strip() # Use Client Key if present, otherwise fall back to System Key active_key = user_provided_key if user_provided_key else SYSTEM_API_KEY is_paid_user = len(user_provided_key) > 5 # 2. Validation if not prompt.strip(): raise gr.Error("Prompt cannot be empty.") # RULE: Character Limit applies ONLY to Free Tier if not is_paid_user and len(prompt) > MAX_CHAR_LIMIT: raise gr.Error(f"Free Tier Limit: {MAX_CHAR_LIMIT} chars. Enter an API Key for unlimited length.") # 3. Execution Logic (The Traffic Split) try: seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed) if is_paid_user: # === FAST LANE (Paid) === # Bypasses the lock entirely. No waiting. print("LOG: Premium Request Processing...") audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key) status_msg = "Generated! (Premium Speed ⚡)" else: # === SLOW LANE (Free) === # Must wait for the lock (Bathroom Key) print("LOG: Free Request - Attempting to acquire lock...") with free_tier_lock: # Once we have the key, we check the timer elapsed_time = time.time() - LAST_REQUEST_TIME if elapsed_time < COOLDOWN_SECONDS: wait_time = COOLDOWN_SECONDS - elapsed_time print(f"Queueing User: Sleeping for {wait_time:.1f}s...") time.sleep(wait_time) # Generate inside the lock so nobody else steals the slot audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key) # Reset the global timer LAST_REQUEST_TIME = time.time() status_msg = "Generated! (Free Tier 🐢)" # 4. Save and Return with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file: temp_audio_file.write(audio_bytes) return temp_audio_file.name, status_msg except Exception as e: return None, str(e) def toggle_seed_input(use_random_seed): return gr.update(visible=not use_random_seed, value=12345) # --- RESTORED ORIGINAL UI --- with gr.Blocks() as app: gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited") gr.Markdown( """ Enter text, choose a voice and emotion, and generate audio. **🚀 Skip the Queue & Run Locally:** To avoid the **50s cooldown** and generate instantly: 1. **Duplicate this Space** to run it privately. 2. Get your own API key from [Pollinations.ai](https://enter.pollinations.ai/). 3. Paste it into the `POLLINATIONS_TOKEN` secret in your duplicated space. **Commercial Use:** For a **Lifetime Commercial Use License**, please refer to our terms here: [LICENSE TERMS](https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited/raw/main/COMMERCIAL_LICENSE_TERMS.txt) **Like & follow** for more AI projects: • Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/) • X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09) • Discord: nihal_gazi_io """ ) with gr.Row(): with gr.Column(scale=2): api_key_input = gr.Textbox( label="Pollinations API Key (Optional)", placeholder="sk_...", type="password", info="Enter your key to REMOVE the 50s cooldown and generate instantly." ) prompt_input = gr.Textbox( label=f"Prompt (Max {MAX_CHAR_LIMIT} chars)", placeholder="Enter the text you want to convert to speech...", max_lines=3 ) emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...") voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy") with gr.Column(scale=1): random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True) seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0) submit_button = gr.Button("Generate Audio", variant="primary") with gr.Row(): audio_output = gr.Audio(label="Generated Audio", type="filepath") status_output = gr.Textbox(label="Status") random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input]) submit_button.click( fn=text_to_speech_app, inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input], outputs=[audio_output, status_output], # Concurrency must be HIGH to allow Premium users to bypass sleeping Free users concurrency_limit=20 ) gr.Examples( examples=[ ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345, ""], ["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345, ""], ["[sobbing] I am feeling... [sighs] a bit down today [cry]", "onyx", "sad and depressed, with stammering", True, 662437, ""], ["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345, ""], ], inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input], outputs=[audio_output, status_output], fn=text_to_speech_app, cache_examples=False, ) if __name__ == "__main__": app.launch()