Spaces:

NihalGazi
/

Text-To-Speech-Unlimited

Running

File size: 8,704 Bytes

e3b7f9d
 
 
 
 
6dbeeea
 
 
 
e3b7f9d
6dbeeea
 
 
 
b0938c1
 
e3b7f9d
667cf53
 
e3b7f9d
 
6dbeeea
 
424d566
6dbeeea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b7f9d
6dbeeea
 
 
 
 
 
 
 
 
 
e3b7f9d
 
6dbeeea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b7f9d
 
6dbeeea
b0938c1
e3b7f9d
6dbeeea
 
 
 
 
 
 
 
 
 
 
e3b7f9d
6dbeeea
 
 
 
e3b7f9d
6dbeeea
e3b7f9d
6dbeeea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b7f9d
 
6dbeeea
e3b7f9d
 
6dbeeea
b0938c1
e3b7f9d
 
b0938c1
e3b7f9d
6dbeeea
 
e3b7f9d
b0938c1
e3b7f9d
6dbeeea
 
2ecff3b
6dbeeea
 
 
 
 
2ecff3b
6dbeeea
 
 
2ecff3b
6dbeeea
9c33716
 
6dbeeea
 
e3b7f9d
 
 
 
6dbeeea
 
 
 
 
 
 
 
 
 
 
e3b7f9d
b0938c1
e3b7f9d
 
b0938c1
e3b7f9d
 
 
 
b0938c1
 
 
6dbeeea
e3b7f9d
 
 
6dbeeea
4afd147
6dbeeea
 
e3b7f9d
 
 
 
6dbeeea
 
916a5a3
6dbeeea
e3b7f9d
6dbeeea
b0938c1
 
 
e3b7f9d
 
 
6dbeeea

import gradio as gr
import requests
import random
import tempfile
import os
import base64
import time
import threading
from datetime import datetime, timedelta, timezone

# --- Configuration ---
API_URL = "https://gen.pollinations.ai/v1/chat/completions"
# Default system key (Used for Free Tier)
SYSTEM_API_KEY = os.getenv("POLLINATIONS_TOKEN")

# VOICES
VOICES = [
    "alloy", "echo", "fable", "onyx", "nova", "shimmer",
    "coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
]

# --- Rate Limiting Globals ---
MAX_CHAR_LIMIT = 200        # Limit input characters for Free Tier
COOLDOWN_SECONDS = 35       # 50s wait to ensure stability
LAST_REQUEST_TIME = 0       # Tracks the last successful FREE generation time

# THE TRAFFIC CONTROLLER
free_tier_lock = threading.Lock() 

def generate_audio_api(prompt: str, voice: str, emotion: str, seed: int, api_key: str) -> bytes:
    """
    Core function that hits the Pollinations/OpenAI API.
    """
    print(f"DEBUG: Generating audio | Voice: {voice} | Emotion: {emotion}")

    headers = {
        "Content-Type": "application/json",
    }
    # Inject Key if provided
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

    # Your "Secret Sauce" Prompt Engineering
    system_instruction = (
        f"Only repeat what I say. "
        f"Now say with proper emphasis in a \"{emotion}\" emotion this statement."
    )

    payload = {
        "model": "openai-audio",
        "modalities": ["text", "audio"],
        "audio": {"voice": voice, "format": "mp3"},
        "messages": [
            {"role": "system", "content": system_instruction},
            {"role": "user", "content": prompt}
        ],
        "seed": seed
    }

    try:
        response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
        
        if response.status_code != 200:
            # Error Handling
            if response.status_code == 402 or response.status_code == 429:
                raise gr.Error(f"⚠️ Server Limit: The Free Tier is busy. Please wait or use a Premium Key.")
            elif response.status_code == 401:
                raise gr.Error("401 Unauthorized: The provided API Key is invalid.")
            else:
                raise gr.Error(f"API Error {response.status_code}: {response.text}")

        data = response.json()
        try:
            audio_b64 = data['choices'][0]['message']['audio']['data']
            return base64.b64decode(audio_b64)
        except KeyError:
            raise gr.Error("API returned empty audio data.")

    except requests.exceptions.RequestException as e:
        raise gr.Error(f"Network error: {str(e)}")


def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int, api_key_input: str):
    global LAST_REQUEST_TIME
    
    # 1. User Status Check
    user_provided_key = api_key_input.strip()
    # Use Client Key if present, otherwise fall back to System Key
    active_key = user_provided_key if user_provided_key else SYSTEM_API_KEY
    is_paid_user = len(user_provided_key) > 5

    # 2. Validation
    if not prompt.strip():
        raise gr.Error("Prompt cannot be empty.")
    
    # RULE: Character Limit applies ONLY to Free Tier
    if not is_paid_user and len(prompt) > MAX_CHAR_LIMIT:
        raise gr.Error(f"Free Tier Limit: {MAX_CHAR_LIMIT} chars. Enter an API Key for unlimited length.")

    # 3. Execution Logic (The Traffic Split)
    try:
        seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
        
        if is_paid_user:
            # === FAST LANE (Paid) ===
            # Bypasses the lock entirely. No waiting.
            print("LOG: Premium Request Processing...")
            audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)
            status_msg = "Generated! (Premium Speed ⚡)"
            
        else:
            # === SLOW LANE (Free) ===
            # Must wait for the lock (Bathroom Key)
            print("LOG: Free Request - Attempting to acquire lock...")
            with free_tier_lock:
                # Once we have the key, we check the timer
                elapsed_time = time.time() - LAST_REQUEST_TIME
                
                if elapsed_time < COOLDOWN_SECONDS:
                    wait_time = COOLDOWN_SECONDS - elapsed_time
                    print(f"Queueing User: Sleeping for {wait_time:.1f}s...")
                    time.sleep(wait_time)
                
                # Generate inside the lock so nobody else steals the slot
                audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)
                
                # Reset the global timer
                LAST_REQUEST_TIME = time.time()
                status_msg = "Generated! (Free Tier 🐢)"

        # 4. Save and Return
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
            temp_audio_file.write(audio_bytes)
            return temp_audio_file.name, status_msg

    except Exception as e:
        return None, str(e)


def toggle_seed_input(use_random_seed):
    return gr.update(visible=not use_random_seed, value=12345)


# --- RESTORED ORIGINAL UI ---
with gr.Blocks() as app:
    gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
    gr.Markdown(
        """
        Enter text, choose a voice and emotion, and generate audio.
        
        **🚀 Skip the Queue & Run Locally:**
        To avoid the **50s cooldown** and generate instantly:
        1. **Duplicate this Space** to run it privately.
        2. Get your own API key from [Pollinations.ai](https://enter.pollinations.ai/).
        3. Paste it into the `POLLINATIONS_TOKEN` secret in your duplicated space.
        
        **Commercial Use:**
        For a **Lifetime Commercial Use License**, please refer to our terms here:
        [LICENSE TERMS](https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited/raw/main/COMMERCIAL_LICENSE_TERMS.txt)
        
        **Like & follow** for more AI projects:
        • Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/) 
        • X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09)
        • Discord: nihal_gazi_io
        """
    )

    with gr.Row():
        with gr.Column(scale=2):
            api_key_input = gr.Textbox(
                label="Pollinations API Key (Optional)", 
                placeholder="sk_...", 
                type="password",
                info="Enter your key to REMOVE the 50s cooldown and generate instantly."
            )
            prompt_input = gr.Textbox(
                label=f"Prompt (Max {MAX_CHAR_LIMIT} chars)", 
                placeholder="Enter the text you want to convert to speech...",
                max_lines=3
            )
            emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
            voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
        with gr.Column(scale=1):
            random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
            seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)

    submit_button = gr.Button("Generate Audio", variant="primary")

    with gr.Row():
        audio_output = gr.Audio(label="Generated Audio", type="filepath")
        status_output = gr.Textbox(label="Status")

    random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input])

    submit_button.click(
        fn=text_to_speech_app,
        inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
        outputs=[audio_output, status_output],
        # Concurrency must be HIGH to allow Premium users to bypass sleeping Free users
        concurrency_limit=20
    )

    gr.Examples(
        examples=[
            ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345, ""],
            ["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345, ""],
            ["[sobbing] I am feeling... [sighs] a bit down today [cry]", "onyx", "sad and depressed, with stammering", True, 662437, ""],
            ["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345, ""],
        ],
        inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
        outputs=[audio_output, status_output],
        fn=text_to_speech_app,
        cache_examples=False, 
    )

if __name__ == "__main__":
    app.launch()