File size: 8,704 Bytes
e3b7f9d 6dbeeea e3b7f9d 6dbeeea b0938c1 e3b7f9d 667cf53 e3b7f9d 6dbeeea 424d566 6dbeeea e3b7f9d 6dbeeea e3b7f9d 6dbeeea e3b7f9d 6dbeeea b0938c1 e3b7f9d 6dbeeea e3b7f9d 6dbeeea e3b7f9d 6dbeeea e3b7f9d 6dbeeea e3b7f9d 6dbeeea e3b7f9d 6dbeeea b0938c1 e3b7f9d b0938c1 e3b7f9d 6dbeeea e3b7f9d b0938c1 e3b7f9d 6dbeeea 2ecff3b 6dbeeea 2ecff3b 6dbeeea 2ecff3b 6dbeeea 9c33716 6dbeeea e3b7f9d 6dbeeea e3b7f9d b0938c1 e3b7f9d b0938c1 e3b7f9d b0938c1 6dbeeea e3b7f9d 6dbeeea 4afd147 6dbeeea e3b7f9d 6dbeeea 916a5a3 6dbeeea e3b7f9d 6dbeeea b0938c1 e3b7f9d 6dbeeea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
import gradio as gr
import requests
import random
import tempfile
import os
import base64
import time
import threading
from datetime import datetime, timedelta, timezone
# --- Configuration ---
API_URL = "https://gen.pollinations.ai/v1/chat/completions"
# Default system key (Used for Free Tier)
SYSTEM_API_KEY = os.getenv("POLLINATIONS_TOKEN")
# VOICES
VOICES = [
"alloy", "echo", "fable", "onyx", "nova", "shimmer",
"coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
]
# --- Rate Limiting Globals ---
MAX_CHAR_LIMIT = 200 # Limit input characters for Free Tier
COOLDOWN_SECONDS = 35 # 50s wait to ensure stability
LAST_REQUEST_TIME = 0 # Tracks the last successful FREE generation time
# THE TRAFFIC CONTROLLER
free_tier_lock = threading.Lock()
def generate_audio_api(prompt: str, voice: str, emotion: str, seed: int, api_key: str) -> bytes:
"""
Core function that hits the Pollinations/OpenAI API.
"""
print(f"DEBUG: Generating audio | Voice: {voice} | Emotion: {emotion}")
headers = {
"Content-Type": "application/json",
}
# Inject Key if provided
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
# Your "Secret Sauce" Prompt Engineering
system_instruction = (
f"Only repeat what I say. "
f"Now say with proper emphasis in a \"{emotion}\" emotion this statement."
)
payload = {
"model": "openai-audio",
"modalities": ["text", "audio"],
"audio": {"voice": voice, "format": "mp3"},
"messages": [
{"role": "system", "content": system_instruction},
{"role": "user", "content": prompt}
],
"seed": seed
}
try:
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
if response.status_code != 200:
# Error Handling
if response.status_code == 402 or response.status_code == 429:
raise gr.Error(f"⚠️ Server Limit: The Free Tier is busy. Please wait or use a Premium Key.")
elif response.status_code == 401:
raise gr.Error("401 Unauthorized: The provided API Key is invalid.")
else:
raise gr.Error(f"API Error {response.status_code}: {response.text}")
data = response.json()
try:
audio_b64 = data['choices'][0]['message']['audio']['data']
return base64.b64decode(audio_b64)
except KeyError:
raise gr.Error("API returned empty audio data.")
except requests.exceptions.RequestException as e:
raise gr.Error(f"Network error: {str(e)}")
def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int, api_key_input: str):
global LAST_REQUEST_TIME
# 1. User Status Check
user_provided_key = api_key_input.strip()
# Use Client Key if present, otherwise fall back to System Key
active_key = user_provided_key if user_provided_key else SYSTEM_API_KEY
is_paid_user = len(user_provided_key) > 5
# 2. Validation
if not prompt.strip():
raise gr.Error("Prompt cannot be empty.")
# RULE: Character Limit applies ONLY to Free Tier
if not is_paid_user and len(prompt) > MAX_CHAR_LIMIT:
raise gr.Error(f"Free Tier Limit: {MAX_CHAR_LIMIT} chars. Enter an API Key for unlimited length.")
# 3. Execution Logic (The Traffic Split)
try:
seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
if is_paid_user:
# === FAST LANE (Paid) ===
# Bypasses the lock entirely. No waiting.
print("LOG: Premium Request Processing...")
audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)
status_msg = "Generated! (Premium Speed ⚡)"
else:
# === SLOW LANE (Free) ===
# Must wait for the lock (Bathroom Key)
print("LOG: Free Request - Attempting to acquire lock...")
with free_tier_lock:
# Once we have the key, we check the timer
elapsed_time = time.time() - LAST_REQUEST_TIME
if elapsed_time < COOLDOWN_SECONDS:
wait_time = COOLDOWN_SECONDS - elapsed_time
print(f"Queueing User: Sleeping for {wait_time:.1f}s...")
time.sleep(wait_time)
# Generate inside the lock so nobody else steals the slot
audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)
# Reset the global timer
LAST_REQUEST_TIME = time.time()
status_msg = "Generated! (Free Tier 🐢)"
# 4. Save and Return
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
temp_audio_file.write(audio_bytes)
return temp_audio_file.name, status_msg
except Exception as e:
return None, str(e)
def toggle_seed_input(use_random_seed):
return gr.update(visible=not use_random_seed, value=12345)
# --- RESTORED ORIGINAL UI ---
with gr.Blocks() as app:
gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
gr.Markdown(
"""
Enter text, choose a voice and emotion, and generate audio.
**🚀 Skip the Queue & Run Locally:**
To avoid the **50s cooldown** and generate instantly:
1. **Duplicate this Space** to run it privately.
2. Get your own API key from [Pollinations.ai](https://enter.pollinations.ai/).
3. Paste it into the `POLLINATIONS_TOKEN` secret in your duplicated space.
**Commercial Use:**
For a **Lifetime Commercial Use License**, please refer to our terms here:
[LICENSE TERMS](https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited/raw/main/COMMERCIAL_LICENSE_TERMS.txt)
**Like & follow** for more AI projects:
• Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/)
• X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09)
• Discord: nihal_gazi_io
"""
)
with gr.Row():
with gr.Column(scale=2):
api_key_input = gr.Textbox(
label="Pollinations API Key (Optional)",
placeholder="sk_...",
type="password",
info="Enter your key to REMOVE the 50s cooldown and generate instantly."
)
prompt_input = gr.Textbox(
label=f"Prompt (Max {MAX_CHAR_LIMIT} chars)",
placeholder="Enter the text you want to convert to speech...",
max_lines=3
)
emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
with gr.Column(scale=1):
random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)
submit_button = gr.Button("Generate Audio", variant="primary")
with gr.Row():
audio_output = gr.Audio(label="Generated Audio", type="filepath")
status_output = gr.Textbox(label="Status")
random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input])
submit_button.click(
fn=text_to_speech_app,
inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
outputs=[audio_output, status_output],
# Concurrency must be HIGH to allow Premium users to bypass sleeping Free users
concurrency_limit=20
)
gr.Examples(
examples=[
["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345, ""],
["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345, ""],
["[sobbing] I am feeling... [sighs] a bit down today [cry]", "onyx", "sad and depressed, with stammering", True, 662437, ""],
["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345, ""],
],
inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
outputs=[audio_output, status_output],
fn=text_to_speech_app,
cache_examples=False,
)
if __name__ == "__main__":
app.launch() |