|
|
import gradio as gr |
|
|
import requests |
|
|
import random |
|
|
import tempfile |
|
|
import os |
|
|
import base64 |
|
|
import time |
|
|
import threading |
|
|
from datetime import datetime, timedelta, timezone |
|
|
|
|
|
|
|
|
API_URL = "https://gen.pollinations.ai/v1/chat/completions" |
|
|
|
|
|
SYSTEM_API_KEY = os.getenv("POLLINATIONS_TOKEN") |
|
|
|
|
|
|
|
|
VOICES = [ |
|
|
"alloy", "echo", "fable", "onyx", "nova", "shimmer", |
|
|
"coral", "verse", "ballad", "ash", "sage", "amuch", "dan" |
|
|
] |
|
|
|
|
|
|
|
|
MAX_CHAR_LIMIT = 200 |
|
|
COOLDOWN_SECONDS = 35 |
|
|
LAST_REQUEST_TIME = 0 |
|
|
|
|
|
|
|
|
free_tier_lock = threading.Lock() |
|
|
|
|
|
def generate_audio_api(prompt: str, voice: str, emotion: str, seed: int, api_key: str) -> bytes: |
|
|
""" |
|
|
Core function that hits the Pollinations/OpenAI API. |
|
|
""" |
|
|
print(f"DEBUG: Generating audio | Voice: {voice} | Emotion: {emotion}") |
|
|
|
|
|
headers = { |
|
|
"Content-Type": "application/json", |
|
|
} |
|
|
|
|
|
if api_key: |
|
|
headers["Authorization"] = f"Bearer {api_key}" |
|
|
|
|
|
|
|
|
system_instruction = ( |
|
|
f"Only repeat what I say. " |
|
|
f"Now say with proper emphasis in a \"{emotion}\" emotion this statement." |
|
|
) |
|
|
|
|
|
payload = { |
|
|
"model": "openai-audio", |
|
|
"modalities": ["text", "audio"], |
|
|
"audio": {"voice": voice, "format": "mp3"}, |
|
|
"messages": [ |
|
|
{"role": "system", "content": system_instruction}, |
|
|
{"role": "user", "content": prompt} |
|
|
], |
|
|
"seed": seed |
|
|
} |
|
|
|
|
|
try: |
|
|
response = requests.post(API_URL, headers=headers, json=payload, timeout=60) |
|
|
|
|
|
if response.status_code != 200: |
|
|
|
|
|
if response.status_code == 402 or response.status_code == 429: |
|
|
raise gr.Error(f"⚠️ Server Limit: The Free Tier is busy. Please wait or use a Premium Key.") |
|
|
elif response.status_code == 401: |
|
|
raise gr.Error("401 Unauthorized: The provided API Key is invalid.") |
|
|
else: |
|
|
raise gr.Error(f"API Error {response.status_code}: {response.text}") |
|
|
|
|
|
data = response.json() |
|
|
try: |
|
|
audio_b64 = data['choices'][0]['message']['audio']['data'] |
|
|
return base64.b64decode(audio_b64) |
|
|
except KeyError: |
|
|
raise gr.Error("API returned empty audio data.") |
|
|
|
|
|
except requests.exceptions.RequestException as e: |
|
|
raise gr.Error(f"Network error: {str(e)}") |
|
|
|
|
|
|
|
|
def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int, api_key_input: str): |
|
|
global LAST_REQUEST_TIME |
|
|
|
|
|
|
|
|
user_provided_key = api_key_input.strip() |
|
|
|
|
|
active_key = user_provided_key if user_provided_key else SYSTEM_API_KEY |
|
|
is_paid_user = len(user_provided_key) > 5 |
|
|
|
|
|
|
|
|
if not prompt.strip(): |
|
|
raise gr.Error("Prompt cannot be empty.") |
|
|
|
|
|
|
|
|
if not is_paid_user and len(prompt) > MAX_CHAR_LIMIT: |
|
|
raise gr.Error(f"Free Tier Limit: {MAX_CHAR_LIMIT} chars. Enter an API Key for unlimited length.") |
|
|
|
|
|
|
|
|
try: |
|
|
seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed) |
|
|
|
|
|
if is_paid_user: |
|
|
|
|
|
|
|
|
print("LOG: Premium Request Processing...") |
|
|
audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key) |
|
|
status_msg = "Generated! (Premium Speed ⚡)" |
|
|
|
|
|
else: |
|
|
|
|
|
|
|
|
print("LOG: Free Request - Attempting to acquire lock...") |
|
|
with free_tier_lock: |
|
|
|
|
|
elapsed_time = time.time() - LAST_REQUEST_TIME |
|
|
|
|
|
if elapsed_time < COOLDOWN_SECONDS: |
|
|
wait_time = COOLDOWN_SECONDS - elapsed_time |
|
|
print(f"Queueing User: Sleeping for {wait_time:.1f}s...") |
|
|
time.sleep(wait_time) |
|
|
|
|
|
|
|
|
audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key) |
|
|
|
|
|
|
|
|
LAST_REQUEST_TIME = time.time() |
|
|
status_msg = "Generated! (Free Tier 🐢)" |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file: |
|
|
temp_audio_file.write(audio_bytes) |
|
|
return temp_audio_file.name, status_msg |
|
|
|
|
|
except Exception as e: |
|
|
return None, str(e) |
|
|
|
|
|
|
|
|
def toggle_seed_input(use_random_seed): |
|
|
return gr.update(visible=not use_random_seed, value=12345) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as app: |
|
|
gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited") |
|
|
gr.Markdown( |
|
|
""" |
|
|
Enter text, choose a voice and emotion, and generate audio. |
|
|
|
|
|
**🚀 Skip the Queue & Run Locally:** |
|
|
To avoid the **50s cooldown** and generate instantly: |
|
|
1. **Duplicate this Space** to run it privately. |
|
|
2. Get your own API key from [Pollinations.ai](https://enter.pollinations.ai/). |
|
|
3. Paste it into the `POLLINATIONS_TOKEN` secret in your duplicated space. |
|
|
|
|
|
**Commercial Use:** |
|
|
For a **Lifetime Commercial Use License**, please refer to our terms here: |
|
|
[LICENSE TERMS](https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited/raw/main/COMMERCIAL_LICENSE_TERMS.txt) |
|
|
|
|
|
**Like & follow** for more AI projects: |
|
|
• Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/) |
|
|
• X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09) |
|
|
• Discord: nihal_gazi_io |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
api_key_input = gr.Textbox( |
|
|
label="Pollinations API Key (Optional)", |
|
|
placeholder="sk_...", |
|
|
type="password", |
|
|
info="Enter your key to REMOVE the 50s cooldown and generate instantly." |
|
|
) |
|
|
prompt_input = gr.Textbox( |
|
|
label=f"Prompt (Max {MAX_CHAR_LIMIT} chars)", |
|
|
placeholder="Enter the text you want to convert to speech...", |
|
|
max_lines=3 |
|
|
) |
|
|
emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...") |
|
|
voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy") |
|
|
with gr.Column(scale=1): |
|
|
random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True) |
|
|
seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0) |
|
|
|
|
|
submit_button = gr.Button("Generate Audio", variant="primary") |
|
|
|
|
|
with gr.Row(): |
|
|
audio_output = gr.Audio(label="Generated Audio", type="filepath") |
|
|
status_output = gr.Textbox(label="Status") |
|
|
|
|
|
random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input]) |
|
|
|
|
|
submit_button.click( |
|
|
fn=text_to_speech_app, |
|
|
inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input], |
|
|
outputs=[audio_output, status_output], |
|
|
|
|
|
concurrency_limit=20 |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345, ""], |
|
|
["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345, ""], |
|
|
["[sobbing] I am feeling... [sighs] a bit down today [cry]", "onyx", "sad and depressed, with stammering", True, 662437, ""], |
|
|
["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345, ""], |
|
|
], |
|
|
inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input], |
|
|
outputs=[audio_output, status_output], |
|
|
fn=text_to_speech_app, |
|
|
cache_examples=False, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |