Spaces:

NihalGazi
/

Text-To-Speech-Unlimited

Running

App Files Files Community

Text-To-Speech-Unlimited / app.py

NihalGazi

Update app.py

424d566 verified 1 day ago

raw

history blame contribute delete

8.7 kB

	import gradio as gr
	import requests
	import random
	import tempfile
	import os
	import base64
	import time
	import threading
	from datetime import datetime, timedelta, timezone

	# --- Configuration ---
	API_URL = "https://gen.pollinations.ai/v1/chat/completions"
	# Default system key (Used for Free Tier)
	SYSTEM_API_KEY = os.getenv("POLLINATIONS_TOKEN")

	# VOICES
	VOICES = [
	"alloy", "echo", "fable", "onyx", "nova", "shimmer",
	"coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
	]

	# --- Rate Limiting Globals ---
	MAX_CHAR_LIMIT = 200 # Limit input characters for Free Tier
	COOLDOWN_SECONDS = 35 # 50s wait to ensure stability
	LAST_REQUEST_TIME = 0 # Tracks the last successful FREE generation time

	# THE TRAFFIC CONTROLLER
	free_tier_lock = threading.Lock()

	def generate_audio_api(prompt: str, voice: str, emotion: str, seed: int, api_key: str) -> bytes:
	"""
	Core function that hits the Pollinations/OpenAI API.
	"""
	print(f"DEBUG: Generating audio \| Voice: {voice} \| Emotion: {emotion}")

	headers = {
	"Content-Type": "application/json",
	}
	# Inject Key if provided
	if api_key:
	headers["Authorization"] = f"Bearer {api_key}"

	# Your "Secret Sauce" Prompt Engineering
	system_instruction = (
	f"Only repeat what I say. "
	f"Now say with proper emphasis in a \"{emotion}\" emotion this statement."
	)

	payload = {
	"model": "openai-audio",
	"modalities": ["text", "audio"],
	"audio": {"voice": voice, "format": "mp3"},
	"messages": [
	{"role": "system", "content": system_instruction},
	{"role": "user", "content": prompt}
	],
	"seed": seed
	}

	try:
	response = requests.post(API_URL, headers=headers, json=payload, timeout=60)

	if response.status_code != 200:
	# Error Handling
	if response.status_code == 402 or response.status_code == 429:
	raise gr.Error(f"⚠️ Server Limit: The Free Tier is busy. Please wait or use a Premium Key.")
	elif response.status_code == 401:
	raise gr.Error("401 Unauthorized: The provided API Key is invalid.")
	else:
	raise gr.Error(f"API Error {response.status_code}: {response.text}")

	data = response.json()
	try:
	audio_b64 = data['choices'][0]['message']['audio']['data']
	return base64.b64decode(audio_b64)
	except KeyError:
	raise gr.Error("API returned empty audio data.")

	except requests.exceptions.RequestException as e:
	raise gr.Error(f"Network error: {str(e)}")


	def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int, api_key_input: str):
	global LAST_REQUEST_TIME

	# 1. User Status Check
	user_provided_key = api_key_input.strip()
	# Use Client Key if present, otherwise fall back to System Key
	active_key = user_provided_key if user_provided_key else SYSTEM_API_KEY
	is_paid_user = len(user_provided_key) > 5

	# 2. Validation
	if not prompt.strip():
	raise gr.Error("Prompt cannot be empty.")

	# RULE: Character Limit applies ONLY to Free Tier
	if not is_paid_user and len(prompt) > MAX_CHAR_LIMIT:
	raise gr.Error(f"Free Tier Limit: {MAX_CHAR_LIMIT} chars. Enter an API Key for unlimited length.")

	# 3. Execution Logic (The Traffic Split)
	try:
	seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)

	if is_paid_user:
	# === FAST LANE (Paid) ===
	# Bypasses the lock entirely. No waiting.
	print("LOG: Premium Request Processing...")
	audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)
	status_msg = "Generated! (Premium Speed ⚡)"

	else:
	# === SLOW LANE (Free) ===
	# Must wait for the lock (Bathroom Key)
	print("LOG: Free Request - Attempting to acquire lock...")
	with free_tier_lock:
	# Once we have the key, we check the timer
	elapsed_time = time.time() - LAST_REQUEST_TIME

	if elapsed_time < COOLDOWN_SECONDS:
	wait_time = COOLDOWN_SECONDS - elapsed_time
	print(f"Queueing User: Sleeping for {wait_time:.1f}s...")
	time.sleep(wait_time)

	# Generate inside the lock so nobody else steals the slot
	audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)

	# Reset the global timer
	LAST_REQUEST_TIME = time.time()
	status_msg = "Generated! (Free Tier 🐢)"

	# 4. Save and Return
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
	temp_audio_file.write(audio_bytes)
	return temp_audio_file.name, status_msg

	except Exception as e:
	return None, str(e)


	def toggle_seed_input(use_random_seed):
	return gr.update(visible=not use_random_seed, value=12345)


	# --- RESTORED ORIGINAL UI ---
	with gr.Blocks() as app:
	gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
	gr.Markdown(
	"""
	Enter text, choose a voice and emotion, and generate audio.

	🚀 Skip the Queue & Run Locally:
	To avoid the 50s cooldown and generate instantly:
	1. Duplicate this Space to run it privately.
	2. Get your own API key from [Pollinations.ai](https://enter.pollinations.ai/).
	3. Paste it into the `POLLINATIONS_TOKEN` secret in your duplicated space.

	Commercial Use:
	For a Lifetime Commercial Use License, please refer to our terms here:
	[LICENSE TERMS](https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited/raw/main/COMMERCIAL_LICENSE_TERMS.txt)

	Like & follow for more AI projects:
	• Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/)
	• X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09)
	• Discord: nihal_gazi_io
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	api_key_input = gr.Textbox(
	label="Pollinations API Key (Optional)",
	placeholder="sk_...",
	type="password",
	info="Enter your key to REMOVE the 50s cooldown and generate instantly."
	)
	prompt_input = gr.Textbox(
	label=f"Prompt (Max {MAX_CHAR_LIMIT} chars)",
	placeholder="Enter the text you want to convert to speech...",
	max_lines=3
	)
	emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
	voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
	with gr.Column(scale=1):
	random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
	seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)

	submit_button = gr.Button("Generate Audio", variant="primary")

	with gr.Row():
	audio_output = gr.Audio(label="Generated Audio", type="filepath")
	status_output = gr.Textbox(label="Status")

	random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input])

	submit_button.click(
	fn=text_to_speech_app,
	inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
	outputs=[audio_output, status_output],
	# Concurrency must be HIGH to allow Premium users to bypass sleeping Free users
	concurrency_limit=20
	)

	gr.Examples(
	examples=[
	["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345, ""],
	["Surely you wouldn't want that. [laughs]", "shimmer", "sarcastic and mocking", True, 12345, ""],
	["[sobbing] I am feeling... [sighs] a bit down today [cry]", "onyx", "sad and depressed, with stammering", True, 662437, ""],
	["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345, ""],
	],
	inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
	outputs=[audio_output, status_output],
	fn=text_to_speech_app,
	cache_examples=False,
	)

	if __name__ == "__main__":
	app.launch()