textai-v2 / api /endpoints.py
rbt2025's picture
Deploy TextAI v2 - Clean architecture
de7d69a verified
"""
API Endpoints
All return JSON strings for consistency.
Can be used by Gradio UI or external apps.
"""
import json
from typing import Optional
from core.models import get_model_service
from core.sessions import get_session_service
from core.state import get_state
from core.logger import logger
from core.config import VERSION
def _json_response(success: bool, data: dict = None, error: str = None) -> str:
"""Standard JSON response format"""
response = {"success": success}
if data:
response.update(data)
if error:
response["error"] = error
return json.dumps(response, indent=2)
# ══════════════════════════════════════════════════════════════════════════════
# MODEL APIs
# ══════════════════════════════════════════════════════════════════════════════
def api_list_models() -> str:
"""List all installed models"""
service = get_model_service()
models = service.get_installed_models()
loaded_id = get_state().get_loaded_model_id()
return _json_response(True, {
"models": models,
"count": len(models),
"loaded_model_id": loaded_id
})
def api_get_model(model_id: str) -> str:
"""Get specific model details"""
model = get_state().get_model_by_id(model_id)
if model:
return _json_response(True, {"model": model})
return _json_response(False, error="Model not found")
def api_load_model(model_id: str) -> str:
"""Load a model"""
service = get_model_service()
result = service.load_model(model_id)
return json.dumps(result)
def api_unload_model() -> str:
"""Unload current model"""
service = get_model_service()
service.unload_model()
return _json_response(True, {"message": "Model unloaded"})
def api_delete_model(model_id: str) -> str:
"""Delete an installed model"""
service = get_model_service()
result = service.delete_model(model_id)
return json.dumps(result)
def api_search_models(query: str = "", max_params: float = 7.0, limit: int = 20) -> str:
"""Search HuggingFace for models"""
service = get_model_service()
results, status = service.search_hf_models(query, max_params, limit)
return _json_response(True, {
"results": results,
"count": len(results),
"status": status
})
def api_get_model_files(repo_id: str) -> str:
"""Get available files for a HF model"""
service = get_model_service()
files = service.get_hf_model_files(repo_id)
return _json_response(True, {
"repo_id": repo_id,
"files": files,
"count": len(files)
})
def api_download_model(repo_id: str, filename: str) -> str:
"""Download a model from HuggingFace"""
service = get_model_service()
result = service.download_model(repo_id, filename)
return json.dumps(result)
# ══════════════════════════════════════════════════════════════════════════════
# SESSION APIs
# ══════════════════════════════════════════════════════════════════════════════
def api_list_sessions() -> str:
"""List all sessions"""
service = get_session_service()
sessions = service.get_all_sessions()
return _json_response(True, {
"sessions": sessions,
"count": len(sessions),
"active_session_id": get_state().get_active_session_id()
})
def api_get_session(session_id: str) -> str:
"""Get session with messages"""
service = get_session_service()
session = service.get_session(session_id)
if session:
return _json_response(True, {"session": session})
return _json_response(False, error="Session not found")
def api_create_session(
title: str = "",
session_type: str = "chat",
system_prompt: str = ""
) -> str:
"""Create new session"""
service = get_session_service()
session = service.create_session(title, session_type, system_prompt)
return _json_response(True, {
"session_id": session["id"],
"title": session["title"]
})
def api_delete_session(session_id: str) -> str:
"""Delete a session"""
service = get_session_service()
result = service.delete_session(session_id)
return _json_response(result, {"message": "Deleted" if result else "Not found"})
def api_rename_session(session_id: str, new_title: str) -> str:
"""Rename a session"""
service = get_session_service()
result = service.rename_session(session_id, new_title)
return _json_response(result, {"title": new_title})
def api_clear_session(session_id: str) -> str:
"""Clear session messages"""
service = get_session_service()
result = service.clear_session(session_id)
return _json_response(result, {"message": "Cleared"})
# ══════════════════════════════════════════════════════════════════════════════
# CHAT / INFERENCE APIs
# ══════════════════════════════════════════════════════════════════════════════
def api_chat(
session_id: str,
message: str,
max_tokens: int = 512,
temperature: float = 0.7
) -> str:
"""Send chat message and get response"""
model_service = get_model_service()
session_service = get_session_service()
# Check model loaded
if not model_service.is_model_loaded():
return _json_response(False, error="No model loaded")
# Get session
session = session_service.get_session(session_id)
if not session:
return _json_response(False, error="Session not found")
# Add user message
session_service.add_message(session_id, "user", message)
# Build messages for model
messages = []
if session.get("system_prompt"):
messages.append({"role": "system", "content": session["system_prompt"]})
for msg in session_service.get_messages(session_id):
messages.append({"role": msg["role"], "content": msg["content"]})
# Generate response
response = model_service.generate(messages, max_tokens, temperature)
# Add assistant response
session_service.add_message(session_id, "assistant", response)
return _json_response(True, {
"response": response,
"session_id": session_id
})
def api_inference(
prompt: str = "",
messages: str = "[]",
system_prompt: str = "",
max_tokens: int = 512,
temperature: float = 0.7,
top_p: float = 0.9,
context: str = "",
lora_scale: float = 1.0
) -> str:
"""
Universal inference endpoint.
Can use direct prompt or message list.
Supports system prompt, context injection, LoRA scaling (future).
"""
model_service = get_model_service()
if not model_service.is_model_loaded():
return _json_response(False, error="No model loaded")
try:
# Parse messages if provided
msg_list = json.loads(messages) if messages and messages != "[]" else []
# Build full message list
full_messages = []
# System prompt
if system_prompt:
full_messages.append({"role": "system", "content": system_prompt})
# Context injection
if context:
full_messages.append({"role": "system", "content": f"Context:\n{context}"})
# Conversation messages
full_messages.extend(msg_list)
# Direct prompt
if prompt:
full_messages.append({"role": "user", "content": prompt})
if not full_messages:
return _json_response(False, error="No prompt or messages provided")
# Generate
response = model_service.generate(full_messages, max_tokens, temperature, top_p)
loaded_model = model_service.get_loaded_model()
return _json_response(True, {
"response": response,
"model_id": loaded_model["id"] if loaded_model else None,
"config": {
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"lora_scale": lora_scale
}
})
except Exception as e:
logger.error("API", f"Inference error: {e}")
return _json_response(False, error=str(e))
def api_chat_with_config(
session_id: str,
message: str,
max_tokens: int = 512,
temperature: float = 0.7,
top_p: float = 0.9,
system_prompt_override: str = "",
context: str = "",
lora_scale: float = 1.0
) -> str:
"""
Chat with full configuration options.
Supports: custom inference params, system prompt override, context injection.
"""
model_service = get_model_service()
session_service = get_session_service()
if not model_service.is_model_loaded():
return _json_response(False, error="No model loaded")
session = session_service.get_session(session_id)
if not session:
return _json_response(False, error="Session not found")
# Add user message
session_service.add_message(session_id, "user", message)
# Build messages
messages = []
# System prompt (override or session default)
sys_prompt = system_prompt_override or session.get("system_prompt", "")
if sys_prompt:
messages.append({"role": "system", "content": sys_prompt})
# Context injection
if context:
messages.append({"role": "system", "content": f"Context:\n{context}"})
# Conversation history
for msg in session_service.get_messages(session_id):
messages.append({"role": msg["role"], "content": msg["content"]})
# Generate
response = model_service.generate(messages, max_tokens, temperature, top_p)
# Add response
session_service.add_message(session_id, "assistant", response)
return _json_response(True, {
"response": response,
"session_id": session_id,
"config_used": {
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"lora_scale": lora_scale,
"context_provided": bool(context)
}
})
# ══════════════════════════════════════════════════════════════════════════════
# SYSTEM APIs
# ══════════════════════════════════════════════════════════════════════════════
def api_get_status() -> str:
"""Get current system status"""
model_service = get_model_service()
state = get_state()
loaded = model_service.get_loaded_model()
return _json_response(True, {
"version": VERSION,
"model_loaded": model_service.is_model_loaded(),
"loaded_model": loaded["name"] if loaded else None,
"installed_models_count": len(state.get_installed_models()),
"sessions_count": len(state.get_sessions()),
"active_session_id": state.get_active_session_id()
})
def api_get_backends() -> str:
"""Check which inference backends are available"""
from core.models import _get_llama_cpp, _get_transformers
return _json_response(True, {
"backends": {
"gguf": _get_llama_cpp() is not None,
"transformers": _get_transformers() is not None
}
})
def api_health() -> str:
"""Health check endpoint"""
return _json_response(True, {
"status": "healthy",
"version": VERSION
})