""" API Endpoints All return JSON strings for consistency. Can be used by Gradio UI or external apps. """ import json from typing import Optional from core.models import get_model_service from core.sessions import get_session_service from core.state import get_state from core.logger import logger from core.config import VERSION def _json_response(success: bool, data: dict = None, error: str = None) -> str: """Standard JSON response format""" response = {"success": success} if data: response.update(data) if error: response["error"] = error return json.dumps(response, indent=2) # ══════════════════════════════════════════════════════════════════════════════ # MODEL APIs # ══════════════════════════════════════════════════════════════════════════════ def api_list_models() -> str: """List all installed models""" service = get_model_service() models = service.get_installed_models() loaded_id = get_state().get_loaded_model_id() return _json_response(True, { "models": models, "count": len(models), "loaded_model_id": loaded_id }) def api_get_model(model_id: str) -> str: """Get specific model details""" model = get_state().get_model_by_id(model_id) if model: return _json_response(True, {"model": model}) return _json_response(False, error="Model not found") def api_load_model(model_id: str) -> str: """Load a model""" service = get_model_service() result = service.load_model(model_id) return json.dumps(result) def api_unload_model() -> str: """Unload current model""" service = get_model_service() service.unload_model() return _json_response(True, {"message": "Model unloaded"}) def api_delete_model(model_id: str) -> str: """Delete an installed model""" service = get_model_service() result = service.delete_model(model_id) return json.dumps(result) def api_search_models(query: str = "", max_params: float = 7.0, limit: int = 20) -> str: """Search HuggingFace for models""" service = get_model_service() results, status = service.search_hf_models(query, max_params, limit) return _json_response(True, { "results": results, "count": len(results), "status": status }) def api_get_model_files(repo_id: str) -> str: """Get available files for a HF model""" service = get_model_service() files = service.get_hf_model_files(repo_id) return _json_response(True, { "repo_id": repo_id, "files": files, "count": len(files) }) def api_download_model(repo_id: str, filename: str) -> str: """Download a model from HuggingFace""" service = get_model_service() result = service.download_model(repo_id, filename) return json.dumps(result) # ══════════════════════════════════════════════════════════════════════════════ # SESSION APIs # ══════════════════════════════════════════════════════════════════════════════ def api_list_sessions() -> str: """List all sessions""" service = get_session_service() sessions = service.get_all_sessions() return _json_response(True, { "sessions": sessions, "count": len(sessions), "active_session_id": get_state().get_active_session_id() }) def api_get_session(session_id: str) -> str: """Get session with messages""" service = get_session_service() session = service.get_session(session_id) if session: return _json_response(True, {"session": session}) return _json_response(False, error="Session not found") def api_create_session( title: str = "", session_type: str = "chat", system_prompt: str = "" ) -> str: """Create new session""" service = get_session_service() session = service.create_session(title, session_type, system_prompt) return _json_response(True, { "session_id": session["id"], "title": session["title"] }) def api_delete_session(session_id: str) -> str: """Delete a session""" service = get_session_service() result = service.delete_session(session_id) return _json_response(result, {"message": "Deleted" if result else "Not found"}) def api_rename_session(session_id: str, new_title: str) -> str: """Rename a session""" service = get_session_service() result = service.rename_session(session_id, new_title) return _json_response(result, {"title": new_title}) def api_clear_session(session_id: str) -> str: """Clear session messages""" service = get_session_service() result = service.clear_session(session_id) return _json_response(result, {"message": "Cleared"}) # ══════════════════════════════════════════════════════════════════════════════ # CHAT / INFERENCE APIs # ══════════════════════════════════════════════════════════════════════════════ def api_chat( session_id: str, message: str, max_tokens: int = 512, temperature: float = 0.7 ) -> str: """Send chat message and get response""" model_service = get_model_service() session_service = get_session_service() # Check model loaded if not model_service.is_model_loaded(): return _json_response(False, error="No model loaded") # Get session session = session_service.get_session(session_id) if not session: return _json_response(False, error="Session not found") # Add user message session_service.add_message(session_id, "user", message) # Build messages for model messages = [] if session.get("system_prompt"): messages.append({"role": "system", "content": session["system_prompt"]}) for msg in session_service.get_messages(session_id): messages.append({"role": msg["role"], "content": msg["content"]}) # Generate response response = model_service.generate(messages, max_tokens, temperature) # Add assistant response session_service.add_message(session_id, "assistant", response) return _json_response(True, { "response": response, "session_id": session_id }) def api_inference( prompt: str = "", messages: str = "[]", system_prompt: str = "", max_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9, context: str = "", lora_scale: float = 1.0 ) -> str: """ Universal inference endpoint. Can use direct prompt or message list. Supports system prompt, context injection, LoRA scaling (future). """ model_service = get_model_service() if not model_service.is_model_loaded(): return _json_response(False, error="No model loaded") try: # Parse messages if provided msg_list = json.loads(messages) if messages and messages != "[]" else [] # Build full message list full_messages = [] # System prompt if system_prompt: full_messages.append({"role": "system", "content": system_prompt}) # Context injection if context: full_messages.append({"role": "system", "content": f"Context:\n{context}"}) # Conversation messages full_messages.extend(msg_list) # Direct prompt if prompt: full_messages.append({"role": "user", "content": prompt}) if not full_messages: return _json_response(False, error="No prompt or messages provided") # Generate response = model_service.generate(full_messages, max_tokens, temperature, top_p) loaded_model = model_service.get_loaded_model() return _json_response(True, { "response": response, "model_id": loaded_model["id"] if loaded_model else None, "config": { "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p, "lora_scale": lora_scale } }) except Exception as e: logger.error("API", f"Inference error: {e}") return _json_response(False, error=str(e)) def api_chat_with_config( session_id: str, message: str, max_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9, system_prompt_override: str = "", context: str = "", lora_scale: float = 1.0 ) -> str: """ Chat with full configuration options. Supports: custom inference params, system prompt override, context injection. """ model_service = get_model_service() session_service = get_session_service() if not model_service.is_model_loaded(): return _json_response(False, error="No model loaded") session = session_service.get_session(session_id) if not session: return _json_response(False, error="Session not found") # Add user message session_service.add_message(session_id, "user", message) # Build messages messages = [] # System prompt (override or session default) sys_prompt = system_prompt_override or session.get("system_prompt", "") if sys_prompt: messages.append({"role": "system", "content": sys_prompt}) # Context injection if context: messages.append({"role": "system", "content": f"Context:\n{context}"}) # Conversation history for msg in session_service.get_messages(session_id): messages.append({"role": msg["role"], "content": msg["content"]}) # Generate response = model_service.generate(messages, max_tokens, temperature, top_p) # Add response session_service.add_message(session_id, "assistant", response) return _json_response(True, { "response": response, "session_id": session_id, "config_used": { "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p, "lora_scale": lora_scale, "context_provided": bool(context) } }) # ══════════════════════════════════════════════════════════════════════════════ # SYSTEM APIs # ══════════════════════════════════════════════════════════════════════════════ def api_get_status() -> str: """Get current system status""" model_service = get_model_service() state = get_state() loaded = model_service.get_loaded_model() return _json_response(True, { "version": VERSION, "model_loaded": model_service.is_model_loaded(), "loaded_model": loaded["name"] if loaded else None, "installed_models_count": len(state.get_installed_models()), "sessions_count": len(state.get_sessions()), "active_session_id": state.get_active_session_id() }) def api_get_backends() -> str: """Check which inference backends are available""" from core.models import _get_llama_cpp, _get_transformers return _json_response(True, { "backends": { "gguf": _get_llama_cpp() is not None, "transformers": _get_transformers() is not None } }) def api_health() -> str: """Health check endpoint""" return _json_response(True, { "status": "healthy", "version": VERSION })