ChunDe's picture
feat: Add MCP compatibility with smart collaboration thumbnails
65493c3
# ----------------------------------------------
# Thumbnail Crafter MCP Server with Browser Automation
# Uses Playwright to interact with the React app
# ----------------------------------------------
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from playwright.async_api import async_playwright, Browser, Page
import json
import asyncio
import base64
from typing import Dict, Any, AsyncGenerator, Optional
from pathlib import Path
import os
app = FastAPI(
title="Thumbnail Crafter MCP Server",
description="AI-callable thumbnail generation using real React app",
version="2.0.0"
)
# Enable CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Global browser instance
browser: Optional[Browser] = None
APP_URL = os.getenv("APP_URL", "http://localhost:7860") # URL of the React app
# ----------------------------------------------
# Browser Management
# ----------------------------------------------
async def get_browser() -> Browser:
"""Get or create browser instance"""
global browser
if browser is None:
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(
headless=True,
args=['--no-sandbox', '--disable-dev-shm-usage']
)
return browser
async def close_browser():
"""Close browser instance"""
global browser
if browser:
await browser.close()
browser = None
# ----------------------------------------------
# Real Layout and Huggy Data
# ----------------------------------------------
AVAILABLE_LAYOUTS = [
{
"id": "seriousCollab",
"name": "Serious Collab",
"description": "Professional collaboration layout with HF logo and partner logo placeholder"
},
{
"id": "funCollab",
"name": "Fun Collab",
"description": "Playful collaboration layout with Huggy mascots and title text"
},
{
"id": "sandwich",
"name": "Sandwich",
"description": "Title and subtitle with central Huggy character"
},
{
"id": "academiaHub",
"name": "Academia Hub",
"description": "Academic-themed layout with 'Welcome Academia Hub' text"
},
{
"id": "impactTitle",
"name": "Impact Title",
"description": "Bold impact-style title with subtitle and HF logo"
}
]
# Sample of available Huggys (could fetch full list from the dataset)
AVAILABLE_HUGGYS = [
{"id": "acedemic-huggy", "name": "Academic Huggy"},
{"id": "dragon-huggy", "name": "Dragon Huggy"},
{"id": "game-jam-huggy", "name": "Game Jam Huggy"},
{"id": "huggy-chef", "name": "Huggy Chef"},
# ... more can be added
]
# ----------------------------------------------
# Tool Implementations
# ----------------------------------------------
async def generate_thumbnail_with_browser(inputs: Dict[str, Any]) -> Dict[str, Any]:
"""
Generate thumbnail using real React app via Playwright.
Args:
inputs: Dictionary containing:
- layout_id (str): Layout to use (e.g., "funCollab")
- title (str): Main title text
- subtitle (str): Subtitle text (optional)
- bg_color (str): Background color hex
- huggy_ids (list): List of Huggy IDs to add (optional)
- width (int): Canvas width (1200 or 1160)
- height (int): Canvas height (675, 627, or 580)
"""
try:
browser = await get_browser()
page = await browser.new_page(viewport={"width": 1920, "height": 1080})
# Load the React app
await page.goto(APP_URL, wait_until="networkidle")
await asyncio.sleep(2) # Wait for app to fully load
# Extract parameters
layout_id = inputs.get("layout_id", "impactTitle")
title = inputs.get("title", "Untitled")
subtitle = inputs.get("subtitle")
bg_color = inputs.get("bg_color", "#1a1a1a")
width = inputs.get("width", 1200)
height = inputs.get("height", 675)
# Step 1: Set canvas size if needed
if width != 1200 or height != 675:
await page.evaluate(f"""
() => {{
const sizeButton = document.querySelector('[data-testid="canvas-size-button"]');
if (sizeButton) sizeButton.click();
}}
""")
await asyncio.sleep(0.5)
# Select the appropriate size
size_mapping = {
(1200, 675): "1200×675",
(1200, 627): "1200×627",
(1160, 580): "1160×580"
}
size_text = size_mapping.get((width, height), "1200×675")
await page.click(f"text={size_text}")
await asyncio.sleep(0.5)
# Step 2: Load the layout
await page.evaluate(f"""
async () => {{
// Find and click the layout button
const layoutButton = Array.from(document.querySelectorAll('button'))
.find(btn => btn.textContent.includes('Layout'));
if (layoutButton) layoutButton.click();
await new Promise(resolve => setTimeout(resolve, 500));
// Find and click the specific layout
const layoutOption = Array.from(document.querySelectorAll('[data-layout-id]'))
.find(el => el.getAttribute('data-layout-id') === '{layout_id}');
if (layoutOption) layoutOption.click();
}}
""")
await asyncio.sleep(1)
# Step 3: Set background color
await page.evaluate(f"""
() => {{
const bgColorInput = document.querySelector('input[type="color"]');
if (bgColorInput) {{
bgColorInput.value = '{bg_color}';
bgColorInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
bgColorInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
}}
}}
""")
await asyncio.sleep(0.5)
# Step 4: Update text content
# Find text objects and update them
text_updated = await page.evaluate(f"""
() => {{
// Try to find text elements on canvas and update them
// This is tricky - might need to double-click to edit
const canvas = document.querySelector('canvas');
if (!canvas) return false;
// Simulate double-click on text to edit
// (This part depends on how your app handles text editing)
return true;
}}
""")
# Alternative: Use keyboard shortcuts or programmatic state updates
# This would require exposing an API from the React app
# Step 5: Export the canvas
await page.evaluate("""
() => {
const exportButton = Array.from(document.querySelectorAll('button'))
.find(btn => btn.textContent.toLowerCase().includes('export'));
if (exportButton) exportButton.click();
}
""")
await asyncio.sleep(1)
# Step 6: Capture the canvas as image
canvas_element = await page.query_selector('canvas')
if not canvas_element:
raise Exception("Canvas element not found")
# Take screenshot of canvas
screenshot_bytes = await canvas_element.screenshot(type="png")
img_base64 = base64.b64encode(screenshot_bytes).decode('utf-8')
await page.close()
return {
"success": True,
"image": f"data:image/png;base64,{img_base64}",
"width": width,
"height": height,
"format": "PNG",
"size_bytes": len(screenshot_bytes),
"layout_used": layout_id,
"title": title
}
except Exception as e:
return {
"success": False,
"error": str(e),
"message": "Failed to generate thumbnail with browser automation"
}
def get_available_layouts_tool(inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return list of available layouts from the real React app"""
return {
"success": True,
"layouts": AVAILABLE_LAYOUTS,
"count": len(AVAILABLE_LAYOUTS)
}
def get_available_huggys_tool(inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return list of available Huggy mascots"""
return {
"success": True,
"huggys": AVAILABLE_HUGGYS,
"count": len(AVAILABLE_HUGGYS),
"total_available": 44,
"note": "Full list available at https://huggingface.co/datasets/Chunte/Huggy"
}
# ----------------------------------------------
# MCP Protocol Implementation
# ----------------------------------------------
async def mcp_response(request: Request) -> AsyncGenerator[str, None]:
"""MCP server entry point following HuggingChat protocol"""
try:
payload = await request.json()
tool_name = payload.get("name")
arguments = payload.get("arguments", {})
# Route to appropriate tool
if tool_name == "generate_thumbnail":
result = await generate_thumbnail_with_browser(arguments)
elif tool_name == "get_available_layouts":
result = get_available_layouts_tool(arguments)
elif tool_name == "get_available_huggys":
result = get_available_huggys_tool(arguments)
else:
result = {"error": f"Unknown tool: {tool_name}"}
# Stream the response
yield json.dumps({"output": True, "data": result}) + "\n"
yield json.dumps({"output": False}) + "\n"
except Exception as e:
error_response = {
"output": True,
"data": {
"success": False,
"error": str(e),
"message": "MCP request processing failed"
}
}
yield json.dumps(error_response) + "\n"
yield json.dumps({"output": False}) + "\n"
@app.post("/tools")
async def tools_endpoint(request: Request):
"""MCP tools endpoint for HuggingChat integration"""
return StreamingResponse(
mcp_response(request),
media_type="application/json"
)
@app.get("/api/info")
async def api_info():
"""API information endpoint"""
return {
"name": "Thumbnail Crafter MCP Server (Browser Mode)",
"version": "2.0.0",
"status": "active",
"mode": "browser_automation",
"endpoints": {
"tools": "/tools",
"schema": "/tools.json",
"info": "/api/info"
},
"available_tools": [
"generate_thumbnail",
"get_available_layouts",
"get_available_huggys"
]
}
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "service": "thumbnail-crafter-mcp", "mode": "browser"}
# ----------------------------------------------
# Lifecycle Events
# ----------------------------------------------
@app.on_event("startup")
async def startup_event():
"""Initialize browser on startup"""
print("Initializing browser...")
await get_browser()
print("Browser ready!")
@app.on_event("shutdown")
async def shutdown_event():
"""Cleanup browser on shutdown"""
print("Closing browser...")
await close_browser()
# ----------------------------------------------
# Static File Serving
# ----------------------------------------------
static_dir = Path("dist")
if static_dir.exists():
app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets")
@app.get("/")
async def serve_frontend():
"""Serve the React frontend"""
index_file = static_dir / "index.html"
if index_file.exists():
return FileResponse(index_file)
return {"message": "Frontend not built"}
@app.get("/{full_path:path}")
async def serve_spa(full_path: str):
"""Serve React app for all routes"""
if full_path.startswith(("api/", "tools", "health")):
return {"error": "Not found"}
file_path = static_dir / full_path
if file_path.exists() and file_path.is_file():
return FileResponse(file_path)
index_file = static_dir / "index.html"
if index_file.exists():
return FileResponse(index_file)
return {"error": "File not found"}
else:
@app.get("/")
async def root():
return {
"message": "Thumbnail Crafter MCP Server (Browser Mode)",
"note": "React frontend not found. Build with 'npm run build'",
"api_info": "/api/info"
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)