import os, hashlib, re, base64, requests, gradio as gr from typing import List, Dict, Optional, Any import json GH = "https://api.github.com" TOKEN = os.getenv("GITHUB_TOKEN") RULES_REPO = os.getenv("RULES_REPO", "stefanoallima/awesome-cursorrules") DEFAULT_REF = os.getenv("DEFAULT_REF", "main") def _hdr(): return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"} def _sha256(b): return hashlib.sha256(b).hexdigest() def get_readme_content(ref: str = None) -> str: """Fetch README content from the repository for context""" ref = ref or DEFAULT_REF try: r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/README.md?ref={ref}", headers=_hdr()) r.raise_for_status() j = r.json() raw = base64.b64decode(j["content"]) if j.get("encoding") == "base64" else j["content"].encode() return raw.decode("utf-8", "replace") except Exception as e: return f"Error fetching README: {str(e)}" def extract_available_technologies(ref: str = None) -> List[str]: """Extract all available technologies from the rules directory""" ref = ref or DEFAULT_REF try: r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr()) r.raise_for_status() technologies = [] for item in r.json().get("tree", []): if item.get("type") == "blob" and item["path"].startswith("rules/"): # Extract technology name from directory structure path_parts = item["path"].split("/") if len(path_parts) >= 2: tech_dir = path_parts[1] # Clean up the directory name to extract technology tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ") if tech_name not in technologies: technologies.append(tech_name) return sorted(technologies) except Exception as e: return [f"Error: {str(e)}"] def semantic_match_technologies(requested_techs: List[str], available_techs: List[str]) -> Dict[str, List[str]]: """Use simple semantic matching to find relevant technologies""" matches = {} for requested in requested_techs: requested_lower = requested.lower() matched_techs = [] for available in available_techs: available_lower = available.lower() # Direct match if requested_lower == available_lower: matched_techs.append(available) continue # Partial match (contains) if requested_lower in available_lower or available_lower in requested_lower: matched_techs.append(available) continue # Common technology mappings tech_mappings = { 'python': ['python', 'django', 'fastapi', 'flask'], 'javascript': ['javascript', 'js', 'node', 'react', 'vue', 'angular'], 'typescript': ['typescript', 'ts', 'react', 'angular', 'nextjs'], 'react': ['react', 'nextjs', 'typescript'], 'vue': ['vue', 'vuejs', 'nuxt'], 'node': ['node', 'nodejs', 'javascript'], 'postgres': ['postgres', 'postgresql', 'database'], 'fastapi': ['fastapi', 'python', 'api'], 'nextjs': ['nextjs', 'next', 'react', 'typescript'] } # Check if requested tech maps to available tech if requested_lower in tech_mappings: for mapped_tech in tech_mappings[requested_lower]: if mapped_tech in available_lower: matched_techs.append(available) break matches[requested] = matched_techs return matches def list_rules(tech_key: str = None, ref: str = None) -> List[Dict[str, Any]]: """List available coding rules with enhanced metadata""" ref = ref or DEFAULT_REF try: r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr()) r.raise_for_status() rules = [] for item in r.json().get("tree", []): if item.get("type") == "blob" and item["path"].startswith("rules/"): path_parts = item["path"].split("/") if len(path_parts) >= 2: tech_dir = path_parts[1] tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ") if not tech_key or tech_key.lower() in tech_name.lower(): rules.append({ "tech_key": tech_name, "directory": tech_dir, "path": item["path"], "repo": RULES_REPO, "commit_sha": ref, "url": f"https://github.com/{RULES_REPO}/tree/{ref}/{item['path']}" }) return rules except Exception as e: return [{"error": str(e)}] def fetch_rule_content(tech_directory: str, ref: str = None) -> Dict[str, Any]: """Fetch the actual rule content from a technology directory""" ref = ref or DEFAULT_REF try: # Get files in the specific rule directory r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/rules/{tech_directory}?ref={ref}", headers=_hdr()) r.raise_for_status() files = r.json() if not isinstance(files, list): files = [files] # Look for .cursorrules or .md files rule_file = None for file in files: if file["name"].endswith(('.cursorrules', '.md')): rule_file = file break if not rule_file: return {"error": f"No rule file found in {tech_directory}"} # Fetch the file content content_r = requests.get(rule_file["download_url"]) content_r.raise_for_status() return { "tech_key": tech_directory.replace("-cursorrules-prompt-file", "").replace("-", " "), "filename": rule_file["name"], "content": content_r.text, "directory": tech_directory, "repo": RULES_REPO, "commit_sha": ref, "sha256": _sha256(content_r.content), "url": rule_file["html_url"] } except Exception as e: return {"error": str(e)} def fetch_rule(tech_key: str, ref: str = None) -> Dict[str, Any]: """Fetch rule with semantic matching fallback""" ref = ref or DEFAULT_REF # First try direct match rules = list_rules(tech_key=tech_key, ref=ref) if rules and "error" not in rules[0]: return fetch_rule_content(rules[0]["directory"], ref) # If no direct match, try semantic matching available_techs = extract_available_technologies(ref) matches = semantic_match_technologies([tech_key], available_techs) if tech_key in matches and matches[tech_key]: # Return the first match best_match = matches[tech_key][0] tech_directory = best_match.replace(" ", "-") + "-cursorrules-prompt-file" return fetch_rule_content(tech_directory, ref) return {"error": f"No rule found for '{tech_key}' in {RULES_REPO}@{ref}"} def get_guidelines_for_stack(tech_stack: List[str], ref: str = None) -> Dict[str, Any]: """Get coding guidelines for multiple technologies in a stack""" ref = ref or DEFAULT_REF available_techs = extract_available_technologies(ref) matches = semantic_match_technologies(tech_stack, available_techs) guidelines = {} for requested_tech, matched_techs in matches.items(): guidelines[requested_tech] = [] for matched_tech in matched_techs[:3]: # Limit to top 3 matches tech_directory = matched_tech.replace(" ", "-") + "-cursorrules-prompt-file" rule_content = fetch_rule_content(tech_directory, ref) if "error" not in rule_content: guidelines[requested_tech].append(rule_content) return { "tech_stack": tech_stack, "guidelines": guidelines, "available_technologies": available_techs, "matches": matches, "repo": RULES_REPO, "commit_sha": ref } def analyze_project_stack(framework_list: str, ref: str = None) -> Dict[str, Any]: """Analyze a project's technology stack and return relevant guidelines""" ref = ref or DEFAULT_REF # Parse the framework list (assume comma-separated or newline-separated) techs = [] for line in framework_list.replace(",", "\n").split("\n"): tech = line.strip() if tech: techs.append(tech) if not techs: return {"error": "No technologies found in the provided list"} # Get README for context readme_content = get_readme_content(ref) # Get guidelines for the entire stack stack_guidelines = get_guidelines_for_stack(techs, ref) return { "project_analysis": { "detected_technologies": techs, "readme_context": readme_content[:1000] + "..." if len(readme_content) > 1000 else readme_content, }, "guidelines": stack_guidelines, "summary": f"Found guidelines for {len([g for g in stack_guidelines['guidelines'].values() if g])} out of {len(techs)} requested technologies" } # Gradio Interface with gr.Blocks(title="Enhanced MCP Coding Guidelines Server") as demo: gr.Markdown("# 🚀 Enhanced MCP Coding Guidelines Server") gr.Markdown("Intelligent coding guideline retrieval with semantic matching") with gr.Tab("Single Technology"): with gr.Row(): tech_input = gr.Textbox(label="Technology", placeholder="e.g., python, react, fastapi") ref_input = gr.Textbox(label="Git Reference", value="main", placeholder="main") fetch_btn = gr.Button("Fetch Guidelines") single_output = gr.JSON(label="Guidelines") fetch_btn.click( fn=fetch_rule, inputs=[tech_input, ref_input], outputs=single_output ) with gr.Tab("Technology Stack"): stack_input = gr.Textbox( label="Technology Stack", placeholder="python, fastapi, postgres, react, typescript", lines=3 ) stack_ref_input = gr.Textbox(label="Git Reference", value="main") analyze_btn = gr.Button("Analyze Stack") stack_output = gr.JSON(label="Stack Analysis") analyze_btn.click( fn=analyze_project_stack, inputs=[stack_input, stack_ref_input], outputs=stack_output ) with gr.Tab("Available Technologies"): list_ref_input = gr.Textbox(label="Git Reference", value="main") list_btn = gr.Button("List Available Technologies") list_output = gr.JSON(label="Available Technologies") list_btn.click( fn=extract_available_technologies, inputs=[list_ref_input], outputs=list_output ) # Register MCP API endpoints gr.api(fn=list_rules) gr.api(fn=fetch_rule) gr.api(fn=get_guidelines_for_stack) gr.api(fn=analyze_project_stack) gr.api(fn=extract_available_technologies) if __name__ == "__main__": demo.launch(mcp_server=True)