Add LLM endpoints, web frontend, and rate limiting config

- Added OpenAI-compatible LLM endpoints to API backend - Introduced web frontend with Jinja2 templates and static assets - Implemented API proxy routes in web service - Added sample db.json data for items, users, orders, reviews, categories, llm_requests - Updated ADC and Helm configs for separate AI and standard rate limiting - Upgraded FastAPI, Uvicorn, and added httpx, Jinja2, python-multipart dependencies - Added API configuration modal and client-side JS for web app
2025-10-07 17:29:12 +02:00
parent 78baa5ad21
commit ed660dce5a
16 changed files with 1551 additions and 138 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -1,8 +1,15 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
 from typing import List, Optional
+from pydantic import BaseModel
 import uvicorn
 from datetime import datetime
+from fastapi import FastAPI, HTTPException
+import os
+import httpx
+
+# OpenAI API configuration
+OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "http://localhost/api")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "your-api-key")
+DEFAULT_MODEL = os.getenv("DEFAULT_LLM_MODEL", "your-model-id")

 app = FastAPI(
    title="API Demo Application",
@@ -119,5 +126,84 @@ async def create_user(user: User):
    users_db.append(user_dict)
    return user_dict

+# LLM endpoints
+class LLMRequest(BaseModel):
+    prompt: str
+    max_tokens: Optional[int] = 150
+    temperature: Optional[float] = 0.7
+    model: Optional[str] = DEFAULT_MODEL
+
+class LLMResponse(BaseModel):
+    response: str
+    tokens_used: int
+    model: str
+    timestamp: str
+
+@app.post("/llm/chat", response_model=LLMResponse, tags=["LLM"])
+async def llm_chat(request: LLMRequest):
+    """
+    LLM Chat endpoint - connects to OpenAI-compatible API (Open WebUI)
+    This endpoint is rate limited by AI token usage via API7 Gateway
+    """
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{OPENAI_API_BASE}/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json"
+                },
+                json={
+                    "model": request.model,
+                    "messages": [
+                        {"role": "user", "content": request.prompt}
+                    ],
+                    "max_tokens": request.max_tokens,
+                    "temperature": request.temperature
+                },
+                timeout=30.0
+            )
+            response.raise_for_status()
+            data = response.json()
+
+            # Extract response and token usage
+            llm_response = data["choices"][0]["message"]["content"]
+            tokens_used = data.get("usage", {}).get("total_tokens", 0)
+
+            return LLMResponse(
+                response=llm_response,
+                tokens_used=tokens_used,
+                model=request.model,
+                timestamp=datetime.now().isoformat()
+            )
+    except httpx.HTTPStatusError as e:
+        raise HTTPException(status_code=e.response.status_code, detail=f"OpenAI API error: {e.response.text}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"LLM service error: {str(e)}")
+
+@app.get("/llm/models", tags=["LLM"])
+async def list_llm_models():
+    """List available LLM models"""
+    return {
+        "models": [
+            {"id": "videogame-expert", "name": "Videogame Expert", "max_tokens": 4096, "provider": "Open WebUI"}
+        ],
+        "default_model": DEFAULT_MODEL,
+        "timestamp": datetime.now().isoformat()
+    }
+
+@app.get("/llm/health", tags=["LLM"])
+async def llm_health():
+    """LLM service health check"""
+    return {
+        "status": "healthy",
+        "service": "llm-api",
+        "provider": "Open WebUI",
+        "endpoint": OPENAI_API_BASE,
+        "default_model": DEFAULT_MODEL,
+        "rate_limit": "ai-rate-limiting enabled (100 tokens/60s)",
+        "timestamp": datetime.now().isoformat()
+    }
+
 if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8001)