Add LLM endpoints, web frontend, and rate limiting config
Some checks failed
Helm Chart Build / lint-only (push) Has been skipped
Helm Chart Build / build-helm (push) Successful in 9s
Build and Deploy / build-api (push) Successful in 33s
Build and Deploy / build-web (push) Failing after 41s

- Added OpenAI-compatible LLM endpoints to API backend - Introduced web
frontend with Jinja2 templates and static assets - Implemented API proxy
routes in web service - Added sample db.json data for items, users,
orders, reviews, categories, llm_requests - Updated ADC and Helm configs
for separate AI and standard rate limiting - Upgraded FastAPI, Uvicorn,
and added httpx, Jinja2, python-multipart dependencies - Added API
configuration modal and client-side JS for web app
This commit is contained in:
d.viti
2025-10-07 17:29:12 +02:00
parent 78baa5ad21
commit ed660dce5a
16 changed files with 1551 additions and 138 deletions

View File

@@ -1,8 +1,15 @@
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from pydantic import BaseModel
import uvicorn
from datetime import datetime
from fastapi import FastAPI, HTTPException
import os
import httpx
# OpenAI API configuration
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "http://localhost/api")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "your-api-key")
DEFAULT_MODEL = os.getenv("DEFAULT_LLM_MODEL", "your-model-id")
app = FastAPI(
title="API Demo Application",
@@ -119,5 +126,84 @@ async def create_user(user: User):
users_db.append(user_dict)
return user_dict
# LLM endpoints
class LLMRequest(BaseModel):
prompt: str
max_tokens: Optional[int] = 150
temperature: Optional[float] = 0.7
model: Optional[str] = DEFAULT_MODEL
class LLMResponse(BaseModel):
response: str
tokens_used: int
model: str
timestamp: str
@app.post("/llm/chat", response_model=LLMResponse, tags=["LLM"])
async def llm_chat(request: LLMRequest):
"""
LLM Chat endpoint - connects to OpenAI-compatible API (Open WebUI)
This endpoint is rate limited by AI token usage via API7 Gateway
"""
try:
async with httpx.AsyncClient() as client:
response = await client.post(
f"{OPENAI_API_BASE}/chat/completions",
headers={
"Authorization": f"Bearer {OPENAI_API_KEY}",
"Content-Type": "application/json"
},
json={
"model": request.model,
"messages": [
{"role": "user", "content": request.prompt}
],
"max_tokens": request.max_tokens,
"temperature": request.temperature
},
timeout=30.0
)
response.raise_for_status()
data = response.json()
# Extract response and token usage
llm_response = data["choices"][0]["message"]["content"]
tokens_used = data.get("usage", {}).get("total_tokens", 0)
return LLMResponse(
response=llm_response,
tokens_used=tokens_used,
model=request.model,
timestamp=datetime.now().isoformat()
)
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=e.response.status_code, detail=f"OpenAI API error: {e.response.text}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"LLM service error: {str(e)}")
@app.get("/llm/models", tags=["LLM"])
async def list_llm_models():
"""List available LLM models"""
return {
"models": [
{"id": "videogame-expert", "name": "Videogame Expert", "max_tokens": 4096, "provider": "Open WebUI"}
],
"default_model": DEFAULT_MODEL,
"timestamp": datetime.now().isoformat()
}
@app.get("/llm/health", tags=["LLM"])
async def llm_health():
"""LLM service health check"""
return {
"status": "healthy",
"service": "llm-api",
"provider": "Open WebUI",
"endpoint": OPENAI_API_BASE,
"default_model": DEFAULT_MODEL,
"rate_limit": "ai-rate-limiting enabled (100 tokens/60s)",
"timestamp": datetime.now().isoformat()
}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8001)