Add LLM endpoints, web frontend, and rate limiting config
- Added OpenAI-compatible LLM endpoints to API backend - Introduced web frontend with Jinja2 templates and static assets - Implemented API proxy routes in web service - Added sample db.json data for items, users, orders, reviews, categories, llm_requests - Updated ADC and Helm configs for separate AI and standard rate limiting - Upgraded FastAPI, Uvicorn, and added httpx, Jinja2, python-multipart dependencies - Added API configuration modal and client-side JS for web app
This commit is contained in:
90
api/main.py
90
api/main.py
@@ -1,8 +1,15 @@
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel
|
||||
import uvicorn
|
||||
from datetime import datetime
|
||||
from fastapi import FastAPI, HTTPException
|
||||
import os
|
||||
import httpx
|
||||
|
||||
# OpenAI API configuration
|
||||
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "http://localhost/api")
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "your-api-key")
|
||||
DEFAULT_MODEL = os.getenv("DEFAULT_LLM_MODEL", "your-model-id")
|
||||
|
||||
app = FastAPI(
|
||||
title="API Demo Application",
|
||||
@@ -119,5 +126,84 @@ async def create_user(user: User):
|
||||
users_db.append(user_dict)
|
||||
return user_dict
|
||||
|
||||
# LLM endpoints
|
||||
class LLMRequest(BaseModel):
|
||||
prompt: str
|
||||
max_tokens: Optional[int] = 150
|
||||
temperature: Optional[float] = 0.7
|
||||
model: Optional[str] = DEFAULT_MODEL
|
||||
|
||||
class LLMResponse(BaseModel):
|
||||
response: str
|
||||
tokens_used: int
|
||||
model: str
|
||||
timestamp: str
|
||||
|
||||
@app.post("/llm/chat", response_model=LLMResponse, tags=["LLM"])
|
||||
async def llm_chat(request: LLMRequest):
|
||||
"""
|
||||
LLM Chat endpoint - connects to OpenAI-compatible API (Open WebUI)
|
||||
This endpoint is rate limited by AI token usage via API7 Gateway
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{OPENAI_API_BASE}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": request.model,
|
||||
"messages": [
|
||||
{"role": "user", "content": request.prompt}
|
||||
],
|
||||
"max_tokens": request.max_tokens,
|
||||
"temperature": request.temperature
|
||||
},
|
||||
timeout=30.0
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Extract response and token usage
|
||||
llm_response = data["choices"][0]["message"]["content"]
|
||||
tokens_used = data.get("usage", {}).get("total_tokens", 0)
|
||||
|
||||
return LLMResponse(
|
||||
response=llm_response,
|
||||
tokens_used=tokens_used,
|
||||
model=request.model,
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise HTTPException(status_code=e.response.status_code, detail=f"OpenAI API error: {e.response.text}")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"LLM service error: {str(e)}")
|
||||
|
||||
@app.get("/llm/models", tags=["LLM"])
|
||||
async def list_llm_models():
|
||||
"""List available LLM models"""
|
||||
return {
|
||||
"models": [
|
||||
{"id": "videogame-expert", "name": "Videogame Expert", "max_tokens": 4096, "provider": "Open WebUI"}
|
||||
],
|
||||
"default_model": DEFAULT_MODEL,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
@app.get("/llm/health", tags=["LLM"])
|
||||
async def llm_health():
|
||||
"""LLM service health check"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "llm-api",
|
||||
"provider": "Open WebUI",
|
||||
"endpoint": OPENAI_API_BASE,
|
||||
"default_model": DEFAULT_MODEL,
|
||||
"rate_limit": "ai-rate-limiting enabled (100 tokens/60s)",
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host="0.0.0.0", port=8001)
|
||||
|
||||
Reference in New Issue
Block a user