llm-automation-docs-and-rem…/api/main.py

"""
FastAPI Application - Datacenter Documentation Server
Serve la documentazione compilata con MkDocs e fornisce API REST
"""

from fastapi import FastAPI, HTTPException, Request, Query
from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
from datetime import datetime
import os
import json
import markdown
from pathlib import Path
import logging

# Configurazione logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Inizializza FastAPI
app = FastAPI(
    title="Datacenter Documentation API",
    description="API REST per accedere alla documentazione del datacenter. Ottimizzata per lettura umana e LLM.",
    version="1.0.0",
    docs_url="/api/docs",
    redoc_url="/api/redoc",
    openapi_url="/api/openapi.json"
)

# Middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
app.add_middleware(GZipMiddleware, minimum_size=1000)

# Configurazione paths
DOCS_DIR = Path("/app/site")
MARKDOWN_DIR = Path("/app/docs/sections")

# Models
class DocumentMetadata(BaseModel):
    """Metadata di un documento"""
    id: str
    title: str
    section: str
    last_updated: str
    size_bytes: int
    token_estimate: int
    url: str
    api_url: str

class DocumentContent(BaseModel):
    """Contenuto completo documento"""
    metadata: DocumentMetadata
    content: str
    format: str  # markdown | html | json

class SectionSummary(BaseModel):
    """Summary di una sezione per LLM"""
    section_id: str
    title: str
    key_points: List[str]
    subsections: List[str]
    last_updated: str

class SearchResult(BaseModel):
    """Risultato ricerca"""
    section: str
    title: str
    excerpt: str
    url: str
    relevance_score: float

# Utility functions
def estimate_tokens(text: str) -> int:
    """Stima token approssimativi"""
    return len(text) // 4

def get_markdown_files() -> List[Path]:
    """Ottieni tutti i file markdown"""
    if MARKDOWN_DIR.exists():
        return list(MARKDOWN_DIR.glob("*.md"))
    return []

def parse_markdown_metadata(file_path: Path) -> Dict[str, Any]:
    """Estrae metadata da file markdown"""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
        lines = content.split('\n')

        # Estrai titolo (prima riga # )
        title = "Unknown"
        for line in lines:
            if line.startswith('# '):
                title = line.replace('# ', '').strip()
                break

        # Cerca data aggiornamento
        last_updated = datetime.now().isoformat()
        for line in lines:
            if '**Ultimo Aggiornamento**:' in line:
                date_str = line.split(':', 1)[1].strip()
                last_updated = date_str if date_str != '[DATA_AGGIORNAMENTO]' else last_updated
                break

        return {
            'title': title,
            'last_updated': last_updated,
            'size': file_path.stat().st_size,
            'tokens': estimate_tokens(content)
        }

# Routes

@app.get("/", response_class=HTMLResponse)
async def root():
    """Redirect alla documentazione"""
    return """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Datacenter Documentation</title>
        <meta http-equiv="refresh" content="0; url=/docs/">
    </head>
    <body>
        <p>Reindirizzamento alla <a href="/docs/">documentazione</a>...</p>
    </body>
    </html>
    """

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "service": "datacenter-docs",
        "version": "1.0.0"
    }

@app.get("/api/v1/sections", response_model=List[DocumentMetadata])
async def list_sections():
    """
    Lista tutte le sezioni disponibili
    Ottimizzato per discovery da parte di LLM
    """
    sections = []
    markdown_files = get_markdown_files()

    for file_path in markdown_files:
        metadata = parse_markdown_metadata(file_path)
        section_id = file_path.stem

        sections.append(DocumentMetadata(
            id=section_id,
            title=metadata['title'],
            section=section_id.split('_')[0],
            last_updated=metadata['last_updated'],
            size_bytes=metadata['size'],
            token_estimate=metadata['tokens'],
            url=f"/docs/sections/{section_id}/",
            api_url=f"/api/v1/sections/{section_id}"
        ))

    return sorted(sections, key=lambda x: x.id)

@app.get("/api/v1/sections/{section_id}", response_model=DocumentContent)
async def get_section(
    section_id: str,
    format: str = Query("markdown", regex="^(markdown|html|json)$")
):
    """
    Ottieni contenuto di una sezione specifica

    Formati disponibili:
    - markdown: Raw markdown (migliore per LLM)
    - html: HTML renderizzato (per browser)
    - json: Strutturato (per parsing)
    """
    file_path = MARKDOWN_DIR / f"{section_id}.md"

    if not file_path.exists():
        raise HTTPException(status_code=404, detail=f"Sezione {section_id} non trovata")

    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    metadata = parse_markdown_metadata(file_path)

    # Converti formato se richiesto
    if format == "html":
        content = markdown.markdown(content, extensions=['tables', 'fenced_code'])
    elif format == "json":
        # Parse markdown in struttura JSON
        sections = content.split('\n## ')
        structured = {
            "title": metadata['title'],
            "sections": []
        }
        for section in sections[1:]:  # Skip header
            lines = section.split('\n', 1)
            if len(lines) == 2:
                structured["sections"].append({
                    "heading": lines[0],
                    "content": lines[1]
                })
        content = json.dumps(structured, indent=2, ensure_ascii=False)

    doc_metadata = DocumentMetadata(
        id=section_id,
        title=metadata['title'],
        section=section_id.split('_')[0],
        last_updated=metadata['last_updated'],
        size_bytes=metadata['size'],
        token_estimate=metadata['tokens'],
        url=f"/docs/sections/{section_id}/",
        api_url=f"/api/v1/sections/{section_id}"
    )

    return DocumentContent(
        metadata=doc_metadata,
        content=content,
        format=format
    )

@app.get("/api/v1/summary", response_model=List[SectionSummary])
async def get_summary():
    """
    Summary di tutte le sezioni - ottimizzato per LLM
    Fornisce panoramica rapida senza caricare contenuto completo
    """
    summaries = []
    markdown_files = get_markdown_files()

    for file_path in markdown_files:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            lines = content.split('\n')

            # Estrai titolo principale
            title = file_path.stem.replace('_', ' ').title()
            for line in lines:
                if line.startswith('# '):
                    title = line.replace('# ', '').strip()
                    break

            # Estrai key points (primi 5 ## headings)
            key_points = []
            subsections = []
            for line in lines:
                if line.startswith('## '):
                    heading = line.replace('## ', '').strip()
                    subsections.append(heading)
                    if len(key_points) < 5:
                        key_points.append(heading)

            # Data aggiornamento
            last_updated = datetime.now().isoformat()
            for line in lines:
                if '**Ultimo Aggiornamento**' in line:
                    last_updated = line.split(':', 1)[1].strip()
                    break

            summaries.append(SectionSummary(
                section_id=file_path.stem,
                title=title,
                key_points=key_points,
                subsections=subsections,
                last_updated=last_updated
            ))

    return summaries

@app.get("/api/v1/search")
async def search_documentation(
    q: str = Query(..., min_length=3),
    limit: int = Query(10, ge=1, le=50)
):
    """
    Ricerca full-text nella documentazione
    """
    results = []
    markdown_files = get_markdown_files()

    query = q.lower()

    for file_path in markdown_files:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            lines = content.split('\n')

            # Cerca nel contenuto
            for i, line in enumerate(lines):
                if query in line.lower():
                    # Estrai contesto
                    start = max(0, i - 2)
                    end = min(len(lines), i + 3)
                    excerpt = ' '.join(lines[start:end])

                    # Calcola relevance (semplificato)
                    relevance = line.lower().count(query) / len(line) if line else 0

                    results.append(SearchResult(
                        section=file_path.stem,
                        title=lines[0] if lines else '',
                        excerpt=excerpt[:200] + '...',
                        url=f"/docs/sections/{file_path.stem}/",
                        relevance_score=relevance
                    ))

                    if len(results) >= limit:
                        break

        if len(results) >= limit:
            break

    # Ordina per relevance
    results.sort(key=lambda x: x.relevance_score, reverse=True)

    return results[:limit]

@app.get("/api/v1/stats")
async def get_statistics():
    """
    Statistiche della documentazione
    """
    markdown_files = get_markdown_files()

    total_size = 0
    total_tokens = 0
    sections = []

    for file_path in markdown_files:
        metadata = parse_markdown_metadata(file_path)
        total_size += metadata['size']
        total_tokens += metadata['tokens']
        sections.append({
            'id': file_path.stem,
            'title': metadata['title'],
            'size': metadata['size'],
            'tokens': metadata['tokens']
        })

    return {
        "total_sections": len(sections),
        "total_size_bytes": total_size,
        "total_size_mb": round(total_size / 1024 / 1024, 2),
        "total_tokens_estimate": total_tokens,
        "sections": sections,
        "generated_at": datetime.now().isoformat()
    }

@app.get("/api/v1/llm-optimized/{section_id}")
async def get_llm_optimized_content(section_id: str):
    """
    Contenuto ottimizzato per consumo da parte di LLM
    - Rimuove formattazione non necessaria
    - Struttura pulita
    - Metadata espliciti
    """
    file_path = MARKDOWN_DIR / f"{section_id}.md"

    if not file_path.exists():
        raise HTTPException(status_code=404, detail=f"Sezione {section_id} non trovata")

    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Pulisci per LLM
    cleaned = content
    # Rimuovi linee vuote multiple
    cleaned = '\n'.join(line for line in cleaned.split('\n') if line.strip())

    metadata = parse_markdown_metadata(file_path)

    return {
        "section_id": section_id,
        "title": metadata['title'],
        "last_updated": metadata['last_updated'],
        "token_count": metadata['tokens'],
        "content": cleaned,
        "format": "cleaned_markdown",
        "llm_instructions": {
            "purpose": "Datacenter infrastructure documentation",
            "structure": "Hierarchical markdown with tables and code blocks",
            "usage": "Reference for infrastructure queries and analysis"
        }
    }

# Mount static files (MkDocs compiled site)
if DOCS_DIR.exists():
    app.mount("/docs", StaticFiles(directory=str(DOCS_DIR), html=True), name="docs")
    logger.info(f"Mounted documentation from {DOCS_DIR}")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)