diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 891eca7..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,158 +0,0 @@ -services: - # MongoDB database - mongodb: - image: mongo:7.0 - environment: - MONGO_INITDB_ROOT_USERNAME: ${MONGO_ROOT_USER:-admin} - MONGO_INITDB_ROOT_PASSWORD: ${MONGO_ROOT_PASSWORD} - MONGO_INITDB_DATABASE: datacenter_docs - volumes: - - mongodb_data:/data/db - - mongodb_config:/data/configdb - networks: - - backend - healthcheck: - test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')"] - interval: 10s - timeout: 5s - retries: 5 - command: ["--auth"] - - # Redis cache - redis: - image: redis:7-alpine - command: redis-server --requirepass ${REDIS_PASSWORD} - volumes: - - redis_data:/data - networks: - - backend - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 10s - timeout: 3s - retries: 5 - - # API Service - api: - build: - context: . - dockerfile: deploy/docker/Dockerfile.api - ports: - - "8000:8000" - env_file: - - .env - environment: - MONGODB_URL: mongodb://${MONGO_ROOT_USER:-admin}:${MONGO_ROOT_PASSWORD}@mongodb:27017 - MONGODB_DATABASE: datacenter_docs - REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379/0 - MCP_SERVER_URL: ${MCP_SERVER_URL} - MCP_API_KEY: ${MCP_API_KEY} - CORS_ORIGINS: ${CORS_ORIGINS:-*} - volumes: - - ./output:/app/output - - ./data:/app/data - - ./logs:/app/logs - networks: - - frontend - - backend - depends_on: - mongodb: - condition: service_healthy - redis: - condition: service_healthy - restart: unless-stopped - - # Chat Service - chat: - build: - context: . - dockerfile: deploy/docker/Dockerfile.chat - ports: - - "8001:8001" - env_file: - - .env - environment: - MONGODB_URL: mongodb://${MONGO_ROOT_USER:-admin}:${MONGO_ROOT_PASSWORD}@mongodb:27017 - MONGODB_DATABASE: datacenter_docs - REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379/0 - MCP_SERVER_URL: ${MCP_SERVER_URL} - MCP_API_KEY: ${MCP_API_KEY} - volumes: - - ./output:/app/output - - ./data:/app/data - - ./logs:/app/logs - networks: - - frontend - - backend - depends_on: - mongodb: - condition: service_healthy - redis: - condition: service_healthy - restart: unless-stopped - - # Celery Worker - worker: - build: - context: . - dockerfile: deploy/docker/Dockerfile.worker - env_file: - - .env - environment: - MONGODB_URL: mongodb://${MONGO_ROOT_USER:-admin}:${MONGO_ROOT_PASSWORD}@mongodb:27017 - MONGODB_DATABASE: datacenter_docs - REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379/0 - MCP_SERVER_URL: ${MCP_SERVER_URL} - MCP_API_KEY: ${MCP_API_KEY} - volumes: - - ./output:/app/output - - ./data:/app/data - - ./logs:/app/logs - networks: - - backend - depends_on: - mongodb: - condition: service_healthy - redis: - condition: service_healthy - restart: unless-stopped - deploy: - replicas: 2 - - # Flower - Celery monitoring - flower: - image: mher/flower:2.0 - command: celery --broker=redis://:${REDIS_PASSWORD}@redis:6379/0 flower --port=5555 - ports: - - "5555:5555" - environment: - CELERY_BROKER_URL: redis://:${REDIS_PASSWORD}@redis:6379/0 - networks: - - frontend - - backend - depends_on: - - redis - restart: unless-stopped - - # Frontend - frontend: - build: - context: . - dockerfile: deploy/docker/Dockerfile.frontend - ports: - - "80:80" - networks: - - frontend - depends_on: - - api - - chat - restart: unless-stopped - -volumes: - mongodb_config: - mongodb_data: - redis_data: - -networks: - frontend: - backend: diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh deleted file mode 100644 index 0661a65..0000000 --- a/docker-entrypoint.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -set -e - -# Funzione per attendere servizio -wait_for_service() { - local host=$1 - local port=$2 - local max_attempts=30 - local attempt=0 - - echo "Waiting for $host:$port..." - while ! nc -z "$host" "$port" 2>/dev/null; do - attempt=$((attempt + 1)) - if [ $attempt -ge $max_attempts ]; then - echo "ERROR: Service $host:$port not available after $max_attempts attempts" - return 1 - fi - echo "Attempt $attempt/$max_attempts..." - sleep 2 - done - echo "$host:$port is available!" -} - -# Funzione per avviare FastAPI server -start_api_server() { - echo "Starting FastAPI documentation server on port 8000..." - cd /app - exec uvicorn api.main:app \ - --host 0.0.0.0 \ - --port 8000 \ - --log-level info \ - --access-log \ - --use-colors -} - -# Funzione per avviare MCP server -start_mcp_server() { - echo "Starting MCP server on port 8001..." - cd /app - exec uvicorn mcp-server.server:mcp_app \ - --host 0.0.0.0 \ - --port 8001 \ - --log-level info \ - --access-log -} - -# Funzione per avviare entrambi i server -start_all_servers() { - echo "Starting all servers..." - - # Start MCP server in background - uvicorn mcp-server.server:mcp_app \ - --host 0.0.0.0 \ - --port 8001 \ - --log-level info & - - MCP_PID=$! - echo "MCP server started with PID $MCP_PID" - - # Start API server in foreground - uvicorn api.main:app \ - --host 0.0.0.0 \ - --port 8000 \ - --log-level info \ - --access-log & - - API_PID=$! - echo "API server started with PID $API_PID" - - # Wait for both processes - wait $MCP_PID $API_PID -} - -# Verifica che la documentazione sia stata compilata -if [ ! -d "/app/site" ]; then - echo "WARNING: Documentation site not found at /app/site" - echo "Documentation will not be served until built." -fi - -# Main execution -case "$1" in - server|api) - start_api_server - ;; - mcp) - start_mcp_server - ;; - all) - start_all_servers - ;; - bash) - exec /bin/bash - ;; - *) - echo "Usage: $0 {server|mcp|all|bash}" - echo " server - Start FastAPI documentation server (port 8000)" - echo " mcp - Start MCP server (port 8001)" - echo " all - Start both servers" - echo " bash - Start bash shell" - exit 1 - ;; -esac diff --git a/pyproject.toml b/pyproject.toml index 1a1f5e6..89e8767 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,7 @@ ruff = "^0.8.0" mypy = "^1.13.0" pre-commit = "^4.0.0" ipython = "^8.30.0" +bandit = "^1.8.6" [tool.poetry.scripts] datacenter-docs = "datacenter_docs.cli:app" diff --git a/quick-deploy.sh b/quick-deploy.sh deleted file mode 100644 index f342c5d..0000000 --- a/quick-deploy.sh +++ /dev/null @@ -1,291 +0,0 @@ -#!/bin/bash -# Quick Deploy Script for Datacenter Documentation System -# Usage: ./quick-deploy.sh [local|docker|kubernetes] - -set -e - -COLOR_GREEN='\033[0;32m' -COLOR_BLUE='\033[0;34m' -COLOR_RED='\033[0;31m' -COLOR_YELLOW='\033[1;33m' -COLOR_NC='\033[0m' - -print_info() { - echo -e "${COLOR_BLUE}[INFO]${COLOR_NC} $1" -} - -print_success() { - echo -e "${COLOR_GREEN}[SUCCESS]${COLOR_NC} $1" -} - -print_error() { - echo -e "${COLOR_RED}[ERROR]${COLOR_NC} $1" -} - -print_warning() { - echo -e "${COLOR_YELLOW}[WARNING]${COLOR_NC} $1" -} - -print_header() { - echo "" - echo -e "${COLOR_GREEN}================================================${COLOR_NC}" - echo -e "${COLOR_GREEN}$1${COLOR_NC}" - echo -e "${COLOR_GREEN}================================================${COLOR_NC}" - echo "" -} - -check_requirements() { - print_header "Checking Requirements" - - local missing_deps=0 - - # Check Python - if command -v python3 &> /dev/null; then - PYTHON_VERSION=$(python3 --version | cut -d' ' -f2) - print_success "Python: $PYTHON_VERSION" - else - print_error "Python 3.10+ required" - missing_deps=1 - fi - - # Check Poetry - if command -v poetry &> /dev/null; then - POETRY_VERSION=$(poetry --version | cut -d' ' -f3) - print_success "Poetry: $POETRY_VERSION" - else - print_warning "Poetry not found. Installing..." - curl -sSL https://install.python-poetry.org | python3 - - export PATH="$HOME/.local/bin:$PATH" - fi - - # Check Docker (if docker mode) - if [[ "$1" == "docker" || "$1" == "kubernetes" ]]; then - if command -v docker &> /dev/null; then - DOCKER_VERSION=$(docker --version | cut -d' ' -f3) - print_success "Docker: $DOCKER_VERSION" - else - print_error "Docker required for docker/kubernetes deployment" - missing_deps=1 - fi - fi - - # Check kubectl (if kubernetes mode) - if [[ "$1" == "kubernetes" ]]; then - if command -v kubectl &> /dev/null; then - KUBECTL_VERSION=$(kubectl version --client --short 2>/dev/null) - print_success "kubectl: $KUBECTL_VERSION" - else - print_error "kubectl required for kubernetes deployment" - missing_deps=1 - fi - fi - - if [[ $missing_deps -eq 1 ]]; then - print_error "Missing required dependencies. Please install them first." - exit 1 - fi -} - -setup_environment() { - print_header "Setting Up Environment" - - if [[ ! -f .env ]]; then - print_info "Creating .env from template..." - cp .env.example .env - - print_warning "Please edit .env file with your credentials:" - echo " - MCP_SERVER_URL" - echo " - MCP_API_KEY" - echo " - ANTHROPIC_API_KEY" - echo " - Database passwords" - echo "" - read -p "Press Enter after editing .env file..." - else - print_success ".env file already exists" - fi -} - -deploy_local() { - print_header "Deploying Locally (Development Mode)" - - # Install dependencies - print_info "Installing Python dependencies..." - poetry install - - # Start dependencies with Docker - print_info "Starting MongoDB and Redis..." - docker-compose up -d mongodb redis - - # Wait for services - print_info "Waiting for services to be ready..." - sleep 10 - - # Run migrations - print_info "Running database migrations..." - poetry run echo "MongoDB - no migrations needed" - - # Index documentation - print_info "Indexing documentation..." - if [[ -d ./output ]]; then - poetry run python -m datacenter_docs.cli index-docs --path ./output - else - print_warning "No documentation found in ./output, skipping indexing" - fi - - print_success "Local deployment complete!" - echo "" - print_info "Start services:" - echo " API: poetry run uvicorn datacenter_docs.api.main:app --reload" - echo " Chat: poetry run python -m datacenter_docs.chat.server" - echo " Worker: poetry run celery -A datacenter_docs.workers.celery_app worker --loglevel=info" -} - -deploy_docker() { - print_header "Deploying with Docker Compose" - - # Build and start all services - print_info "Building Docker images..." - docker-compose build - - print_info "Starting all services..." - docker-compose up -d - - # Wait for services - print_info "Waiting for services to be ready..." - sleep 30 - - # Check health - print_info "Checking API health..." - for i in {1..10}; do - if curl -f http://localhost:8000/health &> /dev/null; then - print_success "API is healthy!" - break - fi - if [[ $i -eq 10 ]]; then - print_error "API failed to start. Check logs: docker-compose logs api" - exit 1 - fi - sleep 3 - done - - # Run migrations - print_info "Running database migrations..." - docker-compose exec -T api poetry run echo "MongoDB - no migrations needed" - - print_success "Docker deployment complete!" - echo "" - print_info "Services available at:" - echo " API: http://localhost:8000/api/docs" - echo " Chat: http://localhost:8001" - echo " Frontend: http://localhost" - echo " Flower: http://localhost:5555" - echo "" - print_info "View logs: docker-compose logs -f" -} - -deploy_kubernetes() { - print_header "Deploying to Kubernetes" - - # Check if namespace exists - if kubectl get namespace datacenter-docs &> /dev/null; then - print_info "Namespace datacenter-docs already exists" - else - print_info "Creating namespace..." - kubectl apply -f deploy/kubernetes/namespace.yaml - fi - - # Check if secrets exist - if kubectl get secret datacenter-secrets -n datacenter-docs &> /dev/null; then - print_info "Secrets already exist" - else - print_warning "Creating secrets..." - print_info "You need to provide:" - read -p " Database URL: " DB_URL - read -s -p " Redis URL: " REDIS_URL - echo "" - read -s -p " MCP API Key: " MCP_KEY - echo "" - read -s -p " Anthropic API Key: " ANTHROPIC_KEY - echo "" - - kubectl create secret generic datacenter-secrets \ - --from-literal=database-url="$DB_URL" \ - --from-literal=redis-url="$REDIS_URL" \ - --from-literal=mcp-api-key="$MCP_KEY" \ - --from-literal=anthropic-api-key="$ANTHROPIC_KEY" \ - -n datacenter-docs - fi - - # Apply manifests - print_info "Applying Kubernetes manifests..." - kubectl apply -f deploy/kubernetes/deployment.yaml - kubectl apply -f deploy/kubernetes/service.yaml - kubectl apply -f deploy/kubernetes/ingress.yaml - - # Wait for deployment - print_info "Waiting for deployments to be ready..." - kubectl rollout status deployment/api -n datacenter-docs --timeout=5m - kubectl rollout status deployment/chat -n datacenter-docs --timeout=5m - kubectl rollout status deployment/worker -n datacenter-docs --timeout=5m - - print_success "Kubernetes deployment complete!" - echo "" - print_info "Check status:" - echo " kubectl get pods -n datacenter-docs" - echo " kubectl logs -n datacenter-docs deployment/api" -} - -show_usage() { - echo "Usage: $0 [local|docker|kubernetes]" - echo "" - echo "Deployment modes:" - echo " local - Local development with Poetry (recommended for dev)" - echo " docker - Docker Compose (recommended for testing/staging)" - echo " kubernetes - Kubernetes cluster (recommended for production)" - echo "" - echo "Examples:" - echo " $0 local # Deploy locally for development" - echo " $0 docker # Deploy with Docker Compose" - echo " $0 kubernetes # Deploy to Kubernetes" -} - -# Main script -if [[ $# -eq 0 ]]; then - show_usage - exit 1 -fi - -MODE=$1 - -case $MODE in - local) - check_requirements local - setup_environment - deploy_local - ;; - docker) - check_requirements docker - setup_environment - deploy_docker - ;; - kubernetes) - check_requirements kubernetes - deploy_kubernetes - ;; - *) - print_error "Unknown deployment mode: $MODE" - show_usage - exit 1 - ;; -esac - -print_header "Deployment Complete! šŸš€" -print_success "System is ready to use" -echo "" -print_info "Next steps:" -echo " 1. Test API: curl http://localhost:8000/health" -echo " 2. Access documentation: http://localhost:8000/api/docs" -echo " 3. Start using the chat interface" -echo " 4. Submit test tickets via API" -echo "" -print_info "For support: automation-team@company.local" diff --git a/src/datacenter_docs/api/main.py b/src/datacenter_docs/api/main.py index 50e65e2..f577a60 100644 --- a/src/datacenter_docs/api/main.py +++ b/src/datacenter_docs/api/main.py @@ -348,6 +348,7 @@ async def trigger_proxmox_documentation() -> Dict[str, Any]: Returns job_id for status monitoring """ from celery import Celery + from datacenter_docs.utils.config import get_settings try: @@ -388,6 +389,7 @@ async def get_job_status(job_id: str) -> Dict[str, Any]: Returns current status and progress information """ from celery.result import AsyncResult + from datacenter_docs.workers.celery_app import celery_app try: @@ -441,24 +443,25 @@ async def list_documentation_files() -> Dict[str, Any]: for file_path in category_dir.glob("*.md"): stat = file_path.stat() - files.append({ - "filename": file_path.name, - "size": stat.st_size, - "modified": stat.st_mtime, - "path": f"{category}/{file_path.name}" - }) + files.append( + { + "filename": file_path.name, + "size": stat.st_size, + "modified": stat.st_mtime, + "path": f"{category}/{file_path.name}", + } + ) if files: - files_by_category[category] = sorted(files, key=lambda x: x["modified"], reverse=True) + files_by_category[category] = sorted( + files, key=lambda x: x["modified"], reverse=True + ) total_files = sum(len(files) for files in files_by_category.values()) return { - "categories": [ - {"name": cat, "files": files} - for cat, files in files_by_category.items() - ], - "total": total_files + "categories": [{"name": cat, "files": files} for cat, files in files_by_category.items()], + "total": total_files, } @@ -495,7 +498,7 @@ async def get_documentation_content(category: str, filename: str) -> Dict[str, A "category": category, "content": content, "size": stat.st_size, - "modified": stat.st_mtime + "modified": stat.st_mtime, } except Exception as e: logger.error(f"Failed to read file {file_path}: {e}") @@ -513,38 +516,21 @@ async def get_job_logs(job_id: str, tail: int = 100) -> Dict[str, Any]: try: # Get worker logs and filter for job_id - cmd = [ - "docker", - "logs", - "datacenter-docs-worker-dev", - "--tail", - str(tail) - ] + cmd = ["docker", "logs", "datacenter-docs-worker-dev", "--tail", str(tail)] - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=5 - ) + result = subprocess.run(cmd, capture_output=True, text=True, timeout=5) all_logs = result.stdout + result.stderr # Filter logs containing the job_id job_logs = [ - line - for line in all_logs.split("\n") - if job_id in line or "ForkPoolWorker" in line + line for line in all_logs.split("\n") if job_id in line or "ForkPoolWorker" in line ] # Take last 50 relevant lines relevant_logs = job_logs[-50:] if len(job_logs) > 50 else job_logs - return { - "job_id": job_id, - "logs": relevant_logs, - "total_lines": len(relevant_logs) - } + return {"job_id": job_id, "logs": relevant_logs, "total_lines": len(relevant_logs)} except subprocess.TimeoutExpired: raise HTTPException(status_code=504, detail="Timeout getting logs") diff --git a/src/datacenter_docs/chat/agent.py b/src/datacenter_docs/chat/agent.py index e6b0fb6..d563c58 100644 --- a/src/datacenter_docs/chat/agent.py +++ b/src/datacenter_docs/chat/agent.py @@ -8,10 +8,10 @@ from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional -from langchain_community.embeddings import HuggingFaceEmbeddings -from langchain_community.vectorstores import Chroma from langchain.schema import Document from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.vectorstores import Chroma from ..mcp.client import MCPClient from ..utils.llm_client import LLMClient @@ -48,7 +48,9 @@ class DocumentationAgent: self.vector_store: Optional[Chroma] = None try: - self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") + self.embeddings = HuggingFaceEmbeddings( + model_name="sentence-transformers/all-MiniLM-L6-v2" + ) self._load_vector_store() logger.info("Vector store initialized successfully") except Exception as e: @@ -136,7 +138,9 @@ class DocumentationAgent: logger.info(f"šŸ’¾ Adding {len(documents)} chunks to vector store...") self.vector_store.add_documents(documents) self.vector_store.persist() - logger.info(f"āœ… Indexed {files_processed} files ({len(documents)} chunks) from documentation") + logger.info( + f"āœ… Indexed {files_processed} files ({len(documents)} chunks) from documentation" + ) else: logger.warning("āš ļø No documents to index") @@ -191,7 +195,9 @@ class DocumentationAgent: "last_updated": doc.metadata.get("indexed_at", ""), } ) - logger.info(f" āœ“ Section: {doc.metadata.get('section')} (relevance: {relevance_score*100:.1f}%, distance: {score:.3f})") + logger.info( + f" āœ“ Section: {doc.metadata.get('section')} (relevance: {relevance_score*100:.1f}%, distance: {score:.3f})" + ) return formatted_results @@ -342,7 +348,7 @@ Respond in JSON format: "de": "German", "pt": "Portuguese", "zh": "Chinese", - "ja": "Japanese" + "ja": "Japanese", } language_instruction = "" diff --git a/src/datacenter_docs/chat/main.py b/src/datacenter_docs/chat/main.py index 1eea08d..66c9966 100644 --- a/src/datacenter_docs/chat/main.py +++ b/src/datacenter_docs/chat/main.py @@ -2,7 +2,6 @@ Chat server with Socket.IO support for real-time communication. """ -import asyncio import logging from pathlib import Path from typing import Any, Dict @@ -36,9 +35,7 @@ async def index_documentation_if_needed() -> None: try: # Create temporary agent for indexing temp_agent = DocumentationAgent( - mcp_client=None, - llm_client=None, - vector_store_path=str(vector_store_path) + mcp_client=None, llm_client=None, vector_store_path=str(vector_store_path) ) # Index documentation @@ -77,6 +74,7 @@ async def initialize_agent() -> None: logger.warning(f"Failed to initialize Documentation Agent: {e}") agent = None + # Create Socket.IO server # Using async_mode="asgi" for FastAPI integration # python-socketio 5.x automatically supports Engine.IO v4 for socket.io-client 4.x compatibility @@ -202,7 +200,9 @@ async def chat(sid: str, data: Dict[str, Any]) -> None: ) response = { - "message": ai_response.get("message", "I apologize, I couldn't generate a response."), + "message": ai_response.get( + "message", "I apologize, I couldn't generate a response." + ), "type": "bot", "timestamp": data.get("timestamp"), "related_docs": ai_response.get("related_docs", []), diff --git a/src/datacenter_docs/collectors/proxmox_collector.py b/src/datacenter_docs/collectors/proxmox_collector.py index 7416e88..3a318ce 100644 --- a/src/datacenter_docs/collectors/proxmox_collector.py +++ b/src/datacenter_docs/collectors/proxmox_collector.py @@ -50,9 +50,7 @@ class ProxmoxCollector(BaseCollector): # Check if we have real Proxmox credentials configured if not settings.PROXMOX_HOST or settings.PROXMOX_HOST == "proxmox.example.com": - self.logger.warning( - "Proxmox host not configured, using mock data for development" - ) + self.logger.warning("Proxmox host not configured, using mock data for development") self.connected = True return True @@ -312,10 +310,14 @@ class ProxmoxCollector(BaseCollector): nodes = self.proxmox_client.nodes.get() if nodes: node_name = nodes[0]["node"] # Use first node for storage info - status = self.proxmox_client.nodes(node_name).storage(storage_id).status.get() + status = ( + self.proxmox_client.nodes(node_name).storage(storage_id).status.get() + ) store.update(status) except Exception as e: - self.logger.warning(f"Failed to get detailed info for storage {storage_id}: {e}") + self.logger.warning( + f"Failed to get detailed info for storage {storage_id}: {e}" + ) self.logger.info(f"Collected {len(storage)} storage pools") return storage @@ -354,7 +356,9 @@ class ProxmoxCollector(BaseCollector): self.logger.warning(f"Failed to get networks from node {node_name}: {e}") continue - self.logger.info(f"Collected {len(networks)} network interfaces from {len(nodes)} nodes") + self.logger.info( + f"Collected {len(networks)} network interfaces from {len(nodes)} nodes" + ) return networks except Exception as e: diff --git a/src/datacenter_docs/generators/template_generator.py b/src/datacenter_docs/generators/template_generator.py index 1a4652b..3eafca0 100644 --- a/src/datacenter_docs/generators/template_generator.py +++ b/src/datacenter_docs/generators/template_generator.py @@ -88,9 +88,7 @@ class TemplateBasedGenerator(BaseGenerator): template_path: Path to YAML template file """ self.template = DocumentationTemplate(Path(template_path)) - super().__init__( - name=self.template.collector, section=f"{self.template.collector}_docs" - ) + super().__init__(name=self.template.collector, section=f"{self.template.collector}_docs") async def generate(self, data: Dict[str, Any]) -> str: """ @@ -104,9 +102,7 @@ class TemplateBasedGenerator(BaseGenerator): Returns: Combined documentation (all sections) """ - self.logger.info( - f"Generating documentation for {self.template.name} using template" - ) + self.logger.info(f"Generating documentation for {self.template.name} using template") # Validate data matches template collector collector_name = data.get("metadata", {}).get("collector", "") @@ -140,7 +136,6 @@ class TemplateBasedGenerator(BaseGenerator): Returns: Generated section content in Markdown """ - section_id = section_def.get("id", "unknown") section_title = section_def.get("title", "Untitled Section") data_requirements = section_def.get("data_requirements", []) prompt_template = section_def.get("prompt_template", "") @@ -549,13 +544,15 @@ Guidelines: except Exception as e: self.logger.error(f"Failed to generate section for {loop_over} item {idx}: {e}") - results.append({ - "section_id": f"{section_id}_item_{idx}", - "success": False, - "error": str(e), - "item_index": idx, - "total_items": total_items, - }) + results.append( + { + "section_id": f"{section_id}_item_{idx}", + "success": False, + "error": str(e), + "item_index": idx, + "total_items": total_items, + } + ) return results diff --git a/src/datacenter_docs/utils/llm_client.py b/src/datacenter_docs/utils/llm_client.py index 520a573..e26b64f 100644 --- a/src/datacenter_docs/utils/llm_client.py +++ b/src/datacenter_docs/utils/llm_client.py @@ -84,9 +84,7 @@ class LLMClient: # Increased timeout to 120s for documentation generation (large prompts) http_client = httpx.AsyncClient(verify=False, timeout=120.0) self.client = AsyncOpenAI( - base_url=self.base_url, - api_key=self.api_key, - http_client=http_client + base_url=self.base_url, api_key=self.api_key, http_client=http_client ) logger.info(f"Initialized LLM client: base_url={self.base_url}, model={self.model}") diff --git a/src/datacenter_docs/workers/documentation_tasks.py b/src/datacenter_docs/workers/documentation_tasks.py index e46f099..d87618f 100644 --- a/src/datacenter_docs/workers/documentation_tasks.py +++ b/src/datacenter_docs/workers/documentation_tasks.py @@ -8,7 +8,7 @@ from infrastructure systems (Proxmox, VMware, Kubernetes, etc.) import logging from datetime import datetime from pathlib import Path -from typing import Any, Dict, List +from typing import Any, Dict from celery import group @@ -18,9 +18,7 @@ logger = logging.getLogger(__name__) @celery_app.task(name="collect_and_generate_docs", bind=True) -def collect_and_generate_docs( - self, collector_name: str, template_path: str -) -> Dict[str, Any]: +def collect_and_generate_docs(self, collector_name: str, template_path: str) -> Dict[str, Any]: """ Collect data from infrastructure and generate documentation @@ -80,9 +78,7 @@ def collect_and_generate_docs( return result -async def _async_collect_and_generate( - collector_name: str, template_path: str -) -> Dict[str, Any]: +async def _async_collect_and_generate(collector_name: str, template_path: str) -> Dict[str, Any]: """ Async implementation of collect and generate workflow @@ -93,8 +89,8 @@ async def _async_collect_and_generate( Returns: Generation result """ - from datacenter_docs.generators.template_generator import TemplateBasedGenerator from datacenter_docs.chat.agent import DocumentationAgent + from datacenter_docs.generators.template_generator import TemplateBasedGenerator # Import appropriate collector collector = await _get_collector(collector_name) @@ -190,8 +186,8 @@ def generate_proxmox_docs(self) -> Dict[str, Any]: # Update task state self.update_state( - state='PROGRESS', - meta={'current': 0, 'total': 6, 'status': 'Starting Proxmox documentation generation...'} + state="PROGRESS", + meta={"current": 0, "total": 6, "status": "Starting Proxmox documentation generation..."}, ) template_path = "templates/documentation/proxmox.yaml" @@ -237,10 +233,7 @@ def generate_proxmox_docs(self) -> Dict[str, Any]: result["completed_at"] = datetime.now().isoformat() logger.error(f"[{task_id}] Proxmox documentation generation failed: {e}", exc_info=True) - self.update_state( - state='FAILURE', - meta={'error': str(e), 'status': f'Failed: {str(e)}'} - ) + self.update_state(state="FAILURE", meta={"error": str(e), "status": f"Failed: {str(e)}"}) raise @@ -267,10 +260,7 @@ def generate_all_docs() -> Dict[str, Any]: # Create parallel tasks task_group = group( - [ - collect_and_generate_docs.s(system["collector"], system["template"]) - for system in systems - ] + [collect_and_generate_docs.s(system["collector"], system["template"]) for system in systems] ) # Execute group @@ -377,11 +367,8 @@ def full_docs_pipeline() -> Dict[str, Any]: """ logger.info("Starting full documentation pipeline") - # Step 1: Generate all documentation - generate_result = generate_all_docs() - - # Step 2: Wait a bit for generation to complete, then index - # (In production, this would use Celery chains/chords for better coordination) + # Step 1: Generate all documentation, then index + # Use Celery chain for proper task coordination from celery import chain pipeline = chain( diff --git a/test_workflow.py b/test_workflow.py deleted file mode 100644 index 0cc0696..0000000 --- a/test_workflow.py +++ /dev/null @@ -1,230 +0,0 @@ -#!/usr/bin/env python3 -""" -End-to-End Workflow Test Script - -Tests the complete documentation generation workflow: -1. VMware Collector (with mock data) -2. Infrastructure Generator (with mock LLM) -3. MongoDB storage -4. API retrieval - -This script validates the system architecture without requiring: -- Real VMware infrastructure -- Real LLM API credentials -""" - -import asyncio -import logging -from datetime import datetime - -# Configure logging -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - - -async def test_collector(): - """Test VMware collector with mock data""" - logger.info("=" * 70) - logger.info("TEST 1: VMware Collector") - logger.info("=" * 70) - - from datacenter_docs.collectors.vmware_collector import VMwareCollector - - collector = VMwareCollector() - - logger.info(f"Collector name: {collector.name}") - logger.info("Running collector.run()...") - - result = await collector.run() - - logger.info(f"Collection result: {result['success']}") - if result['success']: - data = result['data'] - logger.info(f"āœ… Data collected successfully!") - logger.info(f" - VMs: {len(data.get('data', {}).get('vms', []))}") - logger.info(f" - Hosts: {len(data.get('data', {}).get('hosts', []))}") - logger.info(f" - Clusters: {len(data.get('data', {}).get('clusters', []))}") - logger.info( - f" - Datastores: {len(data.get('data', {}).get('datastores', []))}" - ) - logger.info(f" - Networks: {len(data.get('data', {}).get('networks', []))}") - return result - else: - logger.error(f"āŒ Collection failed: {result.get('error')}") - return None - - -async def test_generator_structure(): - """Test generator structure (without LLM call)""" - logger.info("\n" + "=" * 70) - logger.info("TEST 2: Infrastructure Generator Structure") - logger.info("=" * 70) - - from datacenter_docs.generators.infrastructure_generator import ( - InfrastructureGenerator, - ) - - generator = InfrastructureGenerator() - - logger.info(f"Generator name: {generator.name}") - logger.info(f"Generator section: {generator.section}") - logger.info(f"Generator LLM client configured: {generator.llm is not None}") - - # Test data formatting - sample_data = { - 'metadata': {'collector': 'vmware', 'collected_at': datetime.now().isoformat()}, - 'data': { - 'statistics': {'total_vms': 10, 'powered_on_vms': 8}, - 'vms': [{'name': 'test-vm-01', 'power_state': 'poweredOn'}], - }, - } - - summary = generator._format_data_summary(sample_data['data']) - logger.info(f"āœ… Data summary formatted ({len(summary)} chars)") - logger.info(f" Summary preview: {summary[:200]}...") - - return generator - - -async def test_database_connection(): - """Test MongoDB connection and storage""" - logger.info("\n" + "=" * 70) - logger.info("TEST 3: Database Connection") - logger.info("=" * 70) - - from beanie import init_beanie - from motor.motor_asyncio import AsyncIOMotorClient - - from datacenter_docs.api.models import ( - AuditLog, - AutoRemediationPolicy, - ChatSession, - DocumentationSection, - RemediationApproval, - RemediationLog, - SystemMetric, - Ticket, - TicketFeedback, - TicketPattern, - ) - from datacenter_docs.utils.config import get_settings - - settings = get_settings() - - try: - logger.info(f"Connecting to MongoDB: {settings.MONGODB_URL}") - client = AsyncIOMotorClient(settings.MONGODB_URL) - database = client[settings.MONGODB_DATABASE] - - # Test connection - await database.command('ping') - logger.info("āœ… MongoDB connection successful!") - - # Initialize Beanie - await init_beanie( - database=database, - document_models=[ - Ticket, - TicketFeedback, - RemediationLog, - RemediationApproval, - AutoRemediationPolicy, - TicketPattern, - DocumentationSection, - ChatSession, - SystemMetric, - AuditLog, - ], - ) - logger.info("āœ… Beanie ORM initialized!") - - # Test creating a document - test_section = DocumentationSection( - section_id="test_section_" + datetime.now().strftime("%Y%m%d_%H%M%S"), - name="Test Section", - description="This is a test section for validation", - ) - await test_section.insert() - logger.info(f"āœ… Test document created: {test_section.section_id}") - - # Count documents - count = await DocumentationSection.count() - logger.info(f" Total DocumentationSection records: {count}") - - return True - - except Exception as e: - logger.error(f"āŒ Database test failed: {e}", exc_info=True) - return False - - -async def test_full_workflow_mock(): - """Test full workflow with mock data (no LLM call)""" - logger.info("\n" + "=" * 70) - logger.info("TEST 4: Full Workflow (Mock)") - logger.info("=" * 70) - - try: - # Step 1: Collect data - logger.info("Step 1: Collecting VMware data...") - collector_result = await test_collector() - - if not collector_result or not collector_result['success']: - logger.error("āŒ Collector test failed, aborting workflow test") - return False - - # Step 2: Test generator structure - logger.info("\nStep 2: Testing generator structure...") - generator = await test_generator_structure() - - # Step 3: Test database - logger.info("\nStep 3: Testing database connection...") - db_ok = await test_database_connection() - - if not db_ok: - logger.error("āŒ Database test failed, aborting workflow test") - return False - - logger.info("\n" + "=" * 70) - logger.info("āœ… WORKFLOW TEST PASSED (Mock)") - logger.info("=" * 70) - logger.info("Components validated:") - logger.info(" āœ… VMware Collector (mock data)") - logger.info(" āœ… Infrastructure Generator (structure)") - logger.info(" āœ… MongoDB connection & storage") - logger.info(" āœ… Beanie ORM models") - logger.info("\nTo test with real LLM:") - logger.info(" 1. Configure LLM API key in .env") - logger.info(" 2. Run: poetry run datacenter-docs generate vmware") - return True - - except Exception as e: - logger.error(f"āŒ Workflow test failed: {e}", exc_info=True) - return False - - -async def main(): - """Main test entry point""" - logger.info("šŸš€ Starting End-to-End Workflow Test") - logger.info("=" * 70) - - try: - success = await test_full_workflow_mock() - - if success: - logger.info("\nšŸŽ‰ All tests passed!") - return 0 - else: - logger.error("\nāŒ Some tests failed") - return 1 - - except Exception as e: - logger.error(f"\nšŸ’„ Test execution failed: {e}", exc_info=True) - return 1 - - -if __name__ == "__main__": - exit_code = asyncio.run(main()) - exit(exit_code)