diff --git a/ARCHITECTURE_STATUS.md b/ARCHITECTURE_STATUS.md deleted file mode 100644 index fab89f9..0000000 --- a/ARCHITECTURE_STATUS.md +++ /dev/null @@ -1,276 +0,0 @@ -# Architecture Status Overview - -## πŸ—οΈ Struttura Moduli - Stato Attuale vs Target - -``` -src/datacenter_docs/ -β”œβ”€β”€ __init__.py βœ… Presente -β”‚ -β”œβ”€β”€ api/ βœ… COMPLETO (80%) -β”‚ β”œβ”€β”€ __init__.py βœ… -β”‚ β”œβ”€β”€ main.py βœ… FastAPI app principale -β”‚ β”œβ”€β”€ main_enhanced.py βœ… Versione enhanced -β”‚ β”œβ”€β”€ models.py βœ… Pydantic models -β”‚ β”œβ”€β”€ auto_remediation.py βœ… Auto-remediation engine -β”‚ └── reliability.py βœ… Reliability scoring -β”‚ -β”œβ”€β”€ chat/ ⚠️ PARZIALE (40%) -β”‚ β”œβ”€β”€ __init__.py βœ… -β”‚ β”œβ”€β”€ agent.py βœ… DocumentationAgent -β”‚ └── main.py ❌ MANCA - WebSocket server -β”‚ -β”œβ”€β”€ workers/ ❌ DIRECTORY NON ESISTE (0%) -β”‚ β”œβ”€β”€ __init__.py ❌ Da creare -β”‚ β”œβ”€β”€ celery_app.py ❌ Da creare - Celery config -β”‚ └── tasks.py ❌ Da creare - Celery tasks -β”‚ -β”œβ”€β”€ collectors/ ⚠️ SKELETON (5%) -β”‚ β”œβ”€β”€ __init__.py βœ… -β”‚ β”œβ”€β”€ base.py ❌ Da creare - Base collector -β”‚ β”œβ”€β”€ vmware_collector.py ❌ Da creare -β”‚ β”œβ”€β”€ kubernetes_collector.py ❌ Da creare -β”‚ β”œβ”€β”€ network_collector.py ❌ Da creare -β”‚ β”œβ”€β”€ storage_collector.py ❌ Da creare -β”‚ β”œβ”€β”€ database_collector.py ❌ Da creare -β”‚ └── monitoring_collector.py ❌ Da creare -β”‚ -β”œβ”€β”€ generators/ ⚠️ SKELETON (5%) -β”‚ β”œβ”€β”€ __init__.py βœ… -β”‚ β”œβ”€β”€ base.py ❌ Da creare - Base generator -β”‚ β”œβ”€β”€ infrastructure_generator.py ❌ Da creare -β”‚ β”œβ”€β”€ network_generator.py ❌ Da creare -β”‚ β”œβ”€β”€ virtualization_generator.py ❌ Da creare -β”‚ β”œβ”€β”€ kubernetes_generator.py ❌ Da creare -β”‚ β”œβ”€β”€ storage_generator.py ❌ Da creare -β”‚ β”œβ”€β”€ database_generator.py ❌ Da creare -β”‚ β”œβ”€β”€ monitoring_generator.py ❌ Da creare -β”‚ β”œβ”€β”€ security_generator.py ❌ Da creare -β”‚ β”œβ”€β”€ runbook_generator.py ❌ Da creare -β”‚ └── troubleshooting_generator.py ❌ Da creare -β”‚ -β”œβ”€β”€ validators/ ⚠️ SKELETON (5%) -β”‚ β”œβ”€β”€ __init__.py βœ… -β”‚ β”œβ”€β”€ base.py ❌ Da creare -β”‚ β”œβ”€β”€ config_validator.py ❌ Da creare -β”‚ β”œβ”€β”€ security_validator.py ❌ Da creare -β”‚ └── compliance_validator.py ❌ Da creare -β”‚ -β”œβ”€β”€ mcp/ βœ… BASE (60%) -β”‚ β”œβ”€β”€ __init__.py βœ… -β”‚ β”œβ”€β”€ client.py βœ… MCP client -β”‚ └── server.py ❌ Da creare (se necessario) -β”‚ -β”œβ”€β”€ utils/ βœ… BASE (70%) -β”‚ β”œβ”€β”€ __init__.py βœ… -β”‚ β”œβ”€β”€ config.py βœ… Configuration management -β”‚ β”œβ”€β”€ database.py βœ… MongoDB utilities -β”‚ β”œβ”€β”€ logging.py ❌ Da creare -β”‚ └── helpers.py ❌ Da creare -β”‚ -└── cli.py ❌ MANCA (0%) - CLI tool principale -``` - ---- - -## πŸ“Š Completamento per Categoria - -| Categoria | Completamento | PrioritΓ  | Note | -|-----------|---------------|----------|------| -| **API Service** | 🟒 80% | βœ… Completato | Funzionante in produzione | -| **Database Layer** | 🟒 70% | βœ… Completato | MongoDB + Beanie OK | -| **MCP Integration** | 🟑 60% | Alta | Client base funzionante | -| **Chat Service** | 🟑 40% | Media | Agent OK, manca WebSocket server | -| **Auto-Remediation** | 🟒 75% | βœ… Completato | Engine + reliability OK | -| **CLI Tool** | πŸ”΄ 0% | **Critica** | Necessario per gestione | -| **Workers (Celery)** | πŸ”΄ 0% | **Critica** | Necessario per task async | -| **Collectors** | 🟑 5% | Alta | Solo skeleton | -| **Generators** | 🟑 5% | Alta | Solo skeleton | -| **Validators** | 🟑 5% | Media | Solo skeleton | -| **Frontend** | 🟑 20% | Bassa | Skeleton React + build | -| **CI/CD** | 🟒 90% | βœ… Completato | GitHub/GitLab/Gitea | -| **Docker** | 🟒 85% | βœ… Completato | Tutti i Dockerfile OK | - -**Overall Project Completion: ~35%** - ---- - -## πŸ”„ Data Flow - Stato Implementazione - -### Target Architecture -```mermaid -graph TD - A[External Trigger] -->|1| B[API/CLI] - B -->|2| C[Celery Task] - C -->|3| D[Collectors] - D -->|4| E[MCP Server] - E -->|5| F[Infrastructure] - F -->|6| E - E -->|7| D - D -->|8| G[Generators] - G -->|9| H[LLM Claude] - H -->|10| G - G -->|11| I[MongoDB] - I -->|12| J[API Response] -``` - -### Current Status -``` -βœ… [External Trigger] - ↓ -βœ… [API] β†’ ⚠️ [CLI - MANCA] - ↓ -❌ [Celery Task - MANCA] - ↓ -⚠️ [Collectors - SKELETON] β†’ βœ… [MCP Client] β†’ ❓ [MCP Server - External] - ↓ -⚠️ [Generators - SKELETON] β†’ βœ… [LLM Integration OK] - ↓ -βœ… [MongoDB Storage] - ↓ -βœ… [API Response] -``` - -**Blocchi Critici**: -- ❌ **Celery Workers** - Nessun task asincrono funzionante -- ❌ **CLI Tool** - Nessun modo di gestire il sistema da command line -- ⚠️ **Collectors** - Non puΓ² raccogliere dati dall'infrastruttura -- ⚠️ **Generators** - Non puΓ² generare documentazione - ---- - -## 🎯 Milestone per Completamento - -### Milestone 1: Core System (MVP) -**Target**: Sistema base funzionante end-to-end -**Completamento**: 35% β†’ 60% - -- [ ] CLI tool base (`cli.py`) -- [ ] Celery workers setup (`workers/celery_app.py`, `workers/tasks.py`) -- [ ] 1 Collector funzionante (es: VMware) -- [ ] 1 Generator funzionante (es: Infrastructure) -- [ ] Task scheduling per generazione periodica docs - -**Risultato**: Generazione automatica documentazione ogni 6 ore - ---- - -### Milestone 2: Complete Data Pipeline -**Target**: Tutti i collector e generator implementati -**Completamento**: 60% β†’ 80% - -- [ ] Tutti i 6+ collectors implementati -- [ ] Tutti i 10 generators implementati -- [ ] Base validators -- [ ] Logging completo -- [ ] Error handling robusto - -**Risultato**: Documentazione completa di tutta l'infrastruttura - ---- - -### Milestone 3: Advanced Features -**Target**: Chat + Auto-remediation completo -**Completamento**: 80% β†’ 95% - -- [ ] Chat WebSocket server (`chat/main.py`) -- [ ] Frontend React completato -- [ ] Auto-remediation testing esteso -- [ ] Analytics e dashboard -- [ ] Advanced validators - -**Risultato**: Sistema completo con UI e auto-remediation - ---- - -### Milestone 4: Production Ready -**Target**: Sistema production-ready -**Completamento**: 95% β†’ 100% - -- [ ] Testing completo (unit + integration) -- [ ] Performance optimization -- [ ] Security hardening -- [ ] Documentation completa -- [ ] Monitoring e alerting -- [ ] Backup e disaster recovery - -**Risultato**: Deploy in produzione - ---- - -## πŸ” Analisi Dipendenze Critiche - -### Per Avviare Generazione Docs (MVP) -**Dipendenze minime**: -1. βœ… API Service (giΓ  presente) -2. ❌ CLI tool β†’ **BLOCKING** -3. ❌ Celery workers β†’ **BLOCKING** -4. ❌ Almeno 1 collector β†’ **BLOCKING** -5. ❌ Almeno 1 generator β†’ **BLOCKING** -6. βœ… MongoDB (giΓ  configurato) -7. βœ… Redis (giΓ  configurato) -8. βœ… LLM integration (giΓ  presente) - -**Effort Stimato per MVP**: 3-5 giorni di sviluppo - ---- - -### Per Chat Service Completo -**Dipendenze**: -1. βœ… DocumentationAgent (giΓ  presente) -2. ❌ WebSocket server β†’ **BLOCKING** -3. ⚠️ Frontend chat UI (opzionale - puΓ² usare Postman/WebSocket client) -4. βœ… MongoDB (giΓ  configurato) -5. βœ… LLM integration (giΓ  presente) - -**Effort Stimato**: 1-2 giorni di sviluppo - ---- - -### Per Auto-Remediation Completo -**Dipendenze**: -1. βœ… Auto-remediation engine (giΓ  presente) -2. βœ… Reliability scoring (giΓ  presente) -3. ❌ Celery workers per execution β†’ **BLOCKING** -4. ⚠️ Testing infrastructure (importante per sicurezza) -5. ⚠️ Approval workflows (UI opzionale) - -**Effort Stimato**: 2-3 giorni di sviluppo + testing - ---- - -## πŸ’‘ Raccomandazioni - -### PrioritΓ  Sviluppo Immediato -1. **CLI Tool** (1 giorno) - - Essenziale per gestione sistema - - PermetterΓ  testing manuale - -2. **Celery Workers** (1-2 giorni) - - Necessario per task asincroni - - Fondamentale per generazione docs - -3. **1 Collector + 1 Generator** (2-3 giorni) - - Completa il ciclo base - - Permette testing end-to-end - -**Totale effort MVP**: ~5-6 giorni - -### Quick Wins -- βœ… Docker setup Γ¨ completo - infrastruttura OK -- βœ… API Γ¨ funzionante - puΓ² essere testata -- βœ… Database layer Γ¨ pronto - storage OK -- βœ… LLM integration Γ¨ pronta - generazione OK - -**Manca solo**: Logica business per collectors/generators e orchestrazione via Celery - ---- - -## πŸ“ˆ Progress Tracking - -**Last Updated**: 2025-10-19 - -**Current Sprint Focus**: Infrastructure setup βœ… COMPLETATO -**Next Sprint Focus**: Core business logic (Collectors/Generators/Workers) - -**Team Velocity**: N/A -**Estimated Completion**: 2-3 settimane per MVP diff --git a/AUTO_REMEDIATION_GUIDE.md b/AUTO_REMEDIATION_GUIDE.md deleted file mode 100644 index 8bcb9c3..0000000 --- a/AUTO_REMEDIATION_GUIDE.md +++ /dev/null @@ -1,751 +0,0 @@ -# πŸ€– Auto-Remediation System - Complete Documentation - -## πŸ“‹ Table of Contents - -1. [Overview](#overview) -2. [Safety First Design](#safety-first-design) -3. [Reliability Scoring System](#reliability-scoring-system) -4. [Human Feedback Loop](#human-feedback-loop) -5. [Decision Engine](#decision-engine) -6. [Auto-Remediation Execution](#auto-remediation-execution) -7. [Pattern Learning](#pattern-learning) -8. [API Usage](#api-usage) -9. [Configuration](#configuration) -10. [Monitoring & Analytics](#monitoring--analytics) - ---- - -## Overview - -The **Auto-Remediation System** enables AI to autonomously resolve infrastructure issues by executing write operations on your systems. This is a **production-grade** implementation with extensive safety checks, human oversight, and continuous learning. - -### Key Features - -βœ… **Safety-First**: Auto-remediation **disabled by default** -βœ… **Reliability Scoring**: Multi-factor confidence calculation (0-100%) -βœ… **Human Feedback**: Continuous learning from user feedback -βœ… **Pattern Recognition**: Learns from similar issues -βœ… **Approval Workflow**: Critical actions require human approval -βœ… **Full Audit Trail**: Every action logged with rollback capability -βœ… **Progressive Automation**: Decisions improve over time based on success rate - ---- - -## Safety First Design - -### πŸ›‘οΈ Default State: DISABLED - -```python -# Example: Ticket submission -{ - "ticket_id": "INC-001", - "description": "Problem description", - "enable_auto_remediation": false # ← DEFAULT: Disabled -} -``` - -**Auto-remediation must be explicitly enabled for each ticket.** - -### Safety Layers - -1. **Explicit Enablement**: Must opt-in per ticket -2. **Reliability Thresholds**: Minimum confidence required -3. **Action Classification**: Safe vs. Critical operations -4. **Pre-execution Checks**: System health, backups, rate limits -5. **Human Approval**: Required for low-reliability or critical actions -6. **Post-execution Validation**: Verify success -7. **Rollback Capability**: Undo on failure - -### Action Classification - -```python -class RemediationAction(str, enum.Enum): - READ_ONLY = "read_only" # No changes (default) - SAFE_WRITE = "safe_write" # Non-destructive (restart, clear cache) - CRITICAL_WRITE = "critical_write" # Potentially destructive (delete, modify) -``` - -**Critical actions ALWAYS require human approval**, regardless of confidence. - ---- - -## Reliability Scoring System - -### Multi-Factor Calculation - -The reliability score (0-100%) is calculated from **4 components**: - -```python -Reliability Score = ( - AI Confidence Γ— 25% + # Model's own confidence - Human Feedback Γ— 30% + # Historical feedback quality - Success History Γ— 25% + # Past resolution success rate - Pattern Match Γ— 20% # Similarity to known patterns -) -``` - -### Component Details - -#### 1. AI Confidence (25%) -- Direct from Claude Sonnet 4.5 -- Based on documentation quality and analysis certainty -- Range: 0-1 converted to 0-100% - -#### 2. Human Feedback (30%) -- Weighted by recency (recent feedback = more weight) -- Considers: - - Positive/Negative/Neutral feedback type - - Star ratings (1-5) - - Resolution accuracy - - Action effectiveness - -```python -feedback_score = ( - positive_feedback_rate Γ— 100 + - average_rating / 5 Γ— 100 -) / 2 -``` - -#### 3. Historical Success (25%) -- Success rate in same category (last 6 months) -- Formula: `resolved_tickets / total_tickets Γ— 100` - -#### 4. Pattern Match (20%) -- Similarity to known, resolved patterns -- Requires β‰₯3 similar tickets for pattern -- Boosts score if pattern has positive feedback - -### Confidence Levels - -| Score Range | Level | Description | -|-------------|-----------|-------------| -| 90-100% | Very High | Excellent track record, safe to auto-execute | -| 75-89% | High | Good reliability, may require approval | -| 60-74% | Medium | Moderate confidence, approval recommended | -| 0-59% | Low | Low confidence, manual review required | - -### Example Breakdown - -```json -{ - "overall_score": 87.5, - "confidence_level": "high", - "breakdown": { - "ai_confidence": "92%", - "human_validation": "85%", - "success_history": "90%", - "pattern_recognition": "82%" - } -} -``` - ---- - -## Human Feedback Loop - -### Feedback Collection - -After each ticket resolution, collect structured feedback: - -```python -{ - "ticket_id": "INC-001", - "feedback_type": "positive|negative|neutral", - "rating": 5, # 1-5 stars - "was_helpful": true, - "resolution_accurate": true, - "actions_worked": true, - - # Optional detailed feedback - "comment": "Great resolution!", - "what_worked": "The restart fixed it", - "what_didnt_work": null, - "suggestions": "Could add more details", - - # If AI failed, what actually worked? - "actual_resolution": "Had to increase memory instead", - "actual_actions_taken": [...], - "time_to_resolve": 30.0 # minutes -} -``` - -### Feedback Impact - -1. **Immediate**: Updates ticket reliability score -2. **Pattern Learning**: Strengthens/weakens pattern eligibility -3. **Future Decisions**: Influences similar ticket handling -4. **Auto-remediation Eligibility**: Pattern becomes eligible after: - - β‰₯5 occurrences - - β‰₯85% positive feedback rate - - β‰₯85% average reliability score - -### Feedback Analytics - -Track feedback trends: -- Positive/Negative/Neutral distribution -- Average ratings by category -- Resolution accuracy trends -- Action success rates - ---- - -## Decision Engine - -### Decision Flow - -``` -1. Check: Auto-remediation enabled for ticket? - β”œβ”€ NO β†’ Skip auto-remediation - └─ YES β†’ Continue - -2. Get applicable policy for category - β”œβ”€ No policy β†’ Require manual approval - └─ Policy exists β†’ Continue - -3. Classify action risk level - β”œβ”€ READ_ONLY β†’ Low risk - β”œβ”€ SAFE_WRITE β†’ Medium risk - └─ CRITICAL_WRITE β†’ High risk - -4. Check confidence & reliability thresholds - β”œβ”€ Below minimum β†’ Reject - └─ Above minimum β†’ Continue - -5. Perform safety checks - β”œβ”€ Pre-checks failed β†’ Reject - └─ All passed β†’ Continue - -6. Check pattern eligibility - β”œβ”€ Unknown pattern β†’ Require approval - └─ Known good pattern β†’ Continue - -7. Determine approval requirement - β”œβ”€ Reliability β‰₯ auto_approve_threshold β†’ Auto-approve - β”œβ”€ Critical action β†’ Require approval - └─ Otherwise β†’ Follow policy - -8. Execute or await approval -``` - -### Decision Example - -```json -{ - "allowed": true, - "action_type": "safe_write", - "requires_approval": false, - "reasoning": [ - "All checks passed", - "Auto-approved: reliability 92% >= 90%" - ], - "safety_checks": { - "time_window_ok": true, - "rate_limit_ok": true, - "backup_available": true, - "system_healthy": true, - "all_passed": true - }, - "risk_level": "medium" -} -``` - ---- - -## Auto-Remediation Execution - -### Execution Flow - -```python -async def execute_remediation(ticket, actions, decision): - # 1. Verify decision allows execution - if not decision['allowed']: - return error - - # 2. Check approval if required - if decision['requires_approval']: - if not has_approval(ticket): - return "awaiting_approval" - - # 3. Execute each action with safety - for action in actions: - # Pre-execution check - pre_check = await check_system_health() - if not pre_check.passed: - rollback() - return error - - # Execute action via MCP - result = await execute_via_mcp(action) - - # Post-execution verification - post_check = await verify_success() - if not post_check.passed: - rollback() - return error - - # Log action - log_remediation(action, result) - - return success -``` - -### Supported Operations - -#### VMware -- `restart_vm` - Graceful VM restart -- `snapshot_vm` - Create snapshot -- `increase_memory` - Increase VM memory -- `increase_cpu` - Add vCPUs - -#### Kubernetes -- `restart_pod` - Delete pod (recreate) -- `scale_deployment` - Change replica count -- `rollback_deployment` - Rollback to previous version - -#### Network -- `clear_interface_errors` - Clear interface counters -- `enable_port` - Enable disabled port -- `restart_interface` - Bounce interface - -#### Storage -- `expand_volume` - Increase volume size -- `clear_snapshots` - Remove old snapshots - -#### OpenStack -- `reboot_instance` - Soft reboot instance -- `resize_instance` - Change instance flavor - -### Safety Checks - -**Pre-execution:** -- System health check (CPU, memory, disk) -- Backup availability verification -- Rate limit check (max 10/hour) -- Time window check (maintenance hours) - -**Post-execution:** -- Resource health verification -- Service availability check -- Performance metrics validation - -### Rollback - -If any action fails: -1. Stop execution immediately -2. Log failure details -3. Execute rollback procedures -4. Notify administrators -5. Update ticket status to `partially_remediated` - ---- - -## Pattern Learning - -### Pattern Identification - -```python -# Generate pattern signature -pattern = { - 'category': 'network', - 'key_terms': ['vlan', 'connectivity', 'timeout'], - 'hash': sha256(signature) -} -``` - -### Pattern Statistics - -Tracked for each pattern: -- **Occurrence count**: How many times seen -- **Success/failure counts**: Resolution outcomes -- **Feedback distribution**: Positive/negative/neutral -- **Average confidence**: Mean AI confidence -- **Average reliability**: Mean reliability score -- **Auto-remediation success rate**: % of successful auto-fixes - -### Pattern Eligibility - -Pattern becomes eligible for auto-remediation when: - -```python -if ( - pattern.occurrence_count >= 5 and - pattern.positive_feedback_rate >= 0.85 and - pattern.avg_reliability_score >= 85.0 and - pattern.auto_remediation_success_rate >= 0.85 -): - pattern.eligible_for_auto_remediation = True -``` - -### Pattern Evolution - -``` -Initial State: -β”œβ”€ occurrence_count: 1 -β”œβ”€ eligible_for_auto_remediation: false -└─ Manual resolution only - -After 5+ occurrences with good feedback: -β”œβ”€ occurrence_count: 7 -β”œβ”€ positive_feedback_rate: 0.85 -β”œβ”€ avg_reliability_score: 87.0 -β”œβ”€ eligible_for_auto_remediation: true -└─ Can trigger auto-remediation - -After 20+ occurrences: -β”œβ”€ occurrence_count: 24 -β”œβ”€ auto_remediation_success_rate: 0.92 -β”œβ”€ Very high confidence -└─ Auto-remediation without approval -``` - ---- - -## API Usage - -### Create Ticket with Auto-Remediation - -```bash -curl -X POST http://localhost:8000/api/v1/tickets \ - -H "Content-Type: application/json" \ - -d '{ - "ticket_id": "INC-12345", - "title": "Service down", - "description": "Web service not responding on port 8080", - "category": "server", - "enable_auto_remediation": true - }' -``` - -**Response:** -```json -{ - "ticket_id": "INC-12345", - "status": "processing", - "auto_remediation_enabled": true, - "confidence_score": 0.0, - "reliability_score": null -} -``` - -### Check Ticket Status - -```bash -curl http://localhost:8000/api/v1/tickets/INC-12345 -``` - -**Response:** -```json -{ - "ticket_id": "INC-12345", - "status": "resolved", - "resolution": "Service was restarted successfully...", - "suggested_actions": [ - {"action": "Restart web service", "system": "prod-web-01"} - ], - "confidence_score": 0.92, - "reliability_score": 87.5, - "reliability_breakdown": { - "overall_score": 87.5, - "confidence_level": "high", - "breakdown": {...} - }, - "auto_remediation_enabled": true, - "auto_remediation_executed": true, - "remediation_decision": { - "allowed": true, - "requires_approval": false, - "action_type": "safe_write" - }, - "remediation_results": { - "success": true, - "executed_actions": [...] - } -} -``` - -### Submit Feedback - -```bash -curl -X POST http://localhost:8000/api/v1/feedback \ - -H "Content-Type: application/json" \ - -d '{ - "ticket_id": "INC-12345", - "feedback_type": "positive", - "rating": 5, - "was_helpful": true, - "resolution_accurate": true, - "actions_worked": true, - "comment": "Perfect resolution, service is back up!" - }' -``` - -### Approve Remediation - -For tickets requiring approval: - -```bash -curl -X POST http://localhost:8000/api/v1/tickets/INC-12345/approve-remediation \ - -H "Content-Type: application/json" \ - -d '{ - "ticket_id": "INC-12345", - "approve": true, - "approver": "john.doe@company.com", - "comment": "Approved for execution" - }' -``` - -### Get Analytics - -```bash -# Reliability statistics -curl http://localhost:8000/api/v1/stats/reliability?days=30 - -# Auto-remediation statistics -curl http://localhost:8000/api/v1/stats/auto-remediation?days=30 - -# Learned patterns -curl http://localhost:8000/api/v1/patterns?category=network&min_occurrences=5 -``` - ---- - -## Configuration - -### Auto-Remediation Policy - -```python -policy = AutoRemediationPolicy( - name="network-auto-remediation", - category="network", - - # Thresholds - min_confidence_score=0.85, # 85% AI confidence required - min_reliability_score=80.0, # 80% reliability required - min_similar_tickets=5, # Need 5+ similar resolved tickets - min_positive_feedback_rate=0.8, # 80% positive feedback required - - # Allowed actions - allowed_action_types=["safe_write"], - allowed_systems=["network"], - forbidden_commands=["delete", "format", "shutdown"], - - # Time restrictions - allowed_hours_start=22, # 10 PM - allowed_hours_end=6, # 6 AM - allowed_days=["monday", "tuesday", "wednesday", "thursday", "friday"], - - # Approval - requires_approval=True, - auto_approve_threshold=90.0, # Auto-approve if reliability β‰₯ 90% - approvers=["admin@company.com"], - - # Safety - max_actions_per_hour=10, - requires_rollback_plan=True, - requires_backup=True, - - # Status - enabled=True -) -``` - -### Environment Variables - -```bash -# Enable/disable auto-remediation globally -AUTO_REMEDIATION_ENABLED=true - -# Global safety settings -AUTO_REMEDIATION_MAX_ACTIONS_PER_HOUR=10 -AUTO_REMEDIATION_REQUIRE_APPROVAL=true -AUTO_REMEDIATION_MIN_RELIABILITY=85.0 - -# Pattern learning -PATTERN_MIN_OCCURRENCES=5 -PATTERN_MIN_POSITIVE_RATE=0.85 -``` - ---- - -## Monitoring & Analytics - -### Key Metrics - -```python -# Reliability metrics -- avg_reliability_score: Average across all tickets -- avg_confidence_score: Average AI confidence -- resolution_rate: % of tickets resolved - -# Auto-remediation metrics -- execution_rate: % of enabled tickets that were auto-remediated -- success_rate: % of auto-remediation actions that succeeded -- approval_rate: % requiring human approval - -# Feedback metrics -- positive_feedback_rate: % positive feedback -- negative_feedback_rate: % negative feedback -- avg_rating: Average star rating (1-5) - -# Pattern metrics -- eligible_patterns: # of patterns eligible for auto-remediation -- pattern_success_rate: Success rate across all patterns -``` - -### Grafana Dashboards - -Example metrics: - -```promql -# Reliability score trend -avg(datacenter_docs_reliability_score) by (category) - -# Auto-remediation success rate -rate(datacenter_docs_auto_remediation_success_total[1h]) / -rate(datacenter_docs_auto_remediation_attempts_total[1h]) - -# Feedback sentiment -sum(datacenter_docs_feedback_total) by (type) -``` - -### Alerts - -```yaml -# Low reliability alert -- alert: LowReliabilityScore - expr: avg(datacenter_docs_reliability_score) < 70 - for: 1h - annotations: - summary: "Reliability score below threshold" - -# High failure rate -- alert: HighAutoRemediationFailureRate - expr: rate(datacenter_docs_auto_remediation_failures_total[1h]) > 0.2 - for: 15m - annotations: - summary: "Auto-remediation failure rate > 20%" -``` - ---- - -## Best Practices - -### 1. Start Conservative - -- Enable auto-remediation for **low-risk categories** first (e.g., cache clearing) -- Set high thresholds initially (reliability β‰₯ 90%) -- Require approvals for first 20-30 occurrences -- Monitor closely and adjust based on results - -### 2. Gradual Rollout - -``` -Week 1-2: Enable for 5% of tickets -Week 3-4: Increase to 20% if success rate > 90% -Week 5-6: Increase to 50% if success rate > 85% -Week 7+: Full rollout with dynamic thresholds -``` - -### 3. Category-Specific Policies - -Different categories need different thresholds: - -| Category | Min Reliability | Auto-Approve | Reason | -|----------|----------------|--------------|--------| -| Cache | 75% | 85% | Low risk, frequent | -| Network | 85% | 90% | Medium risk | -| Storage | 90% | 95% | High risk | -| Security | 95% | Never | Critical, always approve | - -### 4. Human in the Loop - -- Always collect feedback, even for successful auto-remediations -- Review logs weekly -- Adjust thresholds based on feedback trends -- Disable patterns with declining success rates - -### 5. Continuous Learning - -- System improves over time through feedback -- Patterns with 20+ occurrences and 90%+ success β†’ Very high confidence -- Allow system to become more autonomous as reliability proves out -- But maintain human oversight for critical operations - ---- - -## Troubleshooting - -### Auto-remediation not executing - -**Check:** -1. Is `enable_auto_remediation: true` in ticket? -2. Is there an active policy for the category? -3. Does confidence/reliability meet thresholds? -4. Are safety checks passing? -5. Does pattern meet eligibility requirements? - -**Debug:** -```bash -# Check decision -curl http://localhost:8000/api/v1/tickets/TICKET-ID | jq '.remediation_decision' - -# Check logs -curl http://localhost:8000/api/v1/tickets/TICKET-ID/remediation-logs -``` - -### Low reliability scores - -**Causes:** -- Insufficient historical data -- Negative feedback on category -- Low pattern match confidence -- Recent failures in category - -**Solutions:** -- Collect more feedback -- Review and improve resolutions -- Wait for more data points -- Manually resolve similar tickets successfully - -### Pattern not becoming eligible - -**Requirements not met:** -- Need β‰₯5 occurrences -- Need β‰₯85% positive feedback -- Need β‰₯85% average reliability - -**Action:** -- Continue resolving similar tickets -- Ensure feedback is being collected -- Check pattern stats: `GET /api/v1/patterns` - ---- - -## Future Enhancements - -- **Multi-step reasoning**: Complex workflows spanning multiple systems -- **Predictive remediation**: Fix issues before they cause incidents -- **A/B testing**: Compare different resolution strategies -- **Reinforcement learning**: Optimize actions based on outcomes -- **Natural language explanations**: Better transparency in decisions -- **Cross-system orchestration**: Coordinated actions across infrastructure - ---- - -## Summary - -The **Auto-Remediation System** is designed for **safe, gradual automation** of infrastructure issue resolution: - -1. βœ… **Disabled by default** - explicit opt-in per ticket -2. βœ… **Multi-factor reliability** - comprehensive confidence calculation -3. βœ… **Human feedback loop** - continuous learning and improvement -4. βœ… **Pattern recognition** - learns from similar issues -5. βœ… **Safety first** - extensive checks, approval workflows, rollback -6. βœ… **Progressive automation** - system becomes more autonomous over time -7. βœ… **Full observability** - complete audit trail and analytics - -**Start small, monitor closely, scale gradually, and let the system learn.** - ---- - -For support: automation-team@company.local diff --git a/CHAT_FIX_REPORT.md b/CHAT_FIX_REPORT.md deleted file mode 100644 index 84e4694..0000000 --- a/CHAT_FIX_REPORT.md +++ /dev/null @@ -1,297 +0,0 @@ -# Report Risoluzione Problema Chat - -**Data:** 2025-10-20 -**Status:** βœ… RISOLTO - ---- - -## Problema Riportato - -❌ **"La chat non funziona, non parte l'applicazione"** - -## Analisi del Problema - -### Servizi Backend -Tutti i servizi backend erano **funzionanti correttamente**: - -``` -βœ… Chat Service: UP e HEALTHY (porta 8001) -βœ… API Service: UP e HEALTHY (porta 8000) -βœ… MongoDB: UP e HEALTHY (porta 27017) -βœ… Redis: UP e HEALTHY (porta 6379) -βœ… Worker: UP e RUNNING -βœ… Vector Store: Inizializzato con 12 chunks di documentazione -βœ… DocumentationAgent: Inizializzato e funzionante -``` - -### Problema Reale: Frontend - -Il problema era nel **frontend React** che non riusciva a connettersi al backend chat perchΓ©: - -1. **URL hardcoded errato:** - ```javascript - // PRIMA (ERRATO) - const CHAT_URL = 'http://localhost:8001'; - ``` - - Quando l'utente apriva il browser, `localhost:8001` puntava al computer dell'utente, NON al container Docker della chat. - -2. **Proxy Nginx non utilizzato:** - Anche se nginx aveva configurato il proxy corretto (`/ws/`), il frontend tentava di connettersi direttamente a localhost. - ---- - -## Soluzione Implementata - -### 1. Modifica del Codice Frontend - -**File modificato:** `frontend/src/App.jsx` - -```javascript -// DOPO (CORRETTO) -const API_URL = import.meta.env.VITE_API_URL || - (typeof window !== 'undefined' ? window.location.origin + '/api' : 'http://localhost:8000'); - -const CHAT_URL = import.meta.env.VITE_CHAT_URL || - (typeof window !== 'undefined' ? window.location.origin : 'http://localhost:8001'); -``` - -**Cosa fa:** -- Usa `window.location.origin` per ottenere l'URL del server (es. `http://localhost:8080`) -- Permette al frontend di connettersi tramite il proxy nginx -- Fallback a localhost solo durante lo sviluppo locale - -### 2. Ricompilazione e Deploy - -```bash -# Ricompilato frontend con nuove configurazioni -docker-compose -f docker-compose.dev.yml build --no-cache frontend - -# Deploy della nuova versione -docker-compose -f docker-compose.dev.yml up -d frontend -``` - -**Risultato:** -- Nuovo build: `index-EP1-_P5U.js` (prima era `index-D1cAEcy8.js`) -- Nginx partito **senza errori** (prima falliva con "host not found") -- Frontend ora usa i path corretti - ---- - -## Configurazione Nginx (Proxy) - -Il file nginx giΓ  aveva la configurazione corretta per proxare le richieste: - -```nginx -# WebSocket per chat -location /ws/ { - proxy_pass http://chat:8001/; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - proxy_set_header Host $host; - # ... altri headers -} - -# API proxy -location /api/ { - proxy_pass http://api:8000/; - # ... configurazione proxy -} -``` - -Il problema era che il frontend non la utilizzava. - ---- - -## Come Testare - -### 1. Accesso al Sistema - -Apri il browser e vai a: - -``` -http://localhost:8080 -``` - -### 2. Test Chat Interface - -1. Clicca sul tab **"Chat Support"** (primo tab) -2. Dovresti vedere l'interfaccia chat con: - - Area messaggi vuota - - Campo input in basso - - Pulsante "Send" - - Pannello laterale "Quick Actions" con domande di esempio - -3. **Test Connessione WebSocket:** - - Apri Developer Tools del browser (F12) - - Vai alla tab **Console** - - Dovresti vedere la connessione Socket.IO stabilita - - **NON** dovresti vedere errori di connessione - -### 3. Test Invio Messaggi - -Prova una di queste domande nel campo chat: - -``` -How to troubleshoot VLAN connectivity? -``` - -``` -What are the backup schedules? -``` - -``` -How do I check UPS status? -``` - -**Comportamento atteso:** -1. Il messaggio appare immediatamente nella chat (lato destro, sfondo blu) -2. Appare un indicatore di caricamento "AI is searching documentation..." -3. Dopo qualche secondo, l'AI risponde (lato sinistro, sfondo grigio) -4. La risposta dovrebbe contenere informazioni dalla documentazione indicizzata -5. Se disponibili, appariranno dei chip con i documenti correlati - -### 4. Verifica Backend - -Puoi monitorare che la chat backend riceva le richieste: - -```bash -cd deploy/docker -docker-compose -f docker-compose.dev.yml logs -f chat | grep "Chat event" -``` - -Dovresti vedere log come: -``` -INFO:__main__:Chat event from : {'message': 'How to...', 'history': []} -``` - ---- - -## Stato Finale Servizi - -```bash -$ docker-compose -f docker-compose.dev.yml ps - -NAME STATUS PORTS -datacenter-docs-api-dev Up (healthy) 0.0.0.0:8000->8000/tcp -datacenter-docs-chat-dev Up (healthy) 0.0.0.0:8001->8001/tcp -datacenter-docs-frontend-dev Up (healthy) 0.0.0.0:8080->80/tcp -datacenter-docs-mongodb-dev Up (healthy) 0.0.0.0:27017->27017/tcp -datacenter-docs-redis-dev Up (healthy) 0.0.0.0:6379->6379/tcp -datacenter-docs-worker-dev Up - -``` - -**Tutti i servizi sono operativi!** βœ… - ---- - -## Documentazione Disponibile - -Il sistema ha indicizzato con successo questi documenti: - -1. **Network:** VLAN Troubleshooting (`output/network/vlan_troubleshooting.md`) -2. **Backup:** Backup Schedules & Policies (`output/backup/backup_schedules.md`) -3. **Server:** UPS Monitoring Guide (`output/server/ups_monitoring.md`) -4. **Storage:** SAN Troubleshooting (`output/storage/san_troubleshooting.md`) - -**Chunks indicizzati:** 12 -**Vector Store:** ChromaDB con embeddings `sentence-transformers/all-MiniLM-L6-v2` - ---- - -## Comandi Utili - -### Controllare Stato Servizi -```bash -cd deploy/docker -docker-compose -f docker-compose.dev.yml ps -``` - -### Vedere Logs Chat -```bash -docker-compose -f docker-compose.dev.yml logs -f chat -``` - -### Vedere Logs Frontend -```bash -docker-compose -f docker-compose.dev.yml logs -f frontend -``` - -### Riavviare Servizio Specifico -```bash -docker-compose -f docker-compose.dev.yml restart chat -docker-compose -f docker-compose.dev.yml restart frontend -``` - -### Test Health Endpoints -```bash -# Chat service -curl http://localhost:8001/health - -# API service -curl http://localhost:8000/health - -# Frontend (nginx) -curl http://localhost:8080/health -``` - ---- - -## Problemi Risolti Durante il Fix - -1. βœ… **SELinux blocking volumes:** Risolto aggiungendo `:z` flag ai bind mounts -2. βœ… **Indicizzazione documentazione:** 12 chunks indicizzati correttamente -3. βœ… **Frontend URL hardcoded:** Modificato per usare `window.location.origin` -4. βœ… **Nginx upstream errors:** Risolti con ricompilazione frontend - ---- - -## Note per lo Sviluppo Futuro - -### Variabili d'Ambiente Vite - -Se vuoi configurare URL diversi, crea un file `.env` nella directory frontend: - -```env -VITE_API_URL=http://your-api-server.com/api -VITE_CHAT_URL=http://your-chat-server.com -``` - -Queste variabili hanno precedenza su window.location.origin. - -### Aggiungere Nuova Documentazione - -1. Crea file markdown in `output//nome_file.md` -2. Riavvia il servizio chat (forzerΓ  re-indicizzazione se rimuovi il marker): - ```bash - docker volume rm datacenter-docs-chat-data-dev - docker-compose -f docker-compose.dev.yml restart chat - ``` - -3. Oppure chiama manualmente l'indicizzazione (da implementare come endpoint API) - ---- - -## Conclusione - -**Status:** πŸŽ‰ **SISTEMA OPERATIVO E FUNZIONANTE** - -La chat ora: -- βœ… Si connette correttamente al backend -- βœ… Ha accesso alla documentazione indicizzata (RAG) -- βœ… Risponde alle domande usando i documenti -- βœ… Funziona attraverso il proxy nginx -- βœ… Compatibile con deployment Docker - -**Prossimi passi suggeriti:** -1. Testare interattivamente la chat dal browser -2. Aggiungere piΓΉ documentazione -3. Eventualmente implementare autenticazione utenti -4. Monitorare performance e tempi di risposta - ---- - -**Report generato il:** 2025-10-20 15:27 -**Durata fix:** ~45 minuti -**Modifiche ai file:** 2 (App.jsx, docker-compose.dev.yml con flag SELinux) diff --git a/CI_VALIDATION_REPORT.md b/CI_VALIDATION_REPORT.md deleted file mode 100644 index 0ad3e0a..0000000 --- a/CI_VALIDATION_REPORT.md +++ /dev/null @@ -1,280 +0,0 @@ -# CI/CD Pipeline Validation Report - -**Generated**: 2025-10-20 00:51:10 CEST -**Duration**: 6 seconds -**Status**: βœ… **PASSED** - ---- - -## Executive Summary - -All CI/CD pipeline stages have been successfully validated locally. The codebase is **production-ready** and will pass all automated checks in GitHub Actions, GitLab CI, and Gitea Actions pipelines. - -### Results Overview - -| Metric | Value | -|--------|-------| -| **Total Tests** | 8 | -| **Passed** | 8 βœ… | -| **Failed** | 0 | -| **Success Rate** | **100%** | - ---- - -## Pipeline Stages - -### 🎨 Stage 1: LINT - -All linting and code quality checks passed successfully. - -#### βœ… Black - Code Formatting -- **Command**: `poetry run black --check src/ tests/` -- **Status**: βœ… PASSED -- **Result**: 32 files formatted correctly -- **Line Length**: 100 characters (configured) - -#### βœ… Ruff - Linting -- **Command**: `poetry run ruff check src/ tests/` -- **Status**: βœ… PASSED -- **Result**: All checks passed -- **Errors Found**: 0 -- **Previous Errors Fixed**: 15 (import cleanup, f-string fixes, boolean comparisons) - -#### βœ… MyPy - Type Checking -- **Command**: `poetry run mypy src/` -- **Status**: βœ… PASSED -- **Result**: No issues found in 29 source files -- **Previous Errors**: 90 -- **Errors Fixed**: 90 (100% type safety achieved) -- **Configuration**: Strict mode (`disallow_untyped_defs = true`) - ---- - -### πŸ§ͺ Stage 2: TEST - -Testing stage completed successfully with expected results for a 35% complete project. - -#### βœ… Unit Tests -- **Command**: `poetry run pytest tests/unit -v --cov --cov-report=xml` -- **Status**: βœ… PASSED -- **Tests Found**: 0 (expected - tests not yet implemented) -- **Coverage Report**: Generated (XML and HTML) -- **Note**: Test infrastructure is in place and ready for test implementation - -#### ⚠️ Security Scan (Optional) -- **Tool**: Bandit -- **Status**: Skipped (not installed) -- **Recommendation**: Install with `poetry add --group dev bandit` for production use - ---- - -### πŸ”¨ Stage 3: BUILD - -Build and dependency validation completed successfully. - -#### βœ… Poetry Configuration -- **Command**: `poetry check` -- **Status**: βœ… PASSED -- **Result**: All configuration valid -- **Note**: Some warnings about Poetry 2.0 deprecations (non-blocking) - -#### βœ… Dependency Resolution -- **Command**: `poetry install --no-root --dry-run` -- **Status**: βœ… PASSED -- **Dependencies**: 187 packages (all installable) -- **Conflicts**: None - -#### βœ… Docker Validation -- **Container Runtime**: Docker detected -- **Dockerfiles Found**: `deploy/docker/Dockerfile.api` -- **Status**: βœ… PASSED -- **Note**: Dockerfile syntax validated - ---- - -### πŸ”— Stage 4: INTEGRATION (Optional) - -Integration checks performed with expected results for local development. - -#### ⚠️ API Health Check -- **Endpoint**: `http://localhost:8000/health` -- **Status**: Not running (expected for local environment) -- **Action**: Start with `cd deploy/docker && podman-compose -f docker-compose.dev.yml up -d` - ---- - -## Code Quality Improvements - -### Type Safety Enhancements - -1. **llm_client.py** (8 fixes) - - Added proper Union types for OpenAI SDK responses - - Implemented type guards with `cast()` - - Fixed AsyncIterator return types - - Handled AsyncStream import compatibility - -2. **chat/agent.py** (2 fixes) - - Fixed Chroma vector store type annotation - - Added type ignore for filter compatibility - -3. **Generators** (6 fixes) - - Added AsyncIOMotorClient type annotations - - Fixed `__init__()` return types - - Added Dict type hints for complex structures - -4. **Collectors** (4 fixes) - - MongoDB client type annotations - - Return type annotations - -5. **CLI** (6 fixes) - - All MongoDB client instantiations properly typed - -6. **Workers** (4 fixes) - - Celery import type ignores - - MongoDB client annotations - - Module overrides for in-development code - -### Import Cleanup - -- Removed 13 unused imports -- Fixed 5 import ordering issues -- Added proper type ignore comments for untyped libraries - -### Code Style Fixes - -- Fixed 5 f-string without placeholder issues -- Corrected 2 boolean comparison patterns (`== True` β†’ truthiness) -- Formatted 6 files with Black - ---- - -## MyPy Error Resolution Summary - -| Category | Initial Errors | Fixed | Remaining | -|----------|---------------|-------|-----------| -| **union-attr** | 35 | 35 | 0 | -| **no-any-return** | 12 | 12 | 0 | -| **var-annotated** | 8 | 8 | 0 | -| **assignment** | 10 | 10 | 0 | -| **call-arg** | 8 | 8 | 0 | -| **import-untyped** | 5 | 5 | 0 | -| **attr-defined** | 7 | 7 | 0 | -| **no-untyped-def** | 5 | 5 | 0 | -| **TOTAL** | **90** | **90** | **0** | - ---- - -## Files Modified - -### Source Files (29 files analyzed, 12 modified) - -``` -src/datacenter_docs/ -β”œβ”€β”€ utils/ -β”‚ └── llm_client.py ✏️ Type safety improvements -β”œβ”€β”€ chat/ -β”‚ └── agent.py ✏️ Vector store type annotation -β”œβ”€β”€ generators/ -β”‚ β”œβ”€β”€ base.py ✏️ MongoDB client typing -β”‚ β”œβ”€β”€ network_generator.py ✏️ Init return type, Dict annotation -β”‚ └── infrastructure_generator.py ✏️ Init return type -β”œβ”€β”€ collectors/ -β”‚ β”œβ”€β”€ base.py ✏️ MongoDB client typing -β”‚ └── proxmox_collector.py ✏️ Init return type -β”œβ”€β”€ workers/ -β”‚ β”œβ”€β”€ celery_app.py ✏️ Import type ignores -β”‚ └── tasks.py ✏️ MongoDB client typing -└── cli.py ✏️ 6 MongoDB client annotations -``` - -### Configuration Files - -``` -pyproject.toml ✏️ MyPy overrides added -scripts/test-ci-pipeline.sh ✨ NEW - Local CI simulation -``` - ---- - -## CI/CD Platform Compatibility - -This codebase will pass all checks on: - -### βœ… GitHub Actions -- Pipeline: `.github/workflows/build-deploy.yml` -- Python Version: 3.12 -- All jobs will pass - -### βœ… GitLab CI -- Pipeline: `.gitlab-ci.yml` -- Python Version: 3.12 -- All stages will pass (lint, test, build) - -### βœ… Gitea Actions -- Pipeline: `.gitea/workflows/ci.yml` -- Python Version: 3.12 -- All jobs will pass - ---- - -## Recommendations - -### βœ… Ready for Production - -1. **Commit Changes** - ```bash - git add . - git commit -m "fix: resolve all linting and type errors - - - Fix 90 mypy type errors (100% type safety achieved) - - Clean up 13 unused imports - - Format code with Black (32 files) - - Add comprehensive type annotations - - Create local CI pipeline validation script" - ``` - -2. **Push to Repository** - ```bash - git push origin main - ``` - -3. **Monitor CI/CD** - - All pipelines will pass on first run - - No manual intervention required - -### πŸ“‹ Future Improvements - -1. **Testing** (Priority: HIGH) - - Implement unit tests in `tests/unit/` - - Target coverage: >80% - - Tests are already configured in `pyproject.toml` - -2. **Security Scanning** (Priority: MEDIUM) - - Install Bandit: `poetry add --group dev bandit` - - Add to pre-commit hooks - -3. **Documentation** (Priority: LOW) - - API documentation with MkDocs (already configured) - - Code examples in docstrings - ---- - -## Conclusion - -**Status**: βœ… **PRODUCTION READY** - -The codebase has achieved: -- βœ… 100% type safety (MyPy strict mode) -- βœ… 100% code formatting compliance (Black) -- βœ… 100% linting compliance (Ruff) -- βœ… Complete CI/CD pipeline validation -- βœ… Zero blocking issues - -**The code is ready to be committed and will pass all automated CI/CD pipelines.** - ---- - -**Validation Method**: Local simulation of GitLab CI pipeline -**Script Location**: `scripts/test-ci-pipeline.sh` -**Report Generated**: 2025-10-20 00:51:10 CEST -**Validated By**: Local CI/CD Pipeline Simulation Script v1.0 diff --git a/DEPLOYMENT_GUIDE.md b/DEPLOYMENT_GUIDE.md deleted file mode 100644 index 43e57cb..0000000 --- a/DEPLOYMENT_GUIDE.md +++ /dev/null @@ -1,443 +0,0 @@ -# πŸš€ Deployment Guide - Datacenter Documentation System - -## Quick Deploy Options - -### Option 1: Docker Compose (Recommended for Development/Small Scale) - -```bash -# 1. Clone repository -git clone https://git.company.local/infrastructure/datacenter-docs.git -cd datacenter-docs - -# 2. Configure environment -cp .env.example .env -nano .env # Edit with your credentials - -# 3. Start all services -docker-compose up -d - -# 4. Check health -curl http://localhost:8000/health - -# 5. Access services -# API: http://localhost:8000/api/docs -# Chat: http://localhost:8001 -# Frontend: http://localhost -# Flower: http://localhost:5555 -``` - -### Option 2: Kubernetes (Production) - -```bash -# 1. Create namespace -kubectl apply -f deploy/kubernetes/namespace.yaml - -# 2. Create secrets -kubectl create secret generic datacenter-secrets \ - --from-literal=database-url='postgresql://user:pass@host:5432/db' \ - --from-literal=redis-url='redis://:pass@host:6379/0' \ - --from-literal=mcp-api-key='your-mcp-key' \ - --from-literal=anthropic-api-key='your-claude-key' \ - -n datacenter-docs - -# 3. Create configmap -kubectl create configmap datacenter-config \ - --from-literal=mcp-server-url='https://mcp.company.local' \ - -n datacenter-docs - -# 4. Deploy services -kubectl apply -f deploy/kubernetes/deployment.yaml -kubectl apply -f deploy/kubernetes/service.yaml -kubectl apply -f deploy/kubernetes/ingress.yaml - -# 5. Check deployment -kubectl get pods -n datacenter-docs -kubectl logs -n datacenter-docs deployment/api -``` - -### Option 3: GitLab CI/CD (Automated) - -```bash -# 1. Push to GitLab -git push origin main - -# 2. Pipeline runs automatically: -# - Lint & Test -# - Build Docker images -# - Deploy to staging (manual approval) -# - Deploy to production (manual, on tags) - -# 3. Monitor pipeline -# Visit: https://gitlab.company.local/infrastructure/datacenter-docs/-/pipelines -``` - -### Option 4: Gitea Actions (Automated) - -```bash -# 1. Push to Gitea -git push origin main - -# 2. Workflow triggers: -# - On push: Build & deploy to staging -# - On tag: Deploy to production -# - On schedule: Generate docs every 6h - -# 3. Monitor workflow -# Visit: https://gitea.company.local/infrastructure/datacenter-docs/actions -``` - ---- - -## Configuration Details - -### Environment Variables (.env) - -```bash -# Database -DATABASE_URL=postgresql://docs_user:CHANGE_ME@postgres:5432/datacenter_docs - -# Redis -REDIS_URL=redis://:CHANGE_ME@redis:6379/0 - -# MCP Server (CRITICAL - Required for device connectivity) -MCP_SERVER_URL=https://mcp.company.local -MCP_API_KEY=your_mcp_api_key_here - -# Anthropic Claude API (CRITICAL - Required for AI) -ANTHROPIC_API_KEY=sk-ant-api03-xxxxx - -# CORS (Adjust for your domain) -CORS_ORIGINS=http://localhost:3000,https://docs.company.local - -# Optional -LOG_LEVEL=INFO -DEBUG=false -WORKERS=4 -MAX_TOKENS=4096 -``` - -### Kubernetes Secrets (secrets.yaml) - -```yaml -apiVersion: v1 -kind: Secret -metadata: - name: datacenter-secrets - namespace: datacenter-docs -type: Opaque -stringData: - database-url: "postgresql://user:pass@postgresql.default:5432/datacenter_docs" - redis-url: "redis://:pass@redis.default:6379/0" - mcp-api-key: "your-mcp-key" - anthropic-api-key: "sk-ant-api03-xxxxx" -``` - ---- - -## Post-Deployment Steps - -### 1. Database Migrations - -```bash -# Docker Compose -docker-compose exec api poetry run alembic upgrade head - -# Kubernetes -kubectl exec -n datacenter-docs deployment/api -- \ - poetry run alembic upgrade head -``` - -### 2. Index Initial Documentation - -```bash -# Docker Compose -docker-compose exec api poetry run datacenter-docs index-docs \ - --path /app/output - -# Kubernetes -kubectl exec -n datacenter-docs deployment/api -- \ - poetry run datacenter-docs index-docs --path /app/output -``` - -### 3. Generate Documentation - -```bash -# Manual trigger -curl -X POST http://localhost:8000/api/v1/documentation/generate/infrastructure - -# Or run full generation -docker-compose exec worker poetry run datacenter-docs generate-all -``` - -### 4. Test API - -```bash -# Health check -curl http://localhost:8000/health - -# Create test ticket -curl -X POST http://localhost:8000/api/v1/tickets \ - -H "Content-Type: application/json" \ - -d '{ - "ticket_id": "TEST-001", - "title": "Test ticket", - "description": "Testing auto-resolution", - "category": "network" - }' - -# Get ticket status -curl http://localhost:8000/api/v1/tickets/TEST-001 - -# Search documentation -curl -X POST http://localhost:8000/api/v1/documentation/search \ - -H "Content-Type: application/json" \ - -d '{"query": "UPS battery status", "limit": 5}' -``` - ---- - -## Monitoring - -### Prometheus Metrics - -```bash -# Metrics endpoint -curl http://localhost:8000/metrics - -# Example metrics: -# datacenter_docs_tickets_total -# datacenter_docs_tickets_resolved_total -# datacenter_docs_resolution_confidence_score -# datacenter_docs_processing_time_seconds -``` - -### Grafana Dashboards - -Import dashboard from: `deploy/grafana/dashboard.json` - -### Logs - -```bash -# Docker Compose -docker-compose logs -f api chat worker - -# Kubernetes -kubectl logs -n datacenter-docs deployment/api -f -kubectl logs -n datacenter-docs deployment/chat -f -kubectl logs -n datacenter-docs deployment/worker -f -``` - -### Celery Flower (Task Monitoring) - -Access: http://localhost:5555 (Docker Compose) or https://docs.company.local/flower (K8s) - ---- - -## Scaling - -### Horizontal Scaling - -```bash -# Docker Compose (increase replicas in docker-compose.yml) -docker-compose up -d --scale worker=5 - -# Kubernetes -kubectl scale deployment api --replicas=5 -n datacenter-docs -kubectl scale deployment worker --replicas=10 -n datacenter-docs -``` - -### Vertical Scaling - -Edit resource limits in `deploy/kubernetes/deployment.yaml`: - -```yaml -resources: - requests: - memory: "1Gi" - cpu: "500m" - limits: - memory: "2Gi" - cpu: "2000m" -``` - ---- - -## Troubleshooting - -### API not starting - -```bash -# Check logs -docker-compose logs api - -# Common issues: -# - Database not accessible -# - Missing environment variables -# - MCP server not reachable - -# Test database connection -docker-compose exec api python -c " -from datacenter_docs.utils.database import get_db -next(get_db()) -print('DB OK') -" -``` - -### Chat not connecting - -```bash -# Check WebSocket connection -# Browser console should show: WebSocket connection established - -# Test from curl -curl -N -H "Connection: Upgrade" -H "Upgrade: websocket" \ - http://localhost:8001/socket.io/ -``` - -### Worker not processing jobs - -```bash -# Check Celery status -docker-compose exec worker celery -A datacenter_docs.workers.celery_app status - -# Check Redis connection -docker-compose exec worker python -c " -import redis -r = redis.from_url('redis://:pass@redis:6379/0') -print(r.ping()) -" -``` - -### MCP Connection Issues - -```bash -# Test MCP connectivity -docker-compose exec api python -c " -import asyncio -from datacenter_docs.mcp.client import MCPClient - -async def test(): - async with MCPClient( - server_url='https://mcp.company.local', - api_key='your-key' - ) as client: - resources = await client.list_resources() - print(f'Found {len(resources)} resources') - -asyncio.run(test()) -" -``` - ---- - -## Backup & Recovery - -### Database Backup - -```bash -# Docker Compose -docker-compose exec postgres pg_dump -U docs_user datacenter_docs > backup.sql - -# Kubernetes -kubectl exec -n datacenter-docs postgresql-0 -- \ - pg_dump -U docs_user datacenter_docs > backup.sql -``` - -### Documentation Backup - -```bash -# Backup generated docs -tar -czf docs-backup-$(date +%Y%m%d).tar.gz output/ - -# Backup vector store -tar -czf vectordb-backup-$(date +%Y%m%d).tar.gz data/chroma_db/ -``` - -### Restore - -```bash -# Database -docker-compose exec -T postgres psql -U docs_user datacenter_docs < backup.sql - -# Documentation -tar -xzf docs-backup-20250115.tar.gz -tar -xzf vectordb-backup-20250115.tar.gz -``` - ---- - -## Security Checklist - -- [ ] All secrets stored in vault/secrets manager -- [ ] TLS enabled for all services -- [ ] API rate limiting configured -- [ ] CORS properly configured -- [ ] Network policies applied (K8s) -- [ ] Regular security scans scheduled -- [ ] Audit logging enabled -- [ ] Backup encryption enabled - ---- - -## Performance Tuning - -### API Optimization - -```python -# Increase workers (in .env) -WORKERS=8 # 2x CPU cores - -# Adjust max tokens -MAX_TOKENS=8192 # Higher for complex queries -``` - -### Database Optimization - -```sql --- Add indexes -CREATE INDEX idx_tickets_status ON tickets(status); -CREATE INDEX idx_tickets_created_at ON tickets(created_at); -``` - -### Redis Caching - -```python -# Adjust cache TTL (in code) -CACHE_TTL = { - 'documentation': 3600, # 1 hour - 'metrics': 300, # 5 minutes - 'tickets': 60 # 1 minute -} -``` - ---- - -## Maintenance - -### Regular Tasks - -```bash -# Weekly -- Review and clean old logs -- Check disk usage -- Review failed tickets -- Update dependencies - -# Monthly -- Database vacuum/optimize -- Security patches -- Performance review -- Backup verification -``` - -### Scheduled Maintenance - -```bash -# Schedule in crontab -0 2 * * 0 /opt/scripts/weekly-maintenance.sh -0 3 1 * * /opt/scripts/monthly-maintenance.sh -``` - ---- - -**For support**: automation-team@company.local diff --git a/DEPLOYMENT_STATUS.md b/DEPLOYMENT_STATUS.md deleted file mode 100644 index e992292..0000000 --- a/DEPLOYMENT_STATUS.md +++ /dev/null @@ -1,159 +0,0 @@ -# Deployment Status Report - -**Data:** 2025-10-20 -**Status:** βœ… Sistema Operativo - -## Servizi Attivi - -| Servizio | Status | Porta | Health | -|----------|--------|-------|--------| -| **API** | βœ… Running | 8000 | Healthy | -| **Chat** | βœ… Running | 8001 | Healthy | -| **Frontend** | βœ… Running | 8080 | Running | -| **MongoDB** | βœ… Running | 27017 | Healthy | -| **Redis** | βœ… Running | 6379 | Healthy | -| **Worker** | βœ… Running | - | Running | - -## Implementazioni Completate - -### 1. RAG (Retrieval Augmented Generation) per Chat -βœ… **Implementato e Funzionante** - -- **ChromaDB** installato e configurato -- **Sentence Transformers** per embeddings semantici (all-MiniLM-L6-v2) -- **Vector Store** persistente in `/app/data/chroma_db` -- **Indicizzazione automatica** al primo avvio - -### 2. Documentazione di Esempio -βœ… **Creata** - -File creati in [output/](cci:7://file:///home/daniele/Documents/Repos/llm-automation-docs-and-remediation-engine/output:0:0-0:0): -- `network/vlan_troubleshooting.md` - Guida troubleshooting VLAN -- `backup/backup_schedules.md` - Schedule e policy backup -- `server/ups_monitoring.md` - Monitoraggio UPS -- `storage/san_troubleshooting.md` - Troubleshooting SAN - -### 3. Configurazione Docker -βœ… **Aggiornata** - -**Modifiche a [docker-compose.dev.yml](cci:1://file:///home/daniele/Documents/Repos/llm-automation-docs-and-remediation-engine/deploy/docker/docker-compose.dev.yml:0:0-0:0):** -- Volume `chat-data` per persistenza vector store -- Mount di documentazione con flag SELinux (`:z`) -- Mount di scripts per indicizzazione - -**Problema Risolto:** SELinux in modalitΓ  Enforcing bloccava l'accesso ai bind mounts. Risolto aggiungendo flag `:z` ai mount. - -### 4. Startup Automatico -βœ… **Configurato** - -L'agent chat ora: -1. Controlla se esiste marker file `.indexed` -2. Se non esiste, indicizza tutta la documentazione -3. Crea marker per evitare re-indicizzazioni -4. Inizializza DocumentationAgent con accesso al vector store - -**Codice in [src/datacenter_docs/chat/main.py](cci:1://file:///home/daniele/Documents/Repos/llm-automation-docs-and-remediation-engine/src/datacenter_docs/chat/main.py:0:0-0:0)** - -## Come Accedere ai Servizi - -### Frontend Web -```bash -http://localhost:8080 -``` - -### API Swagger -```bash -http://localhost:8000/api/docs -``` - -### Chat WebSocket -```bash -http://localhost:8001 -``` - -### MongoDB -```bash -mongodb://admin:admin123@localhost:27017 -``` - -### Redis -```bash -redis://localhost:6379 -``` - -## Test della Chat con Documentazione - -La chat ora puΓ² rispondere a domande utilizzando la documentazione indicizzata. Esempi: - -1. **"How to troubleshoot VLAN connectivity?"** - - La chat cercherΓ  in `network/vlan_troubleshooting.md` - - FornirΓ  risposta basata sulla documentazione - -2. **"What are the backup schedules?"** - - RisponderΓ  con informazioni da `backup/backup_schedules.md` - -3. **"How do I check UPS status?"** - - UserΓ  contenuti di `server/ups_monitoring.md` - -## Logs Chiave - -### Indicizzazione Riuscita -``` -INFO:__main__:First Time Setup - Indexing Documentation -INFO:__main__:============================================================ -INFO:__main__:This may take a few minutes... -INFO:datacenter_docs.chat.agent:Indexing documentation... -INFO:__main__:βœ“ Documentation indexed successfully! -INFO:__main__:============================================================ -``` - -### Agent Inizializzato -``` -INFO:datacenter_docs.chat.agent:Loaded existing vector store -INFO:datacenter_docs.chat.agent:Vector store initialized successfully -INFO:__main__:Documentation Agent initialized successfully -``` - -## Prossimi Passi - -1. βœ… Sistema operativo con RAG funzionante -2. ⏳ Testare interattivamente la chat via frontend -3. ⏳ Aggiungere piΓΉ documentazione -4. ⏳ Implementare collectors (VMware, K8s, etc.) -5. ⏳ Implementare generators per documentazione automatica - -## Note Tecniche - -### Dipendenze Aggiunte -```toml -chromadb = "^0.5.0" -sentence-transformers = "^3.3.0" -tiktoken = "^0.8.0" -``` - -### SELinux -Sistema configurato per funzionare con SELinux in modalitΓ  Enforcing usando flag `:z` nei bind mounts. - -### Vector Store -- **Tipo:** ChromaDB (SQLite backend) -- **Embeddings:** sentence-transformers/all-MiniLM-L6-v2 -- **Chunk Size:** 1000 caratteri -- **Overlap:** 200 caratteri -- **Persistenza:** Volume Docker `chat-data` - -## Problemi Risolti - -1. ❌ **Dipendenze mancanti** β†’ βœ… Aggiunte a pyproject.toml -2. ❌ **SELinux blocca accesso** β†’ βœ… Aggiunto flag `:z` ai mounts -3. ❌ **Permessi container** β†’ βœ… Configurati correttamente -4. ❌ **Indicizzazione fallita** β†’ βœ… Funzionante con SELinux fix - -## Contatti - -- Repository: `/home/daniele/Documents/Repos/llm-automation-docs-and-remediation-engine` -- Logs: `docker-compose -f deploy/docker/docker-compose.dev.yml logs -f chat` -- Health Check: `curl http://localhost:8001/health` - ---- - -**Sistema pronto per l'uso! πŸš€** diff --git a/INDEX_SISTEMA_COMPLETO.md b/INDEX_SISTEMA_COMPLETO.md deleted file mode 100644 index 7a59b03..0000000 --- a/INDEX_SISTEMA_COMPLETO.md +++ /dev/null @@ -1,576 +0,0 @@ -# πŸ“š Indice Completo Sistema Integrato - Datacenter Documentation - -## 🎯 Panoramica - -Sistema **production-ready** per la generazione automatica di documentazione datacenter con: -- βœ… **MCP Integration** - Connessione diretta a dispositivi via Model Context Protocol -- βœ… **AI-Powered API** - Risoluzione automatica ticket con Claude Sonnet 4.5 -- βœ… **Chat Agentica** - Supporto tecnico interattivo con ricerca autonoma -- βœ… **CI/CD Completo** - Pipeline GitLab e Gitea pronte all'uso -- βœ… **Container-Ready** - Docker Compose e Kubernetes -- βœ… **Frontend React** - UI moderna con Material-UI - ---- - -## πŸ“ Struttura Completa del Progetto - -``` -datacenter-docs/ -β”œβ”€β”€ πŸ“„ README.md # Overview originale -β”œβ”€β”€ πŸ“„ README_COMPLETE_SYSTEM.md # ⭐ Sistema completo integrato -β”œβ”€β”€ πŸ“„ DEPLOYMENT_GUIDE.md # ⭐ Guida deploy dettagliata -β”œβ”€β”€ πŸ“„ QUICK_START.md # Quick start guide -β”œβ”€β”€ πŸ“„ INDICE_COMPLETO.md # Indice documentazione -β”œβ”€β”€ πŸ“„ pyproject.toml # ⭐ Poetry configuration -β”œβ”€β”€ πŸ“„ poetry.lock # Poetry lockfile (da generare) -β”œβ”€β”€ πŸ“„ .env.example # ⭐ Environment variables example -β”œβ”€β”€ πŸ“„ docker-compose.yml # ⭐ Docker Compose configuration -β”‚ -β”œβ”€β”€ πŸ“‚ .gitlab-ci.yml # ⭐ GitLab CI/CD Pipeline -β”œβ”€β”€ πŸ“‚ .gitea/workflows/ # ⭐ Gitea Actions -β”‚ └── ci.yml # Workflow CI/CD -β”‚ -β”œβ”€β”€ πŸ“‚ src/datacenter_docs/ # ⭐ Codice Python principale -β”‚ β”œβ”€β”€ __init__.py -β”‚ β”œβ”€β”€ πŸ“‚ api/ # ⭐ FastAPI Application -β”‚ β”‚ β”œβ”€β”€ __init__.py -β”‚ β”‚ β”œβ”€β”€ main.py # API endpoints principali -β”‚ β”‚ β”œβ”€β”€ models.py # Database models -β”‚ β”‚ └── schemas.py # Pydantic schemas -β”‚ β”‚ -β”‚ β”œβ”€β”€ πŸ“‚ chat/ # ⭐ Chat Agentica -β”‚ β”‚ β”œβ”€β”€ __init__.py -β”‚ β”‚ β”œβ”€β”€ agent.py # DocumentationAgent AI -β”‚ β”‚ └── server.py # WebSocket server -β”‚ β”‚ -β”‚ β”œβ”€β”€ πŸ“‚ mcp/ # ⭐ MCP Integration -β”‚ β”‚ β”œβ”€β”€ __init__.py -β”‚ β”‚ └── client.py # MCP Client & Collector -β”‚ β”‚ -β”‚ β”œβ”€β”€ πŸ“‚ collectors/ # Data collectors -β”‚ β”‚ β”œβ”€β”€ __init__.py -β”‚ β”‚ β”œβ”€β”€ infrastructure.py -β”‚ β”‚ β”œβ”€β”€ network.py -β”‚ β”‚ └── virtualization.py -β”‚ β”‚ -β”‚ β”œβ”€β”€ πŸ“‚ generators/ # Doc generators -β”‚ β”‚ β”œβ”€β”€ __init__.py -β”‚ β”‚ └── markdown.py -β”‚ β”‚ -β”‚ β”œβ”€β”€ πŸ“‚ validators/ # Validators -β”‚ β”‚ β”œβ”€β”€ __init__.py -β”‚ β”‚ └── checks.py -β”‚ β”‚ -β”‚ β”œβ”€β”€ πŸ“‚ utils/ # Utilities -β”‚ β”‚ β”œβ”€β”€ __init__.py -β”‚ β”‚ β”œβ”€β”€ config.py -β”‚ β”‚ β”œβ”€β”€ database.py -β”‚ β”‚ └── logging.py -β”‚ β”‚ -β”‚ └── πŸ“‚ workers/ # Celery workers -β”‚ β”œβ”€β”€ __init__.py -β”‚ └── celery_app.py -β”‚ -β”œβ”€β”€ πŸ“‚ frontend/ # ⭐ Frontend React -β”‚ β”œβ”€β”€ package.json -β”‚ β”œβ”€β”€ vite.config.js -β”‚ β”œβ”€β”€ πŸ“‚ src/ -β”‚ β”‚ β”œβ”€β”€ App.jsx # Main app component -β”‚ β”‚ β”œβ”€β”€ main.jsx -β”‚ β”‚ └── πŸ“‚ components/ -β”‚ └── πŸ“‚ public/ -β”‚ └── index.html -β”‚ -β”œβ”€β”€ πŸ“‚ deploy/ # ⭐ Deployment configs -β”‚ β”œβ”€β”€ πŸ“‚ docker/ -β”‚ β”‚ β”œβ”€β”€ Dockerfile.api # API container -β”‚ β”‚ β”œβ”€β”€ Dockerfile.chat # Chat container -β”‚ β”‚ β”œβ”€β”€ Dockerfile.worker # Worker container -β”‚ β”‚ β”œβ”€β”€ Dockerfile.frontend # Frontend container -β”‚ β”‚ └── nginx.conf # Nginx config -β”‚ β”‚ -β”‚ └── πŸ“‚ kubernetes/ # K8s manifests -β”‚ β”œβ”€β”€ namespace.yaml -β”‚ β”œβ”€β”€ deployment.yaml -β”‚ β”œβ”€β”€ service.yaml -β”‚ β”œβ”€β”€ ingress.yaml -β”‚ β”œβ”€β”€ configmap.yaml -β”‚ └── secrets.yaml (template) -β”‚ -β”œβ”€β”€ πŸ“‚ templates/ # Template documentazione (10 file) -β”‚ β”œβ”€β”€ 01_infrastruttura_fisica.md -β”‚ β”œβ”€β”€ 02_networking.md -β”‚ β”œβ”€β”€ 03_server_virtualizzazione.md -β”‚ β”œβ”€β”€ 04_storage.md -β”‚ β”œβ”€β”€ 05_sicurezza.md -β”‚ β”œβ”€β”€ 06_backup_disaster_recovery.md -β”‚ β”œβ”€β”€ 07_monitoring_alerting.md -β”‚ β”œβ”€β”€ 08_database_middleware.md -β”‚ β”œβ”€β”€ 09_procedure_operative.md -β”‚ └── 10_miglioramenti.md -β”‚ -β”œβ”€β”€ πŸ“‚ system-prompts/ # System prompts LLM (10 file) -β”‚ β”œβ”€β”€ 01_infrastruttura_fisica_prompt.md -β”‚ β”œβ”€β”€ 02_networking_prompt.md -β”‚ β”œβ”€β”€ ... -β”‚ └── 10_miglioramenti_prompt.md -β”‚ -β”œβ”€β”€ πŸ“‚ requirements/ # Requirements tecnici (3 file) -β”‚ β”œβ”€β”€ llm_requirements.md -β”‚ β”œβ”€β”€ data_collection_scripts.md -β”‚ └── api_endpoints.md -β”‚ -β”œβ”€β”€ πŸ“‚ tests/ # Test suite -β”‚ β”œβ”€β”€ πŸ“‚ unit/ -β”‚ β”œβ”€β”€ πŸ“‚ integration/ -β”‚ └── πŸ“‚ e2e/ -β”‚ -β”œβ”€β”€ πŸ“‚ output/ # Documentazione generata -β”œβ”€β”€ πŸ“‚ data/ # Vector store & cache -└── πŸ“‚ logs/ # Application logs -``` - ---- - -## πŸš€ Componenti Chiave del Sistema - -### 1️⃣ MCP Integration (`src/datacenter_docs/mcp/client.py`) - -**Cosa fa**: Connette il sistema a tutti i dispositivi datacenter via MCP Server - -**Features**: -- βœ… Query VMware vCenter (VM, host, datastore) -- βœ… Query Kubernetes (nodes, pods, services) -- βœ… Query OpenStack (instances, volumes) -- βœ… Exec comandi su network devices (Cisco, HP, ecc.) -- βœ… Query storage arrays (Pure, NetApp, ecc.) -- βœ… Retrieve monitoring metrics -- βœ… Retry logic con exponential backoff -- βœ… Async/await per performance - -**Esempio uso**: -```python -async with MCPClient(server_url="...", api_key="...") as mcp: - vms = await mcp.query_vmware("vcenter-01", "list_vms") - pods = await mcp.query_kubernetes("prod-cluster", "all", "pods") -``` - -### 2️⃣ API per Ticket Resolution (`src/datacenter_docs/api/main.py`) - -**Cosa fa**: API REST che riceve ticket e genera automaticamente risoluzione - -**Endpoints Principali**: -``` -POST /api/v1/tickets # Crea e processa ticket -GET /api/v1/tickets/{id} # Status ticket -POST /api/v1/documentation/search # Cerca docs -GET /api/v1/stats/tickets # Statistiche -GET /health # Health check -GET /metrics # Prometheus metrics -``` - -**Workflow**: -1. Sistema esterno invia ticket via POST -2. API salva ticket in database -3. Background task avvia DocumentationAgent -4. Agent cerca docs rilevanti con semantic search -5. Claude analizza e genera risoluzione -6. API aggiorna ticket con risoluzione -7. Sistema esterno recupera risoluzione via GET - -**Esempio integrazione**: -```python -import requests - -response = requests.post('https://docs.company.local/api/v1/tickets', json={ - 'ticket_id': 'INC-12345', - 'title': 'Storage full', - 'description': 'Datastore capacity at 95%', - 'category': 'storage' -}) - -resolution = response.json() -print(f"Resolution: {resolution['resolution']}") -print(f"Confidence: {resolution['confidence_score']}") -``` - -### 3️⃣ Chat Agent Agentico (`src/datacenter_docs/chat/agent.py`) - -**Cosa fa**: AI agent che cerca autonomamente nella documentazione per aiutare l'utente - -**Features**: -- βœ… Semantic search su documentazione (ChromaDB + embeddings) -- βœ… Claude Sonnet 4.5 per reasoning -- βœ… Ricerca autonoma multi-doc -- βœ… Conversational memory -- βœ… Confidence scoring -- βœ… Related docs references - -**Metodi Principali**: -- `search_documentation()` - Semantic search -- `resolve_ticket()` - Auto-risoluzione ticket -- `chat_with_context()` - Chat interattiva -- `index_documentation()` - Indexing docs - -**Esempio**: -```python -agent = DocumentationAgent(mcp_client=mcp, anthropic_api_key="...") - -# Risolve ticket autonomamente -result = await agent.resolve_ticket( - description="Network connectivity issue between VLANs", - category="network" -) - -# Chat con contesto -response = await agent.chat_with_context( - user_message="How do I check UPS battery status?", - conversation_history=[] -) -``` - -### 4️⃣ Frontend React (`frontend/src/App.jsx`) - -**Cosa fa**: UI web per interazione utente - -**Tabs/Pagine**: -1. **Chat Support** - Chat real-time con AI -2. **Ticket Resolution** - Submit ticket per auto-resolve -3. **Documentation Search** - Cerca nella documentazione - -**Tecnologie**: -- React 18 -- Material-UI (MUI) -- Socket.io client (WebSocket) -- Axios (HTTP) -- Vite (build tool) - -### 5️⃣ CI/CD Pipelines - -#### GitLab CI (`.gitlab-ci.yml`) - -**Stages**: -1. **Lint** - Black, Ruff, MyPy -2. **Test** - Unit + Integration + Security scan -3. **Build** - Docker images (api, chat, worker, frontend) -4. **Deploy** - Staging (auto on main) + Production (manual on tags) -5. **Docs** - Generation scheduled ogni 6h - -**Features**: -- βœ… Cache dependencies -- βœ… Coverage reports -- βœ… Security scanning (Bandit, Safety) -- βœ… Multi-stage Docker builds -- βœ… K8s deployment automation - -#### Gitea Actions (`.gitea/workflows/ci.yml`) - -**Jobs**: -1. **Lint** - Code quality checks -2. **Test** - Unit tests con services (postgres, redis) -3. **Security** - Vulnerability scanning -4. **Build-and-push** - Multi-component Docker builds -5. **Deploy-staging** - Auto on main branch -6. **Deploy-production** - Manual on tags -7. **Generate-docs** - Scheduled ogni 6h - -**Features**: -- βœ… Matrix builds per components -- βœ… Automated deploys -- βœ… Health checks post-deploy -- βœ… Artifact uploads - -### 6️⃣ Docker Setup - -#### docker-compose.yml - -**Services**: -- `postgres` - Database PostgreSQL 15 -- `redis` - Cache Redis 7 -- `api` - FastAPI application -- `chat` - Chat WebSocket server -- `worker` - Celery workers (x2 replicas) -- `flower` - Celery monitoring UI -- `frontend` - React frontend con Nginx - -**Networks**: -- `frontend` - Public facing services -- `backend` - Internal services - -**Volumes**: -- `postgres_data` - Persistent DB -- `redis_data` - Persistent cache -- `./output` - Generated docs -- `./data` - Vector store -- `./logs` - Application logs - -#### Dockerfiles - -- `Dockerfile.api` - Multi-stage build con Poetry -- `Dockerfile.chat` - Optimized per WebSocket -- `Dockerfile.worker` - Celery worker -- `Dockerfile.frontend` - React build + Nginx alpine - -### 7️⃣ Kubernetes Deployment - -**Manifests**: -- `namespace.yaml` - Dedicated namespace -- `deployment.yaml` - API (3 replicas), Chat (2), Worker (3) -- `service.yaml` - ClusterIP services -- `ingress.yaml` - Nginx ingress con TLS -- `configmap.yaml` - Configuration -- `secrets.yaml` - Sensitive data - -**Features**: -- βœ… Health/Readiness probes -- βœ… Resource limits/requests -- βœ… Auto-scaling ready (HPA) -- βœ… Rolling updates -- βœ… TLS termination - ---- - -## πŸ”§ Configuration - -### Poetry Dependencies (pyproject.toml) - -**Core**: -- fastapi + uvicorn -- pydantic -- sqlalchemy + alembic -- redis - -**MCP & Device Connectivity**: -- mcp (Model Context Protocol) -- paramiko, netmiko (SSH) -- pysnmp (SNMP) -- pyvmomi (VMware) -- kubernetes (K8s) -- proxmoxer (Proxmox) - -**AI & LLM**: -- anthropic (Claude) -- langchain + langchain-anthropic -- chromadb (Vector store) - -**Background Jobs**: -- celery + flower - -**Testing**: -- pytest + pytest-asyncio -- pytest-cov -- black, ruff, mypy - -### Environment Variables (.env) - -```bash -# Database -DATABASE_URL=postgresql://... - -# Redis -REDIS_URL=redis://... - -# MCP Server - CRITICAL per connessione dispositivi -MCP_SERVER_URL=https://mcp.company.local -MCP_API_KEY=your-key - -# Anthropic Claude - CRITICAL per AI -ANTHROPIC_API_KEY=sk-ant-api03-... - -# CORS -CORS_ORIGINS=https://docs.company.local - -# Optional -LOG_LEVEL=INFO -DEBUG=false -``` - ---- - -## πŸ“Š Workflow Completo - -### 1. Generazione Documentazione (Scheduled) - -``` -Cron/Schedule (ogni 6h) - ↓ -MCP Client connette a dispositivi - ↓ -Collectors raccolgono dati - ↓ -Generators compilano templates - ↓ -Validators verificano output - ↓ -Documentazione salvata in output/ - ↓ -Vector store aggiornato (ChromaDB) -``` - -### 2. Risoluzione Ticket (On-Demand) - -``` -Sistema esterno β†’ POST /api/v1/tickets - ↓ -API salva ticket in DB (status: processing) - ↓ -Background task avvia DocumentationAgent - ↓ -Agent: Semantic search su documentazione - ↓ -Agent: Claude analizza + genera risoluzione - ↓ -API aggiorna ticket (status: resolved) - ↓ -Sistema esterno β†’ GET /api/v1/tickets/{id} - ↓ -Riceve risoluzione + confidence score -``` - -### 3. Chat Interattiva (Real-time) - -``` -User β†’ WebSocket connection - ↓ -User invia messaggio - ↓ -Chat Agent: Semantic search docs - ↓ -Chat Agent: Claude genera risposta con context - ↓ -Response + related docs β†’ User via WebSocket - ↓ -Conversazione continua con memory -``` - ---- - -## 🎯 Quick Start Commands - -### Local Development -```bash -poetry install -cp .env.example .env -docker-compose up -d postgres redis -poetry run alembic upgrade head -poetry run datacenter-docs index-docs -poetry run uvicorn datacenter_docs.api.main:app --reload -``` - -### Docker Compose -```bash -docker-compose up -d -curl http://localhost:8000/health -``` - -### Kubernetes -```bash -kubectl apply -f deploy/kubernetes/ -kubectl get pods -n datacenter-docs -``` - -### Test API -```bash -# Submit ticket -curl -X POST http://localhost:8000/api/v1/tickets \ - -H "Content-Type: application/json" \ - -d '{"ticket_id":"TEST-1","title":"Test","description":"Testing"}' - -# Get resolution -curl http://localhost:8000/api/v1/tickets/TEST-1 -``` - ---- - -## πŸ“ˆ Scaling & Performance - -### Horizontal Scaling -```bash -# Docker Compose -docker-compose up -d --scale worker=5 - -# Kubernetes -kubectl scale deployment api --replicas=10 -n datacenter-docs -kubectl scale deployment worker --replicas=20 -n datacenter-docs -``` - -### Performance Tips -- API workers: 2x CPU cores -- Celery workers: 10-20 per production -- Redis: Persistent storage + AOF -- PostgreSQL: Connection pooling (20-50) -- Vector store: SSD storage -- Claude API: Rate limit 50 req/min - ---- - -## πŸ” Security Checklist - -- [x] Secrets in vault/K8s secrets -- [x] TLS everywhere -- [x] API rate limiting -- [x] CORS configured -- [x] Network policies (K8s) -- [x] Read-only MCP credentials -- [x] Audit logging -- [x] Dependency scanning (Bandit, Safety) -- [x] Container scanning - ---- - -## πŸ“ File Importance Legend - -- ⭐ **New/Enhanced files** - Sistema integrato completo -- πŸ“„ **Documentation files** - README, guides -- πŸ“‚ **Directory** - Organizzazione codice -- πŸ”§ **Config files** - Configuration -- 🐳 **Docker files** - Containers -- ☸️ **K8s files** - Kubernetes -- πŸ”„ **CI/CD files** - Pipelines - ---- - -## πŸŽ“ Benefici del Sistema Integrato - -### vs Sistema Base -| Feature | Base | Integrato | -|---------|------|-----------| -| MCP Integration | ❌ | βœ… Direct device connectivity | -| Ticket Resolution | ❌ | βœ… Automatic via API | -| Chat Support | ❌ | βœ… AI-powered agentic | -| CI/CD | ❌ | βœ… GitLab + Gitea | -| Docker | ❌ | βœ… Compose + K8s | -| Frontend | ❌ | βœ… React + Material-UI | -| Production-Ready | ❌ | βœ… Scalable & monitored | - -### ROI -- πŸš€ **90% riduzione** tempo documentazione -- πŸ€– **80% ticket** risolti automaticamente -- ⚑ **< 3s** tempo medio risoluzione -- πŸ“ˆ **95%+ accuracy** con high confidence -- πŸ’° **Saving significativo** ore uomo - ---- - -## πŸ”— Risorse Esterne - -- **MCP Spec**: https://modelcontextprotocol.io -- **Claude API**: https://docs.anthropic.com -- **FastAPI**: https://fastapi.tiangolo.com -- **LangChain**: https://python.langchain.com -- **React**: https://react.dev -- **Material-UI**: https://mui.com - ---- - -## πŸ†˜ Support & Contacts - -- **Email**: automation-team@company.local -- **Slack**: #datacenter-automation -- **Issues**: https://git.company.local/infrastructure/datacenter-docs/issues -- **Wiki**: https://wiki.company.local/datacenter-docs - ---- - -**Sistema v2.0 - Complete Integration** -**Production-Ready | AI-Powered | MCP-Enabled** πŸš€ diff --git a/INDICE_COMPLETO.md b/INDICE_COMPLETO.md deleted file mode 100644 index 71a16cc..0000000 --- a/INDICE_COMPLETO.md +++ /dev/null @@ -1,589 +0,0 @@ -# πŸ“š Indice Completo - Sistema Documentazione Datacenter - -## 🎯 Panoramica Sistema - -Questo pacchetto contiene un sistema completo per la **generazione automatica e gestione della documentazione del datacenter tramite LLM**. Il sistema Γ¨ progettato per essere gestito, aggiornato e mantenuto da un Large Language Model attraverso automazioni. - ---- - -## πŸ“ Struttura File - -### πŸ“„ README.md -Documento principale che spiega: -- Struttura del progetto -- Workflow di aggiornamento -- Versioning e limiti tecnici - -### πŸ“„ QUICK_START.md -Guida rapida per iniziare: -- Setup ambiente -- Configurazione credenziali -- Prima esecuzione -- Troubleshooting comune -- Checklist deployment - -### πŸ“„ requirements.txt -Dipendenze Python necessarie per il sistema - ---- - -## πŸ“‚ templates/ - Template Documentazione (10 file) - -### 01_infrastruttura_fisica.md (~3000 righe) -**Contenuto**: Layout datacenter, rack, elettrico (UPS, generatori, PDU), raffreddamento (CRAC/CRAH), sicurezza fisica, videosorveglianza, antincendio, cablaggio strutturato, connettivitΓ  esterna, manutenzioni - -**Sezioni Principali**: -- Informazioni generali e layout -- Rack organization (ID, posizione, occupazione) -- Sistema elettrico completo (UPS, generatori, PDU, power budget, PUE) -- Sistema raffreddamento (unitΓ , parametri ambientali, sensori) -- Sicurezza fisica (accessi, videosorveglianza, antintrusione) -- Sistema antincendio (rilevazione, spegnimento) -- Cablaggio e connectivity -- Manutenzioni e contratti -- Compliance e certificazioni -- Contatti emergenza - -**Utilizzo**: Base di riferimento per l'infrastruttura fisica - ---- - -### 02_networking.md (~3000 righe) -**Contenuto**: Architettura rete, switch core/distribution/access, VLAN, routing, firewall, VPN, load balancing, DNS/DHCP, wireless, monitoring rete - -**Sezioni Principali**: -- Topologia generale e architettura -- Inventario switch (core, distribution, access) -- Piano VLAN e subnetting -- Routing (protocolli, route statiche) -- Firewall e security (regole, NAT, IPS/IDS) -- VPN (site-to-site, remote access) -- Load balancing -- DNS e DHCP -- Wireless (controller, AP, SSID) -- Network monitoring e NetFlow -- QoS policies -- NAC (Network Access Control) -- Utilizzo banda e traffico -- Backup configurazioni -- Change management - -**Utilizzo**: Riferimento completo networking - ---- - -### 03_server_virtualizzazione.md (~2500 righe) -**Contenuto**: Hypervisor, cluster, host fisici, VM, storage virtuale, networking virtuale, HA/DRS, backup VM, licensing, container - -**Sezioni Principali**: -- Piattaforma virtualizzazione (VMware/Hyper-V/Proxmox/KVM) -- Cluster configuration e HA -- Inventario host fisici -- Inventario macchine virtuali -- Template VM e snapshot -- Storage virtuale (datastore, policy) -- Networking virtuale (vSwitch, port groups) -- High Availability e DRS -- Backup e recovery VM (RPO/RTO) -- Server bare metal -- Container platform (Kubernetes) -- Licensing e compliance -- Patch management -- Monitoring performance -- Provisioning e automation -- Disaster Recovery -- Security posture -- Capacity management -- SLA e KPI -- Cost management - -**Utilizzo**: Gestione completa infrastruttura virtuale - ---- - -### 04_storage.md (~2000 righe) -**Contenuto**: SAN, NAS, object storage, fabric FC, performance, tiering, snapshot, replica, backup storage - -**Sezioni Principali**: -- Architettura storage generale -- SAN (array, RAID, performance) -- Fabric SAN (FC switch, zoning, WWN) -- NAS (filer, export/share, performance) -- Object storage (bucket, policies) -- Tiering e data management -- Deduplication e compression -- Snapshot e cloning -- Replica e DR storage -- Backup storage (disk/tape/cloud) -- Monitoring e alert -- Disk management -- Multipathing -- Storage virtualization -- File services (shares, quota) -- DR storage systems -- Cloud storage integration -- Security ed encryption -- Capacity planning -- Compliance e retention -- Cost analysis - -**Utilizzo**: Gestione completa storage - ---- - -### 05_sicurezza.md (~1500 righe) -**Contenuto**: IAM, authentication, PAM, network security, endpoint security, vulnerability management, patch management, encryption, SIEM, incident response - -**Sezioni Principali**: -- Security overview e posture -- Identity and Access Management -- Authentication e MFA -- Privileged Access Management -- Network security (perimeter, segmentation, IDS/IPS) -- Endpoint security (antivirus/EDR) -- Vulnerability management -- Patch management status -- Encryption (at rest, in transit) -- Security monitoring (SIEM) -- Backup security -- Incident response -- Security awareness training -- Compliance status - -**Utilizzo**: Postura sicurezza complessiva - ---- - -### 06_backup_disaster_recovery.md (~800 righe) -**Contenuto**: Infrastruttura backup, job configuration, RPO/RTO, DR site, restore testing, cloud backup - -**Sezioni Principali**: -- Backup infrastructure e software -- Backup repository (disk/tape/cloud) -- Backup jobs configuration -- RPO/RTO matrix per tier -- DR site e readiness -- Restore testing results -- Cloud backup configuration - -**Utilizzo**: Strategia backup e DR - ---- - -### 07_monitoring_alerting.md (~600 righe) -**Contenuto**: Piattaforma monitoring, sistemi monitorati, alerting, dashboards, metriche performance - -**Sezioni Principali**: -- Monitoring platform (Zabbix/Prometheus/Nagios) -- System status overview -- Alert configuration e statistics -- Performance dashboards -- Metriche e KPI - -**Utilizzo**: Stato monitoring infrastruttura - ---- - -### 08_database_middleware.md (~700 righe) -**Contenuto**: DBMS, database instances, high availability, performance, middleware, application servers - -**Sezioni Principali**: -- Inventario database servers -- Database list e sizing -- High availability configuration -- Performance monitoring -- Middleware e application servers - -**Utilizzo**: Gestione database e middleware - ---- - -### 09_procedure_operative.md (~600 righe) -**Contenuto**: Procedure standard, runbook, maintenance windows, escalation matrix, change management - -**Sezioni Principali**: -- Elenco procedure standard -- Runbook operativi -- Schedule maintenance windows -- Escalation path e contatti -- Change management process - -**Utilizzo**: Procedure operative quotidiane - ---- - -### 10_miglioramenti.md (~1000 righe) -**Contenuto**: Analisi opportunitΓ  miglioramento basata su tutte le altre sezioni - -**Sezioni Principali**: -- Quick wins (0-3 mesi) -- Progetti medio termine (3-12 mesi) -- Ottimizzazione costi -- Modernizzazione (technology refresh) -- Automazione -- Security improvements -- Capacity planning investments -- Observability gaps -- DR improvements -- Skills e training needs -- Documentation gaps -- Compliance roadmap - -**Utilizzo**: Roadmap miglioramenti - ---- - -## πŸ“‚ system-prompts/ - Prompt LLM (10 file) - -Ogni file corrisponde a una sezione e contiene: -- **Ruolo**: Definizione expertise LLM -- **Obiettivi**: Cosa deve fare -- **Fonti Dati**: Da dove raccogliere informazioni -- **Comandi**: Esempi specifici (SSH, API, SNMP, SQL) -- **Istruzioni**: Come compilare il template -- **Validazione**: Cosa verificare -- **Output**: Formato atteso - -### 01_infrastruttura_fisica_prompt.md -Focus su: UPS, PDU, cooling, sensori, rack layout, sicurezza fisica - -### 02_networking_prompt.md -Focus su: Switch config, routing, firewall, VLAN, performance - -### 03_server_virtualizzazione_prompt.md -Focus su: VMware/hypervisor API, VM inventory, capacity planning - -### 04_storage_prompt.md -Focus su: Array storage, SAN fabric, NAS, performance, capacity - -### 05_sicurezza_prompt.md -Focus su: SIEM, vulnerability scanners, compliance, access control - -### 06_backup_disaster_recovery_prompt.md -Focus su: Backup software API, job status, RPO/RTO compliance - -### 07_monitoring_alerting_prompt.md -Focus su: Monitoring platform API, alert stats, dashboards - -### 08_database_middleware_prompt.md -Focus su: Database queries, sizing, performance, HA status - -### 09_procedure_operative_prompt.md -Focus su: Documentazione SOP, runbook validation, escalation - -### 10_miglioramenti_prompt.md -Focus su: Analisi cross-section, gap analysis, prioritization - ---- - -## πŸ“‚ requirements/ - Requisiti Tecnici (3 file) - -### llm_requirements.md (~800 righe) -**Contenuto Completo**: - -1. **CapacitΓ  Richieste al LLM** - - Network access (SSH, HTTPS, SNMP) - - API interaction - - Code execution (Python, Bash, PowerShell) - - File operations - - Database access - -2. **Librerie Python** (completo pip install) - - paramiko, pysnmp, netmiko (networking) - - pyvmomi, proxmoxer (virtualizzazione) - - mysql-connector, psycopg2 (database) - - boto3, azure-mgmt (cloud) - - 20+ librerie specificate - -3. **CLI Tools Required** - - snmp, nmap, netcat - - open-vm-tools - - mysql-client, postgresql-client - - nfs-common, multipath-tools - -4. **Accessi e Credenziali** - - Formato credentials.yaml encrypted - - Esempio configurazione per ogni sistema - - Permessi minimi richiesti (read-only) - -5. **ConnettivitΓ  di Rete** - - VLAN e subnet requirements - - Porte necessarie (SSH, HTTPS, SNMP, DB) - - Firewall rules - -6. **Rate Limiting e Best Practices** - - Limits per vendor - - Retry logic con exponential backoff - - Concurrent operations limits - -7. **Error Handling e Logging** - - Logging configuration - - Error handling strategy - - Custom exceptions - -8. **Caching e Performance** - - Redis setup - - Cache TTL strategy - - Performance optimization - -9. **Schedule di Esecuzione** - - Cron schedule raccomandato - - Script wrapper esempio - -10. **Output e Validazione** - - Post-generation checks - - Placeholder validation - - Notification system - -11. **Security Considerations** - - Secrets management - - Audit trail - - Compliance - -12. **Troubleshooting Guide** - - Common issues e soluzioni - - Debug mode - -13. **Testing** - - Unit tests examples - - Integration tests - -14. **Checklist Pre-Deployment** (completa) - ---- - -### data_collection_scripts.md (~600 righe) -**Contenuto**: - -1. **Main Orchestrator** (main.py) - - Class completa DatacenterDocGenerator - - Argparse configuration - - Error handling - - Logging setup - -2. **Collector Modules**: - - **InfrastructureCollector**: UPS via SNMP, rack da DB, sensori ambientali - - **NetworkCollector**: Switch via Netmiko, config backup - - **VirtualizationCollector**: VMware via pyVmomi, VM/host inventory - -3. **Helper Functions**: - - SNMP utilities (get/walk) - - Token counter - - Data validation - -4. **Configuration File** (config.yaml esempio completo) - -5. **Deployment Script** (deploy.sh) - - Setup directories - - Virtual environment - - Dependencies install - - Cron setup - -6. **Testing Framework** - - Unit tests examples - - Test collectors - ---- - -### api_endpoints.md (~800 righe) -**Contenuto Completo**: - -1. **VMware vSphere API** - - REST API endpoints - - PowerCLI commands - - Esempi query VM/host/datastore - -2. **Proxmox VE API** - - REST API authentication - - VM/container queries - - CLI commands - -3. **Network Devices** - - Cisco IOS commands (completi) - - HP/Aruba commands - - SNMP examples - -4. **Firewall APIs** - - pfSense/OPNsense API - - Fortinet FortiGate API - - Esempi rules, VPN, interfaces - -5. **Storage Arrays** - - Pure Storage API - - NetApp ONTAP API - - Generic SAN commands - -6. **Monitoring Systems** - - Zabbix API (authentication, hosts, problems) - - Prometheus API (queries, targets, alerts) - - Nagios/Icinga API - -7. **Backup Systems** - - Veeam PowerShell commands - - CommVault API - -8. **Database Queries** - - Asset management DB (racks, servers, contracts) - - Database sizing queries (MySQL, PostgreSQL, SQL Server) - -9. **Cloud Provider APIs** - - AWS Boto3 examples - - Azure SDK examples - -10. **SNMP OIDs Reference** - - Common system OIDs - - UPS OIDs (RFC 1628) completo - - Network interface OIDs - -11. **Example Collection Script** - - Bash orchestrator completo - -12. **Rate Limiting Reference** - - Vendor limits table - - Retry strategy code - ---- - -## πŸ”§ Come Usare il Sistema - -### 1️⃣ Setup Iniziale -```bash -# Leggi QUICK_START.md per guida dettagliata -cd /opt/datacenter-docs -python3 -m venv venv -source venv/bin/activate -pip install -r requirements.txt -``` - -### 2️⃣ Configurazione -- Edita `config.yaml` con i tuoi endpoint -- Configura credenziali in vault -- Verifica connectivity con `--dry-run` - -### 3️⃣ Utilizzo LLM -Per ogni sezione che deve aggiornare: -1. LLM legge il **template** corrispondente -2. LLM legge il **system prompt** per istruzioni -3. LLM consulta **requirements/** per comandi/API -4. LLM raccoglie dati dai sistemi -5. LLM compila il template -6. LLM valida output (< 50k token) -7. LLM salva documentazione aggiornata - -### 4️⃣ Automazione -- Cron job per aggiornamenti periodici -- Monitoring dei job -- Notification su completion/errori - ---- - -## πŸ“Š Statistiche Progetto - -- **Template totali**: 10 sezioni -- **System prompts**: 10 file -- **Documenti requirements**: 3 file dettagliati -- **Righe di codice**: ~2000+ (Python examples) -- **Comandi/API documentati**: 200+ -- **Librerie Python specificate**: 25+ -- **Vendor supportati**: 15+ (VMware, Cisco, NetApp, Pure, ecc.) - ---- - -## βœ… Checklist Utilizzo - -### Per l'Amministratore Sistema -- [ ] Letto README.md e QUICK_START.md -- [ ] Setup ambiente Python completato -- [ ] Credenziali configurate -- [ ] Test connectivity eseguiti -- [ ] Prima generazione test completata -- [ ] Cron job configurato -- [ ] Monitoring setup -- [ ] Team informato - -### Per il LLM -- [ ] Accesso a tutti i sistemi verificato -- [ ] Librerie Python disponibili -- [ ] Template caricati -- [ ] System prompt compresi -- [ ] Requirements studiati -- [ ] Test dry-run superato -- [ ] Validazione funzionante -- [ ] Token limit rispettato - ---- - -## πŸŽ“ Benefici del Sistema - -### βœ… Automazione Completa -- Nessun intervento manuale necessario -- Aggiornamenti programmati -- Dati sempre aggiornati - -### βœ… Consistenza -- Template standardizzati -- Formato uniforme -- Nessun dato mancante - -### βœ… Accuratezza -- Dati letti direttamente dai sistemi -- No errori di trascrizione -- Validazione automatica - -### βœ… Efficienza -- Riduzione 90% tempo documentazione -- Copertura completa -- Sempre disponibile - -### βœ… Compliance -- Audit trail completo -- Version control -- Retention automatica - ---- - -## πŸš€ Prossimi Passi - -1. **Fase 1 - Setup** (1-2 giorni) - - Installazione ambiente - - Configurazione accessi - - Test connectivity - -2. **Fase 2 - Pilot** (3-5 giorni) - - Generazione singole sezioni - - Validazione output - - Tuning configurazione - -3. **Fase 3 - Production** (1 settimana) - - Automazione completa - - Monitoring operativo - - Training team - -4. **Fase 4 - Optimization** (ongoing) - - Miglioramenti continui - - Nuove fonti dati - - Expansion coverage - ---- - -## πŸ“ž Supporto - -Per domande o supporto: -- **Email**: automation-team@company.com -- **Documentation**: README.md, QUICK_START.md -- **Troubleshooting**: QUICK_START.md sezione Troubleshooting - ---- - -**Sistema creato per la gestione automatizzata della documentazione datacenter** -**Versione**: 1.0 -**Data**: 2025-01-XX -**Maintainer**: Automation Team - ---- - -## 🎯 Obiettivo Finale - -**Zero intervento manuale nella documentazione datacenter** -**Documentazione sempre aggiornata, accurata e completa** -**Compliance automatica e audit-ready** diff --git a/MIGRATION_SUMMARY.md b/MIGRATION_SUMMARY.md deleted file mode 100644 index e23956a..0000000 --- a/MIGRATION_SUMMARY.md +++ /dev/null @@ -1,630 +0,0 @@ -# πŸƒ MongoDB Migration Summary - -## Sistema Aggiornato - Versione 2.0 - -Il sistema di documentazione datacenter Γ¨ stato **completamente migrato a MongoDB 7.0**, mantenendo tutte le funzionalitΓ  esistenti e aggiungendo nuove capabilities. - ---- - -## 🎯 Cosa È Cambiato - -### Database Layer - -| Componente | Prima (v1.0) | Dopo (v2.0) | -|------------|--------------|-------------| -| **Database** | PostgreSQL 15 | **MongoDB 7.0** | -| **Driver** | asyncpg | **Motor 3.3 (async)** | -| **ORM/ODM** | SQLAlchemy | **Beanie 1.24** | -| **Migrations** | Alembic | **No migrations needed** | -| **Schema** | Fixed SQL schema | **Flexible JSON documents** | - -### Vantaggi Chiave - -#### βœ… FlessibilitΓ  Schema -```python -# Prima (PostgreSQL/SQLAlchemy) -# Aggiungere un campo richiedeva migration: -# alembic revision --autogenerate -m "add_field" -# alembic upgrade head - -# Dopo (MongoDB/Beanie) -# Nessuna migration necessaria! -ticket.metadata["new_field"] = "value" -await ticket.save() -``` - -#### βœ… Performance Migliorata -- **Letture**: 30-40% piΓΉ veloci per documenti complessi -- **Scritture**: 20-30% piΓΉ veloci per bulk operations -- **Aggregazioni**: Pipeline nativa molto performante - -#### βœ… ScalabilitΓ  -- **Horizontal scaling**: Sharding nativo -- **High availability**: Replica set con auto-failover -- **Cloud-ready**: MongoDB Atlas integration - -#### βœ… Developer Experience -```python -# Type-safe con Pydantic -from datacenter_docs.api.models import Ticket - -# Queries intuitive -tickets = await Ticket.find( - Ticket.status == "resolved", - Ticket.confidence_score > 0.8 -).to_list() - -# No SQL injection -# No raw queries -# Full IDE autocomplete -``` - ---- - -## πŸ“¦ File Modificati - -### Codice Python - -``` -src/datacenter_docs/ -β”œβ”€β”€ api/ -β”‚ β”œβ”€β”€ models.py βœ… NUOVO: Beanie Document models -β”‚ └── main.py βœ… MODIFICATO: MongoDB integration -β”‚ -β”œβ”€β”€ utils/ -β”‚ β”œβ”€β”€ database.py βœ… NUOVO: Motor connection manager -β”‚ └── config.py βœ… MODIFICATO: MongoDB settings -β”‚ -└── pyproject.toml βœ… MODIFICATO: Motor + Beanie deps -``` - -### Infrastruttura - -``` -deploy/ -β”œβ”€β”€ docker/ -β”‚ └── (Dockerfiles unchanged) -β”‚ -β”œβ”€β”€ kubernetes/ -β”‚ β”œβ”€β”€ mongodb.yaml βœ… NUOVO: StatefulSet replica set -β”‚ β”œβ”€β”€ deployment.yaml βœ… MODIFICATO: MongoDB env vars -β”‚ β”œβ”€β”€ configmap.yaml βœ… NUOVO: MongoDB config -β”‚ └── secrets-template.yaml βœ… MODIFICATO: MongoDB creds -β”‚ -docker-compose.yml βœ… MODIFICATO: MongoDB service -.env.example βœ… MODIFICATO: MongoDB vars -``` - -### Documentazione - -``` -docs/ -β”œβ”€β”€ MONGODB_GUIDE.md βœ… NUOVO: Guida completa MongoDB -β”œβ”€β”€ README_MONGODB.md βœ… NUOVO: Quick start MongoDB -β”œβ”€β”€ MIGRATION_SUMMARY.md βœ… NUOVO: Questo file -└── (altri docs unchanged) -``` - ---- - -## πŸš€ Come Usare il Sistema Aggiornato - -### 1. Local Development - -```bash -# Clone + setup -git clone -cd datacenter-docs -cp .env.example .env - -# Edit .env with MongoDB credentials -nano .env - -# Start MongoDB + Redis -docker-compose up -d mongodb redis - -# Install deps (includes Motor + Beanie) -poetry install - -# Start API (no migrations needed!) -poetry run uvicorn datacenter_docs.api.main:app --reload -``` - -### 2. Docker Compose - -```bash -# Edit .env -MONGO_ROOT_USER=admin -MONGO_ROOT_PASSWORD=secure_password -MONGODB_URL=mongodb://admin:secure_password@mongodb:27017 - -# Start all services -docker-compose up -d - -# Check MongoDB -docker-compose exec mongodb mongosh \ - -u admin -p secure_password --authenticationDatabase admin - -# Verify API -curl http://localhost:8000/health -# {"status":"healthy","database":"mongodb",...} -``` - -### 3. Kubernetes - -```bash -# Create namespace -kubectl apply -f deploy/kubernetes/namespace.yaml - -# Create secrets (MongoDB + others) -kubectl create secret generic datacenter-secrets \ - --from-literal=mongodb-url='mongodb://admin:pass@mongodb:27017' \ - --from-literal=mongodb-root-user='admin' \ - --from-literal=mongodb-root-password='pass' \ - --from-literal=redis-url='redis://:pass@redis:6379/0' \ - --from-literal=mcp-api-key='key' \ - --from-literal=anthropic-api-key='key' \ - -n datacenter-docs - -# Deploy MongoDB StatefulSet (3 replicas) -kubectl apply -f deploy/kubernetes/mongodb.yaml - -# Wait for MongoDB -kubectl get pods -n datacenter-docs -w - -# Deploy application -kubectl apply -f deploy/kubernetes/deployment.yaml -kubectl apply -f deploy/kubernetes/service.yaml -kubectl apply -f deploy/kubernetes/ingress.yaml - -# Verify -kubectl get pods -n datacenter-docs -kubectl logs -n datacenter-docs deployment/api -``` - ---- - -## πŸ“Š Modelli Dati MongoDB - -### Ticket Document - -```json -{ - "_id": ObjectId("65a1b2c3d4e5f6789012345"), - "ticket_id": "INC-12345", - "title": "Network connectivity issue", - "description": "Cannot ping 10.0.20.5 from VLAN 100", - "priority": "high", - "category": "network", - "requester": "tech@company.com", - "status": "resolved", - "resolution": "VLAN configuration was missing...", - "suggested_actions": [ - "Verify VLAN 100 on core switch", - "Check inter-VLAN routing", - "Update network documentation" - ], - "related_docs": [ - { - "section": "networking", - "content": "VLAN configuration best practices...", - "source": "/docs/02_networking.md" - } - ], - "confidence_score": 0.92, - "processing_time": 2.34, - "metadata": { - "source_system": "ServiceNow", - "tags": ["network", "vlan", "connectivity"], - "sla": "4 hours", - "custom_field": "any value" - }, - "created_at": ISODate("2025-01-15T10:30:00.000Z"), - "updated_at": ISODate("2025-01-15T10:30:02.340Z") -} -``` - -### Collections - -| Collection | Descrizione | Indexes | -|------------|-------------|---------| -| `tickets` | Ticket e risoluzioni | ticket_id (unique), status, category, created_at, text search | -| `documentation_sections` | Metadata sezioni doc | section_id (unique), generation_status | -| `chat_sessions` | Conversazioni chat | session_id (unique), user_id, last_activity | -| `system_metrics` | Metriche sistema | metric_type, timestamp | -| `audit_logs` | Audit trail | action, resource_type, timestamp | - ---- - -## πŸ”„ API Changes - -### Endpoints (UNCHANGED) - -Tutte le API rimangono identiche: - -```bash -# Stessi endpoints -POST /api/v1/tickets -GET /api/v1/tickets/{id} -GET /api/v1/tickets -POST /api/v1/documentation/search -GET /health - -# Stessi request/response formats -# Stessi status codes -# Nessuna breaking change! -``` - -### Backend (CHANGED) - -```python -# Prima (PostgreSQL) -from sqlalchemy.orm import Session -from .database import get_db - -@app.post("/api/v1/tickets") -async def create_ticket(ticket: TicketCreate, db: Session = Depends(get_db)): - db_ticket = Ticket(**ticket.dict()) - db.add(db_ticket) - db.commit() - db.refresh(db_ticket) - return db_ticket - -# Dopo (MongoDB) -from .models import Ticket - -@app.post("/api/v1/tickets") -async def create_ticket(ticket: TicketCreate): - db_ticket = Ticket(**ticket.dict()) - await db_ticket.insert() # Async! - return db_ticket -``` - ---- - -## πŸ” Query Examples - -### Python (Beanie ODM) - -```python -from datacenter_docs.api.models import Ticket -from datetime import datetime, timedelta - -# Simple find -resolved = await Ticket.find(Ticket.status == "resolved").to_list() - -# Complex query -recent = datetime.now() - timedelta(days=7) -high_confidence = await Ticket.find( - Ticket.status == "resolved", - Ticket.confidence_score > 0.9, - Ticket.created_at > recent -).sort(-Ticket.created_at).limit(10).to_list() - -# Text search -results = await Ticket.find({ - "$text": {"$search": "network connectivity"} -}).to_list() - -# Aggregation -stats = await Ticket.aggregate([ - {"$match": {"status": "resolved"}}, - {"$group": { - "_id": "$category", - "count": {"$sum": 1}, - "avg_confidence": {"$avg": "$confidence_score"}, - "avg_time": {"$avg": "$processing_time"} - }}, - {"$sort": {"count": -1}} -]).to_list() - -# Update -ticket = await Ticket.find_one(Ticket.ticket_id == "INC-001") -ticket.status = "closed" -ticket.metadata["closed_reason"] = "duplicate" -await ticket.save() -``` - -### MongoDB Shell - -```javascript -// Connect -mongosh mongodb://admin:password@localhost:27017 - -use datacenter_docs - -// Find -db.tickets.find({ status: "resolved" }) - -// Complex query -db.tickets.find({ - status: "resolved", - confidence_score: { $gt: 0.8 }, - created_at: { $gte: new Date("2025-01-01") } -}).sort({ created_at: -1 }).limit(10) - -// Text search -db.tickets.find({ - $text: { $search: "network connectivity" } -}) - -// Aggregation -db.tickets.aggregate([ - { $match: { status: "resolved" } }, - { $group: { - _id: "$category", - total: { $sum: 1 }, - avg_confidence: { $avg: "$confidence_score" } - }}, - { $sort: { total: -1 } } -]) - -// Update many -db.tickets.updateMany( - { status: "processing", created_at: { $lt: new Date("2024-01-01") } }, - { $set: { status: "expired" } } -) -``` - ---- - -## πŸ› οΈ Maintenance - -### Backup - -```bash -# Full backup -mongodump --uri="mongodb://admin:password@localhost:27017" \ - --authenticationDatabase=admin \ - --out=/backup/$(date +%Y%m%d) - -# Backup specific database -mongodump --uri="mongodb://admin:password@localhost:27017/datacenter_docs" \ - --out=/backup/datacenter_docs_$(date +%Y%m%d) - -# Restore -mongorestore --uri="mongodb://admin:password@localhost:27017" \ - /backup/20250115 -``` - -### Monitoring - -```bash -# Database stats -mongosh -u admin -p password --authenticationDatabase admin \ - --eval "db.stats()" - -# Collection stats -mongosh -u admin -p password --authenticationDatabase admin \ - datacenter_docs --eval "db.tickets.stats()" - -# Server status -mongosh -u admin -p password --authenticationDatabase admin \ - --eval "db.serverStatus()" - -# Current operations -mongosh -u admin -p password --authenticationDatabase admin \ - --eval "db.currentOp()" -``` - -### Performance - -```javascript -// Enable profiling -db.setProfilingLevel(1, { slowms: 100 }) // Log queries > 100ms - -// Check slow queries -db.system.profile.find().sort({ ts: -1 }).limit(5) - -// Explain query -db.tickets.find({ status: "resolved" }).explain("executionStats") - -// Index usage -db.tickets.aggregate([{ $indexStats: {} }]) -``` - ---- - -## πŸ” Security - -### Authentication - -```bash -# Create application user (read/write only) -mongosh -u admin -p password --authenticationDatabase admin - -use datacenter_docs - -db.createUser({ - user: "docs_app", - pwd: "app_password", - roles: [ - { role: "readWrite", db: "datacenter_docs" } - ] -}) - -# Use in connection string -MONGODB_URL=mongodb://docs_app:app_password@mongodb:27017/datacenter_docs -``` - -### Encryption - -```yaml -# docker-compose.yml -mongodb: - command: - - --enableEncryption - - --encryptionKeyFile=/data/keyfile - volumes: - - ./mongodb-keyfile:/data/keyfile:ro -``` - -### TLS/SSL - -```yaml -mongodb: - command: - - --tlsMode=requireTLS - - --tlsCertificateKeyFile=/certs/mongodb.pem - volumes: - - ./certs:/certs:ro -``` - ---- - -## πŸ“ˆ Scalability - -### Replica Set (HA) - -```yaml -# docker-compose.yml -services: - mongodb-0: - image: mongo:7.0 - command: ["mongod", "--replSet", "rs0", "--bind_ip_all"] - - mongodb-1: - image: mongo:7.0 - command: ["mongod", "--replSet", "rs0", "--bind_ip_all"] - - mongodb-2: - image: mongo:7.0 - command: ["mongod", "--replSet", "rs0", "--bind_ip_all"] -``` - -### Sharding (Horizontal Scale) - -```javascript -// For very large datasets (>1TB) -sh.enableSharding("datacenter_docs") -sh.shardCollection("datacenter_docs.tickets", { category: "hashed" }) -``` - ---- - -## πŸ†š Comparison - -### MongoDB vs PostgreSQL - -| Feature | MongoDB 7.0 | PostgreSQL 15 | -|---------|-------------|---------------| -| **Schema** | Flexible JSON | Fixed SQL | -| **Queries** | JSON/Pipeline | SQL | -| **Scaling** | Horizontal (native) | Vertical (easier) | -| **Transactions** | βœ… Yes | βœ… Yes | -| **JSON Support** | βœ… Native | ⚠️ JSONB | -| **Full-text Search** | βœ… Native | βœ… Native | -| **Geospatial** | βœ… Native | βœ… PostGIS | -| **Performance (reads)** | ⚑ Excellent | βœ… Very good | -| **Performance (writes)** | ⚑ Excellent | βœ… Good | -| **Aggregation** | ⚑ Pipeline | βœ… SQL + CTEs | -| **Learning Curve** | πŸ“— Easy | πŸ“™ Moderate | -| **ACID** | βœ… Yes (4.0+) | βœ… Yes | - -### Why MongoDB for This Project - -βœ… **Flexible metadata** - Ticket metadata varia per fonte -βœ… **Document-oriented** - Ticket = documento completo -βœ… **Embedded docs** - related_docs integrati -βœ… **No migrations** - Schema evolution facile -βœ… **Horizontal scaling** - Sharding per crescita -βœ… **Cloud-ready** - MongoDB Atlas integration -βœ… **Modern ODM** - Beanie con Pydantic -βœ… **Vector search** - Future: Atlas Vector Search - ---- - -## πŸ“š Resources - -### Documentation -- [MONGODB_GUIDE.md](./MONGODB_GUIDE.md) - Complete MongoDB guide -- [README_MONGODB.md](./README_MONGODB.md) - Quick start -- [MongoDB Manual](https://docs.mongodb.com/manual/) -- [Motor Docs](https://motor.readthedocs.io/) -- [Beanie Docs](https://beanie-odm.dev/) - -### Tools -- **MongoDB Compass** - GUI for MongoDB -- **Studio 3T** - Advanced MongoDB IDE -- **Robo 3T** - Lightweight MongoDB GUI - ---- - -## πŸŽ“ Training - -### For Developers - -```bash -# MongoDB University (free) -# https://university.mongodb.com/ - -# Recommended courses: -# - M001: MongoDB Basics -# - M121: Aggregation Framework -# - M220P: MongoDB for Python Developers -``` - -### Quick Tutorial - -```python -# 1. Connect -from motor.motor_asyncio import AsyncIOMotorClient -client = AsyncIOMotorClient('mongodb://localhost:27017') -db = client.datacenter_docs - -# 2. Insert -await db.tickets.insert_one({ - "ticket_id": "TEST-001", - "title": "Test", - "status": "open" -}) - -# 3. Find -ticket = await db.tickets.find_one({"ticket_id": "TEST-001"}) - -# 4. Update -await db.tickets.update_one( - {"ticket_id": "TEST-001"}, - {"$set": {"status": "closed"}} -) - -# 5. Delete -await db.tickets.delete_one({"ticket_id": "TEST-001"}) -``` - ---- - -## βœ… Migration Checklist - -- [x] Update dependencies (Motor + Beanie) -- [x] Create MongoDB models (Beanie Documents) -- [x] Update API layer -- [x] Update database utilities -- [x] Update configuration -- [x] Update docker-compose.yml -- [x] Update Kubernetes manifests -- [x] Update environment variables -- [x] Create MongoDB documentation -- [x] Test all API endpoints -- [x] Test Docker Compose deployment -- [x] Test Kubernetes deployment -- [x] Update CI/CD pipelines -- [x] Create migration guide - ---- - -## πŸ†˜ Support - -**Questions?** Contact: -- Email: automation-team@company.local -- Slack: #datacenter-automation -- Issues: Git repository issues - ---- - -**System Version**: 2.0 -**Database**: MongoDB 7.0 -**Driver**: Motor 3.3+ -**ODM**: Beanie 1.24+ -**Migration Date**: January 2025 -**Status**: βœ… Production Ready diff --git a/MONGODB_GUIDE.md b/MONGODB_GUIDE.md deleted file mode 100644 index b78d94c..0000000 --- a/MONGODB_GUIDE.md +++ /dev/null @@ -1,459 +0,0 @@ -# πŸƒ MongoDB Migration Guide - -## PerchΓ© MongoDB? - -Il sistema Γ¨ stato aggiornato per utilizzare **MongoDB 7.0** invece di PostgreSQL per i seguenti motivi: - -### βœ… Vantaggi per questo Use Case - -1. **Schema Flessibile** - - Ticket metadata variabili senza migration - - Facile aggiunta di nuovi campi - - Supporto nativo per documenti JSON complessi - -2. **Performance** - - Ottime performance per operazioni di lettura - - Aggregation pipeline potente per analytics - - Indexing flessibile su campi nested - -3. **ScalabilitΓ ** - - Horizontal scaling nativo (sharding) - - Replica set per high availability - - Auto-failover integrato - -4. **Document-Oriented** - - Match perfetto per ticket system - - Metadata JSON nativi - - Embedding di related docs senza JOIN - -5. **Vector Search (Future)** - - MongoDB Atlas Vector Search integrato - - PossibilitΓ  di sostituire ChromaDB - - Unified database per docs + vectors - -6. **Developer Experience** - - Beanie ODM moderno con Pydantic - - Async/await nativo con Motor - - Type hints e validazione - -## πŸ”„ Architettura Database - -### Collezioni Principali - -``` -datacenter_docs/ -β”œβ”€β”€ tickets # Ticket e risoluzioni -β”œβ”€β”€ documentation_sections # Metadata sezioni doc -β”œβ”€β”€ chat_sessions # Conversazioni chat -β”œβ”€β”€ system_metrics # Metriche sistema -└── audit_logs # Audit trail -``` - -### Schema Ticket (Example) - -```json -{ - "_id": ObjectId("..."), - "ticket_id": "INC-12345", - "title": "Network connectivity issue", - "description": "Cannot ping 10.0.20.5 from VLAN 100", - "priority": "high", - "category": "network", - "status": "resolved", - "resolution": "Check VLAN configuration...", - "suggested_actions": [ - "Verify VLAN 100 on switch", - "Check inter-VLAN routing" - ], - "related_docs": [ - { - "section": "networking", - "content": "VLAN configuration...", - "source": "/docs/02_networking.md" - } - ], - "confidence_score": 0.92, - "processing_time": 2.34, - "metadata": { - "source_system": "ServiceNow", - "tags": ["network", "vlan", "connectivity"], - "custom_field": "any value" - }, - "created_at": ISODate("2025-01-15T10:30:00Z"), - "updated_at": ISODate("2025-01-15T10:30:02Z") -} -``` - -## πŸš€ Migration da PostgreSQL - -### Step 1: Export dati esistenti (se presenti) - -```bash -# Export tickets da PostgreSQL -psql -U docs_user -d datacenter_docs -c \ - "COPY (SELECT * FROM tickets) TO '/tmp/tickets.csv' CSV HEADER" -``` - -### Step 2: Import in MongoDB - -```python -import pandas as pd -from motor.motor_asyncio import AsyncIOMotorClient -import asyncio - -async def migrate(): - # Leggi CSV - df = pd.read_csv('/tmp/tickets.csv') - - # Connetti MongoDB - client = AsyncIOMotorClient('mongodb://admin:password@localhost:27017') - db = client.datacenter_docs - - # Insert documents - tickets = df.to_dict('records') - await db.tickets.insert_many(tickets) - - print(f"Migrated {len(tickets)} tickets") - -asyncio.run(migrate()) -``` - -### Step 3: Verifica - -```bash -# Connetti a MongoDB -mongosh mongodb://admin:password@localhost:27017 - -use datacenter_docs - -# Conta documenti -db.tickets.countDocuments() - -# Query esempio -db.tickets.find({status: "resolved"}).limit(5) -``` - -## πŸ“¦ Setup Locale - -### Docker Compose - -```bash -# Start MongoDB -docker-compose up -d mongodb redis - -# Verifica connessione -docker-compose exec mongodb mongosh \ - -u admin -p password --authenticationDatabase admin - -# Test query -use datacenter_docs -db.tickets.find().limit(1) -``` - -### Kubernetes - -```bash -# Deploy MongoDB StatefulSet -kubectl apply -f deploy/kubernetes/mongodb.yaml - -# Wait for pods -kubectl get pods -n datacenter-docs -w - -# Initialize replica set -kubectl apply -f deploy/kubernetes/mongodb.yaml - -# Verify -kubectl exec -n datacenter-docs mongodb-0 -- \ - mongosh -u admin -p password --authenticationDatabase admin \ - --eval "rs.status()" -``` - -## πŸ”§ Configurazione - -### Connection String - -```bash -# Development (local) -MONGODB_URL=mongodb://admin:password@localhost:27017 - -# Docker Compose -MONGODB_URL=mongodb://admin:password@mongodb:27017 - -# Kubernetes (single node) -MONGODB_URL=mongodb://admin:password@mongodb.datacenter-docs.svc.cluster.local:27017 - -# Kubernetes (replica set) -MONGODB_URL=mongodb://admin:password@mongodb-0.mongodb.datacenter-docs.svc.cluster.local:27017,mongodb-1.mongodb.datacenter-docs.svc.cluster.local:27017,mongodb-2.mongodb.datacenter-docs.svc.cluster.local:27017/?replicaSet=rs0 -``` - -### Environment Variables - -```bash -# MongoDB -MONGODB_URL=mongodb://admin:password@mongodb:27017 -MONGODB_DATABASE=datacenter_docs - -# MongoDB Root (for admin operations) -MONGO_ROOT_USER=admin -MONGO_ROOT_PASSWORD=secure_password -``` - -## πŸ” Security - -### Authentication - -```bash -# Create application user -mongosh -u admin -p password --authenticationDatabase admin - -use datacenter_docs - -db.createUser({ - user: "docs_app", - pwd: "app_password", - roles: [ - { role: "readWrite", db: "datacenter_docs" } - ] -}) - -# Use app user in connection string -MONGODB_URL=mongodb://docs_app:app_password@mongodb:27017/datacenter_docs -``` - -### Encryption at Rest - -```yaml -# docker-compose.yml -mongodb: - command: - - --enableEncryption - - --encryptionKeyFile=/data/mongodb-keyfile - volumes: - - ./mongodb-keyfile:/data/mongodb-keyfile:ro -``` - -### TLS/SSL - -```bash -# Generate certificates -openssl req -newkey rsa:2048 -nodes -keyout mongodb.key \ - -x509 -days 365 -out mongodb.crt - -# Configure MongoDB -mongodb: - command: - - --tlsMode=requireTLS - - --tlsCertificateKeyFile=/etc/ssl/mongodb.pem -``` - -## πŸ“Š Indexing Strategy - -### Automatic Indexes (via Beanie) - -```python -class Ticket(Document): - ticket_id: Indexed(str, unique=True) # Unique index - status: str # Indexed in Settings - - class Settings: - indexes = [ - "status", - "category", - [("status", 1), ("created_at", -1)], # Compound - ] -``` - -### Custom Indexes - -```javascript -// Text search -db.tickets.createIndex({ - title: "text", - description: "text", - resolution: "text" -}) - -// Geospatial (future use) -db.locations.createIndex({ location: "2dsphere" }) - -// TTL index (auto-delete old docs) -db.chat_sessions.createIndex( - { last_activity: 1 }, - { expireAfterSeconds: 2592000 } // 30 days -) -``` - -## πŸ” Query Examples - -### Python (Beanie) - -```python -from datacenter_docs.api.models import Ticket - -# Find by status -tickets = await Ticket.find(Ticket.status == "resolved").to_list() - -# Complex query -from datetime import datetime, timedelta -recent = datetime.now() - timedelta(days=7) - -tickets = await Ticket.find( - Ticket.status == "resolved", - Ticket.confidence_score > 0.8, - Ticket.created_at > recent -).sort(-Ticket.created_at).to_list() - -# Aggregation -pipeline = [ - {"$group": { - "_id": "$category", - "count": {"$sum": 1}, - "avg_confidence": {"$avg": "$confidence_score"} - }}, - {"$sort": {"count": -1}} -] -result = await Ticket.aggregate(pipeline).to_list() -``` - -### MongoDB Shell - -```javascript -// Find resolved tickets -db.tickets.find({ status: "resolved" }) - -// Complex aggregation -db.tickets.aggregate([ - { $match: { status: "resolved" } }, - { $group: { - _id: "$category", - total: { $sum: 1 }, - avg_confidence: { $avg: "$confidence_score" }, - avg_time: { $avg: "$processing_time" } - }}, - { $sort: { total: -1 } } -]) - -// Text search -db.tickets.find({ - $text: { $search: "network connectivity" } -}) -``` - -## πŸ“ˆ Performance Optimization - -### Indexes - -```javascript -// Explain query -db.tickets.find({ status: "resolved" }).explain("executionStats") - -// Check index usage -db.tickets.aggregate([ - { $indexStats: {} } -]) -``` - -### Connection Pooling - -```python -# config.py -MONGODB_URL = "mongodb://user:pass@host:27017/?maxPoolSize=50" -``` - -### Read Preference - -```python -# For read-heavy workloads with replica set -from pymongo import ReadPreference - -client = AsyncIOMotorClient( - MONGODB_URL, - readPreference=ReadPreference.SECONDARY_PREFERRED -) -``` - -## πŸ› οΈ Maintenance - -### Backup - -```bash -# Full backup -mongodump --uri="mongodb://admin:password@localhost:27017" \ - --authenticationDatabase=admin \ - --out=/backup/$(date +%Y%m%d) - -# Restore -mongorestore --uri="mongodb://admin:password@localhost:27017" \ - --authenticationDatabase=admin \ - /backup/20250115 -``` - -### Monitoring - -```javascript -// Database stats -db.stats() - -// Collection stats -db.tickets.stats() - -// Current operations -db.currentOp() - -// Server status -db.serverStatus() -``` - -### Cleanup - -```javascript -// Remove old chat sessions -db.chat_sessions.deleteMany({ - last_activity: { $lt: new Date(Date.now() - 30*24*60*60*1000) } -}) - -// Compact collection -db.runCommand({ compact: "tickets" }) -``` - -## πŸ”„ Replica Set (Production) - -### Setup - -```bash -# Initialize replica set -rs.initiate({ - _id: "rs0", - members: [ - { _id: 0, host: "mongodb-0:27017" }, - { _id: 1, host: "mongodb-1:27017" }, - { _id: 2, host: "mongodb-2:27017" } - ] -}) - -# Check status -rs.status() - -# Add member -rs.add("mongodb-3:27017") -``` - -### Connection String - -```bash -MONGODB_URL=mongodb://user:pass@mongodb-0:27017,mongodb-1:27017,mongodb-2:27017/?replicaSet=rs0&w=majority -``` - -## πŸ“š References - -- [MongoDB Manual](https://docs.mongodb.com/manual/) -- [Motor Documentation](https://motor.readthedocs.io/) -- [Beanie ODM](https://beanie-odm.dev/) -- [MongoDB Best Practices](https://docs.mongodb.com/manual/administration/production-notes/) - ---- - -**MongoDB Version**: 7.0 -**Driver**: Motor (Async) -**ODM**: Beanie -**Python**: 3.10+ diff --git a/QUICK_START.md b/QUICK_START.md deleted file mode 100644 index bf08d07..0000000 --- a/QUICK_START.md +++ /dev/null @@ -1,285 +0,0 @@ -# Guida Rapida - Sistema Documentazione Datacenter Automatizzata - -## πŸ“‹ Panoramica - -Questo sistema permette la generazione automatica e l'aggiornamento della documentazione del datacenter tramite un LLM. - -## 🎯 Cosa Contiene - -### πŸ“ templates/ (10 file) -Template markdown per ogni sezione documentale: -- `01_infrastruttura_fisica.md` - Layout, elettrico, cooling, sicurezza fisica -- `02_networking.md` - Switch, router, firewall, VLAN, DNS/DHCP -- `03_server_virtualizzazione.md` - Host fisici, VM, cluster, container -- `04_storage.md` - SAN, NAS, object storage, capacity planning -- `05_sicurezza.md` - IAM, vulnerability, compliance, encryption -- `06_backup_disaster_recovery.md` - Backup jobs, RPO/RTO, DR site -- `07_monitoring_alerting.md` - Monitoring platform, alerts, dashboards -- `08_database_middleware.md` - DBMS, instances, application servers -- `09_procedure_operative.md` - SOP, runbook, escalation, change management -- `10_miglioramenti.md` - Analisi opportunitΓ  di miglioramento - -### πŸ“ system-prompts/ (10 file) -Prompt specifici per guidare l'LLM nella gestione di ogni sezione: -- Definiscono il ruolo dell'LLM -- Specificano le fonti dati -- Forniscono istruzioni di compilazione -- Indicano comandi e query da utilizzare - -### πŸ“ requirements/ (3 file) -Requisiti tecnici per l'implementazione: -- `llm_requirements.md` - Librerie, accessi, network, best practices -- `data_collection_scripts.md` - Script Python per raccolta dati -- `api_endpoints.md` - API calls, comandi CLI, SNMP OIDs - -## πŸš€ Come Iniziare - -### 1. Setup Ambiente -```bash -# Clone/copia il progetto -cd /opt/datacenter-docs - -# Crea virtual environment -python3 -m venv venv -source venv/bin/activate - -# Installa dipendenze -pip install -r requirements.txt -``` - -### 2. Configura Credenziali -```yaml -# Edita config.yaml -databases: - asset_db: - host: your-db.local - user: readonly_user - password: ${VAULT:password} - -vmware: - vcenter_host: vcenter.local - username: automation@vsphere.local - password: ${VAULT:password} -``` - -### 3. Test ConnettivitΓ  -```bash -# Verifica accesso ai sistemi -python3 main.py --dry-run --debug - -# Test singola sezione -python3 main.py --section 01 --dry-run -``` - -### 4. Prima Generazione -```bash -# Genera tutta la documentazione -python3 main.py - -# Output in: output/section_XX.md -``` - -## πŸ”„ Workflow Operativo - -### Aggiornamento Automatico -```bash -# Configura cron per aggiornamenti periodici -# Ogni 6 ore -0 */6 * * * cd /opt/datacenter-docs && venv/bin/python main.py - -# Weekly report completo -0 2 * * 0 cd /opt/datacenter-docs && venv/bin/python main.py --full -``` - -### Aggiornamento Manuale -```bash -# Specifica sezione -python3 main.py --section 02 - -# Debug mode -python3 main.py --debug - -# Dry run (test senza salvare) -python3 main.py --dry-run -``` - -## πŸ“Š Struttura Output - -``` -output/ -β”œβ”€β”€ section_01.md # Infrastruttura fisica -β”œβ”€β”€ section_02.md # Networking -β”œβ”€β”€ section_03.md # Server e virtualizzazione -β”œβ”€β”€ section_04.md # Storage -β”œβ”€β”€ section_05.md # Sicurezza -β”œβ”€β”€ section_06.md # Backup e DR -β”œβ”€β”€ section_07.md # Monitoring -β”œβ”€β”€ section_08.md # Database e middleware -β”œβ”€β”€ section_09.md # Procedure operative -└── section_10.md # Miglioramenti -``` - -## βš™οΈ Personalizzazione - -### Adattare i Template -1. Modifica `templates/XX_nome_sezione.md` -2. Aggiungi/rimuovi sezioni secondo necessitΓ  -3. Mantieni i placeholder `[NOME_CAMPO]` - -### Modificare System Prompts -1. Edita `system-prompts/XX_nome_sezione_prompt.md` -2. Aggiungi comandi specifici per il tuo ambiente -3. Aggiorna prioritΓ  e focus - -### Aggiungere Fonti Dati -1. Implementa nuovo collector in `collectors/` -2. Aggiorna `config.yaml` con endpoint -3. Aggiungi test in `tests/` - -## πŸ”’ Security Best Practices - -### Credenziali -- βœ… **USA**: Vault (HashiCorp Vault, AWS Secrets Manager) -- βœ… **USA**: Environment variables con encryption -- ❌ **MAI**: Hardcode password in script -- ❌ **MAI**: Commit credentials in git - -### Permessi Account -- βœ… Account automation dedicato -- βœ… Permessi read-only dove possibile -- βœ… MFA quando supportato -- βœ… Audit logging abilitato - -### Network Security -- βœ… Accesso solo a management networks -- βœ… Firewall rules specifiche -- βœ… VPN/bastion host se necessario - -## πŸ“ˆ Monitoring - -### Log Files -```bash -# Application logs -tail -f /var/log/datacenter-docs/generation.log - -# Cron execution logs -tail -f /var/log/datacenter-docs/cron.log - -# Error logs -grep ERROR /var/log/datacenter-docs/*.log -``` - -### Health Checks -```bash -# Verifica ultima generazione -ls -lh output/ - -# Check token count -for f in output/*.md; do - echo "$f: $(wc -c < $f | awk '{print int($1/4)}') tokens" -done - -# Verifica placeholder non sostituiti -grep -r '\[.*\]' output/ -``` - -## πŸ› Troubleshooting - -### Issue: Connection Timeout -```bash -# Test connectivity -ping -c 3 vcenter.local -telnet vcenter.local 443 - -# Check firewall -sudo iptables -L -n | grep -``` - -### Issue: Authentication Failed -```bash -# Verify credentials -python3 -c "from collectors import VMwareCollector; VMwareCollector(config).test_connection()" - -# Check vault -vault kv get datacenter/creds -``` - -### Issue: Token Limit Exceeded -- Riduci retention dati storici -- Rimuovi tabelle con troppi record -- Sintetizza invece di listare tutto - -### Issue: Incomplete Data -- Verifica cache redis: `redis-cli KEYS "*"` -- Check source system availability -- Review error logs - -## πŸ“š Risorse Utili - -### Documentazione Vendor -- VMware vSphere API: https://developer.vmware.com/apis -- Cisco DevNet: https://developer.cisco.com -- Zabbix API: https://www.zabbix.com/documentation/current/api - -### Python Libraries -- pyVmomi: https://github.com/vmware/pyvmomi -- netmiko: https://github.com/ktbyers/netmiko -- pysnmp: https://github.com/etingof/pysnmp - -## 🀝 Supporto - -### Team Contacts -- **Automation Team**: automation@company.com -- **Infrastructure Team**: infra@company.com -- **Security Team**: security@company.com - -### Issue Reporting -1. Check logs for errors -2. Test connectivity to sources -3. Open ticket con dettagli: timestamp, sezione, error message -4. Fornire log relevanti - -## βœ… Checklist Deployment - -Prima di andare in produzione: - -- [ ] Virtual environment creato e attivato -- [ ] Tutte le dipendenze installate (`pip install -r requirements.txt`) -- [ ] File `config.yaml` configurato con endpoint corretti -- [ ] Credenziali in vault/secrets manager -- [ ] Test connettivitΓ  a tutti i sistemi (VMware, network, storage, etc.) -- [ ] Firewall rules approvate e implementate -- [ ] Account automation con permessi appropriati -- [ ] Test dry-run completato con successo -- [ ] Logging configurato -- [ ] Notifiche email/Slack configurate -- [ ] Cron job configurato -- [ ] Documentazione runbook operativo completata -- [ ] Team formato sull'uso del sistema -- [ ] Escalation path definito - -## πŸ“ Note Finali - -### Limiti dei Token -Ogni sezione Γ¨ limitata a 50.000 token (~200KB di testo). Se superi il limite: -- Riduce dettaglio tabelle storiche -- Aggrega dati vecchi -- Sintetizza invece di elencare - -### Frequenza Aggiornamenti -Raccomandato: -- **Prod**: Ogni 6 ore -- **Metrics only**: Ogni 1 ora -- **Full report**: Settimanale - -### Backup Documentazione -```bash -# Backup automatico prima di aggiornare -tar -czf backup/docs-$(date +%Y%m%d).tar.gz output/ -``` - ---- - -**Versione**: 1.0 -**Data**: 2025-01-XX -**Maintainer**: Automation Team diff --git a/README_COMPLETE_SYSTEM.md b/README_COMPLETE_SYSTEM.md deleted file mode 100644 index cd683a6..0000000 --- a/README_COMPLETE_SYSTEM.md +++ /dev/null @@ -1,464 +0,0 @@ -# πŸš€ Datacenter Documentation System - Complete Integration - -Sistema completo per la gestione automatizzata della documentazione datacenter con: -- βœ… **MCP Integration** - Connessione ai dispositivi via Model Context Protocol -- βœ… **API REST** - Risoluzione automatica ticket -- βœ… **Chat Agentica** - Supporto tecnico AI-powered -- βœ… **CI/CD Pipelines** - GitLab e Gitea -- βœ… **Container Ready** - Docker e Kubernetes -- βœ… **Production Ready** - Monitoring, logging, scalability - ---- - -## πŸ“ Architettura Sistema - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ External Systems β”‚ -β”‚ Ticket Systems β”‚ Monitoring β”‚ Users β”‚ Chat Interface β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ β”‚ - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ API Service β”‚ β”‚ Chat Service β”‚ - β”‚ (FastAPI) β”‚ β”‚ (WebSocket) β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ β”‚ - β”Œβ”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β” - β”‚ Documentation Agent (AI) β”‚ - β”‚ - Vector Search (ChromaDB) β”‚ - β”‚ - Claude Sonnet 4.5 β”‚ - β”‚ - Autonomous Doc Retrieval β”‚ - β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ MCP Client β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ MCP Server β”‚ - β”‚ (Device Connectivity) β”‚ - β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”¬β”€β”€β”€β”˜ - β”‚ β”‚ β”‚ β”‚ β”‚ - β”Œβ”€β”€β”€β”€β–Όβ” β”Œβ”€β–Όβ”€β”€β” β”Œβ–Όβ”€β” β”Œβ–Όβ”€β”€β” β”Œβ–Όβ”€β”€β”€β” - β”‚VMwareβ”‚ β”‚K8s β”‚ β”‚OSβ”‚ β”‚Netβ”‚ β”‚Storβ”‚ - β””β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”˜ β””β”€β”€β”˜ β””β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”˜ -``` - ---- - -## 🎯 Features Principali - -### 1️⃣ API per Risoluzione Ticket -```bash -# Invia ticket automaticamente -curl -X POST https://docs.company.local/api/v1/tickets \ - -H "Content-Type: application/json" \ - -d '{ - "ticket_id": "INC-12345", - "title": "Network connectivity issue", - "description": "Cannot ping 10.0.20.5 from VLAN 100", - "priority": "high", - "category": "network" - }' - -# Response -{ - "ticket_id": "INC-12345", - "status": "resolved", - "resolution": "Check switch port configuration...", - "suggested_actions": [ - "Verify VLAN 100 configuration on core switch", - "Check inter-VLAN routing", - "Verify ACLs on firewall" - ], - "confidence_score": 0.92, - "related_docs": [...] -} -``` - -### 2️⃣ Chat Agentica -```javascript -// WebSocket connection -const ws = new WebSocket('wss://docs.company.local/chat'); - -ws.send(JSON.stringify({ - type: 'message', - content: 'How do I check UPS battery status?' -})); - -// AI searches documentation autonomously and responds -ws.onmessage = (event) => { - const response = JSON.parse(event.data); - // { - // message: "To check UPS battery status...", - // related_docs: [...], - // confidence: 0.95 - // } -}; -``` - -### 3️⃣ MCP Integration -```python -from datacenter_docs.mcp.client import MCPClient, MCPCollector - -async with MCPClient( - server_url="https://mcp.company.local", - api_key="your-api-key" -) as mcp: - # Query VMware - vms = await mcp.query_vmware("vcenter-01", "list_vms") - - # Query Kubernetes - pods = await mcp.query_kubernetes("prod-cluster", "all", "pods") - - # Execute network commands - output = await mcp.exec_network_command( - "core-sw-01", - ["show vlan brief"] - ) -``` - ---- - -## πŸ› οΈ Setup e Deploy - -### Prerequisites -- Python 3.10+ -- Poetry 1.7+ -- Docker & Docker Compose -- Kubernetes cluster (per production) -- MCP Server running -- Anthropic API key - -### 1. Local Development - -```bash -# Clone repository -git clone https://git.company.local/infrastructure/datacenter-docs.git -cd datacenter-docs - -# Setup con Poetry -poetry install - -# Configurazione -cp .env.example .env -# Edita .env con le tue credenziali - -# Start database e redis -docker-compose up -d postgres redis - -# Run migrations -poetry run alembic upgrade head - -# Index documentation -poetry run datacenter-docs index-docs --path ./output - -# Start API -poetry run uvicorn datacenter_docs.api.main:app --reload - -# Start Chat (in un altro terminale) -poetry run python -m datacenter_docs.chat.server - -# Start Worker (in un altro terminale) -poetry run celery -A datacenter_docs.workers.celery_app worker --loglevel=info -``` - -### 2. Docker Compose (All-in-one) - -```bash -# Build e start tutti i servizi -docker-compose up -d - -# Check logs -docker-compose logs -f api chat worker - -# Access services -# API: http://localhost:8000 -# Chat: http://localhost:8001 -# Frontend: http://localhost -# Flower (Celery monitoring): http://localhost:5555 -``` - -### 3. Kubernetes Production - -```bash -# Apply manifests -kubectl apply -f deploy/kubernetes/namespace.yaml -kubectl apply -f deploy/kubernetes/secrets.yaml # Create this first -kubectl apply -f deploy/kubernetes/configmap.yaml -kubectl apply -f deploy/kubernetes/deployment.yaml -kubectl apply -f deploy/kubernetes/service.yaml -kubectl apply -f deploy/kubernetes/ingress.yaml - -# Check status -kubectl get pods -n datacenter-docs -kubectl logs -n datacenter-docs deployment/api - -# Scale -kubectl scale deployment api --replicas=5 -n datacenter-docs -``` - ---- - -## πŸ”„ CI/CD Pipelines - -### GitLab CI -```yaml -# .gitlab-ci.yml -stages: [lint, test, build, deploy] - -# Automatic on push to main: -# - Lint code -# - Run tests -# - Build Docker images -# - Deploy to staging -# - Manual deploy to production -``` - -### Gitea Actions -```yaml -# .gitea/workflows/ci.yml -# Triggers: -# - Push to main/develop -# - Pull requests -# - Schedule (ogni 6 ore per docs generation) - -# Actions: -# - Lint, test, security scan -# - Build multi-arch images -# - Deploy to K8s -# - Generate documentation -``` - ---- - -## πŸ“‘ API Endpoints - -### Ticket Management -``` -POST /api/v1/tickets Create & process ticket -GET /api/v1/tickets/{ticket_id} Get ticket status -GET /api/v1/stats/tickets Get statistics -``` - -### Documentation -``` -POST /api/v1/documentation/search Search docs -POST /api/v1/documentation/generate/{sec} Generate section -GET /api/v1/documentation/sections List sections -``` - -### Health & Monitoring -``` -GET /health Health check -GET /metrics Prometheus metrics -``` - ---- - -## πŸ€– Chat Interface Usage - -### Web Chat -Accedi a `https://docs.company.local/chat` - -Features: -- πŸ’¬ Real-time chat con AI -- πŸ“š Ricerca autonoma documentazione -- 🎯 Suggerimenti contestuali -- πŸ“Ž Upload file/ticket -- πŸ’Ύ Cronologia conversazioni - -### Integration con External Systems - -```python -# Python example -import requests - -response = requests.post( - 'https://docs.company.local/api/v1/tickets', - json={ - 'ticket_id': 'EXT-12345', - 'title': 'Storage issue', - 'description': 'Datastore running out of space', - 'category': 'storage' - } -) - -resolution = response.json() -print(resolution['resolution']) -print(resolution['suggested_actions']) -``` - -```javascript -// JavaScript example -const response = await fetch('https://docs.company.local/api/v1/tickets', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - ticket_id: 'EXT-12345', - title: 'Storage issue', - description: 'Datastore running out of space', - category: 'storage' - }) -}); - -const resolution = await response.json(); -``` - ---- - -## πŸ” Security - -### Authentication -- API Key based authentication -- JWT tokens per chat sessions -- MCP server credentials secured in vault - -### Secrets Management -```bash -# Kubernetes secrets -kubectl create secret generic datacenter-secrets \ - --from-literal=database-url='postgresql://...' \ - --from-literal=redis-url='redis://...' \ - --from-literal=mcp-api-key='...' \ - --from-literal=anthropic-api-key='...' \ - -n datacenter-docs - -# Docker secrets -docker secret create mcp_api_key ./mcp_key.txt -``` - -### Network Security -- All communications over TLS -- Network policies in Kubernetes -- Rate limiting enabled -- CORS properly configured - ---- - -## πŸ“Š Monitoring & Observability - -### Metrics (Prometheus) -``` -# Exposed at /metrics -datacenter_docs_tickets_total -datacenter_docs_tickets_resolved_total -datacenter_docs_resolution_confidence_score -datacenter_docs_processing_time_seconds -datacenter_docs_api_requests_total -``` - -### Logging -```bash -# Structured logging in JSON -{ - "timestamp": "2025-01-15T10:30:00Z", - "level": "INFO", - "service": "api", - "event": "ticket_resolved", - "ticket_id": "INC-12345", - "confidence": 0.92, - "processing_time": 2.3 -} -``` - -### Tracing -- OpenTelemetry integration -- Distributed tracing across services -- Jaeger UI for visualization - ---- - -## πŸ§ͺ Testing - -```bash -# Unit tests -poetry run pytest tests/unit -v --cov - -# Integration tests -poetry run pytest tests/integration -v - -# E2E tests -poetry run pytest tests/e2e -v - -# Load testing -poetry run locust -f tests/load/locustfile.py -``` - ---- - -## πŸ”§ Configuration - -### Environment Variables -```bash -# Core -DATABASE_URL=postgresql://user:pass@host:5432/db -REDIS_URL=redis://:pass@host:6379/0 - -# MCP -MCP_SERVER_URL=https://mcp.company.local -MCP_API_KEY=your_mcp_key - -# AI -ANTHROPIC_API_KEY=your_anthropic_key - -# Optional -LOG_LEVEL=INFO -DEBUG=false -WORKERS=4 -MAX_TOKENS=4096 -``` - ---- - -## πŸ“š Documentation - -- `/docs` - API documentation (Swagger/OpenAPI) -- `/redoc` - Alternative API documentation -- `QUICK_START.md` - Quick start guide -- `ARCHITECTURE.md` - System architecture -- `DEPLOYMENT.md` - Deployment guide - ---- - -## 🀝 Contributing - -1. Create feature branch: `git checkout -b feature/amazing-feature` -2. Commit changes: `git commit -m 'Add amazing feature'` -3. Push to branch: `git push origin feature/amazing-feature` -4. Open Pull Request -5. CI/CD runs automatically -6. Merge after approval - ---- - -## πŸ“ License - -MIT License - see LICENSE file - ---- - -## πŸ†˜ Support - -- **Email**: automation-team@company.local -- **Slack**: #datacenter-automation -- **Issues**: https://git.company.local/infrastructure/datacenter-docs/issues - ---- - -## 🎯 Roadmap - -- [x] MCP Integration -- [x] API per ticket resolution -- [x] Chat agentica -- [x] CI/CD pipelines -- [x] Docker & Kubernetes -- [ ] Multi-language support -- [ ] Advanced analytics dashboard -- [ ] Mobile app -- [ ] Voice interface -- [ ] Automated remediation - ---- - -**Powered by Claude Sonnet 4.5 & MCP** πŸš€ diff --git a/README_FINALE.md b/README_FINALE.md deleted file mode 100644 index 045b916..0000000 --- a/README_FINALE.md +++ /dev/null @@ -1,576 +0,0 @@ -# πŸŽ‰ SISTEMA COMPLETO - Documentazione Datacenter con Web e MCP - -## βœ… Cosa Γ¨ Stato Creato - -Ho implementato un **sistema end-to-end completo** per la documentazione del datacenter che include: - -### 1️⃣ Sistema Documentazione Base (giΓ  presente) -- βœ… 10 template markdown per sezioni documentazione -- βœ… 10 system prompt per guidare LLM -- βœ… 3 file requirements tecnici dettagliati -- βœ… Script Python per raccolta dati -- βœ… Configurazione completa - -### 2️⃣ **NUOVO: Web Server FastAPI** -- βœ… Server FastAPI per servire documentazione -- βœ… API REST con 10+ endpoints -- βœ… Ottimizzazione speciale per LLM -- βœ… Search full-text -- βœ… Statistics e metadata -- βœ… Multiple format (markdown/html/json) - -### 3️⃣ **NUOVO: MCP Server** -- βœ… Model Context Protocol Server -- βœ… Connessioni SSH a switch/router/server -- βœ… Query SNMP a UPS/sensori -- βœ… API integration VMware/storage -- βœ… 15+ metodi predefiniti -- βœ… Audit logging completo - -### 4️⃣ **NUOVO: Sistema Web con MkDocs** -- βœ… Compilazione automatica con MkDocs -- βœ… Material theme responsive -- βœ… Dark mode -- βœ… Search integrata -- βœ… Git revision dates -- βœ… Ottimizzato per mobile - -### 5️⃣ **NUOVO: CI/CD Pipeline** -- βœ… GitHub Actions workflow completo -- βœ… 8 job automatici: - - Lint & validate - - Build MkDocs - - Build Docker image - - Security scanning - - Deploy production - - Run tests - - Generate reports - - Update metadata -- βœ… Deploy automatico su push -- βœ… Notifiche Slack - -### 6️⃣ **NUOVO: Docker & Orchestration** -- βœ… Dockerfile multi-stage ottimizzato -- βœ… Docker Compose per orchestrazione -- βœ… 4 servizi: - - docs-server (FastAPI + MCP) - - redis (caching) - - nginx (reverse proxy) - - docs-builder (build service) -- βœ… Health checks -- βœ… Volume persistence -- βœ… Network isolation - -### 7️⃣ **NUOVO: Nginx Reverse Proxy** -- βœ… SSL/TLS termination -- βœ… Gzip compression -- βœ… Rate limiting -- βœ… Static file caching -- βœ… Security headers -- βœ… HTTP β†’ HTTPS redirect - -### 8️⃣ **NUOVO: Documentazione Completa** -- βœ… README_WEB.md - Sistema web/MCP -- βœ… README_MASTER.md - Overview completo -- βœ… API docs in docs/api/ -- βœ… MCP docs -- βœ… Deployment guides -- βœ… Troubleshooting - ---- - -## πŸ“ Struttura File Creati - -``` -datacenter-docs/ -β”‚ -β”œβ”€β”€ πŸ“„ README.md # Overview originale -β”œβ”€β”€ πŸ“„ README_WEB.md # ⭐ Docs sistema web/MCP -β”œβ”€β”€ πŸ“„ README_MASTER.md # ⭐ Master overview -β”œβ”€β”€ πŸ“„ QUICK_START.md # Guida rapida -β”œβ”€β”€ πŸ“„ INDICE_COMPLETO.md # Indice dettagliato -β”‚ -β”œβ”€β”€ πŸ“„ mkdocs.yml # ⭐ Config MkDocs -β”œβ”€β”€ πŸ“„ Dockerfile # ⭐ Multi-stage build -β”œβ”€β”€ πŸ“„ docker-compose.yml # ⭐ Orchestrazione -β”œβ”€β”€ πŸ“„ docker-entrypoint.sh # ⭐ Container entry -β”œβ”€β”€ πŸ“„ requirements.txt # Python deps -β”‚ -β”œβ”€β”€ πŸ“ templates/ # 10 template (giΓ  presenti) -β”œβ”€β”€ πŸ“ system-prompts/ # 10 prompt (giΓ  presenti) -β”œβ”€β”€ πŸ“ requirements/ # 3 requisiti (giΓ  presenti) -β”‚ -β”œβ”€β”€ πŸ“ api/ # ⭐ NUOVO -β”‚ β”œβ”€β”€ main.py # FastAPI server -β”‚ └── requirements-api.txt # API dependencies -β”‚ -β”œβ”€β”€ πŸ“ mcp-server/ # ⭐ NUOVO -β”‚ └── server.py # MCP implementation -β”‚ -β”œβ”€β”€ πŸ“ docs/ # ⭐ NUOVO -β”‚ β”œβ”€β”€ index.md # Homepage MkDocs -β”‚ β”œβ”€β”€ sections/ # Placeholder sezioni -β”‚ └── api/ # API documentation -β”‚ β”œβ”€β”€ index.md # API overview -β”‚ β”œβ”€β”€ endpoints.md # Endpoints reference -β”‚ └── mcp.md # MCP docs -β”‚ -β”œβ”€β”€ πŸ“ nginx/ # ⭐ NUOVO -β”‚ └── nginx.conf # Reverse proxy config -β”‚ -β”œβ”€β”€ πŸ“ scripts/ # ⭐ NUOVO -β”‚ β”œβ”€β”€ build-docs.sh # Build script -β”‚ └── deploy.sh # Deploy script -β”‚ -β”œβ”€β”€ πŸ“ .github/workflows/ # ⭐ NUOVO -β”‚ └── build-deploy.yml # CI/CD pipeline completa -β”‚ -└── πŸ“ config/ # ⭐ NUOVO - └── mcp_config.example.json # MCP configuration -``` - ---- - -## πŸš€ Come Funziona il Sistema Completo - -### Fase 1: Generazione Documentazione (LLM) -``` -LLM legge template + prompt + requirements - ↓ -Connette a infrastrutture via MCP: -- SSH β†’ switch, router, server -- SNMP β†’ UPS, sensori ambientali -- API β†’ VMware, storage, monitoring -- Database β†’ asset management - ↓ -Compila template markdown - ↓ -Commit su Git -``` - -### Fase 2: CI/CD Pipeline (Automatico) -``` -Push to main branch - ↓ -GitHub Actions triggered: -β”œβ”€ Lint & validate codice -β”œβ”€ Build MkDocs (HTML static) -β”œβ”€ Build Docker image -β”œβ”€ Security scan (Trivy) -β”œβ”€ Run tests -└─ Deploy to production - ↓ -Docker containers running: -β”œβ”€ FastAPI server (porta 8000) -β”œβ”€ MCP server (porta 8001) -β”œβ”€ Redis cache -└─ Nginx reverse proxy (porta 80/443) -``` - -### Fase 3: Accesso Documentazione -``` -UMANI: -Browser β†’ https://docs.datacenter.local - ↓ -Nginx (SSL/cache) - ↓ -FastAPI β†’ MkDocs site (HTML) - -LLM: -API call β†’ https://docs.datacenter.local/api/v1/sections/02_networking - ↓ -FastAPI β†’ JSON/Markdown ottimizzato - ↓ -LLM riceve contenuto strutturato - -LLM (live data): -MCP call β†’ https://docs.datacenter.local/mcp/execute/ssh - ↓ -MCP Server β†’ SSH to switch - ↓ -LLM riceve output comando -``` - ---- - -## 🎯 Caratteristiche Principali - -### 🌐 Web Server (FastAPI - porta 8000) - -**Endpoints:** -- `GET /` - Redirect a documentazione -- `GET /docs/` - MkDocs site compilato -- `GET /api/v1/sections` - Lista sezioni -- `GET /api/v1/sections/{id}` - Get sezione (markdown/html/json) -- `GET /api/v1/summary` - Summary per LLM -- `GET /api/v1/search?q=query` - Search full-text -- `GET /api/v1/stats` - Statistics -- `GET /api/v1/llm-optimized/{id}` - Contenuto ottimizzato LLM -- `GET /health` - Health check - -**Features:** -- Ottimizzazione per LLM (token count, metadata, structured) -- Multiple format output -- Search integrata -- CORS enabled -- Gzip compression -- OpenAPI docs auto-generate - -### πŸ”Œ MCP Server (porta 8001) - -**Metodi:** -- `ssh_execute(connection, command)` - Esegui SSH -- `ssh_get_config(connection)` - Get configurazione -- `snmp_get(connection, oid)` - SNMP GET -- `snmp_walk(connection, oid)` - SNMP WALK -- `api_request(connection, endpoint, method)` - API call -- `vmware_get_vms(connection)` - Get VMware VMs -- `vmware_get_hosts(connection)` - Get ESXi hosts -- `cisco_get_interfaces(connection)` - Cisco interfaces -- `ups_get_status(connection)` - UPS status -- `test_connection(connection)` - Test connectivity - -**Features:** -- Audit logging completo -- Rate limiting per connessione -- Error handling robusto -- Timeout configurabili -- Read-only operations -- Multiple protocol support (SSH/SNMP/API) - -### πŸ“š MkDocs Site - -**Features:** -- Material theme responsive -- Dark/light mode -- Search integrata con suggestion -- Navigation tabs -- Table of contents -- Code highlighting -- Git revision dates -- Mobile optimized -- Icons e emoji support - -### πŸ”„ CI/CD Pipeline - -**8 Job Automatici:** -1. **lint-and-validate** - Code quality -2. **build-docs** - Compila MkDocs -3. **build-docker** - Build immagine Docker -4. **security-scan** - Trivy scan -5. **test** - Run pytest -6. **deploy-production** - Deploy SSH -7. **generate-report** - Stats report -8. **update-metadata** - Update metadata - -**Trigger:** -- Push su main -- Pull request -- Schedule (daily 2 AM) -- Manual dispatch - ---- - -## πŸ” Security Features - -βœ… **Secrets Management** -- Environment variables -- Docker secrets support -- .env file support -- HashiCorp Vault compatible - -βœ… **Network Security** -- Management network isolation -- Firewall rules examples -- Rate limiting (100 req/min) -- SSL/TLS encryption - -βœ… **Container Security** -- Non-root user (appuser) -- Multi-stage build (small image) -- Security scanning (Trivy) -- Health checks - -βœ… **Access Control** -- Read-only MCP operations -- Audit logging -- API key support (optional) -- CORS configuration - -βœ… **Compliance** -- All operations logged -- Version control (Git) -- Automated backups -- Audit trail - ---- - -## πŸ“– Come Usare - -### 1. Setup Iniziale - -```bash -# Clone repository -git clone -cd datacenter-docs - -# Setup Python environment -python3 -m venv venv -source venv/bin/activate -pip install -r requirements.txt -pip install -r api/requirements-api.txt - -# Configure MCP -cp config/mcp_config.example.json config/mcp_config.json -# Edit with real credentials -vim config/mcp_config.json - -# Create .env file -cat > .env << 'EOF' -VCENTER_PASSWORD=your_password -SWITCH_PASSWORD=your_password -STORAGE_API_KEY=your_api_key -EOF -``` - -### 2. Build & Test Locally - -```bash -# Build documentazione -./scripts/build-docs.sh - -# Test con Docker Compose -docker-compose up -d - -# Check health -curl http://localhost:8000/health -curl http://localhost:8001/methods - -# View logs -docker-compose logs -f -``` - -### 3. Accesso - -``` -Web UI: http://localhost:8000/docs/ -API Swagger: http://localhost:8000/api/docs -MCP Swagger: http://localhost:8001/docs -``` - -### 4. Deploy Production - -```bash -# Configure GitHub secrets: -# - DEPLOY_SSH_KEY -# - DEPLOY_HOST -# - DEPLOY_USER -# - SLACK_WEBHOOK (optional) - -# Push to main triggers deployment -git add . -git commit -m "deploy: update documentation" -git push origin main - -# GitHub Actions will: -# 1. Build everything -# 2. Run tests -# 3. Security scan -# 4. Deploy to production -# 5. Verify deployment -``` - ---- - -## πŸŽ“ Esempi Utilizzo - -### Per LLM - Leggere Documentazione - -```python -import requests - -# Get summary -r = requests.get('http://localhost:8000/api/v1/summary') -summary = r.json() - -for section in summary: - print(f"{section['title']}: {len(section['key_points'])} key points") - -# Get specific section -r = requests.get('http://localhost:8000/api/v1/sections/02_networking') -doc = r.json() - -print(f"Title: {doc['metadata']['title']}") -print(f"Tokens: {doc['metadata']['token_estimate']}") -print(f"Content:\n{doc['content']}") -``` - -### Per LLM - Connessioni Live (MCP) - -```python -import requests - -# List available methods -r = requests.get('http://localhost:8001/methods') -methods = r.json() - -# Execute SSH command -r = requests.post('http://localhost:8001/execute/ssh', json={ - 'connection_name': 'switch-core-01', - 'command': 'show version' -}) -result = r.json() -print(result['output']) - -# SNMP query -r = requests.post('http://localhost:8001/execute/snmp/get', json={ - 'connection_name': 'ups-01', - 'oid': '.1.3.6.1.2.1.33.1.2.1.0' -}) -ups = r.json() -print(f"UPS Status: {ups['output']['value']}") - -# VMware API -r = requests.post('http://localhost:8001/execute/api', json={ - 'connection_name': 'vcenter-prod', - 'endpoint': '/rest/vcenter/vm', - 'method': 'GET' -}) -vms = r.json() -print(f"VMs: {vms['output']['data']}") -``` - ---- - -## πŸ“š Documentazione Disponibile - -### Da Leggere Prima - -1. **README_MASTER.md** (questo file) - Overview completo -2. **README_WEB.md** - Dettagli web server e MCP -3. **QUICK_START.md** - Getting started rapido - -### Documentazione Tecnica - -1. **README.md** - Overview sistema documentazione -2. **INDICE_COMPLETO.md** - Indice tutti i file -3. **requirements/llm_requirements.md** - Setup LLM -4. **docs/api/index.md** - API documentation -5. **docs/api/mcp.md** - MCP documentation - ---- - -## βœ… Checklist Deployment - -### Pre-requisiti -- [ ] Python 3.11+ installato -- [ ] Docker & Docker Compose installati -- [ ] Git configurato -- [ ] SSH access a production server -- [ ] GitHub repository creato - -### Configurazione -- [ ] `config/mcp_config.json` creato con credenziali -- [ ] `.env` file creato con secrets -- [ ] GitHub secrets configurati (DEPLOY_*) -- [ ] SSL certificates preparati (per Nginx) -- [ ] DNS configurato (docs.datacenter.local) - -### Test Locale -- [ ] Build docs funziona (`./scripts/build-docs.sh`) -- [ ] Docker build OK (`docker-compose build`) -- [ ] Containers running (`docker-compose up -d`) -- [ ] Health checks OK -- [ ] API endpoints testati -- [ ] MCP connections testate - -### Deploy Production -- [ ] Server production pronto -- [ ] Firewall rules configurate -- [ ] Pipeline GitHub Actions funzionante -- [ ] Primo deploy completato -- [ ] Monitoring setup -- [ ] Backup configurato - ---- - -## 🎯 Vantaggi Sistema Completo - -### βœ… Per gli Umani -- Web UI professionale e responsive -- Dark mode per confort visivo -- Search integrata efficiente -- Mobile-friendly -- Sempre aggiornata automaticamente - -### βœ… Per gli LLM -- API REST con multiple format -- Token count espliciti -- Metadata strutturati -- Contenuto ottimizzato (no noise) -- MCP per dati live - -### βœ… Per l'Organizzazione -- Zero effort di manutenzione -- Sempre aggiornata (ogni 6h) -- Compliance automatica -- Audit trail completo -- Costi ridotti (no manuale) - -### βœ… Per DevOps -- Containerizzato (easy deploy) -- CI/CD completo -- Infrastructure as Code -- Health checks integrati -- Scalabile horizontal - ---- - -## πŸ“Š Metriche - -### Copertura Documentazione -- **10 sezioni** complete -- **~15.000 righe** markdown -- **~200.000 token** totali -- **10+ tabelle** per sezione -- **50+ parametri** monitorati - -### Performance -- **Build time**: ~2 minuti -- **Deploy time**: ~3 minuti -- **API response**: <100ms -- **MCP exec**: <1s (SSH/SNMP) -- **Site size**: ~50MB - -### Automazione -- **8 job** CI/CD automatici -- **15+ metodi** MCP predefiniti -- **10+ endpoint** API REST -- **1 push** = full deployment -- **0 intervento** manuale - ---- - -## πŸŽ‰ Conclusione - -Hai ora un **sistema completo end-to-end** che: - -βœ… Genera documentazione automaticamente (LLM) -βœ… Pubblica su web professionale (MkDocs) -βœ… Espone API REST (FastAPI) -βœ… Fornisce connessioni live (MCP) -βœ… Deploy automatico (CI/CD) -βœ… Containerizzato (Docker) -βœ… Sicuro e compliant -βœ… Documentato completamente - -**πŸš€ Ready to deploy and use!** - ---- - -**Sistema Documentazione Datacenter v2.0** -**Con Web Publishing e MCP Integration** -**Maintainer**: Automation Team -**Date**: 2025-01-XX - -Per domande: automation-team@company.com diff --git a/README_MASTER.md b/README_MASTER.md deleted file mode 100644 index 18ce6b6..0000000 --- a/README_MASTER.md +++ /dev/null @@ -1,175 +0,0 @@ -# 🎯 Sistema Completo - Documentazione Datacenter Automatizzata - -## πŸ“¦ Pacchetto Completo - -Questo pacchetto contiene un **sistema end-to-end** per la gestione automatizzata della documentazione datacenter: - -1. **Template documentazione** (10 sezioni) -2. **System prompts per LLM** (10 file) -3. **Requisiti tecnici e script** (raccolta dati) -4. **Webserver FastAPI** (pubblicazione web) -5. **MCP Server** (connessioni infrastruttura) -6. **CI/CD Pipeline** (automazione completa) -7. **Docker containerizzazione** - ---- - -## πŸ“š Documentazione - -### πŸ“– README.md -Panoramica generale del sistema di documentazione - -### πŸ“– QUICK_START.md -Guida rapida per setup e primo utilizzo - -### πŸ“– INDICE_COMPLETO.md -Indice dettagliato di tutti i file e componenti - -### πŸ“– README_WEB.md -Documentazione completa per sistema web e MCP server - ---- - -## πŸ—‚οΈ Struttura Completa - -``` -datacenter-docs/ -β”‚ -β”œβ”€β”€ πŸ“„ README.md # Panoramica generale -β”œβ”€β”€ πŸ“„ README_WEB.md # Docs web/MCP system -β”œβ”€β”€ πŸ“„ QUICK_START.md # Guida rapida -β”œβ”€β”€ πŸ“„ INDICE_COMPLETO.md # Indice dettagliato -β”œβ”€β”€ πŸ“„ README_MASTER.md # Questo file -β”‚ -β”œβ”€β”€ πŸ“„ mkdocs.yml # Config MkDocs -β”œβ”€β”€ πŸ“„ Dockerfile # Multi-stage build -β”œβ”€β”€ πŸ“„ docker-compose.yml # Orchestrazione -β”œβ”€β”€ πŸ“„ docker-entrypoint.sh # Container entry -β”œβ”€β”€ πŸ“„ requirements.txt # Python deps -β”‚ -β”œβ”€β”€ πŸ“ templates/ # 10 template sezioni -β”‚ β”œβ”€β”€ 01_infrastruttura_fisica.md -β”‚ β”œβ”€β”€ 02_networking.md -β”‚ β”œβ”€β”€ 03_server_virtualizzazione.md -β”‚ β”œβ”€β”€ 04_storage.md -β”‚ β”œβ”€β”€ 05_sicurezza.md -β”‚ β”œβ”€β”€ 06_backup_disaster_recovery.md -β”‚ β”œβ”€β”€ 07_monitoring_alerting.md -β”‚ β”œβ”€β”€ 08_database_middleware.md -β”‚ β”œβ”€β”€ 09_procedure_operative.md -β”‚ └── 10_miglioramenti.md -β”‚ -β”œβ”€β”€ πŸ“ system-prompts/ # 10 prompt per LLM -β”‚ β”œβ”€β”€ 01_infrastruttura_fisica_prompt.md -β”‚ β”œβ”€β”€ ... (altri 9 file) -β”‚ └── 10_miglioramenti_prompt.md -β”‚ -β”œβ”€β”€ πŸ“ requirements/ # Requisiti tecnici -β”‚ β”œβ”€β”€ llm_requirements.md # Setup LLM completo -β”‚ β”œβ”€β”€ data_collection_scripts.md # Script Python -β”‚ └── api_endpoints.md # API/comandi reference -β”‚ -β”œβ”€β”€ πŸ“ api/ # FastAPI application -β”‚ β”œβ”€β”€ main.py # Server principale -β”‚ └── requirements-api.txt # Dependencies -β”‚ -β”œβ”€β”€ πŸ“ mcp-server/ # MCP Server -β”‚ └── server.py # MCP implementation -β”‚ -β”œβ”€β”€ πŸ“ docs/ # MkDocs source -β”‚ β”œβ”€β”€ index.md # Homepage -β”‚ β”œβ”€β”€ sections/ # Sezioni docs -β”‚ └── api/ # API docs -β”‚ β”œβ”€β”€ index.md -β”‚ β”œβ”€β”€ endpoints.md -β”‚ └── mcp.md -β”‚ -β”œβ”€β”€ πŸ“ nginx/ # Reverse proxy -β”‚ └── nginx.conf -β”‚ -β”œβ”€β”€ πŸ“ scripts/ # Utility scripts -β”‚ β”œβ”€β”€ build-docs.sh -β”‚ └── deploy.sh -β”‚ -β”œβ”€β”€ πŸ“ .github/workflows/ # CI/CD -β”‚ └── build-deploy.yml # Pipeline completa -β”‚ -└── πŸ“ config/ # Configuration - └── mcp_config.example.json # MCP config example -``` - ---- - -## 🎯 Workflow Completo - -### Fase 1: Setup Iniziale -```bash -1. Setup ambiente Python + Docker -2. Configurare credenziali -3. Test connettivitΓ  infrastruttura -4. Prima generazione documentazione -``` - -### Fase 2: Generazione Documentazione -```bash -LLM legge: - β”œβ”€ Template (cosa compilare) - β”œβ”€ System Prompt (come farlo) - └─ Requirements (con quali tool) - ↓ -Connette a infrastrutture via: - β”œβ”€ SSH (switch, router) - β”œβ”€ SNMP (UPS, sensori) - β”œβ”€ API (VMware, storage) - └─ Database (asset management) - ↓ -Compila template e salva -``` - -### Fase 3: Pubblicazione Web -```bash -Commit su Git - ↓ -GitHub Actions pipeline: - β”œβ”€ Lint & validate - β”œβ”€ Build MkDocs - β”œβ”€ Build Docker image - β”œβ”€ Security scan - └─ Deploy to production - ↓ -Documentazione live su: - β”œβ”€ Web UI (MkDocs) - β”œβ”€ API REST (FastAPI) - └─ MCP Server (connessioni) -``` - -### Fase 4: Accesso -```bash -Umani β†’ Web Browser β†’ MkDocs UI -LLM β†’ API REST β†’ JSON/Markdown -LLM β†’ MCP Server β†’ Infrastructure live data -``` - ---- - -## πŸš€ Quick Start - -### 1. Setup Ambiente -```bash -cd datacenter-docs -python3 -m venv venv -source venv/bin/activate -pip install -r requirements.txt -``` - -### 2. Configurazione -```bash -# Edita configurazione -cp config/mcp_config.example.json config/mcp_config.json -vim config/mcp_config.json - -# Secrets -cat > .env << 'EOF' -VCENTER_PASSWORD=your_password -SWITCH_PASSWORD=your_password -STORAGE_API_KEY=your_api_key diff --git a/README_MONGODB.md b/README_MONGODB.md deleted file mode 100644 index 281769b..0000000 --- a/README_MONGODB.md +++ /dev/null @@ -1,473 +0,0 @@ -# πŸƒ Sistema Documentazione con MongoDB - -## NovitΓ  Versione 2.0 - -Il sistema Γ¨ stato **completamente migrato a MongoDB 7.0** per migliorare: -- βœ… FlessibilitΓ  schema -- βœ… Performance -- βœ… ScalabilitΓ  -- βœ… Developer experience - -## πŸš€ Quick Start MongoDB - -### 1. Local Development - -```bash -# Clone repository -git clone https://git.company.local/infrastructure/datacenter-docs.git -cd datacenter-docs - -# Setup environment -cp .env.example .env -nano .env # Edit MongoDB credentials - -# Start MongoDB + Redis -docker-compose up -d mongodb redis - -# Install dependencies -poetry install - -# Start API -poetry run uvicorn datacenter_docs.api.main:app --reload -``` - -### 2. Docker Compose (All-in-One) - -```bash -# Edit .env -cp .env.example .env - -# MongoDB credentials -MONGO_ROOT_USER=admin -MONGO_ROOT_PASSWORD=your_secure_password -MONGODB_URL=mongodb://admin:your_secure_password@mongodb:27017 -MONGODB_DATABASE=datacenter_docs - -# Start everything -docker-compose up -d - -# Check health -curl http://localhost:8000/health -# Response: {"status":"healthy","database":"mongodb",...} - -# Access services -# API: http://localhost:8000/api/docs -# Chat: http://localhost:8001 -# Frontend: http://localhost -# Flower: http://localhost:5555 -``` - -### 3. Kubernetes - -```bash -# Apply manifests -kubectl apply -f deploy/kubernetes/namespace.yaml - -# Create secrets -kubectl create secret generic datacenter-secrets \ - --from-literal=mongodb-url='mongodb://admin:password@mongodb:27017' \ - --from-literal=mongodb-root-user='admin' \ - --from-literal=mongodb-root-password='password' \ - --from-literal=redis-url='redis://:password@redis:6379/0' \ - --from-literal=mcp-api-key='your-key' \ - --from-literal=anthropic-api-key='sk-ant-xxx' \ - -n datacenter-docs - -# Deploy MongoDB (StatefulSet with replica set) -kubectl apply -f deploy/kubernetes/mongodb.yaml - -# Deploy application -kubectl apply -f deploy/kubernetes/deployment.yaml -kubectl apply -f deploy/kubernetes/service.yaml -kubectl apply -f deploy/kubernetes/ingress.yaml - -# Check status -kubectl get pods -n datacenter-docs -``` - -## πŸ“Š MongoDB Features - -### Document Structure - -Tutti i dati sono memorizzati come documenti JSON nativi: - -```json -{ - "ticket_id": "INC-12345", - "title": "Network issue", - "description": "Cannot reach VLAN 100", - "status": "resolved", - "resolution": "Check switch configuration...", - "suggested_actions": ["action1", "action2"], - "confidence_score": 0.92, - "metadata": { - "source": "ServiceNow", - "custom_field": "any value" - }, - "created_at": ISODate("2025-01-15T10:30:00Z") -} -``` - -### Collections - -- `tickets` - Ticket e risoluzioni -- `documentation_sections` - Metadata sezioni doc -- `chat_sessions` - Conversazioni chat -- `system_metrics` - Metriche sistema -- `audit_logs` - Audit trail - -### Beanie ODM - -Utilizziamo **Beanie** (ODM moderno) per type-safe document operations: - -```python -from datacenter_docs.api.models import Ticket - -# Create -ticket = Ticket( - ticket_id="INC-001", - title="Test", - description="Testing MongoDB" -) -await ticket.insert() - -# Find -tickets = await Ticket.find(Ticket.status == "resolved").to_list() - -# Update -ticket.status = "closed" -await ticket.save() - -# Delete -await ticket.delete() - -# Aggregation -pipeline = [ - {"$group": { - "_id": "$category", - "count": {"$sum": 1} - }} -] -result = await Ticket.aggregate(pipeline).to_list() -``` - -## πŸ”§ Configurazione MongoDB - -### Environment Variables - -```bash -# Required -MONGODB_URL=mongodb://admin:password@mongodb:27017 -MONGODB_DATABASE=datacenter_docs - -# Optional (for admin operations) -MONGO_ROOT_USER=admin -MONGO_ROOT_PASSWORD=secure_password -``` - -### Connection String Examples - -```bash -# Local -MONGODB_URL=mongodb://admin:password@localhost:27017 - -# Docker Compose -MONGODB_URL=mongodb://admin:password@mongodb:27017 - -# Kubernetes (single) -MONGODB_URL=mongodb://admin:password@mongodb.datacenter-docs.svc.cluster.local:27017 - -# Kubernetes (replica set) -MONGODB_URL=mongodb://admin:password@mongodb-0.mongodb:27017,mongodb-1.mongodb:27017,mongodb-2.mongodb:27017/?replicaSet=rs0 - -# MongoDB Atlas (cloud) -MONGODB_URL=mongodb+srv://user:password@cluster.mongodb.net/datacenter_docs?retryWrites=true&w=majority -``` - -## πŸ” Query Examples - -### Python API - -```python -# Simple queries -resolved = await Ticket.find(Ticket.status == "resolved").to_list() - -high_priority = await Ticket.find( - Ticket.priority == "high", - Ticket.status == "processing" -).to_list() - -# Complex queries -from datetime import datetime, timedelta - -recent = datetime.now() - timedelta(days=7) -high_confidence = await Ticket.find( - Ticket.created_at > recent, - Ticket.confidence_score > 0.9 -).sort(-Ticket.created_at).to_list() - -# Text search -search_results = await Ticket.find({ - "$text": {"$search": "network connectivity"} -}).to_list() - -# Aggregation -stats = await Ticket.aggregate([ - {"$group": { - "_id": "$category", - "total": {"$sum": 1}, - "avg_confidence": {"$avg": "$confidence_score"} - }}, - {"$sort": {"total": -1}} -]).to_list() -``` - -### MongoDB Shell - -```javascript -// Connect -mongosh mongodb://admin:password@localhost:27017 - -use datacenter_docs - -// Basic queries -db.tickets.find({ status: "resolved" }) -db.tickets.countDocuments({ category: "network" }) - -// Complex queries -db.tickets.find({ - status: "resolved", - confidence_score: { $gt: 0.8 }, - created_at: { $gte: new Date("2025-01-01") } -}) - -// Text search -db.tickets.find({ - $text: { $search: "network connectivity" } -}) - -// Aggregation -db.tickets.aggregate([ - { $match: { status: "resolved" } }, - { $group: { - _id: "$category", - count: { $sum: 1 }, - avg_time: { $avg: "$processing_time" } - }}, - { $sort: { count: -1 } } -]) -``` - -## πŸ› οΈ Maintenance - -### Backup - -```bash -# Full backup -docker-compose exec mongodb mongodump \ - --username admin \ - --password password \ - --authenticationDatabase admin \ - --out /data/backup - -# Restore -docker-compose exec mongodb mongorestore \ - --username admin \ - --password password \ - --authenticationDatabase admin \ - /data/backup -``` - -### Monitoring - -```bash -# Database stats -docker-compose exec mongodb mongosh \ - -u admin -p password --authenticationDatabase admin \ - --eval "db.stats()" - -# Collection stats -docker-compose exec mongodb mongosh \ - -u admin -p password --authenticationDatabase admin \ - datacenter_docs --eval "db.tickets.stats()" -``` - -### Indexes - -```javascript -// Check indexes -db.tickets.getIndexes() - -// Create custom index -db.tickets.createIndex({ category: 1, status: 1 }) - -// Text search index -db.tickets.createIndex({ - title: "text", - description: "text", - resolution: "text" -}) -``` - -## πŸ” Security - -### Authentication - -MongoDB usa autenticazione SCRAM-SHA-256: - -```javascript -// Create app user -db.createUser({ - user: "docs_app", - pwd: "secure_password", - roles: [ - { role: "readWrite", db: "datacenter_docs" } - ] -}) -``` - -### Authorization - -Roles disponibili: -- `read` - Solo lettura -- `readWrite` - Lettura + scrittura -- `dbAdmin` - Amministrazione DB -- `userAdmin` - Gestione utenti - -### TLS/SSL - -```bash -# Generate certificates -openssl req -x509 -nodes -days 365 -newkey rsa:2048 \ - -keyout mongodb.key -out mongodb.crt - -# Docker Compose con TLS -mongodb: - command: ["--tlsMode=requireTLS", "--tlsCertificateKeyFile=/cert/mongodb.pem"] - volumes: - - ./certs/mongodb.pem:/cert/mongodb.pem:ro -``` - -## πŸ“ˆ Performance - -### Connection Pooling - -```python -# Default: maxPoolSize=100 -MONGODB_URL=mongodb://user:pass@host:27017/?maxPoolSize=200 -``` - -### Read Preference - -```python -# Replica set - preferisci secondary per letture -MONGODB_URL=mongodb://user:pass@host:27017/?readPreference=secondaryPreferred -``` - -### Write Concern - -```python -# Majority (safe, slower) -MONGODB_URL=mongodb://user:pass@host:27017/?w=majority - -# Faster (less safe) -MONGODB_URL=mongodb://user:pass@host:27017/?w=1 -``` - -## πŸš€ Scalability - -### Replica Set (High Availability) - -```yaml -# docker-compose.yml -services: - mongodb-0: - image: mongo:7.0 - command: ["--replSet", "rs0"] - - mongodb-1: - image: mongo:7.0 - command: ["--replSet", "rs0"] - - mongodb-2: - image: mongo:7.0 - command: ["--replSet", "rs0"] -``` - -### Sharding (Horizontal Scaling) - -Per dataset molto grandi (>1TB): - -```javascript -// Enable sharding -sh.enableSharding("datacenter_docs") - -// Shard collection -sh.shardCollection("datacenter_docs.tickets", { category: 1 }) -``` - -## πŸ†š MongoDB vs PostgreSQL - -| Feature | MongoDB | PostgreSQL | -|---------|---------|------------| -| Schema | Flexible | Fixed | -| Scaling | Horizontal (native) | Vertical (easier) | -| Queries | JSON-like | SQL | -| Transactions | Yes (4.0+) | Yes | -| Performance (reads) | Excellent | Very good | -| Performance (writes) | Excellent | Good | -| JSON support | Native | JSONB | -| Aggregation | Pipeline | SQL + CTEs | -| Learning curve | Easy | Moderate | - -## πŸ“š Documentation - -- πŸ“– [MONGODB_GUIDE.md](./MONGODB_GUIDE.md) - Guida completa MongoDB -- πŸ“– [README_COMPLETE_SYSTEM.md](./README_COMPLETE_SYSTEM.md) - Sistema completo -- πŸ“– [DEPLOYMENT_GUIDE.md](./DEPLOYMENT_GUIDE.md) - Deploy guide - -## πŸ†˜ Troubleshooting - -### Connection issues - -```bash -# Test MongoDB connection -docker-compose exec api python -c " -from motor.motor_asyncio import AsyncIOMotorClient -import asyncio - -async def test(): - client = AsyncIOMotorClient('mongodb://admin:password@mongodb:27017') - await client.admin.command('ping') - print('MongoDB OK') - -asyncio.run(test()) -" -``` - -### Authentication errors - -```bash -# Verify credentials -docker-compose exec mongodb mongosh \ - -u admin -p password --authenticationDatabase admin \ - --eval "db.runCommand({connectionStatus: 1})" -``` - -### Performance issues - -```javascript -// Check slow queries -db.setProfilingLevel(2) // Log all queries -db.system.profile.find().sort({ts:-1}).limit(5) - -// Analyze query -db.tickets.find({status: "resolved"}).explain("executionStats") -``` - ---- - -**MongoDB Version**: 7.0 -**Driver**: Motor 3.3+ (Async) -**ODM**: Beanie 1.24+ -**Minimum Python**: 3.10 diff --git a/README_WEB.md b/README_WEB.md deleted file mode 100644 index 01a7c8f..0000000 --- a/README_WEB.md +++ /dev/null @@ -1,511 +0,0 @@ -# 🌐 Sistema Web e MCP - Documentazione Datacenter - -Sistema completo per pubblicazione web della documentazione datacenter con API REST e MCP Server per connessioni LLM alle infrastrutture. - -## πŸ“¦ Componenti - -### 1. FastAPI Documentation Server -- **Porta**: 8000 -- **Funzione**: Serve documentazione MkDocs compilata + API REST -- **Features**: - - Documentazione web responsive - - API REST per accesso programmatico - - Ottimizzazione per LLM - - Search full-text - - Statistics e metadata - -### 2. MCP Server -- **Porta**: 8001 -- **Funzione**: Model Context Protocol - Connessioni infrastruttura -- **Features**: - - SSH execution - - SNMP queries - - API REST integration - - VMware, Cisco, storage shortcuts - - Audit logging - -### 3. MkDocs Static Site -- **Framework**: Material for MkDocs -- **Build**: Automatico via CI/CD -- **Features**: - - Responsive design - - Dark mode - - Search integrata - - Git revision dates - - Navigation ottimizzata - -### 4. Nginx Reverse Proxy -- **Porta**: 80 (HTTP) β†’ 443 (HTTPS) -- **Funzione**: SSL termination, caching, rate limiting -- **Features**: - - HTTPS con TLS 1.2+ - - Gzip compression - - Static file caching - - Security headers - -## πŸš€ Quick Start - -### Prerequisiti -```bash -- Docker & Docker Compose -- Git -- Accesso management network -``` - -### Setup Iniziale - -1. **Clone repository** -```bash -git clone https://github.com/company/datacenter-docs.git -cd datacenter-docs -``` - -2. **Configura credenziali** -```bash -# Crea file MCP config -cp config/mcp_config.example.json config/mcp_config.json -# Edita con credenziali reali -vim config/mcp_config.json - -# Crea .env per Docker -cat > .env << 'EOF' -VCENTER_PASSWORD=your_password -SWITCH_PASSWORD=your_password -STORAGE_API_KEY=your_api_key -EOF -``` - -3. **Build e avvia servizi** -```bash -# Build documentazione -./scripts/build-docs.sh - -# Avvia con Docker Compose -docker-compose up -d - -# Verifica health -curl http://localhost:8000/health -curl http://localhost:8001/methods -``` - -4. **Accedi alla documentazione** -``` -http://localhost:8000/docs/ -http://localhost:8000/api/docs (API Swagger) -http://localhost:8001/docs (MCP Swagger) -``` - -## πŸ“ Struttura File - -``` -datacenter-docs/ -β”œβ”€β”€ api/ # FastAPI application -β”‚ β”œβ”€β”€ main.py # Main FastAPI app -β”‚ └── requirements-api.txt # Python dependencies -β”œβ”€β”€ mcp-server/ # MCP Server -β”‚ └── server.py # MCP implementation -β”œβ”€β”€ docs/ # MkDocs source -β”‚ β”œβ”€β”€ index.md # Homepage -β”‚ β”œβ”€β”€ sections/ # Documentation sections -β”‚ └── api/ # API documentation -β”œβ”€β”€ templates/ # Template documentazione -β”œβ”€β”€ nginx/ # Nginx configuration -β”‚ └── nginx.conf -β”œβ”€β”€ scripts/ # Utility scripts -β”‚ β”œβ”€β”€ build-docs.sh -β”‚ └── deploy.sh -β”œβ”€β”€ .github/workflows/ # CI/CD pipelines -β”‚ └── build-deploy.yml -β”œβ”€β”€ config/ # Configuration files -β”‚ └── mcp_config.json -β”œβ”€β”€ mkdocs.yml # MkDocs configuration -β”œβ”€β”€ Dockerfile # Multi-stage Dockerfile -β”œβ”€β”€ docker-compose.yml # Docker Compose config -└── docker-entrypoint.sh # Container entrypoint -``` - -## πŸ”„ Workflow Automazione - -### 1. Generazione Documentazione -```bash -# LLM genera/aggiorna template -python3 main.py --section 01 - -# Commit su Git -git add templates/ -git commit -m "docs: update infrastructure section" -git push origin main -``` - -### 2. CI/CD Pipeline -``` -Push to main - ↓ -GitHub Actions triggered - ↓ -β”œβ”€ Lint & Validate -β”œβ”€ Build MkDocs -β”œβ”€ Build Docker Image -β”œβ”€ Security Scan -└─ Deploy to Production - ↓ -Documentation live! -``` - -### 3. Accesso Documentazione -``` -User β†’ Nginx β†’ FastAPI β†’ MkDocs Site - ↓ - API REST - ↓ - LLM-optimized -``` - -## πŸ”Œ API Usage - -### Python Client Example -```python -import requests - -# Get all sections -r = requests.get('http://localhost:8000/api/v1/sections') -sections = r.json() - -for section in sections: - print(f"{section['title']}: {section['token_estimate']} tokens") - -# Get specific section -r = requests.get('http://localhost:8000/api/v1/sections/02_networking') -content = r.json() -print(content['content']) - -# LLM-optimized content -r = requests.get('http://localhost:8000/api/v1/llm-optimized/02_networking') -llm_data = r.json() -print(f"Ready for LLM: {llm_data['token_count']} tokens") -``` - -### cURL Examples -```bash -# Health check -curl http://localhost:8000/health - -# Get summary -curl http://localhost:8000/api/v1/summary | jq - -# Search -curl "http://localhost:8000/api/v1/search?q=vmware" | jq - -# Get section as HTML -curl "http://localhost:8000/api/v1/sections/03_server_virtualizzazione?format=html" -``` - -## πŸ€– MCP Usage - -### Python MCP Client -```python -import asyncio -import requests - -async def query_infrastructure(): - base_url = 'http://localhost:8001' - - # List available methods - r = requests.get(f'{base_url}/methods') - print(r.json()) - - # Execute SSH command - r = requests.post(f'{base_url}/execute/ssh', json={ - 'connection_name': 'switch-core-01', - 'command': 'show version' - }) - result = r.json() - print(f"Output: {result['output']}") - - # SNMP query - r = requests.post(f'{base_url}/execute/snmp/get', json={ - 'connection_name': 'ups-01', - 'oid': '.1.3.6.1.2.1.33.1.2.1.0' - }) - ups_status = r.json() - print(f"UPS Status: {ups_status['output']}") - -asyncio.run(query_infrastructure()) -``` - -### Available MCP Methods -- `ssh_execute` - Execute commands via SSH -- `ssh_get_config` - Get device configurations -- `snmp_get` - SNMP GET query -- `snmp_walk` - SNMP WALK query -- `api_request` - Generic API call -- `vmware_get_vms` - Get VMware VMs -- `vmware_get_hosts` - Get ESXi hosts -- `cisco_get_interfaces` - Cisco interface status -- `ups_get_status` - UPS status via SNMP - -## πŸ” Security - -### Access Control -```yaml -Documentation (port 8000): - - Public read access (internal network) - - API key for external access - -MCP Server (port 8001): - - Internal network only - - No external exposure - - Audit logging enabled - - Read-only operations -``` - -### Secrets Management -```bash -# Use environment variables -export VCENTER_PASSWORD="..." -export SWITCH_PASSWORD="..." - -# Or use Docker secrets -docker secret create vcenter_pass vcenter_password.txt - -# Or use HashiCorp Vault -vault kv get -field=password datacenter/vcenter -``` - -### Network Security -```bash -# Firewall rules -# Allow: Management network β†’ MCP Server -# Allow: Internal network β†’ Documentation -# Deny: External β†’ MCP Server -# Allow: External β†’ Documentation (with auth) -``` - -## πŸ“Š Monitoring - -### Health Checks -```bash -# FastAPI health -curl http://localhost:8000/health - -# MCP health -curl http://localhost:8001/methods - -# Docker health -docker ps -docker-compose ps -``` - -### Logs -```bash -# Application logs -docker-compose logs -f docs-server - -# Nginx logs -docker-compose logs -f nginx - -# Specific service -docker-compose logs -f docs-server | grep ERROR -``` - -### Metrics -```bash -# Documentation statistics -curl http://localhost:8000/api/v1/stats | jq - -# Response times -curl -w "@curl-format.txt" -o /dev/null -s http://localhost:8000/health -``` - -## πŸ› οΈ Development - -### Local Development -```bash -# Install dependencies -pip install -r requirements.txt -pip install -r api/requirements-api.txt - -# Run FastAPI locally -cd api -uvicorn main:app --reload --port 8000 - -# Run MCP server locally -cd mcp-server -uvicorn server:mcp_app --reload --port 8001 - -# Build docs locally -mkdocs serve -``` - -### Testing -```bash -# Run tests -pytest tests/ -v - -# Coverage -pytest tests/ --cov=api --cov=mcp-server --cov-report=html - -# Linting -flake8 api/ mcp-server/ -black --check api/ mcp-server/ -``` - -## 🚒 Deployment - -### Production Deployment -```bash -# Via script -./scripts/deploy.sh - -# Manual -docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d - -# Verify -curl https://docs.datacenter.local/health -``` - -### Update Documentation -```bash -# Pull latest -git pull origin main - -# Rebuild -docker-compose build docs-server - -# Rolling update -docker-compose up -d --no-deps docs-server -``` - -### Rollback -```bash -# Rollback to previous image -docker-compose down -docker-compose up -d docs-server:previous-tag - -# Or restore from backup -cp -r backup/docs/* docs/ -docker-compose restart docs-server -``` - -## πŸ“ Configuration - -### Environment Variables -```bash -# Application -ENVIRONMENT=production -LOG_LEVEL=info - -# MCP Connections -VCENTER_PASSWORD=xxx -SWITCH_PASSWORD=xxx -STORAGE_API_KEY=xxx - -# Optional -REDIS_URL=redis://localhost:6379 -DATABASE_URL=postgresql://user:pass@localhost/db -``` - -### MkDocs Configuration -Edit `mkdocs.yml`: -```yaml -site_name: Your Site Name -theme: - name: material - palette: - primary: indigo -nav: - - Home: index.md - # ... -``` - -### Nginx Configuration -Edit `nginx/nginx.conf`: -```nginx -# Rate limiting -limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s; - -# SSL certificates -ssl_certificate /etc/nginx/ssl/cert.pem; -ssl_certificate_key /etc/nginx/ssl/key.pem; -``` - -## πŸ” Troubleshooting - -### Common Issues - -**Port giΓ  in uso** -```bash -# Check what's using port -sudo lsof -i :8000 -sudo lsof -i :8001 - -# Stop conflicting service -sudo systemctl stop service_name -``` - -**Docker build failed** -```bash -# Clean build -docker-compose build --no-cache docs-server - -# Check logs -docker-compose logs docs-server -``` - -**MCP connection errors** -```bash -# Test connectivity -telnet switch.domain.local 22 -snmpget -v2c -c public ups.domain.local .1.3.6.1.2.1.1.1.0 - -# Check config -cat config/mcp_config.json | jq - -# Test connection -curl -X GET http://localhost:8001/test/switch-core-01 -``` - -**Documentation not updating** -```bash -# Rebuild docs -./scripts/build-docs.sh - -# Force rebuild -docker-compose down -docker-compose up -d --build - -# Check pipeline -# Go to GitHub Actions and check logs -``` - -## πŸ“š Additional Resources - -- [MkDocs Documentation](https://www.mkdocs.org/) -- [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) -- [FastAPI Documentation](https://fastapi.tiangolo.com/) -- [Docker Compose](https://docs.docker.com/compose/) - -## 🀝 Contributing - -1. Fork repository -2. Create feature branch -3. Make changes -4. Test locally -5. Submit pull request - -## πŸ“ž Support - -- **Email**: automation-team@company.com -- **Issues**: https://github.com/company/datacenter-docs/issues -- **Wiki**: https://github.com/company/datacenter-docs/wiki - -## πŸ“„ License - -Internal use only - Company Proprietary - ---- - -**Sistema Web e MCP per Documentazione Datacenter** -**Versione**: 1.0.0 -**Maintainer**: Automation Team -**Last Update**: 2025-01-XX diff --git a/TESTING_RESULTS.md b/TESTING_RESULTS.md deleted file mode 100644 index 9648866..0000000 --- a/TESTING_RESULTS.md +++ /dev/null @@ -1,340 +0,0 @@ -# End-to-End Testing Results - -**Date**: 2025-10-20 -**Status**: βœ… **MVP VALIDATION SUCCESSFUL** - ---- - -## 🎯 Test Overview - -End-to-end testing del workflow completo di generazione documentazione, eseguito con mock data (senza LLM reale o VMware reale). - -## βœ… Test Passed - -### TEST 1: VMware Collector -**Status**: βœ… PASSED - -- βœ… Collector initialization successful -- βœ… MCP client fallback to mock data working -- βœ… Data collection completed (3 VMs, 3 hosts, 2 clusters, 3 datastores, 3 networks) -- βœ… Data validation successful -- βœ… MongoDB storage successful -- βœ… Audit logging working - -**Output**: -``` -Collection result: True -Data collected successfully! - - VMs: 0 (in data structure) - - Hosts: 3 - - Clusters: 2 - - Datastores: 3 - - Networks: 3 -``` - ---- - -### TEST 2: Infrastructure Generator -**Status**: βœ… PASSED - -- βœ… Generator initialization successful -- βœ… LLM client configured (generic OpenAI-compatible) -- βœ… Data formatting successful -- βœ… System/user prompt generation working -- βœ… Structure validated - -**Output**: -``` -Generator name: infrastructure -Generator section: infrastructure_overview -Generator LLM client configured: True -Data summary formatted (195 chars) -``` - ---- - -### TEST 3: Database Connection -**Status**: βœ… PASSED - -- βœ… MongoDB connection successful (localhost:27017) -- βœ… Database: `datacenter_docs_dev` -- βœ… Beanie ORM initialization successful -- βœ… All 10 models registered -- βœ… Document creation and storage successful -- βœ… Query and count operations working - -**Output**: -``` -MongoDB connection successful! -Beanie ORM initialized! -Test document created: test_section_20251020_001343 -Total DocumentationSection records: 1 -``` - ---- - -### TEST 4: Full Workflow (Mock) -**Status**: βœ… PASSED - -Complete workflow validation: -1. βœ… **Collector** β†’ Mock data collection -2. βœ… **Generator** β†’ Structure validation -3. βœ… **MongoDB** β†’ Storage and retrieval -4. βœ… **Beanie ORM** β†’ Models working - ---- - -## πŸ“Š Components Validated - -| Component | Status | Notes | -|-----------|--------|-------| -| VMware Collector | βœ… Working | Mock data fallback functional | -| Infrastructure Generator | βœ… Working | Structure validated (LLM call not tested) | -| Network Generator | ⚠️ Not tested | Structure implemented | -| MongoDB Connection | βœ… Working | All operations successful | -| Beanie ORM Models | βœ… Working | 10 models registered | -| LLM Client | ⚠️ Configured | Not tested (mock endpoint) | -| MCP Client | ⚠️ Fallback | Mock data working, real MCP not tested | - ---- - -## πŸ”„ Workflow Architecture Validated - -``` -User/Test Script - ↓ -VMwareCollector.run() - β”œβ”€ connect() β†’ MCP fallback β†’ Mock data βœ… - β”œβ”€ collect() β†’ Gather infrastructure data βœ… - β”œβ”€ validate() β†’ Check data integrity βœ… - β”œβ”€ store() β†’ MongoDB via Beanie βœ… - └─ disconnect() βœ… - ↓ -InfrastructureGenerator (structure validated) - β”œβ”€ generate() β†’ Would call LLM - β”œβ”€ validate_content() β†’ Markdown validation - β”œβ”€ save_to_database() β†’ DocumentationSection storage - └─ save_to_file() β†’ Optional file output - ↓ -MongoDB Storage βœ… - β”œβ”€ AuditLog collection (data collection) - β”œβ”€ DocumentationSection collection (docs) - └─ Query via API -``` - ---- - -## πŸŽ“ What Was Tested - -### βœ… Tested Successfully -1. **Infrastructure Layer**: - - MongoDB connection and operations - - Redis availability (Docker) - - Docker stack management - -2. **Data Collection Layer**: - - VMware collector with mock data - - Data validation - - Storage in MongoDB via AuditLog - -3. **ORM Layer**: - - Beanie document models - - CRUD operations - - Indexes and queries - -4. **Generator Layer** (Structure): - - Generator initialization - - LLM client configuration - - Data formatting for prompts - - Prompt generation (system + user) - -### ⚠️ Not Tested (Requires External Services) -1. **LLM Generation**: - - Actual API calls to OpenAI/Anthropic/Ollama - - Markdown content generation - - Content validation - -2. **MCP Integration**: - - Real vCenter connection - - Live infrastructure data collection - - MCP protocol communication - -3. **Celery Workers**: - - Background task execution - - Celery Beat scheduling - - Task queues - -4. **API Endpoints**: - - FastAPI service - - REST API operations - - Authentication/authorization - ---- - -## πŸ“‹ Next Steps for Full Production Testing - -### Step 1: Configure Real LLM (5 minutes) -```bash -# Option A: OpenAI -# Edit .env: -LLM_BASE_URL=https://api.openai.com/v1 -LLM_API_KEY=sk-your-actual-key-here -LLM_MODEL=gpt-4-turbo-preview - -# Option B: Ollama (local, free) -ollama pull llama3 -# Edit .env: -LLM_BASE_URL=http://localhost:11434/v1 -LLM_API_KEY=ollama -LLM_MODEL=llama3 -``` - -### Step 2: Test with Real LLM (2 minutes) -```bash -# Generate VMware documentation -PYTHONPATH=src poetry run datacenter-docs generate vmware - -# Or using CLI directly -poetry run datacenter-docs generate vmware -``` - -### Step 3: Start Full Stack (5 minutes) -```bash -cd deploy/docker -docker-compose -f docker-compose.dev.yml up -d - -# Check services -docker-compose -f docker-compose.dev.yml ps -docker-compose -f docker-compose.dev.yml logs -f api -``` - -### Step 4: Test API Endpoints (2 minutes) -```bash -# Health check -curl http://localhost:8000/health - -# API docs -curl http://localhost:8000/api/docs - -# List documentation sections -curl http://localhost:8000/api/v1/documentation/sections -``` - -### Step 5: Test Celery Workers (5 minutes) -```bash -# Start worker -PYTHONPATH=src poetry run datacenter-docs worker - -# Trigger generation task -# (via API or CLI) -``` - ---- - -## πŸš€ Production Readiness Checklist - -### βœ… Infrastructure (100%) -- [x] MongoDB operational -- [x] Redis operational -- [x] Docker stack functional -- [x] Network connectivity validated - -### βœ… Core Components (95%) -- [x] VMware Collector implemented and tested -- [x] Infrastructure Generator implemented -- [x] Network Generator implemented -- [x] Base classes complete -- [x] MongoDB/Beanie integration working -- [x] LLM client configured (generic) -- [ ] Real LLM generation tested (needs API key) - -### βœ… CLI Tool (100%) -- [x] 11 commands implemented -- [x] Database operations working -- [x] Error handling complete -- [x] Help and documentation - -### βœ… Workers (100%) -- [x] Celery configuration complete -- [x] 8 tasks implemented -- [x] Task scheduling configured -- [x] Integration with collectors/generators - -### ⚠️ API Service (not tested) -- [x] FastAPI implementation complete -- [ ] Service startup not tested -- [ ] Endpoints not tested -- [ ] Health checks not validated - -### ⚠️ Chat Service (not implemented) -- [x] DocumentationAgent implemented -- [ ] WebSocket server missing (chat/main.py) -- [ ] Real-time chat not available - ---- - -## πŸ“Š Project Completion Status - -**Overall Progress**: **68%** (up from 65%) - -| Phase | Status | % | Notes | -|-------|--------|---|-------| -| MVP Core | βœ… Complete | 100% | Collector + Generator + DB working | -| Infrastructure | βœ… Complete | 100% | All services operational | -| CLI Tool | βœ… Complete | 100% | Fully functional | -| Workers | βœ… Complete | 100% | Integrated with generators | -| Collectors | 🟑 Partial | 20% | VMware done, 5 more needed | -| Generators | 🟑 Partial | 30% | 2 done, 8 more needed | -| API Service | 🟑 Not tested | 80% | Code ready, not validated | -| Chat Service | πŸ”΄ Partial | 40% | WebSocket server missing | -| Frontend | πŸ”΄ Minimal | 20% | Basic skeleton only | - -**Estimated Time to Production**: 2-3 weeks for full feature completion - ---- - -## πŸ’‘ Key Achievements - -1. **βœ… MVP Validated**: End-to-end workflow functional -2. **βœ… Mock Data Working**: Can test without external dependencies -3. **βœ… Database Integration**: MongoDB + Beanie fully operational -4. **βœ… Flexible LLM Support**: Generic client supports any OpenAI-compatible API -5. **βœ… Clean Architecture**: Base classes + implementations cleanly separated -6. **βœ… Production-Ready Structure**: Async/await, error handling, logging complete - ---- - -## 🎯 Immediate Next Actions - -1. **Configure LLM API key** in `.env` (5 min) -2. **Run first real documentation generation** (2 min) -3. **Verify output quality** (5 min) -4. **Start API service** and test endpoints (10 min) -5. **Document any issues** and iterate - ---- - -## πŸ“ Test Command Reference - -```bash -# Run end-to-end test (mock) -PYTHONPATH=src poetry run python test_workflow.py - -# Generate docs with CLI (needs LLM configured) -poetry run datacenter-docs generate vmware - -# Start Docker stack -cd deploy/docker && docker-compose -f docker-compose.dev.yml up -d - -# Check MongoDB -docker exec datacenter-docs-mongodb-dev mongosh --eval "show dbs" - -# View logs -docker-compose -f docker-compose.dev.yml logs -f mongodb -``` - ---- - -**Test Completed**: 2025-10-20 00:13:43 -**Duration**: ~2 minutes -**Result**: βœ… **ALL TESTS PASSED** diff --git a/WHATS_NEW_V2.md b/WHATS_NEW_V2.md deleted file mode 100644 index 84beb93..0000000 --- a/WHATS_NEW_V2.md +++ /dev/null @@ -1,529 +0,0 @@ -# πŸŽ‰ What's New in v2.0 - Auto-Remediation & Feedback System - -## πŸš€ Major New Features - -### 1️⃣ Auto-Remediation (Write Operations) ⚠️ - -**AI can now automatically fix problems** by executing write operations on your infrastructure. - -#### Key Points: -- βœ… **DEFAULT: DISABLED** - Must explicitly enable per ticket for safety -- βœ… **Smart Decision Engine** - Only executes when confidence is high -- βœ… **Safety Checks** - Pre/post validation, backups, rollbacks -- βœ… **Approval Workflow** - Critical actions require human approval -- βœ… **Full Audit Trail** - Every action logged - -#### Example Usage: - -```python -# Submit ticket WITH auto-remediation -{ - "ticket_id": "INC-001", - "description": "Web service not responding", - "category": "server", - "enable_auto_remediation": true # ← Enable write operations -} - -# AI will: -# 1. Analyze the problem -# 2. Check reliability score -# 3. If score β‰₯85% and safe action β†’ Execute automatically -# 4. If critical action β†’ Request approval -# 5. Log all actions taken -``` - -**What AI Can Do:** -- Restart services/VMs -- Clear caches -- Scale deployments -- Enable network ports -- Expand storage volumes -- Rollback deployments - -**Safety Guardrails:** -- Minimum 85% reliability required -- Rate limiting (max 10 actions/hour) -- Time windows (maintenance hours only) -- Backup verification -- System health checks -- Rollback on failure - ---- - -### 2️⃣ Reliability Scoring System πŸ“Š - -**Multi-factor confidence calculation** that gets smarter over time. - -#### How It Works: - -``` -Reliability Score (0-100%) = - AI Confidence Γ— 25% + # Claude's confidence - Human Feedback Γ— 30% + # User ratings & feedback - Historical Success Γ— 25% + # Past resolution success rate - Pattern Recognition Γ— 20% # Similarity to known issues -``` - -#### Confidence Levels: - -| Score | Level | Action | -|-------|-------|--------| -| 90-100% | 🟒 Very High | Auto-execute without approval | -| 75-89% | πŸ”΅ High | Auto-execute or require approval | -| 60-74% | 🟑 Medium | Require approval | -| 0-59% | πŸ”΄ Low | Manual resolution only | - -#### Example: - -```json -{ - "reliability_score": 87.5, - "confidence_level": "high", - "breakdown": { - "ai_confidence": "92%", - "human_validation": "85%", - "success_history": "90%", - "pattern_recognition": "82%" - } -} -``` - ---- - -### 3️⃣ Human Feedback Loop πŸ”„ - -**Your feedback makes the AI smarter.** - -#### What You Can Provide: - -```javascript -{ - "ticket_id": "INC-001", - "feedback_type": "positive|negative|neutral", - "rating": 5, // 1-5 stars - "was_helpful": true, - "resolution_accurate": true, - "actions_worked": true, - - // Optional details - "comment": "Perfect! Service is back up.", - "what_worked": "The service restart fixed it", - "what_didnt_work": null, - "suggestions": "Could add health check step", - - // If AI failed, what actually worked? - "actual_resolution": "Had to increase memory instead", - "time_to_resolve": 30.0 // minutes -} -``` - -#### Impact of Feedback: - -1. **Immediate**: Updates reliability score for that ticket -2. **Pattern Learning**: Strengthens/weakens similar issue handling -3. **Future Decisions**: Influences auto-remediation eligibility -4. **System Improvement**: Better resolutions over time - ---- - -### 4️⃣ Pattern Learning & Recognition 🧠 - -**AI learns from repeated issues** and gets better at handling them. - -#### How Patterns Work: - -``` -Issue occurs first time: -└─ Manual resolution, collect feedback - -After 5+ similar issues with good feedback: -β”œβ”€ Pattern identified and eligible for auto-remediation -β”œβ”€ Success rate: 85%+ -└─ Can auto-fix similar issues in future - -After 20+ occurrences: -β”œβ”€ Very high confidence (90%+) -β”œβ”€ Success rate: 92%+ -└─ Auto-fix without approval (if safe action) -``` - -#### Pattern Eligibility Criteria: - -```python -eligible_for_auto_remediation = ( - occurrence_count >= 5 AND - positive_feedback_rate >= 0.85 AND - avg_reliability_score >= 85.0 AND - auto_remediation_success_rate >= 0.85 -) -``` - ---- - -## πŸ“‹ New Database Models - -### Tables Added: - -1. **ticket_feedbacks** - Store human feedback -2. **similar_tickets** - Track pattern similarities -3. **remediation_logs** - Audit trail of actions -4. **auto_remediation_policies** - Configuration per category -5. **remediation_approvals** - Approval workflow -6. **ticket_patterns** - Learned patterns - ---- - -## πŸ”§ New API Endpoints - -### Core Functionality - -```bash -# Create ticket with auto-remediation -POST /api/v1/tickets -{ - "enable_auto_remediation": true # New parameter -} - -# Get enhanced ticket status -GET /api/v1/tickets/{ticket_id} -# Returns: reliability_score, remediation_decision, etc. -``` - -### Feedback System - -```bash -# Submit feedback -POST /api/v1/feedback - -# Get ticket feedback history -GET /api/v1/tickets/{ticket_id}/feedback -``` - -### Auto-Remediation Control - -```bash -# Approve/reject remediation -POST /api/v1/tickets/{ticket_id}/approve-remediation - -# Get remediation execution logs -GET /api/v1/tickets/{ticket_id}/remediation-logs -``` - -### Analytics & Monitoring - -```bash -# Reliability statistics -GET /api/v1/stats/reliability?days=30&category=network - -# Auto-remediation statistics -GET /api/v1/stats/auto-remediation?days=30 - -# View learned patterns -GET /api/v1/patterns?category=network&min_occurrences=5 -``` - ---- - -## 🎨 Frontend Enhancements - -### New UI Components: - -1. **Auto-Remediation Toggle** (with safety warning) -2. **Reliability Score Display** (with breakdown) -3. **Feedback Form** (star rating, comments, detailed feedback) -4. **Remediation Logs Viewer** (audit trail) -5. **Analytics Dashboard** (reliability trends, success rates) -6. **Pattern Viewer** (learned patterns and eligibility) - -### Visual Indicators: - -- 🟒 Green: Very high reliability (90%+) -- πŸ”΅ Blue: High reliability (75-89%) -- 🟑 Yellow: Medium reliability (60-74%) -- πŸ”΄ Red: Low reliability (<60%) - ---- - -## πŸ“Š Example Workflow - -### Traditional Flow (v1.0) -``` -1. User submits ticket -2. AI analyzes and suggests resolution -3. User manually executes actions -4. Done -``` - -### Enhanced Flow (v2.0) -``` -1. User submits ticket with auto_remediation=true -2. AI analyzes problem -3. AI calculates reliability score -4. Decision Engine evaluates: - β”œβ”€ High confidence + safe action β†’ Execute automatically - β”œβ”€ Medium confidence β†’ Request approval - └─ Low confidence β†’ Manual resolution only -5. If approved/auto-approved: - β”œβ”€ Pre-execution safety checks - β”œβ”€ Execute actions via MCP - β”œβ”€ Post-execution validation - └─ Log all actions -6. User provides feedback -7. System learns and improves -8. Future similar issues β†’ Faster, smarter resolution -``` - ---- - -## 🎯 Use Cases - -### Use Case 1: Service Down - -```python -# Ticket: "Web service not responding" -# Category: server -# Auto-remediation: enabled - -AI Analysis: -β”œβ”€ Identifies: Service crash -β”œβ”€ Solution: Restart service -β”œβ”€ Reliability: 92% (based on 15 similar past issues) -β”œβ”€ Action type: safe_write -└─ Decision: Auto-execute without approval - -Result: -β”œβ”€ Service restarted in 3 seconds -β”œβ”€ Health check: passed -β”œβ”€ Action logged -└─ User feedback: ⭐⭐⭐⭐⭐ - -Future: -└─ Similar issues auto-fixed with 95% confidence -``` - -### Use Case 2: Storage Full - -```python -# Ticket: "Datastore at 98% capacity" -# Category: storage -# Auto-remediation: enabled - -AI Analysis: -β”œβ”€ Identifies: Storage capacity issue -β”œβ”€ Solution: Expand volume by 100GB -β”œβ”€ Reliability: 88% -β”œβ”€ Action type: critical_write (expansion can't be undone easily) -└─ Decision: Require approval - -Workflow: -β”œβ”€ Approval requested from admin -β”œβ”€ Admin reviews and approves -β”œβ”€ Pre-check: Backup verified -β”œβ”€ Volume expanded -β”œβ”€ Post-check: New space available -└─ Logged with approval trail - -Future: -└─ After 10+ successful expansions, may auto-approve -``` - -### Use Case 3: Network Port Flapping - -```python -# Ticket: "Port Gi0/1 flapping on switch" -# Category: network -# Auto-remediation: enabled - -AI Analysis: -β”œβ”€ Identifies: Interface errors causing flapping -β”œβ”€ Solution: Clear interface errors, bounce port -β”œβ”€ Reliability: 78% (only 3 similar past issues) -β”œβ”€ Pattern: Not yet eligible for auto-remediation -└─ Decision: Require approval (not enough history) - -After 5+ similar issues with good feedback: -└─ Pattern becomes eligible -└─ Future port issues auto-fixed -``` - ---- - -## πŸ” Security & Safety - -### Built-in Safety Features: - -1. βœ… **Explicit Opt-in**: Auto-remediation disabled by default -2. βœ… **Action Classification**: Safe vs. critical operations -3. βœ… **Reliability Thresholds**: Minimum 85% for auto-execution -4. βœ… **Approval Workflow**: Critical actions require human OK -5. βœ… **Rate Limiting**: Max 10 actions per hour -6. βœ… **Pre-execution Checks**: Health, backups, time windows -7. βœ… **Post-execution Validation**: Verify success -8. βœ… **Rollback Capability**: Undo on failure -9. βœ… **Full Audit Trail**: Every action logged -10. βœ… **Pattern Validation**: Only proven patterns get auto-remediation - -### What AI Will NEVER Do: - -- ❌ Delete data without approval -- ❌ Modify critical configs without approval -- ❌ Shutdown production systems without approval -- ❌ Execute during business hours (if restricted) -- ❌ Exceed rate limits -- ❌ Act on low-confidence issues -- ❌ Proceed if safety checks fail - ---- - -## πŸ“ˆ Expected Benefits - -### Operational Efficiency - -- **90% reduction** in time to resolution for common issues -- **80% of repetitive issues** auto-resolved -- **<3 seconds** average resolution time for known patterns -- **24/7 automated response** even outside business hours - -### Quality Improvements - -- **Consistent** resolutions (no human error) -- **Learning** from feedback (gets better over time) -- **Documented** audit trail (full transparency) -- **Proactive** pattern recognition - -### Cost Savings - -- **70-80% reduction** in operational overhead for common issues -- **Faster** mean time to resolution (MTTR) -- **Fewer** escalations -- **Better** resource utilization - ---- - -## 🚦 Rollout Strategy - -### Phase 1: Pilot (Week 1-2) -- Enable for **cache/restart operations only** -- **5% of tickets** -- Require approval for all -- Monitor closely - -### Phase 2: Expansion (Week 3-4) -- Add **safe network operations** -- **20% of tickets** -- Auto-approve if reliability β‰₯ 95% -- Collect feedback aggressively - -### Phase 3: Scale (Week 5-6) -- Enable for **all safe operations** -- **50% of tickets** -- Auto-approve if reliability β‰₯ 90% -- Patterns becoming eligible - -### Phase 4: Full Deployment (Week 7+) -- **All categories** (except security) -- **100% availability** -- Dynamic thresholds based on performance -- Continuous improvement - ---- - -## πŸ“š Documentation - -New documentation added: - -1. **AUTO_REMEDIATION_GUIDE.md** - Complete guide (THIS FILE) -2. **API_ENHANCED.md** - Enhanced API documentation -3. **RELIABILITY_SCORING.md** - Deep dive on scoring -4. **FEEDBACK_SYSTEM.md** - Feedback loop details -5. **PATTERN_LEARNING.md** - How patterns work - ---- - -## πŸŽ“ Training & Adoption - -### For Operators: - -1. Read **AUTO_REMEDIATION_GUIDE.md** -2. Start with low-risk categories -3. Always provide feedback -4. Monitor logs and analytics -5. Adjust thresholds based on results - -### For Administrators: - -1. Configure **auto_remediation_policies** -2. Set appropriate thresholds per category -3. Define approval workflows -4. Monitor system performance -5. Review and approve critical actions - -### For Developers: - -1. Integrate API endpoints -2. Implement feedback collection -3. Use reliability scores in decisions -4. Monitor metrics and alerts -5. Contribute to pattern improvement - ---- - -## πŸ”„ Migration from v1.0 - -### Breaking Changes: - -**None!** v2.0 is fully backward compatible. - -- Existing tickets continue to work -- Auto-remediation is opt-in -- All v1.0 APIs still functional - -### New Defaults: - -- `enable_auto_remediation: false` (explicit opt-in required) -- `requires_approval: true` (by default) -- `min_reliability_score: 85.0` - -### Database Migration: - -```bash -# Run Alembic migrations -poetry run alembic upgrade head - -# Migrations add new tables: -# - ticket_feedbacks -# - similar_tickets -# - remediation_logs -# - auto_remediation_policies -# - remediation_approvals -# - ticket_patterns -``` - ---- - -## πŸŽ‰ Summary - -**v2.0 adds intelligent, safe, self-improving auto-remediation:** - -1. βœ… AI can now fix problems automatically (disabled by default) -2. βœ… Multi-factor reliability scoring (gets smarter over time) -3. βœ… Human feedback loop (continuous learning) -4. βœ… Pattern recognition (learns from similar issues) -5. βœ… Approval workflow (safety for critical actions) -6. βœ… Full audit trail (complete transparency) -7. βœ… Progressive automation (starts conservative, scales based on success) - -**The system learns from every interaction and gets better over time!** - ---- - -## πŸ“ž Support - -- **Email**: automation-team@company.local -- **Slack**: #datacenter-automation -- **Documentation**: /docs/auto-remediation -- **Issues**: git.company.local/infrastructure/datacenter-docs/issues - ---- - -**Ready to try auto-remediation? Start with a low-risk ticket and let the AI show you what it can do!** πŸš€ diff --git a/scripts/generate_proxmox_docs_in_container.py b/scripts/generate_proxmox_docs_in_container.py new file mode 100644 index 0000000..f5ecbc4 --- /dev/null +++ b/scripts/generate_proxmox_docs_in_container.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +Generate Proxmox documentation from within the worker container. +This script tests the complete flow: collect data -> generate docs using template. +""" +import asyncio +import logging +import sys +from pathlib import Path + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[ + logging.StreamHandler(sys.stdout), + logging.FileHandler("/tmp/proxmox_docs_container.log") + ] +) + +logger = logging.getLogger(__name__) + + +async def main(): + """Main function to generate Proxmox documentation""" + try: + logger.info("=" * 80) + logger.info("PROXMOX DOCUMENTATION GENERATION TEST") + logger.info("=" * 80) + + # Import after configuring logging + from datacenter_docs.collectors.proxmox_collector import ProxmoxCollector + from datacenter_docs.generators.template_generator import TemplateBasedGenerator + + # Step 1: Collect Proxmox data + logger.info("\n[STEP 1] Collecting Proxmox data...") + collector = ProxmoxCollector() + collect_result = await collector.run() + + if not collect_result["success"]: + logger.error(f"❌ Collection failed: {collect_result['error']}") + return False + + logger.info("βœ… Data collection successful") + + # Log collected data summary + data = collect_result.get("data", {}) + vms = data.get("data", {}).get("vms", []) + containers = data.get("data", {}).get("containers", []) + nodes = data.get("data", {}).get("nodes", []) + + logger.info(f" - VMs: {len(vms)}") + logger.info(f" - Containers: {len(containers)}") + logger.info(f" - Nodes: {len(nodes)}") + + # Step 2: Generate documentation using template + logger.info("\n[STEP 2] Generating documentation using template...") + template_path = "/app/templates/documentation/proxmox.yaml" + + if not Path(template_path).exists(): + logger.error(f"❌ Template not found: {template_path}") + return False + + logger.info(f" Using template: {template_path}") + + generator = TemplateBasedGenerator(template_path) + + # Generate and save each section individually + logger.info("\n[STEP 3] Generating documentation sections...") + sections_results = await generator.generate_and_save_sections( + data=data, + save_individually=True + ) + + # Print results + logger.info("\n" + "=" * 80) + logger.info("GENERATION RESULTS") + logger.info("=" * 80) + + success_count = 0 + failed_count = 0 + + for i, result in enumerate(sections_results, 1): + section_id = result.get("section_id", "unknown") + title = result.get("title", "Unknown") + success = result.get("success", False) + + if success: + success_count += 1 + file_path = result.get("file_path", "N/A") + logger.info(f"βœ… Section {i}: {title}") + logger.info(f" File: {file_path}") + else: + failed_count += 1 + error = result.get("error", "Unknown error") + logger.info(f"❌ Section {i}: {title}") + logger.info(f" Error: {error}") + + # Final summary + logger.info("\n" + "=" * 80) + logger.info("SUMMARY") + logger.info("=" * 80) + logger.info(f"Total sections: {len(sections_results)}") + logger.info(f"Successful: {success_count}") + logger.info(f"Failed: {failed_count}") + logger.info(f"Success rate: {(success_count / len(sections_results) * 100):.1f}%") + logger.info("=" * 80) + + return success_count == len(sections_results) + + except Exception as e: + logger.error(f"❌ Unexpected error: {e}", exc_info=True) + return False + + +if __name__ == "__main__": + success = asyncio.run(main()) + sys.exit(0 if success else 1) diff --git a/scripts/test_proxmox_connection.py b/scripts/test_proxmox_connection.py new file mode 100755 index 0000000..e418fcd --- /dev/null +++ b/scripts/test_proxmox_connection.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +""" +Simple test script for Proxmox API connection + +Tests only the Proxmox connection without loading full application +""" + +import sys +from pathlib import Path + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from datacenter_docs.utils.config import get_settings + +def test_proxmox_connection(): + """Test Proxmox API connection""" + + print("=" * 80) + print("PROXMOX API CONNECTION TEST") + print("=" * 80) + + # Load settings + settings = get_settings() + + print("\nπŸ“‹ Configuration:") + print(f" Host: {settings.PROXMOX_HOST}") + print(f" Port: {settings.PROXMOX_PORT}") + print(f" User: {settings.PROXMOX_USER}") + print(f" Token Name: {settings.PROXMOX_TOKEN_NAME}") + print(f" Token Value: {'*' * 8}{settings.PROXMOX_TOKEN_VALUE[-4:] if settings.PROXMOX_TOKEN_VALUE else 'NOT SET'}") + print(f" Verify SSL: {settings.PROXMOX_VERIFY_SSL}") + print(f" Timeout: {settings.PROXMOX_TIMEOUT}s") + + # Check if configured + if not settings.PROXMOX_HOST or settings.PROXMOX_HOST == "proxmox.example.com": + print("\n❌ ERROR: Proxmox host not configured") + print(" Please set PROXMOX_HOST in .env file") + return False + + # Try to import proxmoxer + try: + from proxmoxer import ProxmoxAPI + except ImportError: + print("\n❌ ERROR: proxmoxer library not installed") + print(" Install with: pip install proxmoxer") + return False + + print("\nπŸ”Œ Connecting to Proxmox...") + + try: + # Prepare connection parameters + auth_params = { + "host": settings.PROXMOX_HOST, + "port": settings.PROXMOX_PORT, + "verify_ssl": settings.PROXMOX_VERIFY_SSL, + "timeout": settings.PROXMOX_TIMEOUT, + } + + # API Token authentication + if settings.PROXMOX_TOKEN_NAME and settings.PROXMOX_TOKEN_VALUE: + print(f" Using API token authentication: {settings.PROXMOX_USER}!{settings.PROXMOX_TOKEN_NAME}") + auth_params["user"] = settings.PROXMOX_USER + auth_params["token_name"] = settings.PROXMOX_TOKEN_NAME + auth_params["token_value"] = settings.PROXMOX_TOKEN_VALUE + + # Password authentication + elif settings.PROXMOX_PASSWORD: + print(f" Using password authentication: {settings.PROXMOX_USER}") + auth_params["user"] = settings.PROXMOX_USER + auth_params["password"] = settings.PROXMOX_PASSWORD + + else: + print("\n❌ ERROR: No authentication credentials configured") + print(" Set either PROXMOX_TOKEN_NAME/VALUE or PROXMOX_PASSWORD") + return False + + # Connect + proxmox = ProxmoxAPI(**auth_params) + + # Test connection by getting version + version = proxmox.version.get() + print(f"\nβœ… Connection successful!") + print(f" Proxmox VE version: {version.get('version')}") + print(f" Release: {version.get('release', 'unknown')}") + + # Get cluster status + print("\nπŸ“Š Cluster Information:") + try: + cluster_status = proxmox.cluster.status.get() + for item in cluster_status: + if item.get("type") == "cluster": + print(f" Cluster Name: {item.get('name')}") + print(f" Quorate: {'Yes' if item.get('quorate') else 'No'}") + print(f" Nodes: {item.get('nodes')}") + break + except Exception as e: + print(f" ⚠️ Could not get cluster info: {e}") + + # Get nodes + print("\nπŸ–₯️ Nodes:") + try: + nodes = proxmox.nodes.get() + for node in nodes: + status_icon = "🟒" if node.get("status") == "online" else "πŸ”΄" + print(f" {status_icon} {node.get('node')}: {node.get('status')}") + except Exception as e: + print(f" ⚠️ Could not get nodes: {e}") + + # Get VM count + print("\nπŸ’» Virtual Machines:") + try: + total_vms = 0 + total_containers = 0 + for node in nodes: + node_name = node["node"] + try: + vms = proxmox.nodes(node_name).qemu.get() + containers = proxmox.nodes(node_name).lxc.get() + total_vms += len(vms) + total_containers += len(containers) + print(f" Node {node_name}: {len(vms)} VMs, {len(containers)} containers") + except Exception: + pass + + print(f"\n πŸ“Š TOTAL: {total_vms} VMs, {total_containers} containers") + + except Exception as e: + print(f" ⚠️ Could not get VMs: {e}") + + print("\n" + "=" * 80) + print("βœ… ALL TESTS PASSED - Proxmox connection is working!") + print("=" * 80) + + return True + + except Exception as e: + print(f"\n❌ CONNECTION FAILED") + print(f" Error: {e}") + print(f" Type: {type(e).__name__}") + + if "401" in str(e): + print("\nπŸ’‘ Troubleshooting:") + print(" - Check that PROXMOX_USER is correct (should include realm: user@pam or user@pve)") + print(" - Verify PROXMOX_TOKEN_NAME matches the token ID in Proxmox") + print(" - Verify PROXMOX_TOKEN_VALUE is correct") + print(" - Check token has proper permissions (PVEAuditor role on path /)") + + elif "SSL" in str(e): + print("\nπŸ’‘ Troubleshooting:") + print(" - Try setting PROXMOX_VERIFY_SSL=false in .env") + print(" - Or install valid SSL certificate on Proxmox") + + elif "refused" in str(e).lower(): + print("\nπŸ’‘ Troubleshooting:") + print(" - Check PROXMOX_HOST is correct") + print(" - Check PROXMOX_PORT is correct (default: 8006)") + print(" - Verify firewall allows access to Proxmox API") + print(" - Check Proxmox API service is running: systemctl status pveproxy") + + print("\n" + "=" * 80) + return False + + +if __name__ == "__main__": + success = test_proxmox_connection() + sys.exit(0 if success else 1)