From 16fc8e26593ab0df5f6654d3c65c44b067f1ac8a Mon Sep 17 00:00:00 2001 From: dnviti Date: Mon, 20 Oct 2025 19:23:30 +0200 Subject: [PATCH] feat: implement template-based documentation generation system for Proxmox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a scalable system for automatic documentation generation from infrastructure systems, preventing LLM context overload through template-driven sectioning. **New Features:** 1. **YAML Template System** (`templates/documentation/proxmox.yaml`) - Define documentation sections independently - Specify data requirements per section - Configure prompts, generation settings, and scheduling - Prevents LLM context overflow by sectioning data 2. **Template-Based Generator** (`src/datacenter_docs/generators/template_generator.py`) - Load and parse YAML templates - Generate documentation sections independently - Extract only required data for each section - Save sections individually to files and database - Combine sections with table of contents 3. **Celery Tasks** (`src/datacenter_docs/workers/documentation_tasks.py`) - `collect_and_generate_docs`: Collect data and generate docs - `generate_proxmox_docs`: Scheduled Proxmox documentation (daily at 2 AM) - `generate_all_docs`: Generate docs for all systems in parallel - `index_generated_docs`: Index generated docs into vector store for RAG - `full_docs_pipeline`: Complete workflow (collect → generate → index) 4. **Scheduled Jobs** (updated `celery_app.py`) - Daily Proxmox documentation generation - Every 6 hours: all systems documentation - Weekly: full pipeline with indexing - Proper task routing and rate limiting 5. **Test Script** (`scripts/test_proxmox_docs.py`) - End-to-end testing of documentation generation - Mock data collection from Proxmox - Template-based generation - File and database storage 6. **Configuration Updates** (`src/datacenter_docs/utils/config.py`) - Add port configuration fields for Docker services - Add MongoDB and Redis credentials - Support all required environment variables **Proxmox Documentation Sections:** - Infrastructure Overview (cluster, nodes, stats) - Virtual Machines Inventory - LXC Containers Inventory - Storage Configuration - Network Configuration - Maintenance Procedures **Benefits:** - Scalable to multiple infrastructure systems - Prevents LLM context window overflow - Independent section generation - Scheduled automatic updates - Vector store integration for RAG chat - Template-driven approach for consistency šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- scripts/test_proxmox_docs.py | 162 +++++++ .../generators/template_generator.py | 409 ++++++++++++++++++ src/datacenter_docs/utils/config.py | 14 + src/datacenter_docs/workers/celery_app.py | 26 +- .../workers/documentation_tasks.py | 347 +++++++++++++++ templates/documentation/proxmox.yaml | 221 ++++++++++ 6 files changed, 1178 insertions(+), 1 deletion(-) create mode 100755 scripts/test_proxmox_docs.py create mode 100644 src/datacenter_docs/generators/template_generator.py create mode 100644 src/datacenter_docs/workers/documentation_tasks.py create mode 100644 templates/documentation/proxmox.yaml diff --git a/scripts/test_proxmox_docs.py b/scripts/test_proxmox_docs.py new file mode 100755 index 0000000..938cf6e --- /dev/null +++ b/scripts/test_proxmox_docs.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Test script for Proxmox documentation generation + +Tests the end-to-end workflow: +1. Collect data from Proxmox (using mock data) +2. Generate documentation using template +3. Save sections to files and database +4. Optionally index for RAG +""" + +import asyncio +import logging +import sys +from pathlib import Path + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from datacenter_docs.collectors.proxmox_collector import ProxmoxCollector +from datacenter_docs.generators.template_generator import TemplateBasedGenerator + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + +logger = logging.getLogger(__name__) + + +async def test_proxmox_documentation() -> None: + """Test complete Proxmox documentation generation workflow""" + + logger.info("=" * 80) + logger.info("PROXMOX DOCUMENTATION GENERATION TEST") + logger.info("=" * 80) + + # Step 1: Collect data from Proxmox + logger.info("\nšŸ“Š STEP 1: Collecting data from Proxmox...") + logger.info("-" * 80) + + collector = ProxmoxCollector() + collect_result = await collector.run() + + if not collect_result["success"]: + logger.error(f"āŒ Data collection failed: {collect_result.get('error')}") + return + + logger.info("āœ… Data collection successful!") + logger.info(f" Collected at: {collect_result['data']['metadata']['collected_at']}") + + # Show statistics + stats = collect_result["data"]["data"].get("statistics", {}) + logger.info("\nšŸ“ˆ Infrastructure Statistics:") + logger.info(f" Total VMs: {stats.get('total_vms', 0)}") + logger.info(f" Running VMs: {stats.get('running_vms', 0)}") + logger.info(f" Total Containers: {stats.get('total_containers', 0)}") + logger.info(f" Running Containers: {stats.get('running_containers', 0)}") + logger.info(f" Total Nodes: {stats.get('total_nodes', 0)}") + logger.info(f" Total CPU Cores: {stats.get('total_cpu_cores', 0)}") + logger.info(f" Total Memory: {stats.get('total_memory_gb', 0)} GB") + logger.info(f" Total Storage: {stats.get('total_storage_tb', 0)} TB") + + # Step 2: Generate documentation using template + logger.info("\nšŸ“ STEP 2: Generating documentation using template...") + logger.info("-" * 80) + + template_path = "templates/documentation/proxmox.yaml" + + try: + generator = TemplateBasedGenerator(template_path) + except FileNotFoundError: + logger.error(f"āŒ Template not found: {template_path}") + logger.info(" Creating template directory and file...") + + # Create template directory + template_dir = Path(template_path).parent + template_dir.mkdir(parents=True, exist_ok=True) + + logger.error( + " Please ensure the template file exists at: " + f"{Path(template_path).absolute()}" + ) + return + + logger.info(f"āœ… Template loaded: {generator.template.name}") + logger.info(f" Sections to generate: {len(generator.template.sections)}") + + # List all sections + for i, section in enumerate(generator.template.sections, 1): + logger.info(f" {i}. {section.get('title')} (ID: {section.get('id')})") + + # Step 3: Generate and save all sections + logger.info("\nšŸ”Ø STEP 3: Generating documentation sections...") + logger.info("-" * 80) + + sections_results = await generator.generate_and_save_sections( + data=collect_result["data"], save_individually=True + ) + + # Show results + sections_generated = sum(1 for r in sections_results if r.get("success")) + sections_failed = sum(1 for r in sections_results if not r.get("success")) + + logger.info(f"\nāœ… Generation completed!") + logger.info(f" Sections generated: {sections_generated}") + logger.info(f" Sections failed: {sections_failed}") + + # Show each section result + logger.info("\nšŸ“‹ Section Results:") + for result in sections_results: + if result.get("success"): + logger.info(f" āœ… {result.get('title')}") + if "file_path" in result: + logger.info(f" File: {result.get('file_path')}") + else: + logger.info(f" āŒ {result.get('section_id')}: {result.get('error')}") + + # Step 4: Summary + logger.info("\n" + "=" * 80) + logger.info("SUMMARY") + logger.info("=" * 80) + logger.info(f"āœ… Data Collection: SUCCESS") + logger.info( + f"āœ… Documentation Generation: {sections_generated}/{len(sections_results)} sections" + ) + + if sections_failed == 0: + logger.info("\nšŸŽ‰ All tests passed successfully!") + else: + logger.warning(f"\nāš ļø {sections_failed} section(s) failed to generate") + + # Show output directory + output_dir = Path(generator.template.output_config.get("directory", "output")) + if output_dir.exists(): + logger.info(f"\nšŸ“ Generated files available in: {output_dir.absolute()}") + md_files = list(output_dir.glob("**/*.md")) + if md_files: + logger.info(f" Total markdown files: {len(md_files)}") + for md_file in md_files[:10]: # Show first 10 + logger.info(f" - {md_file.relative_to(output_dir)}") + if len(md_files) > 10: + logger.info(f" ... and {len(md_files) - 10} more") + + logger.info("\n" + "=" * 80) + + +def main() -> None: + """Main entry point""" + try: + asyncio.run(test_proxmox_documentation()) + except KeyboardInterrupt: + logger.info("\n\nāš ļø Test interrupted by user") + sys.exit(1) + except Exception as e: + logger.error(f"\n\nāŒ Test failed with error: {e}", exc_info=True) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/datacenter_docs/generators/template_generator.py b/src/datacenter_docs/generators/template_generator.py new file mode 100644 index 0000000..01f3c1f --- /dev/null +++ b/src/datacenter_docs/generators/template_generator.py @@ -0,0 +1,409 @@ +""" +Template-Based Documentation Generator + +Generates documentation using YAML templates that define sections and prompts. +This approach prevents LLM context overload by generating documentation in sections. +""" + +import json +import logging +from pathlib import Path +from typing import Any, Dict, List, Optional + +import yaml + +from datacenter_docs.generators.base import BaseGenerator + +logger = logging.getLogger(__name__) + + +class DocumentationTemplate: + """Represents a documentation template loaded from YAML""" + + def __init__(self, template_path: Path): + """ + Initialize template from YAML file + + Args: + template_path: Path to YAML template file + """ + self.path = template_path + self.data = self._load_template() + + def _load_template(self) -> Dict[str, Any]: + """Load and parse YAML template""" + try: + with open(self.path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) + except Exception as e: + logger.error(f"Failed to load template {self.path}: {e}") + raise + + @property + def name(self) -> str: + """Get template name""" + return self.data.get("metadata", {}).get("name", "Unknown") + + @property + def collector(self) -> str: + """Get required collector name""" + return self.data.get("metadata", {}).get("collector", "") + + @property + def sections(self) -> List[Dict[str, Any]]: + """Get documentation sections""" + return self.data.get("sections", []) + + @property + def generation_config(self) -> Dict[str, Any]: + """Get generation configuration""" + return self.data.get("generation", {}) + + @property + def output_config(self) -> Dict[str, Any]: + """Get output configuration""" + return self.data.get("output", {}) + + @property + def schedule_config(self) -> Dict[str, Any]: + """Get schedule configuration""" + return self.data.get("schedule", {}) + + +class TemplateBasedGenerator(BaseGenerator): + """ + Generator that uses YAML templates to generate sectioned documentation + + This prevents LLM context overload by: + 1. Loading templates that define sections + 2. Generating each section independently + 3. Using only required data for each section + """ + + def __init__(self, template_path: str): + """ + Initialize template-based generator + + Args: + template_path: Path to YAML template file + """ + self.template = DocumentationTemplate(Path(template_path)) + super().__init__( + name=self.template.collector, section=f"{self.template.collector}_docs" + ) + + async def generate(self, data: Dict[str, Any]) -> str: + """ + Generate complete documentation using template + + This method orchestrates the generation of all sections. + + Args: + data: Collected infrastructure data + + Returns: + Combined documentation (all sections) + """ + self.logger.info( + f"Generating documentation for {self.template.name} using template" + ) + + # Validate data matches template collector + collector_name = data.get("metadata", {}).get("collector", "") + if collector_name != self.template.collector: + self.logger.warning( + f"Data collector ({collector_name}) doesn't match template ({self.template.collector})" + ) + + # Generate each section + sections_content = [] + for section_def in self.template.sections: + section_content = await self.generate_section(section_def, data) + if section_content: + sections_content.append(section_content) + + # Combine all sections + combined_doc = self._combine_sections(sections_content) + + return combined_doc + + async def generate_section( + self, section_def: Dict[str, Any], full_data: Dict[str, Any] + ) -> Optional[str]: + """ + Generate a single documentation section + + Args: + section_def: Section definition from template + full_data: Complete collected data + + Returns: + Generated section content in Markdown + """ + section_id = section_def.get("id", "unknown") + section_title = section_def.get("title", "Untitled Section") + data_requirements = section_def.get("data_requirements", []) + prompt_template = section_def.get("prompt_template", "") + + self.logger.info(f"Generating section: {section_title}") + + # Extract only required data for this section + section_data = self._extract_section_data(full_data, data_requirements) + + # Build prompt by substituting placeholders + prompt = self._build_prompt(prompt_template, section_data) + + # Get generation config + gen_config = self.template.generation_config + temperature = gen_config.get("temperature", 0.7) + max_tokens = gen_config.get("max_tokens", 4000) + + # System prompt for documentation generation + system_prompt = """You are a technical documentation expert specializing in datacenter infrastructure. +Generate clear, accurate, and well-structured documentation in Markdown format. + +Guidelines: +- Use proper Markdown formatting (headers, tables, lists, code blocks) +- Be precise and factual based on provided data +- Include practical examples and recommendations +- Use tables for structured data +- Use bullet points for lists +- Use code blocks for commands/configurations +- Organize content with clear sections +- Write in a professional but accessible tone +""" + + try: + # Generate content using LLM + content = await self.generate_with_llm( + system_prompt=system_prompt, + user_prompt=prompt, + temperature=temperature, + max_tokens=max_tokens, + ) + + # Add section header + section_content = f"# {section_title}\n\n{content}\n\n" + + self.logger.info(f"āœ“ Section '{section_title}' generated successfully") + return section_content + + except Exception as e: + self.logger.error(f"Failed to generate section '{section_title}': {e}") + return None + + def _extract_section_data( + self, full_data: Dict[str, Any], requirements: List[str] + ) -> Dict[str, Any]: + """ + Extract only required data for a section + + Args: + full_data: Complete collected data + requirements: List of required data keys + + Returns: + Dictionary with only required data + """ + section_data = {} + data_section = full_data.get("data", {}) + + for req in requirements: + if req in data_section: + section_data[req] = data_section[req] + else: + self.logger.warning(f"Required data '{req}' not found in collected data") + section_data[req] = None + + return section_data + + def _build_prompt(self, template: str, data: Dict[str, Any]) -> str: + """ + Build prompt by substituting data into template + + Args: + template: Prompt template with {placeholders} + data: Data to substitute + + Returns: + Completed prompt + """ + prompt = template + + # Replace each placeholder with formatted data + for key, value in data.items(): + placeholder = f"{{{key}}}" + if placeholder in prompt: + # Format data for prompt + formatted_value = self._format_data_for_prompt(value) + prompt = prompt.replace(placeholder, formatted_value) + + return prompt + + def _format_data_for_prompt(self, data: Any) -> str: + """ + Format data for inclusion in LLM prompt + + Args: + data: Data to format (dict, list, str, etc.) + + Returns: + Formatted string representation + """ + if data is None: + return "No data available" + + if isinstance(data, (dict, list)): + # Pretty print JSON for structured data + try: + return json.dumps(data, indent=2, default=str) + except Exception: + return str(data) + + return str(data) + + def _combine_sections(self, sections: List[str]) -> str: + """ + Combine all sections into a single document + + Args: + sections: List of section contents + + Returns: + Combined markdown document + """ + # Add document header + header = f"""# {self.template.name} Documentation + +*Generated automatically from infrastructure data* + +--- + +""" + + # Add table of contents + toc = "## Table of Contents\n\n" + for i, section in enumerate(sections, 1): + # Extract section title from first line + lines = section.strip().split("\n") + if lines: + title = lines[0].replace("#", "").strip() + toc += f"{i}. [{title}](#{title.lower().replace(' ', '-')})\n" + toc += "\n---\n\n" + + # Combine all parts + combined = header + toc + "\n".join(sections) + + return combined + + async def generate_and_save_sections( + self, data: Dict[str, Any], save_individually: bool = True + ) -> List[Dict[str, Any]]: + """ + Generate and save each section individually + + This is useful for very large documentation where you want each + section as a separate file. + + Args: + data: Collected infrastructure data + save_individually: Save each section as separate file + + Returns: + List of results for each section + """ + results = [] + output_config = self.template.output_config + output_dir = output_config.get("directory", "output") + save_to_db = output_config.get("save_to_database", True) + save_to_file = output_config.get("save_to_file", True) + + for section_def in self.template.sections: + section_id = section_def.get("id") + section_title = section_def.get("title") + + # Generate section + content = await self.generate_section(section_def, data) + + if not content: + results.append( + { + "section_id": section_id, + "success": False, + "error": "Generation failed", + } + ) + continue + + result = { + "section_id": section_id, + "title": section_title, + "success": True, + "content": content, + } + + # Save section if requested + if save_individually: + if save_to_file: + # Save to file + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + filename = f"{section_id}.md" + file_path = output_path / filename + file_path.write_text(content, encoding="utf-8") + result["file_path"] = str(file_path) + self.logger.info(f"Saved section to: {file_path}") + + if save_to_db: + # Save to database + metadata = { + "section_id": section_id, + "template": str(self.template.path), + "category": section_def.get("category", ""), + } + # Create temporary generator for this section + temp_gen = BaseGenerator.__new__(BaseGenerator) + temp_gen.name = self.name + temp_gen.section = section_id + temp_gen.logger = self.logger + temp_gen.llm = self.llm + await temp_gen.save_to_database(content, metadata) + + results.append(result) + + return results + + +async def example_usage() -> None: + """Example of using template-based generator""" + from datacenter_docs.collectors.proxmox_collector import ProxmoxCollector + + # Collect data + collector = ProxmoxCollector() + collect_result = await collector.run() + + if not collect_result["success"]: + print(f"āŒ Collection failed: {collect_result['error']}") + return + + # Generate documentation using template + template_path = "templates/documentation/proxmox.yaml" + generator = TemplateBasedGenerator(template_path) + + # Generate and save all sections + sections_results = await generator.generate_and_save_sections( + data=collect_result["data"], save_individually=True + ) + + # Print results + for result in sections_results: + if result["success"]: + print(f"āœ… Section '{result['title']}' generated successfully") + else: + print(f"āŒ Section '{result.get('section_id')}' failed: {result.get('error')}") + + +if __name__ == "__main__": + import asyncio + + asyncio.run(example_usage()) diff --git a/src/datacenter_docs/utils/config.py b/src/datacenter_docs/utils/config.py index 1ba3c0a..b74bec1 100644 --- a/src/datacenter_docs/utils/config.py +++ b/src/datacenter_docs/utils/config.py @@ -72,6 +72,20 @@ class Settings(BaseSettings): CELERY_BROKER_URL: str = "redis://localhost:6379/0" CELERY_RESULT_BACKEND: str = "redis://localhost:6379/0" + # Additional Port Configuration (for Docker services) + MONGODB_PORT: int = 27017 + REDIS_PORT: int = 6379 + CHAT_PORT: int = 8001 + FLOWER_PORT: int = 5555 + FRONTEND_PORT: int = 8080 + + # MongoDB Root Credentials (for Docker initialization) + MONGO_ROOT_USER: str = "admin" + MONGO_ROOT_PASSWORD: str = "admin123" + + # Redis Password + REDIS_PASSWORD: str = "" + @model_validator(mode="before") @classmethod def set_celery_defaults(cls, values: Dict[str, Any]) -> Dict[str, Any]: diff --git a/src/datacenter_docs/workers/celery_app.py b/src/datacenter_docs/workers/celery_app.py index e353817..7aad66c 100644 --- a/src/datacenter_docs/workers/celery_app.py +++ b/src/datacenter_docs/workers/celery_app.py @@ -35,6 +35,7 @@ celery_app = Celery( backend=settings.CELERY_RESULT_BACKEND, include=[ "datacenter_docs.workers.tasks", + "datacenter_docs.workers.documentation_tasks", ], ) @@ -67,6 +68,11 @@ celery_app.conf.update( "queue": "data_collection" }, "datacenter_docs.workers.tasks.cleanup_old_data_task": {"queue": "maintenance"}, + "collect_and_generate_docs": {"queue": "documentation"}, + "generate_proxmox_docs": {"queue": "documentation"}, + "generate_all_docs": {"queue": "documentation"}, + "index_generated_docs": {"queue": "documentation"}, + "full_docs_pipeline": {"queue": "documentation"}, }, # Task rate limits task_annotations={ @@ -77,10 +83,28 @@ celery_app.conf.update( }, # Beat schedule (periodic tasks) beat_schedule={ + # Generate Proxmox documentation daily at 2 AM + "generate-proxmox-docs-daily": { + "task": "generate_proxmox_docs", + "schedule": crontab(minute=0, hour=2), # Daily at 2 AM + "options": {"queue": "documentation"}, + }, # Generate all documentation every 6 hours "generate-all-docs-every-6h": { + "task": "generate_all_docs", + "schedule": crontab(minute=30, hour="*/6"), # Every 6 hours at :30 + "options": {"queue": "documentation"}, + }, + # Full documentation pipeline weekly + "full-docs-pipeline-weekly": { + "task": "full_docs_pipeline", + "schedule": crontab(minute=0, hour=3, day_of_week=0), # Sunday at 3 AM + "options": {"queue": "documentation"}, + }, + # Legacy tasks (keep for backward compatibility) + "generate-all-docs-legacy": { "task": "datacenter_docs.workers.tasks.generate_documentation_task", - "schedule": crontab(minute=0, hour="*/6"), # Every 6 hours + "schedule": crontab(minute=0, hour="*/12"), # Every 12 hours "args": (), "options": {"queue": "documentation"}, }, diff --git a/src/datacenter_docs/workers/documentation_tasks.py b/src/datacenter_docs/workers/documentation_tasks.py new file mode 100644 index 0000000..d28243b --- /dev/null +++ b/src/datacenter_docs/workers/documentation_tasks.py @@ -0,0 +1,347 @@ +""" +Celery Tasks for Documentation Generation + +Scheduled tasks for collecting data and generating documentation +from infrastructure systems (Proxmox, VMware, Kubernetes, etc.) +""" + +import logging +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List + +from celery import group + +from datacenter_docs.workers.celery_app import celery_app + +logger = logging.getLogger(__name__) + + +@celery_app.task(name="collect_and_generate_docs", bind=True) +def collect_and_generate_docs( + self, collector_name: str, template_path: str +) -> Dict[str, Any]: + """ + Collect data from infrastructure and generate documentation + + Args: + collector_name: Name of collector to use (e.g., 'proxmox', 'vmware') + template_path: Path to documentation template YAML file + + Returns: + Result dictionary with status and details + """ + import asyncio + + task_id = self.request.id + logger.info( + f"[{task_id}] Starting documentation generation: {collector_name} -> {template_path}" + ) + + result = { + "task_id": task_id, + "collector": collector_name, + "template": template_path, + "success": False, + "started_at": datetime.now().isoformat(), + "completed_at": None, + "error": None, + "sections_generated": 0, + "sections_failed": 0, + } + + try: + # Run async collection and generation + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + generation_result = loop.run_until_complete( + _async_collect_and_generate(collector_name, template_path) + ) + + loop.close() + + # Update result + result.update(generation_result) + result["success"] = True + result["completed_at"] = datetime.now().isoformat() + + logger.info( + f"[{task_id}] Documentation generation completed: " + f"{result['sections_generated']} sections generated, " + f"{result['sections_failed']} failed" + ) + + except Exception as e: + result["error"] = str(e) + result["completed_at"] = datetime.now().isoformat() + logger.error(f"[{task_id}] Documentation generation failed: {e}", exc_info=True) + + return result + + +async def _async_collect_and_generate( + collector_name: str, template_path: str +) -> Dict[str, Any]: + """ + Async implementation of collect and generate workflow + + Args: + collector_name: Collector name + template_path: Template path + + Returns: + Generation result + """ + from datacenter_docs.generators.template_generator import TemplateBasedGenerator + + # Import appropriate collector + collector = await _get_collector(collector_name) + + # Collect data + logger.info(f"Collecting data with {collector_name} collector...") + collect_result = await collector.run() + + if not collect_result["success"]: + raise Exception(f"Data collection failed: {collect_result.get('error')}") + + collected_data = collect_result["data"] + + # Generate documentation using template + logger.info(f"Generating documentation using template: {template_path}") + generator = TemplateBasedGenerator(template_path) + + sections_results = await generator.generate_and_save_sections( + data=collected_data, save_individually=True + ) + + # Count successes and failures + sections_generated = sum(1 for r in sections_results if r.get("success")) + sections_failed = sum(1 for r in sections_results if not r.get("success")) + + return { + "sections_generated": sections_generated, + "sections_failed": sections_failed, + "sections": sections_results, + "collector_stats": collect_result["data"].get("data", {}).get("statistics", {}), + } + + +async def _get_collector(collector_name: str) -> Any: + """ + Get collector instance by name + + Args: + collector_name: Name of collector + + Returns: + Collector instance + """ + from datacenter_docs.collectors.kubernetes_collector import KubernetesCollector + from datacenter_docs.collectors.proxmox_collector import ProxmoxCollector + from datacenter_docs.collectors.vmware_collector import VMwareCollector + + collectors = { + "proxmox": ProxmoxCollector, + "vmware": VMwareCollector, + "kubernetes": KubernetesCollector, + } + + if collector_name not in collectors: + raise ValueError( + f"Unknown collector: {collector_name}. Available: {list(collectors.keys())}" + ) + + return collectors[collector_name]() + + +@celery_app.task(name="generate_proxmox_docs") +def generate_proxmox_docs() -> Dict[str, Any]: + """ + Scheduled task to generate Proxmox documentation + + This task is scheduled via Celery Beat to run daily. + + Returns: + Task result + """ + logger.info("Scheduled Proxmox documentation generation started") + + template_path = "templates/documentation/proxmox.yaml" + + return collect_and_generate_docs(collector_name="proxmox", template_path=template_path) + + +@celery_app.task(name="generate_all_docs") +def generate_all_docs() -> Dict[str, Any]: + """ + Generate documentation for all configured systems + + This creates parallel tasks for each system. + + Returns: + Result with task IDs + """ + logger.info("Starting documentation generation for all systems") + + # Define all systems and their templates + systems = [ + {"collector": "proxmox", "template": "templates/documentation/proxmox.yaml"}, + # Add more as templates are created: + # {"collector": "vmware", "template": "templates/documentation/vmware.yaml"}, + # {"collector": "kubernetes", "template": "templates/documentation/k8s.yaml"}, + ] + + # Create parallel tasks + task_group = group( + [ + collect_and_generate_docs.s(system["collector"], system["template"]) + for system in systems + ] + ) + + # Execute group + result = task_group.apply_async() + + return { + "task_group_id": result.id, + "systems": len(systems), + "message": "Documentation generation started for all systems", + } + + +@celery_app.task(name="index_generated_docs") +def index_generated_docs(output_dir: str = "output") -> Dict[str, Any]: + """ + Index all generated documentation into vector store for RAG + + This task should run after documentation generation to make + the new docs searchable in the chat interface. + + Args: + output_dir: Directory containing generated markdown files + + Returns: + Indexing result + """ + import asyncio + + logger.info(f"Starting documentation indexing from {output_dir}") + + result = { + "success": False, + "files_indexed": 0, + "chunks_created": 0, + "error": None, + } + + try: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + index_result = loop.run_until_complete(_async_index_docs(output_dir)) + + loop.close() + + result.update(index_result) + result["success"] = True + + logger.info( + f"Documentation indexing completed: {result['files_indexed']} files, " + f"{result['chunks_created']} chunks" + ) + + except Exception as e: + result["error"] = str(e) + logger.error(f"Documentation indexing failed: {e}", exc_info=True) + + return result + + +async def _async_index_docs(output_dir: str) -> Dict[str, Any]: + """ + Async implementation of documentation indexing + + Args: + output_dir: Output directory with markdown files + + Returns: + Indexing result + """ + from datacenter_docs.chat.agent import DocumentationAgent + + agent = DocumentationAgent() + + # Index all markdown files in output directory + docs_path = Path(output_dir) + + if not docs_path.exists(): + raise FileNotFoundError(f"Output directory not found: {output_dir}") + + await agent.index_documentation(docs_path) + + # Count indexed files and chunks + # (This is a simplified version, actual implementation would track this better) + md_files = list(docs_path.glob("**/*.md")) + files_indexed = len(md_files) + + # Estimate chunks (roughly 1000 chars per chunk, 200 overlap) + total_chars = sum(f.stat().st_size for f in md_files) + chunks_created = total_chars // 800 # Rough estimate + + return {"files_indexed": files_indexed, "chunks_created": chunks_created} + + +@celery_app.task(name="full_docs_pipeline") +def full_docs_pipeline() -> Dict[str, Any]: + """ + Full documentation pipeline: collect -> generate -> index + + This is the master task that orchestrates the entire workflow. + + Returns: + Pipeline result + """ + logger.info("Starting full documentation pipeline") + + # Step 1: Generate all documentation + generate_result = generate_all_docs() + + # Step 2: Wait a bit for generation to complete, then index + # (In production, this would use Celery chains/chords for better coordination) + from celery import chain + + pipeline = chain( + generate_all_docs.s(), + index_generated_docs.s("output"), + ) + + result = pipeline.apply_async() + + return { + "pipeline_id": result.id, + "message": "Full documentation pipeline started", + "steps": ["generate_all_docs", "index_generated_docs"], + } + + +# Periodic task configuration (if using Celery Beat) +# Add to celery_app.py or separate beat configuration: +""" +from celery.schedules import crontab + +celery_app.conf.beat_schedule = { + 'generate-proxmox-docs-daily': { + 'task': 'generate_proxmox_docs', + 'schedule': crontab(hour=2, minute=0), # Daily at 2 AM + }, + 'generate-all-docs-daily': { + 'task': 'generate_all_docs', + 'schedule': crontab(hour=2, minute=30), # Daily at 2:30 AM + }, + 'full-docs-pipeline-weekly': { + 'task': 'full_docs_pipeline', + 'schedule': crontab(hour=3, minute=0, day_of_week=0), # Weekly on Sunday at 3 AM + }, +} +""" diff --git a/templates/documentation/proxmox.yaml b/templates/documentation/proxmox.yaml new file mode 100644 index 0000000..2ab580c --- /dev/null +++ b/templates/documentation/proxmox.yaml @@ -0,0 +1,221 @@ +# Proxmox Documentation Template +# Defines documentation sections to generate from Proxmox data +# Each section is generated independently to avoid LLM context overload + +metadata: + name: "Proxmox Virtual Environment" + collector: "proxmox" + version: "1.0.0" + description: "Documentation template for Proxmox VE infrastructure" + +# Documentation sections - each generates a separate markdown file +sections: + - id: "proxmox_overview" + title: "Proxmox Infrastructure Overview" + category: "infrastructure" + priority: 1 + description: "High-level overview of Proxmox cluster and resources" + data_requirements: + - "cluster" + - "statistics" + - "nodes" + prompt_template: | + Generate comprehensive documentation for our Proxmox Virtual Environment cluster. + + **Cluster Information:** + {cluster} + + **Infrastructure Statistics:** + {statistics} + + **Nodes:** + {nodes} + + Create a well-structured markdown document that includes: + 1. Cluster overview with key statistics + 2. Node inventory and status + 3. Resource allocation summary (CPU, RAM, Storage) + 4. High availability status + 5. Capacity planning insights + + Use tables, bullet points, and clear sections. Include actual values from the data. + + - id: "proxmox_vms" + title: "Virtual Machines Inventory" + category: "virtualization" + priority: 2 + description: "Complete inventory of QEMU virtual machines" + data_requirements: + - "vms" + - "nodes" + prompt_template: | + Generate detailed documentation for all virtual machines in the Proxmox cluster. + + **Virtual Machines:** + {vms} + + **Nodes:** + {nodes} + + Create documentation that includes: + 1. VM inventory table (VMID, Name, Node, Status, vCPU, RAM, Disk) + 2. VMs grouped by node + 3. VMs grouped by status (running/stopped) + 4. Resource allocation per VM + 5. Naming conventions and patterns observed + 6. Recommendations for VM placement and balancing + + Use markdown tables and organize information clearly. + + - id: "proxmox_containers" + title: "LXC Containers Inventory" + category: "virtualization" + priority: 3 + description: "Complete inventory of LXC containers" + data_requirements: + - "containers" + - "nodes" + prompt_template: | + Generate detailed documentation for all LXC containers in the Proxmox cluster. + + **Containers:** + {containers} + + **Nodes:** + {nodes} + + Create documentation that includes: + 1. Container inventory table (VMID, Name, Node, Status, vCPU, RAM, Disk) + 2. Containers grouped by node + 3. Containers grouped by status (running/stopped) + 4. Resource allocation per container + 5. Use cases and patterns for containers vs VMs + 6. Recommendations for container management + + Use markdown tables and clear organization. + + - id: "proxmox_storage" + title: "Storage Configuration" + category: "storage" + priority: 4 + description: "Storage pools and allocation" + data_requirements: + - "storage" + - "statistics" + prompt_template: | + Generate comprehensive storage documentation for the Proxmox cluster. + + **Storage Pools:** + {storage} + + **Overall Statistics:** + {statistics} + + Create documentation that includes: + 1. Storage inventory table (Name, Type, Total, Used, Available, Usage %) + 2. Storage types explained (local, NFS, Ceph, etc.) + 3. Content types per storage (images, ISO, containers) + 4. Storage capacity analysis + 5. Performance considerations + 6. Backup storage recommendations + 7. Capacity planning and alerts + + Use markdown tables, charts (if possible), and clear sections. + + - id: "proxmox_networking" + title: "Network Configuration" + category: "network" + priority: 5 + description: "Network bridges and configuration" + data_requirements: + - "networks" + - "nodes" + prompt_template: | + Generate network configuration documentation for the Proxmox cluster. + + **Network Interfaces:** + {networks} + + **Nodes:** + {nodes} + + Create documentation that includes: + 1. Network bridges inventory (Bridge, Type, CIDR, Ports, Purpose) + 2. Network topology diagram (text-based or description) + 3. VLAN configuration if present + 4. Network purposes (management, VM, storage, etc.) + 5. Best practices for network separation + 6. Troubleshooting guides for common network issues + + Use markdown tables and clear explanations. + + - id: "proxmox_maintenance" + title: "Maintenance Procedures" + category: "operations" + priority: 6 + description: "Standard maintenance and operational procedures" + data_requirements: + - "nodes" + - "cluster" + - "vms" + - "containers" + prompt_template: | + Generate operational and maintenance documentation for the Proxmox cluster. + + **Cluster Info:** + {cluster} + + **Nodes:** + {nodes} + + Based on the cluster configuration, create documentation that includes: + 1. **Backup Procedures** + - VM/Container backup strategies + - Configuration backup + - Retention policies + + 2. **Update Procedures** + - Proxmox VE updates + - Kernel updates + - Rolling updates for HA clusters + + 3. **Monitoring** + - Key metrics to monitor + - Alert thresholds + - Dashboard recommendations + + 4. **Common Tasks** + - Creating VMs/Containers + - Migration procedures + - Storage management + - Snapshot management + + 5. **Troubleshooting** + - Common issues and solutions + - Log locations + - Recovery procedures + + 6. **Emergency Contacts** + - Escalation procedures + - Vendor support information + + Make it practical and actionable for operations team. + +# Generation settings +generation: + max_tokens: 4000 + temperature: 0.7 + language: "en" # Default language, can be overridden + +# Output configuration +output: + directory: "output/proxmox" + filename_pattern: "{section_id}.md" + save_to_database: true + save_to_file: true + +# Scheduling (for Celery tasks) +schedule: + enabled: true + cron: "0 2 * * *" # Daily at 2 AM + timezone: "UTC"