""" Infrastructure Documentation Generator Generates comprehensive infrastructure documentation from collected VMware, Kubernetes, and other infrastructure data. """ import json import logging from typing import Any, Dict from datacenter_docs.generators.base import BaseGenerator logger = logging.getLogger(__name__) class InfrastructureGenerator(BaseGenerator): """ Generator for comprehensive infrastructure documentation Creates detailed documentation covering: - VMware vSphere environment - Virtual machines and hosts - Clusters and resource pools - Storage and networking - Resource utilization - Best practices and recommendations """ def __init__(self) -> None: """Initialize infrastructure generator""" super().__init__(name="infrastructure", section="infrastructure_overview") async def generate(self, data: Dict[str, Any]) -> str: """ Generate infrastructure documentation from collected data Args: data: Collected infrastructure data from VMware collector Returns: Markdown-formatted documentation """ # Extract metadata metadata = data.get("metadata", {}) infrastructure_data = data.get("data", {}) # Build comprehensive prompt system_prompt = self._build_system_prompt() user_prompt = self._build_user_prompt(infrastructure_data, metadata) # Generate documentation using LLM self.logger.info("Generating infrastructure documentation with LLM...") content = await self.generate_with_llm( system_prompt=system_prompt, user_prompt=user_prompt, temperature=0.7, max_tokens=8000, # Longer for comprehensive docs ) # Post-process content content = self._post_process_content(content, metadata) return content def _build_system_prompt(self) -> str: """ Build system prompt for LLM Returns: System prompt string """ return """You are an expert datacenter infrastructure documentation specialist. Your task is to generate comprehensive, professional infrastructure documentation in Markdown format. Guidelines: 1. **Structure**: Use clear hierarchical headings (##, ###, ####) 2. **Clarity**: Write clear, concise descriptions that non-technical stakeholders can understand 3. **Completeness**: Cover all major infrastructure components 4. **Actionable**: Include recommendations and best practices 5. **Visual**: Use tables, lists, and code blocks for better readability 6. **Accurate**: Base all content strictly on the provided data Documentation sections to include: - Executive Summary (high-level overview) - Infrastructure Overview (total resources, key metrics) - Virtual Machines (VMs status, resource allocation) - ESXi Hosts (hardware, versions, health) - Clusters (DRS, HA, vSAN configuration) - Storage (datastores, capacity, usage) - Networking (networks, VLANs, connectivity) - Resource Utilization (CPU, memory, storage trends) - Health & Compliance (warnings, recommendations) - Recommendations (optimization opportunities) Format: Professional Markdown with proper headings, tables, and formatting. Tone: Professional, clear, and authoritative. """ def _build_user_prompt( self, infrastructure_data: Dict[str, Any], metadata: Dict[str, Any] ) -> str: """ Build user prompt with infrastructure data Args: infrastructure_data: Infrastructure data metadata: Collection metadata Returns: User prompt string """ # Format data for better LLM understanding data_summary = self._format_data_summary(infrastructure_data) prompt = f"""Generate comprehensive infrastructure documentation based on the following data: **Collection Metadata:** - Collector: {metadata.get('collector', 'unknown')} - Collected at: {metadata.get('collected_at', 'unknown')} - Version: {metadata.get('version', 'unknown')} **Infrastructure Data Summary:** {data_summary} **Complete Infrastructure Data (JSON):** ```json {json.dumps(infrastructure_data, indent=2, default=str)} ``` Please generate a complete, professional infrastructure documentation in Markdown format following the guidelines provided. """ return prompt def _format_data_summary(self, data: Dict[str, Any]) -> str: """ Format infrastructure data into human-readable summary Args: data: Infrastructure data Returns: Formatted summary string """ summary_parts = [] # Statistics stats = data.get("statistics", {}) if stats: summary_parts.append("**Statistics:**") summary_parts.append(f"- Total VMs: {stats.get('total_vms', 0)}") summary_parts.append(f"- Powered On VMs: {stats.get('powered_on_vms', 0)}") summary_parts.append(f"- Total Hosts: {stats.get('total_hosts', 0)}") summary_parts.append(f"- Total Clusters: {stats.get('total_clusters', 0)}") summary_parts.append(f"- Total Datastores: {stats.get('total_datastores', 0)}") summary_parts.append(f"- Total Storage: {stats.get('total_storage_tb', 0):.2f} TB") summary_parts.append(f"- Used Storage: {stats.get('used_storage_tb', 0):.2f} TB") summary_parts.append("") # VMs summary vms = data.get("vms", []) if vms: summary_parts.append(f"**Virtual Machines:** {len(vms)} VMs found") summary_parts.append("") # Hosts summary hosts = data.get("hosts", []) if hosts: summary_parts.append(f"**ESXi Hosts:** {len(hosts)} hosts found") summary_parts.append("") # Clusters summary clusters = data.get("clusters", []) if clusters: summary_parts.append(f"**Clusters:** {len(clusters)} clusters found") summary_parts.append("") # Datastores summary datastores = data.get("datastores", []) if datastores: summary_parts.append(f"**Datastores:** {len(datastores)} datastores found") summary_parts.append("") # Networks summary networks = data.get("networks", []) if networks: summary_parts.append(f"**Networks:** {len(networks)} networks found") summary_parts.append("") return "\n".join(summary_parts) def _post_process_content(self, content: str, metadata: Dict[str, Any]) -> str: """ Post-process generated content Args: content: Generated content metadata: Collection metadata Returns: Post-processed content """ # Add header header = f"""# Infrastructure Documentation **Generated:** {metadata.get('collected_at', 'N/A')} **Source:** {metadata.get('collector', 'VMware Collector')} **Version:** {metadata.get('version', 'N/A')} --- """ # Add footer footer = """ --- **Document Information:** - **Auto-generated:** This document was automatically generated from infrastructure data - **Accuracy:** All information is based on live infrastructure state at time of collection - **Updates:** Documentation should be regenerated periodically to reflect current state **Disclaimer:** This documentation is for internal use only. Verify all critical information before making infrastructure changes. """ return header + content + footer # Example usage async def example_usage() -> None: """Example of using the infrastructure generator""" # Sample VMware data (would come from VMware collector) sample_data = { "metadata": { "collector": "vmware", "collected_at": "2025-10-19T23:00:00", "version": "1.0.0", }, "data": { "statistics": { "total_vms": 45, "powered_on_vms": 42, "total_hosts": 6, "total_clusters": 2, "total_datastores": 4, "total_storage_tb": 50.0, "used_storage_tb": 32.5, }, "vms": [ { "name": "web-server-01", "power_state": "poweredOn", "num_cpu": 4, "memory_mb": 8192, "guest_os": "Ubuntu Linux (64-bit)", }, # More VMs... ], "hosts": [ { "name": "esxi-host-01.example.com", "num_cpu": 24, "memory_mb": 131072, "version": "7.0.3", } ], "clusters": [ { "name": "Production-Cluster", "total_hosts": 3, "drs_enabled": True, "ha_enabled": True, } ], }, } # Generate documentation generator = InfrastructureGenerator() result = await generator.run( data=sample_data, save_to_db=True, save_to_file=True, output_dir="output/docs" ) if result["success"]: print("Documentation generated successfully!") print(f"Content length: {len(result['content'])} characters") if result["file_path"]: print(f"Saved to: {result['file_path']}") else: print(f"Generation failed: {result['error']}") if __name__ == "__main__": import asyncio asyncio.run(example_usage())