llm-automation-docs-and-rem…/src/datacenter_docs/generators/infrastructure_generator.py

"""
Infrastructure Documentation Generator

Generates comprehensive infrastructure documentation from collected VMware,
Kubernetes, and other infrastructure data.
"""

import json
import logging
from typing import Any, Dict

from datacenter_docs.generators.base import BaseGenerator

logger = logging.getLogger(__name__)


class InfrastructureGenerator(BaseGenerator):
    """
    Generator for comprehensive infrastructure documentation

    Creates detailed documentation covering:
    - VMware vSphere environment
    - Virtual machines and hosts
    - Clusters and resource pools
    - Storage and networking
    - Resource utilization
    - Best practices and recommendations
    """

    def __init__(self) -> None:
        """Initialize infrastructure generator"""
        super().__init__(name="infrastructure", section="infrastructure_overview")

    async def generate(self, data: Dict[str, Any]) -> str:
        """
        Generate infrastructure documentation from collected data

        Args:
            data: Collected infrastructure data from VMware collector

        Returns:
            Markdown-formatted documentation
        """
        # Extract metadata
        metadata = data.get("metadata", {})
        infrastructure_data = data.get("data", {})

        # Build comprehensive prompt
        system_prompt = self._build_system_prompt()
        user_prompt = self._build_user_prompt(infrastructure_data, metadata)

        # Generate documentation using LLM
        self.logger.info("Generating infrastructure documentation with LLM...")
        content = await self.generate_with_llm(
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            temperature=0.7,
            max_tokens=8000,  # Longer for comprehensive docs
        )

        # Post-process content
        content = self._post_process_content(content, metadata)

        return content

    def _build_system_prompt(self) -> str:
        """
        Build system prompt for LLM

        Returns:
            System prompt string
        """
        return """You are an expert datacenter infrastructure documentation specialist.

Your task is to generate comprehensive, professional infrastructure documentation in Markdown format.

Guidelines:
1. **Structure**: Use clear hierarchical headings (##, ###, ####)
2. **Clarity**: Write clear, concise descriptions that non-technical stakeholders can understand
3. **Completeness**: Cover all major infrastructure components
4. **Actionable**: Include recommendations and best practices
5. **Visual**: Use tables, lists, and code blocks for better readability
6. **Accurate**: Base all content strictly on the provided data

Documentation sections to include:
- Executive Summary (high-level overview)
- Infrastructure Overview (total resources, key metrics)
- Virtual Machines (VMs status, resource allocation)
- ESXi Hosts (hardware, versions, health)
- Clusters (DRS, HA, vSAN configuration)
- Storage (datastores, capacity, usage)
- Networking (networks, VLANs, connectivity)
- Resource Utilization (CPU, memory, storage trends)
- Health & Compliance (warnings, recommendations)
- Recommendations (optimization opportunities)

Format: Professional Markdown with proper headings, tables, and formatting.
Tone: Professional, clear, and authoritative.
"""

    def _build_user_prompt(
        self, infrastructure_data: Dict[str, Any], metadata: Dict[str, Any]
    ) -> str:
        """
        Build user prompt with infrastructure data

        Args:
            infrastructure_data: Infrastructure data
            metadata: Collection metadata

        Returns:
            User prompt string
        """
        # Format data for better LLM understanding
        data_summary = self._format_data_summary(infrastructure_data)

        prompt = f"""Generate comprehensive infrastructure documentation based on the following data:

**Collection Metadata:**
- Collector: {metadata.get('collector', 'unknown')}
- Collected at: {metadata.get('collected_at', 'unknown')}
- Version: {metadata.get('version', 'unknown')}

**Infrastructure Data Summary:**
{data_summary}

**Complete Infrastructure Data (JSON):**
```json
{json.dumps(infrastructure_data, indent=2, default=str)}
```

Please generate a complete, professional infrastructure documentation in Markdown format following the guidelines provided.
"""
        return prompt

    def _format_data_summary(self, data: Dict[str, Any]) -> str:
        """
        Format infrastructure data into human-readable summary

        Args:
            data: Infrastructure data

        Returns:
            Formatted summary string
        """
        summary_parts = []

        # Statistics
        stats = data.get("statistics", {})
        if stats:
            summary_parts.append("**Statistics:**")
            summary_parts.append(f"- Total VMs: {stats.get('total_vms', 0)}")
            summary_parts.append(f"- Powered On VMs: {stats.get('powered_on_vms', 0)}")
            summary_parts.append(f"- Total Hosts: {stats.get('total_hosts', 0)}")
            summary_parts.append(f"- Total Clusters: {stats.get('total_clusters', 0)}")
            summary_parts.append(f"- Total Datastores: {stats.get('total_datastores', 0)}")
            summary_parts.append(f"- Total Storage: {stats.get('total_storage_tb', 0):.2f} TB")
            summary_parts.append(f"- Used Storage: {stats.get('used_storage_tb', 0):.2f} TB")
            summary_parts.append("")

        # VMs summary
        vms = data.get("vms", [])
        if vms:
            summary_parts.append(f"**Virtual Machines:** {len(vms)} VMs found")
            summary_parts.append("")

        # Hosts summary
        hosts = data.get("hosts", [])
        if hosts:
            summary_parts.append(f"**ESXi Hosts:** {len(hosts)} hosts found")
            summary_parts.append("")

        # Clusters summary
        clusters = data.get("clusters", [])
        if clusters:
            summary_parts.append(f"**Clusters:** {len(clusters)} clusters found")
            summary_parts.append("")

        # Datastores summary
        datastores = data.get("datastores", [])
        if datastores:
            summary_parts.append(f"**Datastores:** {len(datastores)} datastores found")
            summary_parts.append("")

        # Networks summary
        networks = data.get("networks", [])
        if networks:
            summary_parts.append(f"**Networks:** {len(networks)} networks found")
            summary_parts.append("")

        return "\n".join(summary_parts)

    def _post_process_content(self, content: str, metadata: Dict[str, Any]) -> str:
        """
        Post-process generated content

        Args:
            content: Generated content
            metadata: Collection metadata

        Returns:
            Post-processed content
        """
        # Add header
        header = f"""# Infrastructure Documentation

**Generated:** {metadata.get('collected_at', 'N/A')}
**Source:** {metadata.get('collector', 'VMware Collector')}
**Version:** {metadata.get('version', 'N/A')}

---

"""

        # Add footer
        footer = """

---

**Document Information:**
- **Auto-generated:** This document was automatically generated from infrastructure data
- **Accuracy:** All information is based on live infrastructure state at time of collection
- **Updates:** Documentation should be regenerated periodically to reflect current state

**Disclaimer:** This documentation is for internal use only. Verify all critical information before making infrastructure changes.
"""

        return header + content + footer


# Example usage
async def example_usage() -> None:
    """Example of using the infrastructure generator"""

    # Sample VMware data (would come from VMware collector)
    sample_data = {
        "metadata": {
            "collector": "vmware",
            "collected_at": "2025-10-19T23:00:00",
            "version": "1.0.0",
        },
        "data": {
            "statistics": {
                "total_vms": 45,
                "powered_on_vms": 42,
                "total_hosts": 6,
                "total_clusters": 2,
                "total_datastores": 4,
                "total_storage_tb": 50.0,
                "used_storage_tb": 32.5,
            },
            "vms": [
                {
                    "name": "web-server-01",
                    "power_state": "poweredOn",
                    "num_cpu": 4,
                    "memory_mb": 8192,
                    "guest_os": "Ubuntu Linux (64-bit)",
                },
                # More VMs...
            ],
            "hosts": [
                {
                    "name": "esxi-host-01.example.com",
                    "num_cpu": 24,
                    "memory_mb": 131072,
                    "version": "7.0.3",
                }
            ],
            "clusters": [
                {
                    "name": "Production-Cluster",
                    "total_hosts": 3,
                    "drs_enabled": True,
                    "ha_enabled": True,
                }
            ],
        },
    }

    # Generate documentation
    generator = InfrastructureGenerator()
    result = await generator.run(
        data=sample_data, save_to_db=True, save_to_file=True, output_dir="output/docs"
    )

    if result["success"]:
        print("Documentation generated successfully!")
        print(f"Content length: {len(result['content'])} characters")
        if result["file_path"]:
            print(f"Saved to: {result['file_path']}")
    else:
        print(f"Generation failed: {result['error']}")


if __name__ == "__main__":
    import asyncio

    asyncio.run(example_usage())