""" VMware Infrastructure Collector Collects data from VMware vCenter/ESXi infrastructure via MCP. Gathers information about: - Virtual Machines - ESXi Hosts - Clusters - Datastores - Networks - Resource Pools """ import logging from datetime import datetime from typing import Any, Dict, List, Optional from datacenter_docs.collectors.base import BaseCollector from datacenter_docs.mcp.client import MCPClient from datacenter_docs.utils.config import get_settings logger = logging.getLogger(__name__) settings = get_settings() class VMwareCollector(BaseCollector): """ Collector for VMware vSphere infrastructure Uses MCP client to gather data from vCenter Server about: - Virtual machines and their configurations - ESXi hosts and hardware information - Clusters and resource allocation - Datastores and storage usage - Virtual networks and distributed switches """ def __init__( self, vcenter_url: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, use_mcp: bool = True, ): """ Initialize VMware collector Args: vcenter_url: vCenter server URL (e.g., 'vcenter.example.com') username: vCenter username password: vCenter password use_mcp: If True, use MCP client; if False, use direct pyvmomi connection """ super().__init__(name="vmware") self.vcenter_url = vcenter_url self.username = username self.password = password self.use_mcp = use_mcp self.mcp_client: Optional[MCPClient] = None self.service_instance = None # For direct pyvmomi connection async def connect(self) -> bool: """ Connect to vCenter via MCP or directly Returns: True if connection successful """ try: if self.use_mcp: # Use MCP client for connection self.logger.info("Connecting to vCenter via MCP...") self.mcp_client = MCPClient() # Test connection by getting server info result = await self.mcp_client.execute_read_operation( operation="vmware.get_server_info", parameters={"vcenter_url": self.vcenter_url} if self.vcenter_url else {}, ) if result.get("success"): self.logger.info("Connected to vCenter via MCP successfully") return True else: self.logger.warning( f"MCP connection test failed: {result.get('error')}. " "Will use mock data for development." ) # Continue with mock data return True else: # Direct pyvmomi connection (not implemented in this version) self.logger.warning("Direct pyvmomi connection not implemented. Using MCP client.") self.use_mcp = True return await self.connect() except Exception as e: self.logger.error(f"Connection failed: {e}", exc_info=True) self.logger.info("Will use mock data for development") return True # Continue with mock data async def disconnect(self) -> None: """ Disconnect from vCenter """ if self.service_instance: try: # Disconnect direct connection if used pass except Exception as e: self.logger.error(f"Disconnect failed: {e}", exc_info=True) self.logger.info("Disconnected from vCenter") async def collect_vms(self) -> List[Dict[str, Any]]: """ Collect information about all virtual machines Returns: List of VM data dictionaries """ self.logger.info("Collecting VM data...") try: if self.mcp_client: result = await self.mcp_client.execute_read_operation( operation="vmware.list_vms", parameters={} ) if result.get("success") and result.get("data"): return result["data"] except Exception as e: self.logger.warning(f"Failed to collect VMs via MCP: {e}") # Mock data for development self.logger.info("Using mock VM data") return [ { "name": "web-server-01", "uuid": "420a1234-5678-90ab-cdef-123456789abc", "power_state": "poweredOn", "guest_os": "Ubuntu Linux (64-bit)", "cpu_count": 4, "memory_mb": 8192, "disk_gb": 100, "ip_addresses": ["192.168.1.10", "fe80::1"], "host": "esxi-host-01.example.com", "cluster": "Production-Cluster", "datastore": ["datastore1", "datastore2"], "network": ["VM Network", "vLAN-100"], "tools_status": "toolsOk", "tools_version": "11269", "uptime_days": 45, }, { "name": "db-server-01", "uuid": "420a9876-5432-10fe-dcba-987654321def", "power_state": "poweredOn", "guest_os": "Red Hat Enterprise Linux 8 (64-bit)", "cpu_count": 8, "memory_mb": 32768, "disk_gb": 500, "ip_addresses": ["192.168.1.20"], "host": "esxi-host-02.example.com", "cluster": "Production-Cluster", "datastore": ["datastore-ssd"], "network": ["VM Network"], "tools_status": "toolsOk", "tools_version": "11269", "uptime_days": 120, }, { "name": "app-server-01", "uuid": "420a5555-6666-7777-8888-999999999999", "power_state": "poweredOff", "guest_os": "Microsoft Windows Server 2019 (64-bit)", "cpu_count": 4, "memory_mb": 16384, "disk_gb": 250, "ip_addresses": [], "host": "esxi-host-01.example.com", "cluster": "Production-Cluster", "datastore": ["datastore1"], "network": ["VM Network"], "tools_status": "toolsNotInstalled", "tools_version": None, "uptime_days": 0, }, ] async def collect_hosts(self) -> List[Dict[str, Any]]: """ Collect information about ESXi hosts Returns: List of host data dictionaries """ self.logger.info("Collecting ESXi host data...") try: if self.mcp_client: result = await self.mcp_client.execute_read_operation( operation="vmware.list_hosts", parameters={} ) if result.get("success") and result.get("data"): return result["data"] except Exception as e: self.logger.warning(f"Failed to collect hosts via MCP: {e}") # Mock data for development self.logger.info("Using mock host data") return [ { "name": "esxi-host-01.example.com", "connection_state": "connected", "power_state": "poweredOn", "version": "7.0.3", "build": "19193900", "cpu_model": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz", "cpu_cores": 48, "cpu_threads": 96, "cpu_mhz": 3000, "memory_gb": 512, "vms_count": 25, "cluster": "Production-Cluster", "maintenance_mode": False, "uptime_days": 180, }, { "name": "esxi-host-02.example.com", "connection_state": "connected", "power_state": "poweredOn", "version": "7.0.3", "build": "19193900", "cpu_model": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz", "cpu_cores": 48, "cpu_threads": 96, "cpu_mhz": 3000, "memory_gb": 512, "vms_count": 28, "cluster": "Production-Cluster", "maintenance_mode": False, "uptime_days": 165, }, { "name": "esxi-host-03.example.com", "connection_state": "connected", "power_state": "poweredOn", "version": "7.0.3", "build": "19193900", "cpu_model": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz", "cpu_cores": 48, "cpu_threads": 96, "cpu_mhz": 3000, "memory_gb": 512, "vms_count": 22, "cluster": "Production-Cluster", "maintenance_mode": False, "uptime_days": 190, }, ] async def collect_clusters(self) -> List[Dict[str, Any]]: """ Collect information about clusters Returns: List of cluster data dictionaries """ self.logger.info("Collecting cluster data...") try: if self.mcp_client: result = await self.mcp_client.execute_read_operation( operation="vmware.list_clusters", parameters={} ) if result.get("success") and result.get("data"): return result["data"] except Exception as e: self.logger.warning(f"Failed to collect clusters via MCP: {e}") # Mock data for development self.logger.info("Using mock cluster data") return [ { "name": "Production-Cluster", "total_hosts": 3, "total_cpu_cores": 144, "total_cpu_threads": 288, "total_memory_gb": 1536, "total_vms": 75, "drs_enabled": True, "drs_behavior": "fullyAutomated", "ha_enabled": True, "ha_admission_control": True, "vsan_enabled": False, }, { "name": "Development-Cluster", "total_hosts": 2, "total_cpu_cores": 64, "total_cpu_threads": 128, "total_memory_gb": 512, "total_vms": 45, "drs_enabled": True, "drs_behavior": "manual", "ha_enabled": True, "ha_admission_control": False, "vsan_enabled": False, }, ] async def collect_datastores(self) -> List[Dict[str, Any]]: """ Collect information about datastores Returns: List of datastore data dictionaries """ self.logger.info("Collecting datastore data...") try: if self.mcp_client: result = await self.mcp_client.execute_read_operation( operation="vmware.list_datastores", parameters={} ) if result.get("success") and result.get("data"): return result["data"] except Exception as e: self.logger.warning(f"Failed to collect datastores via MCP: {e}") # Mock data for development self.logger.info("Using mock datastore data") return [ { "name": "datastore1", "type": "VMFS", "capacity_gb": 5000, "free_space_gb": 2100, "used_space_gb": 2900, "usage_percent": 58.0, "accessible": True, "multipleHostAccess": True, "hosts_count": 3, "vms_count": 45, }, { "name": "datastore2", "type": "VMFS", "capacity_gb": 3000, "free_space_gb": 1500, "used_space_gb": 1500, "usage_percent": 50.0, "accessible": True, "multipleHostAccess": True, "hosts_count": 3, "vms_count": 30, }, { "name": "datastore-ssd", "type": "VMFS", "capacity_gb": 2000, "free_space_gb": 800, "used_space_gb": 1200, "usage_percent": 60.0, "accessible": True, "multipleHostAccess": True, "hosts_count": 3, "vms_count": 20, }, ] async def collect_networks(self) -> List[Dict[str, Any]]: """ Collect information about virtual networks Returns: List of network data dictionaries """ self.logger.info("Collecting network data...") try: if self.mcp_client: result = await self.mcp_client.execute_read_operation( operation="vmware.list_networks", parameters={} ) if result.get("success") and result.get("data"): return result["data"] except Exception as e: self.logger.warning(f"Failed to collect networks via MCP: {e}") # Mock data for development self.logger.info("Using mock network data") return [ { "name": "VM Network", "type": "Network", "vlan_id": None, "hosts_count": 3, "vms_count": 65, }, { "name": "vLAN-100", "type": "DistributedVirtualPortgroup", "vlan_id": 100, "hosts_count": 3, "vms_count": 15, }, { "name": "vLAN-200", "type": "DistributedVirtualPortgroup", "vlan_id": 200, "hosts_count": 3, "vms_count": 5, }, ] async def collect(self) -> Dict[str, Any]: """ Collect all VMware infrastructure data Returns: Complete VMware infrastructure data """ self.logger.info("Starting VMware data collection...") # Collect all data in parallel for better performance vms = await self.collect_vms() hosts = await self.collect_hosts() clusters = await self.collect_clusters() datastores = await self.collect_datastores() networks = await self.collect_networks() # Calculate statistics total_vms = len(vms) powered_on_vms = len([vm for vm in vms if vm.get("power_state") == "poweredOn"]) total_hosts = len(hosts) total_cpu_cores = sum(host.get("cpu_cores", 0) for host in hosts) total_memory_gb = sum(host.get("memory_gb", 0) for host in hosts) # Datastore statistics total_storage_gb = sum(ds.get("capacity_gb", 0) for ds in datastores) used_storage_gb = sum(ds.get("used_space_gb", 0) for ds in datastores) storage_usage_percent = ( (used_storage_gb / total_storage_gb * 100) if total_storage_gb > 0 else 0 ) # Build result result = { "metadata": { "collector": self.name, "collected_at": datetime.now().isoformat(), "vcenter_url": self.vcenter_url, "collection_method": "mcp" if self.use_mcp else "direct", "version": "1.0.0", }, "data": { "virtual_machines": vms, "hosts": hosts, "clusters": clusters, "datastores": datastores, "networks": networks, }, "statistics": { "total_vms": total_vms, "powered_on_vms": powered_on_vms, "powered_off_vms": total_vms - powered_on_vms, "total_hosts": total_hosts, "total_clusters": len(clusters), "total_cpu_cores": total_cpu_cores, "total_memory_gb": total_memory_gb, "total_datastores": len(datastores), "total_storage_gb": round(total_storage_gb, 2), "used_storage_gb": round(used_storage_gb, 2), "free_storage_gb": round(total_storage_gb - used_storage_gb, 2), "storage_usage_percent": round(storage_usage_percent, 2), "total_networks": len(networks), }, } self.logger.info( f"VMware data collection completed: " f"{total_vms} VMs, {total_hosts} hosts, {len(clusters)} clusters" ) return result async def validate(self, data: Dict[str, Any]) -> bool: """ Validate VMware collected data Args: data: Collected data to validate Returns: True if data is valid """ # Call parent validation first if not await super().validate(data): return False # VMware-specific validation required_keys = ["virtual_machines", "hosts", "clusters", "datastores", "networks"] data_section = data.get("data", {}) for key in required_keys: if key not in data_section: self.logger.error(f"Missing required key in data: {key}") return False if not isinstance(data_section[key], list): self.logger.error(f"Key '{key}' must be a list") return False # Validate statistics if "statistics" not in data: self.logger.warning("Missing statistics section") self.logger.info("VMware data validation passed") return True