""" Base Collector Class Defines the interface for all infrastructure data collectors. """ import logging from abc import ABC, abstractmethod from datetime import datetime from typing import Any, Dict, Optional from motor.motor_asyncio import AsyncIOMotorClient from datacenter_docs.utils.config import get_settings logger = logging.getLogger(__name__) settings = get_settings() class BaseCollector(ABC): """ Abstract base class for all data collectors Collectors are responsible for gathering data from infrastructure components (VMware, Kubernetes, network devices, etc.) via MCP or direct connections. """ def __init__(self, name: str): """ Initialize collector Args: name: Collector name (e.g., 'vmware', 'kubernetes') """ self.name = name self.logger = logging.getLogger(f"{__name__}.{name}") self.collected_at: Optional[datetime] = None self.data: Dict[str, Any] = {} @abstractmethod async def connect(self) -> bool: """ Establish connection to the infrastructure component Returns: True if connection successful, False otherwise """ pass @abstractmethod async def disconnect(self) -> None: """ Close connection to the infrastructure component """ pass @abstractmethod async def collect(self) -> Dict[str, Any]: """ Collect all data from the infrastructure component Returns: Dict containing collected data with structure: { 'metadata': { 'collector': str, 'collected_at': datetime, 'version': str, ... }, 'data': { # Component-specific data } } """ pass async def validate(self, data: Dict[str, Any]) -> bool: """ Validate collected data Args: data: Collected data to validate Returns: True if data is valid, False otherwise """ # Basic validation if not isinstance(data, dict): self.logger.error("Data must be a dictionary") return False if "metadata" not in data: self.logger.warning("Data missing 'metadata' field") return False if "data" not in data: self.logger.warning("Data missing 'data' field") return False return True async def store(self, data: Dict[str, Any]) -> bool: """ Store collected data This method can be overridden to implement custom storage logic. By default, it stores data in MongoDB. Args: data: Data to store Returns: True if storage successful, False otherwise """ from beanie import init_beanie from datacenter_docs.api.models import ( AuditLog, AutoRemediationPolicy, ChatSession, DocumentationSection, RemediationApproval, RemediationLog, SystemMetric, Ticket, TicketFeedback, TicketPattern, ) try: # Connect to MongoDB client: AsyncIOMotorClient = AsyncIOMotorClient(settings.MONGODB_URL) database = client[settings.MONGODB_DATABASE] # Initialize Beanie await init_beanie( database=database, document_models=[ Ticket, TicketFeedback, RemediationLog, RemediationApproval, AutoRemediationPolicy, TicketPattern, DocumentationSection, ChatSession, SystemMetric, AuditLog, ], ) # Store as audit log for now # TODO: Create dedicated collection for infrastructure data audit = AuditLog( action="data_collection", actor="system", resource_type=self.name, resource_id=f"{self.name}_data", details=data, success=True, ) await audit.insert() self.logger.info(f"Data stored successfully for collector: {self.name}") return True except Exception as e: self.logger.error(f"Failed to store data: {e}", exc_info=True) return False async def run(self) -> Dict[str, Any]: """ Execute the full collection workflow Returns: Collected data """ result = { "success": False, "collector": self.name, "error": None, "data": None, } try: # Connect self.logger.info(f"Connecting to {self.name}...") connected = await self.connect() if not connected: result["error"] = "Connection failed" return result # Collect self.logger.info(f"Collecting data from {self.name}...") data = await self.collect() self.collected_at = datetime.now() # Validate self.logger.info(f"Validating data from {self.name}...") valid = await self.validate(data) if not valid: result["error"] = "Data validation failed" return result # Store self.logger.info(f"Storing data from {self.name}...") stored = await self.store(data) if not stored: result["error"] = "Data storage failed" # Continue even if storage fails # Success result["success"] = True result["data"] = data self.logger.info(f"Collection completed successfully for {self.name}") except Exception as e: self.logger.error(f"Collection failed for {self.name}: {e}", exc_info=True) result["error"] = str(e) finally: # Disconnect try: await self.disconnect() except Exception as e: self.logger.error(f"Disconnect failed: {e}", exc_info=True) return result def get_summary(self) -> Dict[str, Any]: """ Get summary of collected data Returns: Summary dict """ return { "collector": self.name, "collected_at": self.collected_at.isoformat() if self.collected_at else None, "data_size": len(str(self.data)), }