llm-automation-docs-and-rem…/src/datacenter_docs/collectors/base.py

"""
Base Collector Class

Defines the interface for all infrastructure data collectors.
"""

import logging
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Any, Dict, Optional

from motor.motor_asyncio import AsyncIOMotorClient

from datacenter_docs.utils.config import get_settings

logger = logging.getLogger(__name__)
settings = get_settings()


class BaseCollector(ABC):
    """
    Abstract base class for all data collectors

    Collectors are responsible for gathering data from infrastructure
    components (VMware, Kubernetes, network devices, etc.) via MCP or
    direct connections.
    """

    def __init__(self, name: str):
        """
        Initialize collector

        Args:
            name: Collector name (e.g., 'vmware', 'kubernetes')
        """
        self.name = name
        self.logger = logging.getLogger(f"{__name__}.{name}")
        self.collected_at: Optional[datetime] = None
        self.data: Dict[str, Any] = {}

    @abstractmethod
    async def connect(self) -> bool:
        """
        Establish connection to the infrastructure component

        Returns:
            True if connection successful, False otherwise
        """
        pass

    @abstractmethod
    async def disconnect(self) -> None:
        """
        Close connection to the infrastructure component
        """
        pass

    @abstractmethod
    async def collect(self) -> Dict[str, Any]:
        """
        Collect all data from the infrastructure component

        Returns:
            Dict containing collected data with structure:
            {
                'metadata': {
                    'collector': str,
                    'collected_at': datetime,
                    'version': str,
                    ...
                },
                'data': {
                    # Component-specific data
                }
            }
        """
        pass

    async def validate(self, data: Dict[str, Any]) -> bool:
        """
        Validate collected data

        Args:
            data: Collected data to validate

        Returns:
            True if data is valid, False otherwise
        """
        # Basic validation
        if not isinstance(data, dict):
            self.logger.error("Data must be a dictionary")
            return False

        if "metadata" not in data:
            self.logger.warning("Data missing 'metadata' field")
            return False

        if "data" not in data:
            self.logger.warning("Data missing 'data' field")
            return False

        return True

    async def store(self, data: Dict[str, Any]) -> bool:
        """
        Store collected data

        This method can be overridden to implement custom storage logic.
        By default, it stores data in MongoDB.

        Args:
            data: Data to store

        Returns:
            True if storage successful, False otherwise
        """
        from beanie import init_beanie

        from datacenter_docs.api.models import (
            AuditLog,
            AutoRemediationPolicy,
            ChatSession,
            DocumentationSection,
            RemediationApproval,
            RemediationLog,
            SystemMetric,
            Ticket,
            TicketFeedback,
            TicketPattern,
        )

        try:
            # Connect to MongoDB
            client: AsyncIOMotorClient = AsyncIOMotorClient(settings.MONGODB_URL)
            database = client[settings.MONGODB_DATABASE]

            # Initialize Beanie
            await init_beanie(
                database=database,
                document_models=[
                    Ticket,
                    TicketFeedback,
                    RemediationLog,
                    RemediationApproval,
                    AutoRemediationPolicy,
                    TicketPattern,
                    DocumentationSection,
                    ChatSession,
                    SystemMetric,
                    AuditLog,
                ],
            )

            # Store as audit log for now
            # TODO: Create dedicated collection for infrastructure data
            audit = AuditLog(
                action="data_collection",
                actor="system",
                resource_type=self.name,
                resource_id=f"{self.name}_data",
                details=data,
                success=True,
            )
            await audit.insert()

            self.logger.info(f"Data stored successfully for collector: {self.name}")
            return True

        except Exception as e:
            self.logger.error(f"Failed to store data: {e}", exc_info=True)
            return False

    async def run(self) -> Dict[str, Any]:
        """
        Execute the full collection workflow

        Returns:
            Collected data
        """
        result = {
            "success": False,
            "collector": self.name,
            "error": None,
            "data": None,
        }

        try:
            # Connect
            self.logger.info(f"Connecting to {self.name}...")
            connected = await self.connect()

            if not connected:
                result["error"] = "Connection failed"
                return result

            # Collect
            self.logger.info(f"Collecting data from {self.name}...")
            data = await self.collect()
            self.collected_at = datetime.now()

            # Validate
            self.logger.info(f"Validating data from {self.name}...")
            valid = await self.validate(data)

            if not valid:
                result["error"] = "Data validation failed"
                return result

            # Store
            self.logger.info(f"Storing data from {self.name}...")
            stored = await self.store(data)

            if not stored:
                result["error"] = "Data storage failed"
                # Continue even if storage fails

            # Success
            result["success"] = True
            result["data"] = data

            self.logger.info(f"Collection completed successfully for {self.name}")

        except Exception as e:
            self.logger.error(f"Collection failed for {self.name}: {e}", exc_info=True)
            result["error"] = str(e)

        finally:
            # Disconnect
            try:
                await self.disconnect()
            except Exception as e:
                self.logger.error(f"Disconnect failed: {e}", exc_info=True)

        return result

    def get_summary(self) -> Dict[str, Any]:
        """
        Get summary of collected data

        Returns:
            Summary dict
        """
        return {
            "collector": self.name,
            "collected_at": self.collected_at.isoformat() if self.collected_at else None,
            "data_size": len(str(self.data)),
        }