#!/usr/bin/env python3 """ Script to index documentation into ChromaDB vector store. This script should be run once to initialize the documentation search capability. """ import asyncio import logging import sys from pathlib import Path # Add src to path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from datacenter_docs.chat.agent import DocumentationAgent logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) async def main() -> None: """Index all documentation files into vector store.""" # Paths docs_path = Path("/app/output") # In Docker container if not docs_path.exists(): # Fallback to local path docs_path = Path(__file__).parent.parent / "output" vector_store_path = Path("/app/data/chroma_db") # In Docker container if not vector_store_path.parent.exists(): # Fallback to local path vector_store_path = Path(__file__).parent.parent / "data" / "chroma_db" logger.info(f"Indexing documentation from: {docs_path}") logger.info(f"Vector store location: {vector_store_path}") # Check if docs exist md_files = list(docs_path.glob("**/*.md")) if not md_files: logger.warning(f"No markdown files found in {docs_path}") logger.info("Creating sample documentation...") # Could optionally create sample docs here return logger.info(f"Found {len(md_files)} markdown files to index") try: # Initialize agent (without MCP client for indexing only) logger.info("Initializing Documentation Agent...") agent = DocumentationAgent( mcp_client=None, llm_client=None, vector_store_path=str(vector_store_path) ) # Index documentation logger.info("Starting indexing process...") await agent.index_documentation(docs_path) logger.info("āœ“ Documentation indexed successfully!") logger.info(f"Vector store saved to: {vector_store_path}") # Test search logger.info("\nTesting search functionality...") test_queries = [ "How to troubleshoot VLAN connectivity?", "What are the backup schedules?", "How to check UPS status?" ] for query in test_queries: results = await agent.search_documentation(query, limit=2) logger.info(f"\nQuery: {query}") logger.info(f"Found {len(results)} results:") for i, result in enumerate(results, 1): logger.info(f" {i}. {result['section']} (score: {result['relevance_score']:.2f})") logger.info("\nāœ“ Indexing and testing complete!") except Exception as e: logger.error(f"Failed to index documentation: {e}", exc_info=True) sys.exit(1) if __name__ == "__main__": asyncio.run(main())