Some checks failed
CI/CD Pipeline / Generate Documentation (push) Failing after 7m41s
CI/CD Pipeline / Lint Code (push) Failing after 7m44s
CI/CD Pipeline / Run Tests (push) Has been skipped
CI/CD Pipeline / Security Scanning (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (api) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (chat) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (frontend) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (worker) (push) Has been skipped
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped
91 lines
2.9 KiB
Python
Executable File
91 lines
2.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Script to index documentation into ChromaDB vector store.
|
|
This script should be run once to initialize the documentation search capability.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add src to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
|
|
from datacenter_docs.chat.agent import DocumentationAgent
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def main() -> None:
|
|
"""Index all documentation files into vector store."""
|
|
|
|
# Paths
|
|
docs_path = Path("/app/output") # In Docker container
|
|
if not docs_path.exists():
|
|
# Fallback to local path
|
|
docs_path = Path(__file__).parent.parent / "output"
|
|
|
|
vector_store_path = Path("/app/data/chroma_db") # In Docker container
|
|
if not vector_store_path.parent.exists():
|
|
# Fallback to local path
|
|
vector_store_path = Path(__file__).parent.parent / "data" / "chroma_db"
|
|
|
|
logger.info(f"Indexing documentation from: {docs_path}")
|
|
logger.info(f"Vector store location: {vector_store_path}")
|
|
|
|
# Check if docs exist
|
|
md_files = list(docs_path.glob("**/*.md"))
|
|
if not md_files:
|
|
logger.warning(f"No markdown files found in {docs_path}")
|
|
logger.info("Creating sample documentation...")
|
|
# Could optionally create sample docs here
|
|
return
|
|
|
|
logger.info(f"Found {len(md_files)} markdown files to index")
|
|
|
|
try:
|
|
# Initialize agent (without MCP client for indexing only)
|
|
logger.info("Initializing Documentation Agent...")
|
|
agent = DocumentationAgent(
|
|
mcp_client=None,
|
|
llm_client=None,
|
|
vector_store_path=str(vector_store_path)
|
|
)
|
|
|
|
# Index documentation
|
|
logger.info("Starting indexing process...")
|
|
await agent.index_documentation(docs_path)
|
|
|
|
logger.info("✓ Documentation indexed successfully!")
|
|
logger.info(f"Vector store saved to: {vector_store_path}")
|
|
|
|
# Test search
|
|
logger.info("\nTesting search functionality...")
|
|
test_queries = [
|
|
"How to troubleshoot VLAN connectivity?",
|
|
"What are the backup schedules?",
|
|
"How to check UPS status?"
|
|
]
|
|
|
|
for query in test_queries:
|
|
results = await agent.search_documentation(query, limit=2)
|
|
logger.info(f"\nQuery: {query}")
|
|
logger.info(f"Found {len(results)} results:")
|
|
for i, result in enumerate(results, 1):
|
|
logger.info(f" {i}. {result['section']} (score: {result['relevance_score']:.2f})")
|
|
|
|
logger.info("\n✓ Indexing and testing complete!")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to index documentation: {e}", exc_info=True)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|