feat: enhance chat service with documentation indexing and improved Docker configuration
Some checks failed
CI/CD Pipeline / Generate Documentation (push) Failing after 7m41s
CI/CD Pipeline / Lint Code (push) Failing after 7m44s
CI/CD Pipeline / Run Tests (push) Has been skipped
CI/CD Pipeline / Security Scanning (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (api) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (chat) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (frontend) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (worker) (push) Has been skipped
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped

This commit is contained in:
2025-10-20 19:15:32 +02:00
parent 6f5deb0879
commit 27dd9e00b6
14 changed files with 784 additions and 94 deletions

90
scripts/index_docs.py Executable file
View File

@@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""
Script to index documentation into ChromaDB vector store.
This script should be run once to initialize the documentation search capability.
"""
import asyncio
import logging
import sys
from pathlib import Path
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from datacenter_docs.chat.agent import DocumentationAgent
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
async def main() -> None:
"""Index all documentation files into vector store."""
# Paths
docs_path = Path("/app/output") # In Docker container
if not docs_path.exists():
# Fallback to local path
docs_path = Path(__file__).parent.parent / "output"
vector_store_path = Path("/app/data/chroma_db") # In Docker container
if not vector_store_path.parent.exists():
# Fallback to local path
vector_store_path = Path(__file__).parent.parent / "data" / "chroma_db"
logger.info(f"Indexing documentation from: {docs_path}")
logger.info(f"Vector store location: {vector_store_path}")
# Check if docs exist
md_files = list(docs_path.glob("**/*.md"))
if not md_files:
logger.warning(f"No markdown files found in {docs_path}")
logger.info("Creating sample documentation...")
# Could optionally create sample docs here
return
logger.info(f"Found {len(md_files)} markdown files to index")
try:
# Initialize agent (without MCP client for indexing only)
logger.info("Initializing Documentation Agent...")
agent = DocumentationAgent(
mcp_client=None,
llm_client=None,
vector_store_path=str(vector_store_path)
)
# Index documentation
logger.info("Starting indexing process...")
await agent.index_documentation(docs_path)
logger.info("✓ Documentation indexed successfully!")
logger.info(f"Vector store saved to: {vector_store_path}")
# Test search
logger.info("\nTesting search functionality...")
test_queries = [
"How to troubleshoot VLAN connectivity?",
"What are the backup schedules?",
"How to check UPS status?"
]
for query in test_queries:
results = await agent.search_documentation(query, limit=2)
logger.info(f"\nQuery: {query}")
logger.info(f"Found {len(results)} results:")
for i, result in enumerate(results, 1):
logger.info(f" {i}. {result['section']} (score: {result['relevance_score']:.2f})")
logger.info("\n✓ Indexing and testing complete!")
except Exception as e:
logger.error(f"Failed to index documentation: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())