feat: enhance chat service with documentation indexing and improved Docker configuration
Some checks failed
CI/CD Pipeline / Generate Documentation (push) Failing after 7m41s
CI/CD Pipeline / Lint Code (push) Failing after 7m44s
CI/CD Pipeline / Run Tests (push) Has been skipped
CI/CD Pipeline / Security Scanning (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (api) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (chat) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (frontend) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (worker) (push) Has been skipped
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped
Some checks failed
CI/CD Pipeline / Generate Documentation (push) Failing after 7m41s
CI/CD Pipeline / Lint Code (push) Failing after 7m44s
CI/CD Pipeline / Run Tests (push) Has been skipped
CI/CD Pipeline / Security Scanning (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (api) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (chat) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (frontend) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (worker) (push) Has been skipped
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped
This commit is contained in:
90
scripts/index_docs.py
Executable file
90
scripts/index_docs.py
Executable file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to index documentation into ChromaDB vector store.
|
||||
This script should be run once to initialize the documentation search capability.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add src to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from datacenter_docs.chat.agent import DocumentationAgent
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Index all documentation files into vector store."""
|
||||
|
||||
# Paths
|
||||
docs_path = Path("/app/output") # In Docker container
|
||||
if not docs_path.exists():
|
||||
# Fallback to local path
|
||||
docs_path = Path(__file__).parent.parent / "output"
|
||||
|
||||
vector_store_path = Path("/app/data/chroma_db") # In Docker container
|
||||
if not vector_store_path.parent.exists():
|
||||
# Fallback to local path
|
||||
vector_store_path = Path(__file__).parent.parent / "data" / "chroma_db"
|
||||
|
||||
logger.info(f"Indexing documentation from: {docs_path}")
|
||||
logger.info(f"Vector store location: {vector_store_path}")
|
||||
|
||||
# Check if docs exist
|
||||
md_files = list(docs_path.glob("**/*.md"))
|
||||
if not md_files:
|
||||
logger.warning(f"No markdown files found in {docs_path}")
|
||||
logger.info("Creating sample documentation...")
|
||||
# Could optionally create sample docs here
|
||||
return
|
||||
|
||||
logger.info(f"Found {len(md_files)} markdown files to index")
|
||||
|
||||
try:
|
||||
# Initialize agent (without MCP client for indexing only)
|
||||
logger.info("Initializing Documentation Agent...")
|
||||
agent = DocumentationAgent(
|
||||
mcp_client=None,
|
||||
llm_client=None,
|
||||
vector_store_path=str(vector_store_path)
|
||||
)
|
||||
|
||||
# Index documentation
|
||||
logger.info("Starting indexing process...")
|
||||
await agent.index_documentation(docs_path)
|
||||
|
||||
logger.info("✓ Documentation indexed successfully!")
|
||||
logger.info(f"Vector store saved to: {vector_store_path}")
|
||||
|
||||
# Test search
|
||||
logger.info("\nTesting search functionality...")
|
||||
test_queries = [
|
||||
"How to troubleshoot VLAN connectivity?",
|
||||
"What are the backup schedules?",
|
||||
"How to check UPS status?"
|
||||
]
|
||||
|
||||
for query in test_queries:
|
||||
results = await agent.search_documentation(query, limit=2)
|
||||
logger.info(f"\nQuery: {query}")
|
||||
logger.info(f"Found {len(results)} results:")
|
||||
for i, result in enumerate(results, 1):
|
||||
logger.info(f" {i}. {result['section']} (score: {result['relevance_score']:.2f})")
|
||||
|
||||
logger.info("\n✓ Indexing and testing complete!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to index documentation: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user