feat: enhance chat service with documentation indexing and improved Docker configuration
Some checks failed
CI/CD Pipeline / Generate Documentation (push) Failing after 7m41s
CI/CD Pipeline / Lint Code (push) Failing after 7m44s
CI/CD Pipeline / Run Tests (push) Has been skipped
CI/CD Pipeline / Security Scanning (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (api) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (chat) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (frontend) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (worker) (push) Has been skipped
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped

This commit is contained in:
2025-10-20 19:15:32 +02:00
parent 6f5deb0879
commit 27dd9e00b6
14 changed files with 784 additions and 94 deletions

90
scripts/index_docs.py Executable file
View File

@@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""
Script to index documentation into ChromaDB vector store.
This script should be run once to initialize the documentation search capability.
"""
import asyncio
import logging
import sys
from pathlib import Path
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from datacenter_docs.chat.agent import DocumentationAgent
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
async def main() -> None:
"""Index all documentation files into vector store."""
# Paths
docs_path = Path("/app/output") # In Docker container
if not docs_path.exists():
# Fallback to local path
docs_path = Path(__file__).parent.parent / "output"
vector_store_path = Path("/app/data/chroma_db") # In Docker container
if not vector_store_path.parent.exists():
# Fallback to local path
vector_store_path = Path(__file__).parent.parent / "data" / "chroma_db"
logger.info(f"Indexing documentation from: {docs_path}")
logger.info(f"Vector store location: {vector_store_path}")
# Check if docs exist
md_files = list(docs_path.glob("**/*.md"))
if not md_files:
logger.warning(f"No markdown files found in {docs_path}")
logger.info("Creating sample documentation...")
# Could optionally create sample docs here
return
logger.info(f"Found {len(md_files)} markdown files to index")
try:
# Initialize agent (without MCP client for indexing only)
logger.info("Initializing Documentation Agent...")
agent = DocumentationAgent(
mcp_client=None,
llm_client=None,
vector_store_path=str(vector_store_path)
)
# Index documentation
logger.info("Starting indexing process...")
await agent.index_documentation(docs_path)
logger.info("✓ Documentation indexed successfully!")
logger.info(f"Vector store saved to: {vector_store_path}")
# Test search
logger.info("\nTesting search functionality...")
test_queries = [
"How to troubleshoot VLAN connectivity?",
"What are the backup schedules?",
"How to check UPS status?"
]
for query in test_queries:
results = await agent.search_documentation(query, limit=2)
logger.info(f"\nQuery: {query}")
logger.info(f"Found {len(results)} results:")
for i, result in enumerate(results, 1):
logger.info(f" {i}. {result['section']} (score: {result['relevance_score']:.2f})")
logger.info("\n✓ Indexing and testing complete!")
except Exception as e:
logger.error(f"Failed to index documentation: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())

90
scripts/start_chat.py Executable file
View File

@@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""
Startup script for chat service with documentation indexing.
Runs indexing if needed, then starts the chat server.
"""
import asyncio
import logging
import os
import subprocess
import sys
from pathlib import Path
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
async def main() -> None:
"""Main startup routine"""
logger.info("=" * 50)
logger.info("Datacenter Documentation Chat Service")
logger.info("Starting initialization...")
logger.info("=" * 50)
# Check if vector store needs initialization
vector_store_path = Path("/app/data/chroma_db")
index_marker = vector_store_path / ".indexed"
if not index_marker.exists():
logger.info("")
logger.info("=" * 50)
logger.info("First Time Setup")
logger.info("=" * 50)
logger.info("Indexing documentation into vector store...")
logger.info("This may take a few minutes...")
logger.info("")
# Run indexing script
try:
result = subprocess.run(
[sys.executable, "/app/scripts/index_docs.py"],
check=True,
capture_output=True,
text=True
)
logger.info(result.stdout)
# Create marker file
vector_store_path.mkdir(parents=True, exist_ok=True)
index_marker.touch()
logger.info("")
logger.info("✓ Documentation indexed successfully!")
except subprocess.CalledProcessError as e:
logger.error("")
logger.error(f"⚠ Warning: Documentation indexing failed: {e}")
logger.error(e.stdout)
logger.error(e.stderr)
logger.error(" The chat service will still start but won't have access to indexed documentation.")
else:
logger.info(f"✓ Vector store already initialized (marker: {index_marker})")
logger.info(" To re-index, delete the volume: docker volume rm datacenter-docs-chat-data-dev")
logger.info("")
logger.info("=" * 50)
logger.info("Starting Chat Server")
logger.info("=" * 50)
logger.info("Listening on port 8001...")
logger.info("")
# Start the chat server by importing and running it
# This keeps everything in the same process
os.chdir("/app")
sys.path.insert(0, "/app/src")
from datacenter_docs.chat import main as chat_main
# Run the chat server
import uvicorn
from datacenter_docs.chat.main import socket_app
uvicorn.run(socket_app, host="0.0.0.0", port=8001)
if __name__ == "__main__":
asyncio.run(main())

45
scripts/start_chat.sh Executable file
View File

@@ -0,0 +1,45 @@
#!/bin/bash
# Startup script for chat service with documentation indexing
set -e
echo "=== Datacenter Documentation Chat Service ==="
echo "Starting initialization..."
# Check if vector store needs initialization
VECTOR_STORE_PATH="/app/data/chroma_db"
INDEX_MARKER="$VECTOR_STORE_PATH/.indexed"
if [ ! -f "$INDEX_MARKER" ]; then
echo ""
echo "=== First Time Setup ==="
echo "Indexing documentation into vector store..."
echo "This may take a few minutes..."
echo ""
# Run indexing script
python /app/scripts/index_docs.py
# Create marker file to prevent re-indexing
if [ $? -eq 0 ]; then
mkdir -p "$VECTOR_STORE_PATH"
touch "$INDEX_MARKER"
echo ""
echo "✓ Documentation indexed successfully!"
else
echo ""
echo "⚠ Warning: Documentation indexing failed. Chat will work with limited functionality."
echo " The chat service will still start but won't have access to indexed documentation."
fi
else
echo "✓ Vector store already initialized (found marker: $INDEX_MARKER)"
echo " To re-index, delete the volume: docker volume rm datacenter-docs-chat-data-dev"
fi
echo ""
echo "=== Starting Chat Server ==="
echo "Listening on port 8001..."
echo ""
# Start the chat server
exec python -m datacenter_docs.chat.main