feat: enhance chat service with documentation indexing and improved Docker configuration
Some checks failed
CI/CD Pipeline / Generate Documentation (push) Failing after 7m41s
CI/CD Pipeline / Lint Code (push) Failing after 7m44s
CI/CD Pipeline / Run Tests (push) Has been skipped
CI/CD Pipeline / Security Scanning (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (api) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (chat) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (frontend) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (worker) (push) Has been skipped
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped
Some checks failed
CI/CD Pipeline / Generate Documentation (push) Failing after 7m41s
CI/CD Pipeline / Lint Code (push) Failing after 7m44s
CI/CD Pipeline / Run Tests (push) Has been skipped
CI/CD Pipeline / Security Scanning (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (api) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (chat) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (frontend) (push) Has been skipped
CI/CD Pipeline / Build and Push Docker Images (worker) (push) Has been skipped
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped
This commit is contained in:
90
scripts/index_docs.py
Executable file
90
scripts/index_docs.py
Executable file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to index documentation into ChromaDB vector store.
|
||||
This script should be run once to initialize the documentation search capability.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add src to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from datacenter_docs.chat.agent import DocumentationAgent
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Index all documentation files into vector store."""
|
||||
|
||||
# Paths
|
||||
docs_path = Path("/app/output") # In Docker container
|
||||
if not docs_path.exists():
|
||||
# Fallback to local path
|
||||
docs_path = Path(__file__).parent.parent / "output"
|
||||
|
||||
vector_store_path = Path("/app/data/chroma_db") # In Docker container
|
||||
if not vector_store_path.parent.exists():
|
||||
# Fallback to local path
|
||||
vector_store_path = Path(__file__).parent.parent / "data" / "chroma_db"
|
||||
|
||||
logger.info(f"Indexing documentation from: {docs_path}")
|
||||
logger.info(f"Vector store location: {vector_store_path}")
|
||||
|
||||
# Check if docs exist
|
||||
md_files = list(docs_path.glob("**/*.md"))
|
||||
if not md_files:
|
||||
logger.warning(f"No markdown files found in {docs_path}")
|
||||
logger.info("Creating sample documentation...")
|
||||
# Could optionally create sample docs here
|
||||
return
|
||||
|
||||
logger.info(f"Found {len(md_files)} markdown files to index")
|
||||
|
||||
try:
|
||||
# Initialize agent (without MCP client for indexing only)
|
||||
logger.info("Initializing Documentation Agent...")
|
||||
agent = DocumentationAgent(
|
||||
mcp_client=None,
|
||||
llm_client=None,
|
||||
vector_store_path=str(vector_store_path)
|
||||
)
|
||||
|
||||
# Index documentation
|
||||
logger.info("Starting indexing process...")
|
||||
await agent.index_documentation(docs_path)
|
||||
|
||||
logger.info("✓ Documentation indexed successfully!")
|
||||
logger.info(f"Vector store saved to: {vector_store_path}")
|
||||
|
||||
# Test search
|
||||
logger.info("\nTesting search functionality...")
|
||||
test_queries = [
|
||||
"How to troubleshoot VLAN connectivity?",
|
||||
"What are the backup schedules?",
|
||||
"How to check UPS status?"
|
||||
]
|
||||
|
||||
for query in test_queries:
|
||||
results = await agent.search_documentation(query, limit=2)
|
||||
logger.info(f"\nQuery: {query}")
|
||||
logger.info(f"Found {len(results)} results:")
|
||||
for i, result in enumerate(results, 1):
|
||||
logger.info(f" {i}. {result['section']} (score: {result['relevance_score']:.2f})")
|
||||
|
||||
logger.info("\n✓ Indexing and testing complete!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to index documentation: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
90
scripts/start_chat.py
Executable file
90
scripts/start_chat.py
Executable file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Startup script for chat service with documentation indexing.
|
||||
Runs indexing if needed, then starts the chat server.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Main startup routine"""
|
||||
|
||||
logger.info("=" * 50)
|
||||
logger.info("Datacenter Documentation Chat Service")
|
||||
logger.info("Starting initialization...")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# Check if vector store needs initialization
|
||||
vector_store_path = Path("/app/data/chroma_db")
|
||||
index_marker = vector_store_path / ".indexed"
|
||||
|
||||
if not index_marker.exists():
|
||||
logger.info("")
|
||||
logger.info("=" * 50)
|
||||
logger.info("First Time Setup")
|
||||
logger.info("=" * 50)
|
||||
logger.info("Indexing documentation into vector store...")
|
||||
logger.info("This may take a few minutes...")
|
||||
logger.info("")
|
||||
|
||||
# Run indexing script
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[sys.executable, "/app/scripts/index_docs.py"],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
logger.info(result.stdout)
|
||||
|
||||
# Create marker file
|
||||
vector_store_path.mkdir(parents=True, exist_ok=True)
|
||||
index_marker.touch()
|
||||
logger.info("")
|
||||
logger.info("✓ Documentation indexed successfully!")
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error("")
|
||||
logger.error(f"⚠ Warning: Documentation indexing failed: {e}")
|
||||
logger.error(e.stdout)
|
||||
logger.error(e.stderr)
|
||||
logger.error(" The chat service will still start but won't have access to indexed documentation.")
|
||||
else:
|
||||
logger.info(f"✓ Vector store already initialized (marker: {index_marker})")
|
||||
logger.info(" To re-index, delete the volume: docker volume rm datacenter-docs-chat-data-dev")
|
||||
|
||||
logger.info("")
|
||||
logger.info("=" * 50)
|
||||
logger.info("Starting Chat Server")
|
||||
logger.info("=" * 50)
|
||||
logger.info("Listening on port 8001...")
|
||||
logger.info("")
|
||||
|
||||
# Start the chat server by importing and running it
|
||||
# This keeps everything in the same process
|
||||
os.chdir("/app")
|
||||
sys.path.insert(0, "/app/src")
|
||||
|
||||
from datacenter_docs.chat import main as chat_main
|
||||
|
||||
# Run the chat server
|
||||
import uvicorn
|
||||
from datacenter_docs.chat.main import socket_app
|
||||
|
||||
uvicorn.run(socket_app, host="0.0.0.0", port=8001)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
45
scripts/start_chat.sh
Executable file
45
scripts/start_chat.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
# Startup script for chat service with documentation indexing
|
||||
|
||||
set -e
|
||||
|
||||
echo "=== Datacenter Documentation Chat Service ==="
|
||||
echo "Starting initialization..."
|
||||
|
||||
# Check if vector store needs initialization
|
||||
VECTOR_STORE_PATH="/app/data/chroma_db"
|
||||
INDEX_MARKER="$VECTOR_STORE_PATH/.indexed"
|
||||
|
||||
if [ ! -f "$INDEX_MARKER" ]; then
|
||||
echo ""
|
||||
echo "=== First Time Setup ==="
|
||||
echo "Indexing documentation into vector store..."
|
||||
echo "This may take a few minutes..."
|
||||
echo ""
|
||||
|
||||
# Run indexing script
|
||||
python /app/scripts/index_docs.py
|
||||
|
||||
# Create marker file to prevent re-indexing
|
||||
if [ $? -eq 0 ]; then
|
||||
mkdir -p "$VECTOR_STORE_PATH"
|
||||
touch "$INDEX_MARKER"
|
||||
echo ""
|
||||
echo "✓ Documentation indexed successfully!"
|
||||
else
|
||||
echo ""
|
||||
echo "⚠ Warning: Documentation indexing failed. Chat will work with limited functionality."
|
||||
echo " The chat service will still start but won't have access to indexed documentation."
|
||||
fi
|
||||
else
|
||||
echo "✓ Vector store already initialized (found marker: $INDEX_MARKER)"
|
||||
echo " To re-index, delete the volume: docker volume rm datacenter-docs-chat-data-dev"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Starting Chat Server ==="
|
||||
echo "Listening on port 8001..."
|
||||
echo ""
|
||||
|
||||
# Start the chat server
|
||||
exec python -m datacenter_docs.chat.main
|
||||
Reference in New Issue
Block a user