diff --git a/deploy/docker/docker-compose.dev.yml b/deploy/docker/compose.yml similarity index 78% rename from deploy/docker/docker-compose.dev.yml rename to deploy/docker/compose.yml index 7807219..2edbc7d 100644 --- a/deploy/docker/docker-compose.dev.yml +++ b/deploy/docker/compose.yml @@ -2,8 +2,9 @@ services: # MongoDB Database mongodb: image: docker.io/library/mongo:7-jammy - container_name: datacenter-docs-mongodb-dev + container_name: datacenter-docs-mongodb hostname: mongodb + restart: always ports: - "${MONGODB_PORT}:27017" env_file: @@ -26,8 +27,9 @@ services: # Redis Cache & Message Broker redis: image: docker.io/library/redis:7-alpine - container_name: datacenter-docs-redis-dev + container_name: datacenter-docs-redis hostname: redis + restart: always ports: - "${REDIS_PORT}:6379" env_file: @@ -48,8 +50,9 @@ services: build: context: ../.. dockerfile: deploy/docker/Dockerfile.api - container_name: datacenter-docs-api-dev + container_name: datacenter-docs-api hostname: api + restart: always ports: - "${API_PORT}:8000" env_file: @@ -67,15 +70,15 @@ services: condition: service_healthy networks: - datacenter-network - restart: unless-stopped # Chat Service chat: build: context: ../.. dockerfile: deploy/docker/Dockerfile.chat - container_name: datacenter-docs-chat-dev + container_name: datacenter-docs-chat hostname: chat + restart: always ports: - "${CHAT_PORT}:8001" env_file: @@ -94,15 +97,15 @@ services: condition: service_healthy networks: - datacenter-network - restart: unless-stopped # Celery Worker worker: build: context: ../.. dockerfile: deploy/docker/Dockerfile.worker - container_name: datacenter-docs-worker-dev + container_name: datacenter-docs-worker hostname: worker + restart: always env_file: - ../../.env volumes: @@ -118,13 +121,19 @@ services: condition: service_healthy networks: - datacenter-network - restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "celery -A datacenter_docs.workers.celery_app inspect ping -d celery@worker || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s # Flower - Celery Monitoring flower: image: docker.io/mher/flower:2.0 - container_name: datacenter-docs-flower-dev + container_name: datacenter-docs-flower hostname: flower + restart: always ports: - "${FLOWER_PORT}:5555" env_file: @@ -136,15 +145,15 @@ services: condition: service_healthy networks: - datacenter-network - restart: unless-stopped # Frontend frontend: build: context: ../.. dockerfile: deploy/docker/Dockerfile.frontend - container_name: datacenter-docs-frontend-dev + container_name: datacenter-docs-frontend hostname: frontend + restart: always ports: - "${FRONTEND_PORT}:80" env_file: @@ -154,27 +163,26 @@ services: - chat networks: - datacenter-network - restart: unless-stopped volumes: mongodb-data: - name: datacenter-docs-mongodb-data-dev + name: datacenter-docs-mongodb-data mongodb-config: - name: datacenter-docs-mongodb-config-dev + name: datacenter-docs-mongodb-config redis-data: - name: datacenter-docs-redis-data-dev + name: datacenter-docs-redis-data api-logs: - name: datacenter-docs-api-logs-dev + name: datacenter-docs-api-logs chat-logs: - name: datacenter-docs-chat-logs-dev + name: datacenter-docs-chat-logs chat-data: - name: datacenter-docs-chat-data-dev + name: datacenter-docs-chat-data worker-logs: - name: datacenter-docs-worker-logs-dev + name: datacenter-docs-worker-logs worker-output: - name: datacenter-docs-worker-output-dev + name: datacenter-docs-worker-output networks: datacenter-network: - name: datacenter-docs-network-dev + name: datacenter-docs-network driver: bridge diff --git a/scripts/test_chat_rag.py b/scripts/test_chat_rag.py new file mode 100644 index 0000000..e021e20 --- /dev/null +++ b/scripts/test_chat_rag.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" +Test script for RAG system in chat service +""" + +import asyncio +import sys +from pathlib import Path + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from datacenter_docs.chat.agent import DocumentationAgent +from datacenter_docs.utils.llm_client import get_llm_client + + +async def test_rag_search(): + """Test RAG search and retrieval""" + print("๐Ÿงช Testing RAG system...\n") + + # Initialize agent + print("1๏ธโƒฃ Initializing DocumentationAgent...") + agent = DocumentationAgent(vector_store_path="./data/chroma_db") + print("โœ… Agent initialized\n") + + # Test queries + test_queries = [ + "Come รจ configurato smarthome-services?", + "Quali VM sono in esecuzione?", + "Come faccio il backup dei container?", + "Configurazione di storage su Proxmox", + ] + + for i, query in enumerate(test_queries, 1): + print(f"\n{'='*60}") + print(f"Query {i}: {query}") + print('='*60) + + # Search documentation + results = await agent.search_documentation(query, limit=3) + + if results: + print(f"\n๐Ÿ“š Found {len(results)} results:\n") + for j, result in enumerate(results, 1): + print(f" {j}. Section: {result['section']}") + print(f" Relevance: {result['relevance_score']:.3f} ({result['relevance_score']*100:.1f}%)") + print(f" Source: {Path(result['source']).name}") + print(f" Content preview: {result['content'][:100]}...") + print() + else: + print("โŒ No results found") + + print("\n" + "="*60) + print("โœ… RAG test completed successfully!") + print("="*60) + + +if __name__ == "__main__": + asyncio.run(test_rag_search()) diff --git a/src/datacenter_docs/chat/agent.py b/src/datacenter_docs/chat/agent.py index b721ee4..e6b0fb6 100644 --- a/src/datacenter_docs/chat/agent.py +++ b/src/datacenter_docs/chat/agent.py @@ -177,16 +177,21 @@ class DocumentationAgent: # Format results formatted_results = [] for doc, score in results: + # ChromaDB returns distance scores (lower is better) + # Normalize to similarity score (0-1, higher is better) + # Using 1/(1+distance) ensures values are always between 0 and 1 + relevance_score = 1.0 / (1.0 + score) + formatted_results.append( { "content": doc.page_content, "section": doc.metadata.get("section", "unknown"), "source": doc.metadata.get("source", ""), - "relevance_score": float(1 - score), # Convert distance to similarity + "relevance_score": float(relevance_score), "last_updated": doc.metadata.get("indexed_at", ""), } ) - logger.info(f" โœ“ Section: {doc.metadata.get('section')} (relevance: {(1-score)*100:.1f}%)") + logger.info(f" โœ“ Section: {doc.metadata.get('section')} (relevance: {relevance_score*100:.1f}%, distance: {score:.3f})") return formatted_results