chore: remove deprecated Docker Compose and entrypoint scripts; add Bandit for security checks
Some checks failed
CI/CD Pipeline / Generate Documentation (push) Successful in 10m58s
CI/CD Pipeline / Lint Code (push) Successful in 11m10s
CI/CD Pipeline / Security Scanning (push) Successful in 11m57s
CI/CD Pipeline / Run Tests (push) Successful in 12m2s
CI/CD Pipeline / Build and Push Docker Images (frontend) (push) Failing after 3m20s
CI/CD Pipeline / Build and Push Docker Images (chat) (push) Failing after 3m24s
CI/CD Pipeline / Build and Push Docker Images (worker) (push) Failing after 3m29s
CI/CD Pipeline / Build and Push Docker Images (api) (push) Failing after 3m35s
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped

This commit is contained in:
2025-10-21 11:55:24 +02:00
parent 73c352128b
commit 12baaa93c4
12 changed files with 70 additions and 872 deletions

View File

@@ -348,6 +348,7 @@ async def trigger_proxmox_documentation() -> Dict[str, Any]:
Returns job_id for status monitoring
"""
from celery import Celery
from datacenter_docs.utils.config import get_settings
try:
@@ -388,6 +389,7 @@ async def get_job_status(job_id: str) -> Dict[str, Any]:
Returns current status and progress information
"""
from celery.result import AsyncResult
from datacenter_docs.workers.celery_app import celery_app
try:
@@ -441,24 +443,25 @@ async def list_documentation_files() -> Dict[str, Any]:
for file_path in category_dir.glob("*.md"):
stat = file_path.stat()
files.append({
"filename": file_path.name,
"size": stat.st_size,
"modified": stat.st_mtime,
"path": f"{category}/{file_path.name}"
})
files.append(
{
"filename": file_path.name,
"size": stat.st_size,
"modified": stat.st_mtime,
"path": f"{category}/{file_path.name}",
}
)
if files:
files_by_category[category] = sorted(files, key=lambda x: x["modified"], reverse=True)
files_by_category[category] = sorted(
files, key=lambda x: x["modified"], reverse=True
)
total_files = sum(len(files) for files in files_by_category.values())
return {
"categories": [
{"name": cat, "files": files}
for cat, files in files_by_category.items()
],
"total": total_files
"categories": [{"name": cat, "files": files} for cat, files in files_by_category.items()],
"total": total_files,
}
@@ -495,7 +498,7 @@ async def get_documentation_content(category: str, filename: str) -> Dict[str, A
"category": category,
"content": content,
"size": stat.st_size,
"modified": stat.st_mtime
"modified": stat.st_mtime,
}
except Exception as e:
logger.error(f"Failed to read file {file_path}: {e}")
@@ -513,38 +516,21 @@ async def get_job_logs(job_id: str, tail: int = 100) -> Dict[str, Any]:
try:
# Get worker logs and filter for job_id
cmd = [
"docker",
"logs",
"datacenter-docs-worker-dev",
"--tail",
str(tail)
]
cmd = ["docker", "logs", "datacenter-docs-worker-dev", "--tail", str(tail)]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=5
)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
all_logs = result.stdout + result.stderr
# Filter logs containing the job_id
job_logs = [
line
for line in all_logs.split("\n")
if job_id in line or "ForkPoolWorker" in line
line for line in all_logs.split("\n") if job_id in line or "ForkPoolWorker" in line
]
# Take last 50 relevant lines
relevant_logs = job_logs[-50:] if len(job_logs) > 50 else job_logs
return {
"job_id": job_id,
"logs": relevant_logs,
"total_lines": len(relevant_logs)
}
return {"job_id": job_id, "logs": relevant_logs, "total_lines": len(relevant_logs)}
except subprocess.TimeoutExpired:
raise HTTPException(status_code=504, detail="Timeout getting logs")

View File

@@ -8,10 +8,10 @@ from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from ..mcp.client import MCPClient
from ..utils.llm_client import LLMClient
@@ -48,7 +48,9 @@ class DocumentationAgent:
self.vector_store: Optional[Chroma] = None
try:
self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
self._load_vector_store()
logger.info("Vector store initialized successfully")
except Exception as e:
@@ -136,7 +138,9 @@ class DocumentationAgent:
logger.info(f"💾 Adding {len(documents)} chunks to vector store...")
self.vector_store.add_documents(documents)
self.vector_store.persist()
logger.info(f"✅ Indexed {files_processed} files ({len(documents)} chunks) from documentation")
logger.info(
f"✅ Indexed {files_processed} files ({len(documents)} chunks) from documentation"
)
else:
logger.warning("⚠️ No documents to index")
@@ -191,7 +195,9 @@ class DocumentationAgent:
"last_updated": doc.metadata.get("indexed_at", ""),
}
)
logger.info(f" ✓ Section: {doc.metadata.get('section')} (relevance: {relevance_score*100:.1f}%, distance: {score:.3f})")
logger.info(
f" ✓ Section: {doc.metadata.get('section')} (relevance: {relevance_score*100:.1f}%, distance: {score:.3f})"
)
return formatted_results
@@ -342,7 +348,7 @@ Respond in JSON format:
"de": "German",
"pt": "Portuguese",
"zh": "Chinese",
"ja": "Japanese"
"ja": "Japanese",
}
language_instruction = ""

View File

@@ -2,7 +2,6 @@
Chat server with Socket.IO support for real-time communication.
"""
import asyncio
import logging
from pathlib import Path
from typing import Any, Dict
@@ -36,9 +35,7 @@ async def index_documentation_if_needed() -> None:
try:
# Create temporary agent for indexing
temp_agent = DocumentationAgent(
mcp_client=None,
llm_client=None,
vector_store_path=str(vector_store_path)
mcp_client=None, llm_client=None, vector_store_path=str(vector_store_path)
)
# Index documentation
@@ -77,6 +74,7 @@ async def initialize_agent() -> None:
logger.warning(f"Failed to initialize Documentation Agent: {e}")
agent = None
# Create Socket.IO server
# Using async_mode="asgi" for FastAPI integration
# python-socketio 5.x automatically supports Engine.IO v4 for socket.io-client 4.x compatibility
@@ -202,7 +200,9 @@ async def chat(sid: str, data: Dict[str, Any]) -> None:
)
response = {
"message": ai_response.get("message", "I apologize, I couldn't generate a response."),
"message": ai_response.get(
"message", "I apologize, I couldn't generate a response."
),
"type": "bot",
"timestamp": data.get("timestamp"),
"related_docs": ai_response.get("related_docs", []),

View File

@@ -50,9 +50,7 @@ class ProxmoxCollector(BaseCollector):
# Check if we have real Proxmox credentials configured
if not settings.PROXMOX_HOST or settings.PROXMOX_HOST == "proxmox.example.com":
self.logger.warning(
"Proxmox host not configured, using mock data for development"
)
self.logger.warning("Proxmox host not configured, using mock data for development")
self.connected = True
return True
@@ -312,10 +310,14 @@ class ProxmoxCollector(BaseCollector):
nodes = self.proxmox_client.nodes.get()
if nodes:
node_name = nodes[0]["node"] # Use first node for storage info
status = self.proxmox_client.nodes(node_name).storage(storage_id).status.get()
status = (
self.proxmox_client.nodes(node_name).storage(storage_id).status.get()
)
store.update(status)
except Exception as e:
self.logger.warning(f"Failed to get detailed info for storage {storage_id}: {e}")
self.logger.warning(
f"Failed to get detailed info for storage {storage_id}: {e}"
)
self.logger.info(f"Collected {len(storage)} storage pools")
return storage
@@ -354,7 +356,9 @@ class ProxmoxCollector(BaseCollector):
self.logger.warning(f"Failed to get networks from node {node_name}: {e}")
continue
self.logger.info(f"Collected {len(networks)} network interfaces from {len(nodes)} nodes")
self.logger.info(
f"Collected {len(networks)} network interfaces from {len(nodes)} nodes"
)
return networks
except Exception as e:

View File

@@ -88,9 +88,7 @@ class TemplateBasedGenerator(BaseGenerator):
template_path: Path to YAML template file
"""
self.template = DocumentationTemplate(Path(template_path))
super().__init__(
name=self.template.collector, section=f"{self.template.collector}_docs"
)
super().__init__(name=self.template.collector, section=f"{self.template.collector}_docs")
async def generate(self, data: Dict[str, Any]) -> str:
"""
@@ -104,9 +102,7 @@ class TemplateBasedGenerator(BaseGenerator):
Returns:
Combined documentation (all sections)
"""
self.logger.info(
f"Generating documentation for {self.template.name} using template"
)
self.logger.info(f"Generating documentation for {self.template.name} using template")
# Validate data matches template collector
collector_name = data.get("metadata", {}).get("collector", "")
@@ -140,7 +136,6 @@ class TemplateBasedGenerator(BaseGenerator):
Returns:
Generated section content in Markdown
"""
section_id = section_def.get("id", "unknown")
section_title = section_def.get("title", "Untitled Section")
data_requirements = section_def.get("data_requirements", [])
prompt_template = section_def.get("prompt_template", "")
@@ -549,13 +544,15 @@ Guidelines:
except Exception as e:
self.logger.error(f"Failed to generate section for {loop_over} item {idx}: {e}")
results.append({
"section_id": f"{section_id}_item_{idx}",
"success": False,
"error": str(e),
"item_index": idx,
"total_items": total_items,
})
results.append(
{
"section_id": f"{section_id}_item_{idx}",
"success": False,
"error": str(e),
"item_index": idx,
"total_items": total_items,
}
)
return results

View File

@@ -84,9 +84,7 @@ class LLMClient:
# Increased timeout to 120s for documentation generation (large prompts)
http_client = httpx.AsyncClient(verify=False, timeout=120.0)
self.client = AsyncOpenAI(
base_url=self.base_url,
api_key=self.api_key,
http_client=http_client
base_url=self.base_url, api_key=self.api_key, http_client=http_client
)
logger.info(f"Initialized LLM client: base_url={self.base_url}, model={self.model}")

View File

@@ -8,7 +8,7 @@ from infrastructure systems (Proxmox, VMware, Kubernetes, etc.)
import logging
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
from typing import Any, Dict
from celery import group
@@ -18,9 +18,7 @@ logger = logging.getLogger(__name__)
@celery_app.task(name="collect_and_generate_docs", bind=True)
def collect_and_generate_docs(
self, collector_name: str, template_path: str
) -> Dict[str, Any]:
def collect_and_generate_docs(self, collector_name: str, template_path: str) -> Dict[str, Any]:
"""
Collect data from infrastructure and generate documentation
@@ -80,9 +78,7 @@ def collect_and_generate_docs(
return result
async def _async_collect_and_generate(
collector_name: str, template_path: str
) -> Dict[str, Any]:
async def _async_collect_and_generate(collector_name: str, template_path: str) -> Dict[str, Any]:
"""
Async implementation of collect and generate workflow
@@ -93,8 +89,8 @@ async def _async_collect_and_generate(
Returns:
Generation result
"""
from datacenter_docs.generators.template_generator import TemplateBasedGenerator
from datacenter_docs.chat.agent import DocumentationAgent
from datacenter_docs.generators.template_generator import TemplateBasedGenerator
# Import appropriate collector
collector = await _get_collector(collector_name)
@@ -190,8 +186,8 @@ def generate_proxmox_docs(self) -> Dict[str, Any]:
# Update task state
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 6, 'status': 'Starting Proxmox documentation generation...'}
state="PROGRESS",
meta={"current": 0, "total": 6, "status": "Starting Proxmox documentation generation..."},
)
template_path = "templates/documentation/proxmox.yaml"
@@ -237,10 +233,7 @@ def generate_proxmox_docs(self) -> Dict[str, Any]:
result["completed_at"] = datetime.now().isoformat()
logger.error(f"[{task_id}] Proxmox documentation generation failed: {e}", exc_info=True)
self.update_state(
state='FAILURE',
meta={'error': str(e), 'status': f'Failed: {str(e)}'}
)
self.update_state(state="FAILURE", meta={"error": str(e), "status": f"Failed: {str(e)}"})
raise
@@ -267,10 +260,7 @@ def generate_all_docs() -> Dict[str, Any]:
# Create parallel tasks
task_group = group(
[
collect_and_generate_docs.s(system["collector"], system["template"])
for system in systems
]
[collect_and_generate_docs.s(system["collector"], system["template"]) for system in systems]
)
# Execute group
@@ -377,11 +367,8 @@ def full_docs_pipeline() -> Dict[str, Any]:
"""
logger.info("Starting full documentation pipeline")
# Step 1: Generate all documentation
generate_result = generate_all_docs()
# Step 2: Wait a bit for generation to complete, then index
# (In production, this would use Celery chains/chords for better coordination)
# Step 1: Generate all documentation, then index
# Use Celery chain for proper task coordination
from celery import chain
pipeline = chain(