diff --git a/deploy/docker/Dockerfile.worker b/deploy/docker/Dockerfile.worker
index d718d28..656453b 100644
--- a/deploy/docker/Dockerfile.worker
+++ b/deploy/docker/Dockerfile.worker
@@ -39,6 +39,7 @@ RUN pip install --no-cache-dir -r requirements.txt
# Copy application code and package definition
COPY src/ /app/src/
COPY config/ /app/config/
+COPY templates/ /app/templates/
COPY pyproject.toml README.md /app/
# Install poetry-core (required for install with pyproject.toml)
@@ -59,5 +60,5 @@ RUN useradd -m -u 1000 appuser && \
USER appuser
-# Run the Celery worker
-CMD ["celery", "-A", "datacenter_docs.workers.celery_app", "worker", "--loglevel=info", "--concurrency=4"]
+# Run the Celery worker with specific queues
+CMD ["celery", "-A", "datacenter_docs.workers.celery_app", "worker", "--loglevel=info", "--concurrency=4", "-Q", "documentation,auto_remediation,data_collection,maintenance,celery"]
diff --git a/deploy/docker/docker-compose.dev.yml b/deploy/docker/docker-compose.dev.yml
index 8eea032..7807219 100644
--- a/deploy/docker/docker-compose.dev.yml
+++ b/deploy/docker/docker-compose.dev.yml
@@ -58,7 +58,8 @@ services:
- ../../src:/app/src
- ../../config:/app/config
- api-logs:/app/logs
- - api-output:/app/output
+ - worker-output:/app/output # Shared with worker for documentation files
+ - /var/run/docker.sock:/var/run/docker.sock:ro # For accessing worker logs
depends_on:
mongodb:
condition: service_healthy
@@ -82,7 +83,7 @@ services:
volumes:
- ../../src:/app/src:z
- ../../config:/app/config:z
- - ../../output:/app/output:z # Documentation files
+ - worker-output:/app/output # Shared documentation files with worker
- ../../scripts:/app/scripts:z # Indexing scripts
- chat-logs:/app/logs
- chat-data:/app/data # Vector store persistence
@@ -109,6 +110,7 @@ services:
- ../../config:/app/config
- worker-logs:/app/logs
- worker-output:/app/output
+ - chat-data:/app/data # Shared ChromaDB vector store with chat
depends_on:
mongodb:
condition: service_healthy
@@ -163,8 +165,6 @@ volumes:
name: datacenter-docs-redis-data-dev
api-logs:
name: datacenter-docs-api-logs-dev
- api-output:
- name: datacenter-docs-api-output-dev
chat-logs:
name: datacenter-docs-chat-logs-dev
chat-data:
diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx
index 7a3d45d..06e3c37 100644
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@@ -21,7 +21,7 @@ import remarkGfm from 'remark-gfm';
// Use relative URLs to work with nginx proxy in production
const API_URL = import.meta.env.VITE_API_URL || (typeof window !== 'undefined' ? window.location.origin + '/api' : 'http://localhost:8000');
-const CHAT_URL = import.meta.env.VITE_CHAT_URL || (typeof window !== 'undefined' ? window.location.origin : 'http://localhost:8001');
+const CHAT_URL = import.meta.env.VITE_CHAT_URL || 'http://localhost:8001';
function App() {
const [activeTab, setActiveTab] = useState(0);
@@ -43,13 +43,15 @@ function App() {
+
-
+
{activeTab === 0 && }
{activeTab === 1 && }
{activeTab === 2 && }
+ {activeTab === 3 && }
);
@@ -380,7 +382,7 @@ function SearchInterface() {
const [query, setQuery] = useState('');
const [results, setResults] = useState([]);
const [loading, setLoading] = useState(false);
-
+
const search = async () => {
setLoading(true);
try {
@@ -394,7 +396,7 @@ function SearchInterface() {
}
setLoading(false);
};
-
+
return (
@@ -416,9 +418,9 @@ function SearchInterface() {
-
+
{loading && }
-
+
{results.map((result, idx) => (
@@ -441,4 +443,302 @@ function SearchInterface() {
);
}
+// Documentation Management Interface
+function DocumentationInterface() {
+ const [jobStatus, setJobStatus] = useState(null);
+ const [generatingJob, setGeneratingJob] = useState(null);
+ const [files, setFiles] = useState([]);
+ const [selectedFile, setSelectedFile] = useState(null);
+ const [fileContent, setFileContent] = useState(null);
+ const [loading, setLoading] = useState(false);
+ const [showLogs, setShowLogs] = useState(false);
+ const [jobLogs, setJobLogs] = useState([]);
+
+ // Load available files on mount
+ useEffect(() => {
+ loadFiles();
+ }, []);
+
+ // Poll job status when generating
+ useEffect(() => {
+ if (!generatingJob) return;
+
+ const pollInterval = setInterval(async () => {
+ try {
+ const response = await axios.get(`${API_URL}/api/v1/documentation/jobs/${generatingJob}/status`);
+ setJobStatus(response.data);
+
+ if (response.data.completed) {
+ clearInterval(pollInterval);
+ setGeneratingJob(null);
+ loadFiles(); // Refresh file list
+ }
+ } catch (error) {
+ console.error('Error polling job status:', error);
+ clearInterval(pollInterval);
+ setGeneratingJob(null);
+ }
+ }, 2000);
+
+ return () => clearInterval(pollInterval);
+ }, [generatingJob]);
+
+ // Poll logs when log viewer is open
+ useEffect(() => {
+ if (!showLogs || !jobStatus?.job_id) return;
+
+ const loadLogs = async () => {
+ try {
+ const response = await axios.get(`${API_URL}/api/v1/documentation/jobs/${jobStatus.job_id}/logs`);
+ setJobLogs(response.data.logs || []);
+ } catch (error) {
+ console.error('Error loading logs:', error);
+ }
+ };
+
+ loadLogs(); // Initial load
+ const logInterval = setInterval(loadLogs, 3000); // Poll every 3 seconds
+
+ return () => clearInterval(logInterval);
+ }, [showLogs, jobStatus?.job_id]);
+
+ const loadFiles = async () => {
+ try {
+ const response = await axios.get(`${API_URL}/api/v1/documentation/files`);
+ setFiles(response.data.categories || []);
+ } catch (error) {
+ console.error('Error loading files:', error);
+ }
+ };
+
+ const triggerGeneration = async () => {
+ setLoading(true);
+ setJobStatus(null);
+ try {
+ const response = await axios.post(`${API_URL}/api/v1/documentation/jobs/proxmox`);
+ setGeneratingJob(response.data.job_id);
+ setJobStatus({
+ job_id: response.data.job_id,
+ status: response.data.status,
+ completed: false
+ });
+ } catch (error) {
+ console.error('Error triggering generation:', error);
+ alert('Failed to start documentation generation: ' + error.message);
+ }
+ setLoading(false);
+ };
+
+ const viewFile = async (category, filename) => {
+ setLoading(true);
+ try {
+ const response = await axios.get(`${API_URL}/api/v1/documentation/files/${category}/${filename}`);
+ setFileContent(response.data);
+ setSelectedFile({ category, filename });
+ } catch (error) {
+ console.error('Error loading file:', error);
+ alert('Failed to load file: ' + error.message);
+ }
+ setLoading(false);
+ };
+
+ const formatBytes = (bytes) => {
+ if (bytes < 1024) return bytes + ' B';
+ if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
+ return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
+ };
+
+ const formatDate = (timestamp) => {
+ return new Date(timestamp * 1000).toLocaleString();
+ };
+
+ return (
+
+ {/* Control Panel */}
+
+
+ Generate Documentation
+
+ Trigger manual generation of Proxmox infrastructure documentation
+
+ }
+ sx={{ mb: 2 }}
+ >
+ {generatingJob ? 'Generating...' : 'Generate Proxmox Docs'}
+
+
+ {jobStatus && (
+
+ Job Status
+
+ {!jobStatus.completed && }
+
+
+
+ Job ID: {jobStatus.job_id}
+
+
+ {jobStatus.error && (
+
+ Error: {jobStatus.error}
+
+ )}
+
+ )}
+
+
+ {/* Log Viewer */}
+ {showLogs && jobStatus && (
+
+
+ Job Logs (Last 50 lines)
+
+ {jobLogs.length === 0 ? (
+
+ No logs available yet...
+
+ ) : (
+
+ {jobLogs.join('\n')}
+
+ )}
+
+ )}
+
+ {/* File List */}
+
+ Available Documentation
+ {files.length === 0 ? (
+
+ No documentation files found. Generate some first!
+
+ ) : (
+ files.map((category) => (
+
+
+ {category.name.toUpperCase()}
+
+
+ {category.files.map((file) => (
+ viewFile(category.name, file.filename)}
+ selected={selectedFile?.filename === file.filename}
+ >
+
+
+ ))}
+
+
+ ))
+ )}
+
+
+
+ {/* Markdown Viewer */}
+
+ {fileContent ? (
+
+
+ {fileContent.filename}
+
+
+
+
+
+ {fileContent.content}
+
+
+
+
+ Last modified: {formatDate(fileContent.modified)} • Size: {formatBytes(fileContent.size)}
+
+
+ ) : (
+
+
+
+ Select a documentation file to view
+
+
+ )}
+
+
+ );
+}
+
export default App;
diff --git a/src/datacenter_docs/api/main.py b/src/datacenter_docs/api/main.py
index 7e88640..50e65e2 100644
--- a/src/datacenter_docs/api/main.py
+++ b/src/datacenter_docs/api/main.py
@@ -339,6 +339,220 @@ async def list_sections() -> Dict[str, Any]:
}
+# Documentation Jobs API
+@app.post("/api/v1/documentation/jobs/proxmox")
+async def trigger_proxmox_documentation() -> Dict[str, Any]:
+ """
+ Trigger Proxmox documentation generation job
+
+ Returns job_id for status monitoring
+ """
+ from celery import Celery
+ from datacenter_docs.utils.config import get_settings
+
+ try:
+ settings = get_settings()
+
+ # Create minimal Celery client for sending tasks
+ celery_client = Celery(
+ "datacenter_docs",
+ broker=settings.CELERY_BROKER_URL,
+ backend=settings.CELERY_RESULT_BACKEND,
+ )
+
+ # Send task by name to the documentation queue
+ task = celery_client.send_task(
+ "generate_proxmox_docs",
+ kwargs={},
+ queue="documentation",
+ )
+
+ logger.info(f"Triggered Proxmox docs generation job: {task.id}")
+
+ return {
+ "success": True,
+ "job_id": task.id,
+ "status": "queued",
+ "message": "Proxmox documentation generation job started",
+ }
+ except Exception as e:
+ logger.error(f"Failed to trigger Proxmox docs job: {e}", exc_info=True)
+ raise HTTPException(status_code=500, detail=f"Failed to start job: {str(e)}")
+
+
+@app.get("/api/v1/documentation/jobs/{job_id}/status")
+async def get_job_status(job_id: str) -> Dict[str, Any]:
+ """
+ Get status of documentation generation job
+
+ Returns current status and progress information
+ """
+ from celery.result import AsyncResult
+ from datacenter_docs.workers.celery_app import celery_app
+
+ try:
+ result = AsyncResult(job_id, app=celery_app)
+
+ response = {
+ "job_id": job_id,
+ "status": result.state, # PENDING, STARTED, SUCCESS, FAILURE, RETRY
+ }
+
+ if result.state == "SUCCESS":
+ response["result"] = result.result
+ response["completed"] = True
+ elif result.state == "FAILURE":
+ response["error"] = str(result.info)
+ response["completed"] = True
+ elif result.state == "PROGRESS":
+ response["progress"] = result.info
+ response["completed"] = False
+ else:
+ response["completed"] = False
+
+ return response
+
+ except Exception as e:
+ logger.error(f"Failed to get job status: {e}")
+ raise HTTPException(status_code=500, detail=f"Failed to get status: {str(e)}")
+
+
+@app.get("/api/v1/documentation/files")
+async def list_documentation_files() -> Dict[str, Any]:
+ """
+ List all generated documentation files
+
+ Returns available markdown files organized by category
+ """
+ from pathlib import Path
+
+ output_dir = Path("/app/output")
+
+ if not output_dir.exists():
+ return {"categories": [], "total": 0}
+
+ files_by_category = {}
+
+ # Scan output directory
+ for category_dir in output_dir.iterdir():
+ if category_dir.is_dir():
+ category = category_dir.name
+ files = []
+
+ for file_path in category_dir.glob("*.md"):
+ stat = file_path.stat()
+ files.append({
+ "filename": file_path.name,
+ "size": stat.st_size,
+ "modified": stat.st_mtime,
+ "path": f"{category}/{file_path.name}"
+ })
+
+ if files:
+ files_by_category[category] = sorted(files, key=lambda x: x["modified"], reverse=True)
+
+ total_files = sum(len(files) for files in files_by_category.values())
+
+ return {
+ "categories": [
+ {"name": cat, "files": files}
+ for cat, files in files_by_category.items()
+ ],
+ "total": total_files
+ }
+
+
+@app.get("/api/v1/documentation/files/{category}/{filename}")
+async def get_documentation_content(category: str, filename: str) -> Dict[str, Any]:
+ """
+ Retrieve content of a specific documentation file
+
+ Returns markdown content
+ """
+ from pathlib import Path
+
+ # Validate filename to prevent directory traversal
+ if ".." in filename or "/" in filename:
+ raise HTTPException(status_code=400, detail="Invalid filename")
+
+ if ".." in category or "/" in category:
+ raise HTTPException(status_code=400, detail="Invalid category")
+
+ file_path = Path(f"/app/output/{category}/{filename}")
+
+ if not file_path.exists():
+ raise HTTPException(status_code=404, detail="Documentation file not found")
+
+ if not file_path.is_file() or file_path.suffix != ".md":
+ raise HTTPException(status_code=400, detail="Invalid file type")
+
+ try:
+ content = file_path.read_text(encoding="utf-8")
+ stat = file_path.stat()
+
+ return {
+ "filename": filename,
+ "category": category,
+ "content": content,
+ "size": stat.st_size,
+ "modified": stat.st_mtime
+ }
+ except Exception as e:
+ logger.error(f"Failed to read file {file_path}: {e}")
+ raise HTTPException(status_code=500, detail=f"Failed to read file: {str(e)}")
+
+
+@app.get("/api/v1/documentation/jobs/{job_id}/logs")
+async def get_job_logs(job_id: str, tail: int = 100) -> Dict[str, Any]:
+ """
+ Get logs for a specific documentation generation job
+
+ Returns recent log lines related to the job
+ """
+ import subprocess
+
+ try:
+ # Get worker logs and filter for job_id
+ cmd = [
+ "docker",
+ "logs",
+ "datacenter-docs-worker-dev",
+ "--tail",
+ str(tail)
+ ]
+
+ result = subprocess.run(
+ cmd,
+ capture_output=True,
+ text=True,
+ timeout=5
+ )
+
+ all_logs = result.stdout + result.stderr
+
+ # Filter logs containing the job_id
+ job_logs = [
+ line
+ for line in all_logs.split("\n")
+ if job_id in line or "ForkPoolWorker" in line
+ ]
+
+ # Take last 50 relevant lines
+ relevant_logs = job_logs[-50:] if len(job_logs) > 50 else job_logs
+
+ return {
+ "job_id": job_id,
+ "logs": relevant_logs,
+ "total_lines": len(relevant_logs)
+ }
+
+ except subprocess.TimeoutExpired:
+ raise HTTPException(status_code=504, detail="Timeout getting logs")
+ except Exception as e:
+ logger.error(f"Failed to get job logs: {e}")
+ raise HTTPException(status_code=500, detail=f"Failed to get logs: {str(e)}")
+
+
# Stats and Metrics
@app.get("/api/v1/stats/tickets")
async def get_ticket_stats() -> Dict[str, Any]:
diff --git a/src/datacenter_docs/chat/agent.py b/src/datacenter_docs/chat/agent.py
index b11275b..b721ee4 100644
--- a/src/datacenter_docs/chat/agent.py
+++ b/src/datacenter_docs/chat/agent.py
@@ -76,14 +76,35 @@ class DocumentationAgent:
async def index_documentation(self, docs_path: Path) -> None:
"""Index all documentation files into vector store"""
- logger.info("Indexing documentation...")
+ logger.info("📚 Indexing documentation...")
+
+ # Clear existing documents to avoid duplicates
+ if self.vector_store is not None:
+ try:
+ # Delete the collection and recreate it
+ logger.info("🗑️ Clearing old documentation from vector store...")
+ self.vector_store.delete_collection()
+ self.vector_store = Chroma(
+ persist_directory=str(self.vector_store_path),
+ embedding_function=self.embeddings,
+ )
+ logger.info("✅ Vector store cleared and recreated")
+ except Exception as e:
+ logger.warning(f"⚠️ Could not clear vector store (might be first run): {e}")
documents = []
+ files_processed = 0
# Read all markdown files
for md_file in docs_path.glob("**/*.md"):
- with open(md_file, "r", encoding="utf-8") as f:
- content = f.read()
+ try:
+ with open(md_file, "r", encoding="utf-8") as f:
+ content = f.read()
+
+ # Skip empty files
+ if not content.strip():
+ logger.warning(f"⚠️ Skipping empty file: {md_file}")
+ continue
# Split into chunks
splitter = RecursiveCharacterTextSplitter(
@@ -91,6 +112,7 @@ class DocumentationAgent:
)
chunks = splitter.split_text(content)
+ files_processed += 1
for i, chunk in enumerate(chunks):
doc = Document(
@@ -104,12 +126,19 @@ class DocumentationAgent:
)
documents.append(doc)
+ logger.info(f" ✓ Indexed {md_file.name} ({len(chunks)} chunks)")
+
+ except Exception as e:
+ logger.error(f"❌ Failed to index {md_file}: {e}")
+
# Add to vector store
- if self.vector_store is not None:
+ if self.vector_store is not None and documents:
+ logger.info(f"💾 Adding {len(documents)} chunks to vector store...")
self.vector_store.add_documents(documents)
self.vector_store.persist()
-
- logger.info(f"Indexed {len(documents)} chunks from documentation")
+ logger.info(f"✅ Indexed {files_processed} files ({len(documents)} chunks) from documentation")
+ else:
+ logger.warning("⚠️ No documents to index")
async def search_documentation(
self, query: str, sections: Optional[List[str]] = None, limit: int = 5
diff --git a/src/datacenter_docs/generators/template_generator.py b/src/datacenter_docs/generators/template_generator.py
index 01f3c1f..1a4652b 100644
--- a/src/datacenter_docs/generators/template_generator.py
+++ b/src/datacenter_docs/generators/template_generator.py
@@ -240,12 +240,15 @@ Guidelines:
return prompt
- def _format_data_for_prompt(self, data: Any) -> str:
+ def _format_data_for_prompt(self, data: Any, max_items: int = 10) -> str:
"""
Format data for inclusion in LLM prompt
+ For large datasets, limits the number of items to prevent overwhelming small LLMs.
+
Args:
data: Data to format (dict, list, str, etc.)
+ max_items: Maximum number of items to include for lists (default: 10)
Returns:
Formatted string representation
@@ -253,7 +256,24 @@ Guidelines:
if data is None:
return "No data available"
- if isinstance(data, (dict, list)):
+ if isinstance(data, list):
+ # Limit list size for small LLMs
+ total_count = len(data)
+ if total_count > max_items:
+ limited_data = data[:max_items]
+ try:
+ formatted = json.dumps(limited_data, indent=2, default=str)
+ summary = f"\n\n**Note: Showing {max_items} of {total_count} items. Full count: {total_count}**\n"
+ return formatted + summary
+ except Exception:
+ return str(limited_data) + summary
+ # Small list, show all
+ try:
+ return json.dumps(data, indent=2, default=str)
+ except Exception:
+ return str(data)
+
+ if isinstance(data, dict):
# Pretty print JSON for structured data
try:
return json.dumps(data, indent=2, default=str)
@@ -305,12 +325,15 @@ Guidelines:
This is useful for very large documentation where you want each
section as a separate file.
+ Supports looped sections where one section definition generates
+ multiple output files (e.g., one file per VM).
+
Args:
data: Collected infrastructure data
save_individually: Save each section as separate file
Returns:
- List of results for each section
+ List of results for each section (may be multiple for looped sections)
"""
results = []
output_config = self.template.output_config
@@ -322,57 +345,251 @@ Guidelines:
section_id = section_def.get("id")
section_title = section_def.get("title")
- # Generate section
- content = await self.generate_section(section_def, data)
+ # Check if this section should loop over items
+ loop_over = section_def.get("loop_over")
- if not content:
- results.append(
- {
- "section_id": section_id,
- "success": False,
- "error": "Generation failed",
- }
+ if loop_over:
+ # Generate one section per item in the loop
+ loop_results = await self._generate_looped_sections(
+ section_def, data, output_dir, save_to_db, save_to_file, save_individually
)
- continue
+ results.extend(loop_results)
+ else:
+ # Generate single section (original behavior)
+ # Generate section
+ content = await self.generate_section(section_def, data)
- result = {
- "section_id": section_id,
- "title": section_title,
- "success": True,
- "content": content,
- }
+ if not content:
+ results.append(
+ {
+ "section_id": section_id,
+ "success": False,
+ "error": "Generation failed",
+ }
+ )
+ continue
- # Save section if requested
- if save_individually:
- if save_to_file:
- # Save to file
- output_path = Path(output_dir)
- output_path.mkdir(parents=True, exist_ok=True)
- filename = f"{section_id}.md"
- file_path = output_path / filename
- file_path.write_text(content, encoding="utf-8")
- result["file_path"] = str(file_path)
- self.logger.info(f"Saved section to: {file_path}")
+ result = {
+ "section_id": section_id,
+ "title": section_title,
+ "success": True,
+ "content": content,
+ }
- if save_to_db:
- # Save to database
- metadata = {
- "section_id": section_id,
- "template": str(self.template.path),
- "category": section_def.get("category", ""),
- }
- # Create temporary generator for this section
- temp_gen = BaseGenerator.__new__(BaseGenerator)
- temp_gen.name = self.name
- temp_gen.section = section_id
- temp_gen.logger = self.logger
- temp_gen.llm = self.llm
- await temp_gen.save_to_database(content, metadata)
+ # Save section if requested
+ if save_individually:
+ if save_to_file:
+ # Save to file
+ output_path = Path(output_dir)
+ output_path.mkdir(parents=True, exist_ok=True)
+ filename = f"{section_id}.md"
+ file_path = output_path / filename
+ file_path.write_text(content, encoding="utf-8")
+ result["file_path"] = str(file_path)
+ self.logger.info(f"Saved section to: {file_path}")
- results.append(result)
+ if save_to_db:
+ # Save to database using our own instance
+ metadata = {
+ "section_id": section_id,
+ "template": str(self.template.path),
+ "category": section_def.get("category", ""),
+ }
+ # Use current instance with modified section name
+ original_section = self.section
+ self.section = section_id
+ try:
+ await self.save_to_database(content, metadata)
+ finally:
+ # Restore original section name
+ self.section = original_section
+
+ results.append(result)
return results
+ async def _generate_looped_sections(
+ self,
+ section_def: Dict[str, Any],
+ full_data: Dict[str, Any],
+ output_dir: str,
+ save_to_db: bool,
+ save_to_file: bool,
+ save_individually: bool,
+ ) -> List[Dict[str, Any]]:
+ """
+ Generate multiple sections by looping over items
+
+ Args:
+ section_def: Section definition with loop_over key
+ full_data: Complete collected data
+ output_dir: Output directory
+ save_to_db: Whether to save to database
+ save_to_file: Whether to save to file
+ save_individually: Whether to save each item individually
+
+ Returns:
+ List of results, one per looped item
+ """
+ results = []
+ section_id = section_def.get("id")
+ loop_over = section_def.get("loop_over")
+ loop_item_name = section_def.get("loop_item_name", "item")
+
+ # Get the data to loop over
+ data_section = full_data.get("data", {})
+ items = data_section.get(loop_over, [])
+
+ if not isinstance(items, list):
+ self.logger.warning(f"loop_over '{loop_over}' is not a list, skipping")
+ return []
+
+ total_items = len(items)
+ self.logger.info(f"Looping over {total_items} {loop_over} to generate individual sections")
+
+ # Generate one section per item
+ for idx, item in enumerate(items, 1):
+ try:
+ # Create item-specific section definition
+ item_section_def = section_def.copy()
+
+ # Get item identifier for filename
+ item_id = item.get("vmid") or item.get("id") or item.get("name") or f"item_{idx}"
+ item_name = item.get("name", f"{loop_over}_{item_id}")
+
+ # Build item-specific data context
+ item_data = self._build_item_context(section_def, full_data, item, loop_item_name)
+
+ # Build prompt with item context
+ prompt_template = item_section_def.get("prompt_template", "")
+ prompt = self._build_prompt(prompt_template, item_data)
+
+ # Get generation config
+ gen_config = self.template.generation_config
+ temperature = gen_config.get("temperature", 0.7)
+ max_tokens = gen_config.get("max_tokens", 4000)
+
+ # System prompt
+ system_prompt = """You are a technical documentation expert specializing in datacenter infrastructure.
+Generate clear, accurate, and well-structured documentation in Markdown format.
+
+Guidelines:
+- Use proper Markdown formatting (headers, tables, lists, code blocks)
+- Be precise and factual based on provided data
+- Include practical examples and recommendations
+- Use tables for structured data
+- Use bullet points for lists
+- Use code blocks for commands/configurations
+- Organize content with clear sections
+- Write in a professional but accessible tone
+"""
+
+ # Generate content
+ content = await self.generate_with_llm(
+ system_prompt=system_prompt,
+ user_prompt=prompt,
+ temperature=temperature,
+ max_tokens=max_tokens,
+ )
+
+ # Create title with item name
+ # Support both {item_name} and {loop_item_name}_{name} patterns
+ title_template = item_section_def.get("title", "")
+ title_context = {
+ loop_item_name: item_name,
+ f"{loop_item_name}_name": item_name,
+ f"{loop_item_name}_id": str(item_id),
+ }
+ section_title = title_template.format(**title_context)
+ section_content = f"# {section_title}\n\n{content}\n\n"
+
+ result = {
+ "section_id": f"{section_id}_{item_id}",
+ "title": section_title,
+ "success": True,
+ "content": section_content,
+ "item_index": idx,
+ "total_items": total_items,
+ }
+
+ # Save if requested
+ if save_individually:
+ if save_to_file:
+ output_path = Path(output_dir)
+ output_path.mkdir(parents=True, exist_ok=True)
+ # Use category subdirectory if specified
+ category = section_def.get("category", "")
+ if category:
+ output_path = output_path / category
+ output_path.mkdir(parents=True, exist_ok=True)
+
+ filename = f"{section_id}_{item_id}.md"
+ file_path = output_path / filename
+ file_path.write_text(section_content, encoding="utf-8")
+ result["file_path"] = str(file_path)
+ self.logger.info(f"[{idx}/{total_items}] Saved: {file_path}")
+
+ if save_to_db:
+ metadata = {
+ "section_id": f"{section_id}_{item_id}",
+ "template": str(self.template.path),
+ "category": section_def.get("category", ""),
+ "item_id": str(item_id),
+ "item_name": item_name,
+ }
+ original_section = self.section
+ self.section = f"{section_id}_{item_id}"
+ try:
+ await self.save_to_database(section_content, metadata)
+ finally:
+ self.section = original_section
+
+ results.append(result)
+ self.logger.info(f"✓ [{idx}/{total_items}] Generated: {section_title}")
+
+ except Exception as e:
+ self.logger.error(f"Failed to generate section for {loop_over} item {idx}: {e}")
+ results.append({
+ "section_id": f"{section_id}_item_{idx}",
+ "success": False,
+ "error": str(e),
+ "item_index": idx,
+ "total_items": total_items,
+ })
+
+ return results
+
+ def _build_item_context(
+ self,
+ section_def: Dict[str, Any],
+ full_data: Dict[str, Any],
+ item: Dict[str, Any],
+ loop_item_name: str,
+ ) -> Dict[str, Any]:
+ """
+ Build data context for a looped item
+
+ Args:
+ section_def: Section definition
+ full_data: Complete collected data
+ item: Current loop item
+ loop_item_name: Variable name for the item in prompt
+
+ Returns:
+ Dictionary with item and any additional required data
+ """
+ context = {loop_item_name: item}
+
+ # Add any additional required data from section definition
+ data_requirements = section_def.get("data_requirements", [])
+ data_section = full_data.get("data", {})
+
+ for req in data_requirements:
+ if req in data_section:
+ context[req] = data_section[req]
+
+ return context
+
async def example_usage() -> None:
"""Example of using template-based generator"""
diff --git a/src/datacenter_docs/utils/llm_client.py b/src/datacenter_docs/utils/llm_client.py
index 433ad7d..520a573 100644
--- a/src/datacenter_docs/utils/llm_client.py
+++ b/src/datacenter_docs/utils/llm_client.py
@@ -81,7 +81,8 @@ class LLMClient:
self.max_tokens = max_tokens or settings.LLM_MAX_TOKENS
# Initialize AsyncOpenAI client with custom HTTP client (disable SSL verification for self-signed certs)
- http_client = httpx.AsyncClient(verify=False, timeout=30.0)
+ # Increased timeout to 120s for documentation generation (large prompts)
+ http_client = httpx.AsyncClient(verify=False, timeout=120.0)
self.client = AsyncOpenAI(
base_url=self.base_url,
api_key=self.api_key,
@@ -129,6 +130,13 @@ class LLMClient:
# Type guard: we know it's ChatCompletion when stream=False
response = cast(ChatCompletion, response)
+ # Check for None response or empty choices
+ if response is None:
+ raise ValueError("LLM returned None response")
+
+ if not response.choices or len(response.choices) == 0:
+ raise ValueError("LLM returned empty choices")
+
# Extract text from first choice
message = response.choices[0].message
content = message.content or ""
diff --git a/src/datacenter_docs/workers/documentation_tasks.py b/src/datacenter_docs/workers/documentation_tasks.py
index d28243b..e46f099 100644
--- a/src/datacenter_docs/workers/documentation_tasks.py
+++ b/src/datacenter_docs/workers/documentation_tasks.py
@@ -94,6 +94,7 @@ async def _async_collect_and_generate(
Generation result
"""
from datacenter_docs.generators.template_generator import TemplateBasedGenerator
+ from datacenter_docs.chat.agent import DocumentationAgent
# Import appropriate collector
collector = await _get_collector(collector_name)
@@ -119,6 +120,23 @@ async def _async_collect_and_generate(
sections_generated = sum(1 for r in sections_results if r.get("success"))
sections_failed = sum(1 for r in sections_results if not r.get("success"))
+ # Index documentation into ChromaDB for RAG
+ logger.info("📚 Indexing generated documentation into ChromaDB...")
+ try:
+ # Initialize agent with vector store
+ agent = DocumentationAgent(vector_store_path="./data/chroma_db")
+
+ # Index all documentation from output directory
+ output_dir = Path("output")
+ if output_dir.exists():
+ await agent.index_documentation(output_dir)
+ logger.info("✅ Documentation indexed successfully into ChromaDB")
+ else:
+ logger.warning("⚠️ Output directory not found, skipping indexing")
+ except Exception as e:
+ logger.error(f"❌ Failed to index documentation: {e}", exc_info=True)
+ # Don't fail the whole task if indexing fails
+
return {
"sections_generated": sections_generated,
"sections_failed": sections_failed,
@@ -155,8 +173,8 @@ async def _get_collector(collector_name: str) -> Any:
return collectors[collector_name]()
-@celery_app.task(name="generate_proxmox_docs")
-def generate_proxmox_docs() -> Dict[str, Any]:
+@celery_app.task(name="generate_proxmox_docs", bind=True)
+def generate_proxmox_docs(self) -> Dict[str, Any]:
"""
Scheduled task to generate Proxmox documentation
@@ -165,11 +183,66 @@ def generate_proxmox_docs() -> Dict[str, Any]:
Returns:
Task result
"""
- logger.info("Scheduled Proxmox documentation generation started")
+ import asyncio
+
+ task_id = self.request.id
+ logger.info(f"[{task_id}] Proxmox documentation generation started")
+
+ # Update task state
+ self.update_state(
+ state='PROGRESS',
+ meta={'current': 0, 'total': 6, 'status': 'Starting Proxmox documentation generation...'}
+ )
template_path = "templates/documentation/proxmox.yaml"
- return collect_and_generate_docs(collector_name="proxmox", template_path=template_path)
+ result = {
+ "task_id": task_id,
+ "collector": "proxmox",
+ "template": template_path,
+ "success": False,
+ "started_at": datetime.now().isoformat(),
+ "completed_at": None,
+ "error": None,
+ "sections_generated": 0,
+ "sections_failed": 0,
+ }
+
+ try:
+ # Run async collection and generation directly (don't call another Celery task)
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+
+ generation_result = loop.run_until_complete(
+ _async_collect_and_generate("proxmox", template_path)
+ )
+
+ loop.close()
+
+ # Update result
+ result.update(generation_result)
+ result["success"] = True
+ result["completed_at"] = datetime.now().isoformat()
+
+ logger.info(
+ f"[{task_id}] Proxmox documentation generation completed: "
+ f"{result['sections_generated']} sections generated, "
+ f"{result['sections_failed']} failed"
+ )
+
+ return result
+
+ except Exception as e:
+ result["error"] = str(e)
+ result["completed_at"] = datetime.now().isoformat()
+ logger.error(f"[{task_id}] Proxmox documentation generation failed: {e}", exc_info=True)
+
+ self.update_state(
+ state='FAILURE',
+ meta={'error': str(e), 'status': f'Failed: {str(e)}'}
+ )
+
+ raise
@celery_app.task(name="generate_all_docs")
diff --git a/templates/documentation/proxmox.yaml b/templates/documentation/proxmox.yaml
index 2ab580c..cc17301 100644
--- a/templates/documentation/proxmox.yaml
+++ b/templates/documentation/proxmox.yaml
@@ -40,59 +40,107 @@ sections:
Use tables, bullet points, and clear sections. Include actual values from the data.
- - id: "proxmox_vms"
- title: "Virtual Machines Inventory"
- category: "virtualization"
+ - id: "vm"
+ title: "VM: {vm_name}"
+ category: "vms"
priority: 2
- description: "Complete inventory of QEMU virtual machines"
+ description: "Individual VM documentation"
+ loop_over: "vms"
+ loop_item_name: "vm"
data_requirements:
- - "vms"
- "nodes"
prompt_template: |
- Generate detailed documentation for all virtual machines in the Proxmox cluster.
+ Generate detailed technical documentation for this virtual machine.
- **Virtual Machines:**
- {vms}
+ **VM Details:**
+ {vm}
- **Nodes:**
+ **Available Nodes:**
{nodes}
- Create documentation that includes:
- 1. VM inventory table (VMID, Name, Node, Status, vCPU, RAM, Disk)
- 2. VMs grouped by node
- 3. VMs grouped by status (running/stopped)
- 4. Resource allocation per VM
- 5. Naming conventions and patterns observed
- 6. Recommendations for VM placement and balancing
+ Create comprehensive documentation that includes:
- Use markdown tables and organize information clearly.
+ ## VM Configuration
+ - VMID and Name
+ - Current Status (running/stopped)
+ - Host Node location
- - id: "proxmox_containers"
- title: "LXC Containers Inventory"
- category: "virtualization"
+ ## Resource Allocation
+ - vCPU cores allocated
+ - RAM allocated (in GB)
+ - Disk space and storage location
+ - Network interfaces
+
+ ## Current State
+ - Uptime (if running)
+ - Resource usage (CPU%, RAM%)
+ - IP addresses (if available)
+
+ ## Management
+ - Configuration highlights
+ - Backup status and schedule (if available)
+ - Snapshot information (if available)
+ - High availability configuration (if applicable)
+
+ ## Recommendations
+ - Resource optimization suggestions
+ - Security considerations
+ - Best practices for this VM
+
+ **IMPORTANT**: Document ALL details available in the VM data. Do not skip any information.
+ Use markdown tables, bullet points, and clear sections.
+
+ - id: "container"
+ title: "Container: {container_name}"
+ category: "containers"
priority: 3
- description: "Complete inventory of LXC containers"
+ description: "Individual LXC container documentation"
+ loop_over: "containers"
+ loop_item_name: "container"
data_requirements:
- - "containers"
- "nodes"
prompt_template: |
- Generate detailed documentation for all LXC containers in the Proxmox cluster.
+ Generate detailed technical documentation for this LXC container.
- **Containers:**
- {containers}
+ **Container Details:**
+ {container}
- **Nodes:**
+ **Available Nodes:**
{nodes}
- Create documentation that includes:
- 1. Container inventory table (VMID, Name, Node, Status, vCPU, RAM, Disk)
- 2. Containers grouped by node
- 3. Containers grouped by status (running/stopped)
- 4. Resource allocation per container
- 5. Use cases and patterns for containers vs VMs
- 6. Recommendations for container management
+ Create comprehensive documentation that includes:
- Use markdown tables and clear organization.
+ ## Container Configuration
+ - VMID and Name
+ - Container Type (LXC)
+ - Current Status (running/stopped)
+ - Host Node location
+
+ ## Resource Allocation
+ - vCPU cores allocated
+ - RAM allocated (in GB)
+ - Disk space and storage location
+ - Network interfaces
+
+ ## Current State
+ - Uptime (if running)
+ - Resource usage (CPU%, RAM%)
+ - IP addresses (if available)
+ - Template used (if available)
+
+ ## Management
+ - Configuration highlights
+ - Backup status and schedule (if available)
+ - Snapshot information (if available)
+ - Privileges and security settings
+
+ ## Recommendations
+ - Resource optimization suggestions
+ - Security hardening
+ - Best practices for this container
+
+ **IMPORTANT**: Document ALL details available in the container data. Do not skip any information.
+ Use markdown tables, bullet points, and clear sections.
- id: "proxmox_storage"
title: "Storage Configuration"
@@ -203,8 +251,8 @@ sections:
# Generation settings
generation:
- max_tokens: 4000
- temperature: 0.7
+ max_tokens: 2000 # Reduced for small models
+ temperature: 0.5 # Lower for more focused responses
language: "en" # Default language, can be overridden
# Output configuration