llm-automation-docs-and-rem…/deploy/helm/datacenter-docs/values-production.yaml

# Production values for datacenter-docs
# This is an example configuration for production deployment
# Copy this file and customize it for your environment

global:
  imagePullPolicy: Always
  storageClass: "standard"  # Use your storage class

# MongoDB configuration for production
mongodb:
  enabled: true
  auth:
    rootUsername: admin
    rootPassword: "CHANGE-THIS-IN-PRODUCTION"  # Use strong password
    database: datacenter_docs
  persistence:
    enabled: true
    size: 50Gi  # Adjust based on expected data volume
    storageClass: "fast-ssd"  # Use SSD storage class for better performance
  resources:
    requests:
      memory: "2Gi"
      cpu: "1000m"
    limits:
      memory: "4Gi"
      cpu: "2000m"

# Redis configuration for production
redis:
  enabled: true
  resources:
    requests:
      memory: "256Mi"
      cpu: "200m"
    limits:
      memory: "1Gi"
      cpu: "1000m"

# API service - production scale
api:
  enabled: true
  replicaCount: 5
  image:
    repository: your-registry.io/datacenter-docs-api
    tag: "v1.0.0"  # Use specific version, not latest
    pullPolicy: Always
  service:
    type: ClusterIP
    port: 8000
  resources:
    requests:
      memory: "1Gi"
      cpu: "500m"
    limits:
      memory: "4Gi"
      cpu: "2000m"
  autoscaling:
    enabled: true
    minReplicas: 5
    maxReplicas: 20
    targetCPUUtilizationPercentage: 70
    targetMemoryUtilizationPercentage: 80

# Chat service - enable in production
chat:
  enabled: true
  replicaCount: 3
  image:
    repository: your-registry.io/datacenter-docs-chat
    tag: "v1.0.0"
    pullPolicy: Always
  resources:
    requests:
      memory: "512Mi"
      cpu: "250m"
    limits:
      memory: "2Gi"
      cpu: "1000m"

# Worker service - enable in production
worker:
  enabled: true
  replicaCount: 5
  image:
    repository: your-registry.io/datacenter-docs-worker
    tag: "v1.0.0"
    pullPolicy: Always
  resources:
    requests:
      memory: "1Gi"
      cpu: "500m"
    limits:
      memory: "4Gi"
      cpu: "2000m"
  autoscaling:
    enabled: true
    minReplicas: 3
    maxReplicas: 20
    targetCPUUtilizationPercentage: 75

# Frontend - production scale
frontend:
  enabled: true
  replicaCount: 3
  image:
    repository: your-registry.io/datacenter-docs-frontend
    tag: "v1.0.0"
    pullPolicy: Always
  resources:
    requests:
      memory: "128Mi"
      cpu: "100m"
    limits:
      memory: "512Mi"
      cpu: "500m"

# Ingress - production configuration
ingress:
  enabled: true
  className: "nginx"
  annotations:
    cert-manager.io/cluster-issuer: "letsencrypt-prod"
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
    nginx.ingress.kubernetes.io/proxy-body-size: "50m"
    nginx.ingress.kubernetes.io/rate-limit: "100"
    nginx.ingress.kubernetes.io/limit-rps: "50"
  hosts:
    - host: datacenter-docs.yourdomain.com
      paths:
        - path: /
          pathType: Prefix
          service: frontend
        - path: /api
          pathType: Prefix
          service: api
        - path: /ws
          pathType: Prefix
          service: chat
  tls:
    - secretName: datacenter-docs-tls
      hosts:
        - datacenter-docs.yourdomain.com

# Application configuration for production
config:
  # MongoDB connection (if using external MongoDB, change this)
  mongodbUrl: "mongodb://admin:CHANGE-THIS-IN-PRODUCTION@{{ include \"datacenter-docs.mongodb.fullname\" . }}:27017/datacenter_docs?authSource=admin"

  # Redis connection
  redisUrl: "redis://{{ include \"datacenter-docs.redis.fullname\" . }}:6379/0"

  # LLM Provider configuration
  llm:
    # For OpenAI
    baseUrl: "https://api.openai.com/v1"
    model: "gpt-4-turbo-preview"

    # For Anthropic Claude (alternative)
    # baseUrl: "https://api.anthropic.com/v1"
    # model: "claude-3-opus-20240229"

    # For Azure OpenAI (alternative)
    # baseUrl: "https://your-resource.openai.azure.com"
    # model: "gpt-4"

    maxTokens: 4096
    temperature: 0.7

  # MCP configuration
  mcp:
    baseUrl: "http://mcp-server:8080"
    timeout: 30

  # Auto-remediation configuration
  autoRemediation:
    enabled: true
    minReliabilityScore: 90.0  # Higher threshold for production
    requireApprovalThreshold: 95.0
    maxActionsPerHour: 50  # Conservative limit
    dryRun: false  # Set to true for initial deployment

  # Security
  apiKeyEnabled: true
  corsOrigins:
    - "https://datacenter-docs.yourdomain.com"
    - "https://admin.yourdomain.com"

  # Logging
  logLevel: "INFO"  # Use "DEBUG" for troubleshooting
  logFormat: "json"

# Secrets - MUST BE CHANGED IN PRODUCTION
secrets:
  # LLM API Key
  llmApiKey: "CHANGE-THIS-TO-YOUR-ACTUAL-API-KEY"

  # API authentication secret key
  apiSecretKey: "CHANGE-THIS-TO-A-STRONG-RANDOM-KEY"

  # MongoDB credentials
  mongodbUsername: "admin"
  mongodbPassword: "CHANGE-THIS-IN-PRODUCTION"

# ServiceAccount
serviceAccount:
  create: true
  annotations:
    # Add cloud provider annotations if needed
    # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT-ID:role/IAM-ROLE-NAME
  name: ""

# Pod security context
podSecurityContext:
  fsGroup: 1000
  runAsNonRoot: true
  runAsUser: 1000
  seccompProfile:
    type: RuntimeDefault

# Container security context
securityContext:
  allowPrivilegeEscalation: false
  capabilities:
    drop:
      - ALL
  readOnlyRootFilesystem: false
  runAsNonRoot: true
  runAsUser: 1000

# Node selector - place workloads on specific nodes
nodeSelector:
  workload-type: "application"
  # kubernetes.io/arch: amd64

# Tolerations - allow scheduling on tainted nodes
tolerations:
  - key: "workload-type"
    operator: "Equal"
    value: "application"
    effect: "NoSchedule"

# Affinity rules - spread pods across zones and nodes
affinity:
  podAntiAffinity:
    preferredDuringSchedulingIgnoredDuringExecution:
      - weight: 100
        podAffinityTerm:
          labelSelector:
            matchExpressions:
              - key: app.kubernetes.io/name
                operator: In
                values:
                  - datacenter-docs
          topologyKey: kubernetes.io/hostname
      - weight: 100
        podAffinityTerm:
          labelSelector:
            matchExpressions:
              - key: app.kubernetes.io/component
                operator: In
                values:
                  - api
          topologyKey: topology.kubernetes.io/zone

# Priority class - ensure critical pods are scheduled first
priorityClassName: "high-priority"

# Additional production recommendations:
#
# 1. Use external secret management:
#    - HashiCorp Vault
#    - AWS Secrets Manager
#    - Azure Key Vault
#    - Google Secret Manager
#
# 2. Enable monitoring:
#    - Prometheus metrics
#    - Grafana dashboards
#    - AlertManager alerts
#
# 3. Enable logging:
#    - ELK Stack
#    - Loki
#    - CloudWatch
#
# 4. Enable tracing:
#    - Jaeger
#    - OpenTelemetry
#
# 5. Backup strategy:
#    - MongoDB backups (Velero, native tools)
#    - Disaster recovery plan
#
# 6. Network policies:
#    - Restrict pod-to-pod communication
#    - Isolate database access
#
# 7. Pod disruption budgets:
#    - Ensure high availability during updates
#
# 8. Regular security scans:
#    - Container image scanning
#    - Dependency vulnerability scanning