Updated network
This commit is contained in:
155
.gitea/workflows/nomad-job-complete.hcl.tmpl
Normal file
155
.gitea/workflows/nomad-job-complete.hcl.tmpl
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
job "[[PROJECT_NAME]]" {
|
||||||
|
region = "global"
|
||||||
|
datacenters = ["dc1"]
|
||||||
|
type = "service"
|
||||||
|
|
||||||
|
update {
|
||||||
|
stagger = "60s"
|
||||||
|
max_parallel = 1
|
||||||
|
progress_deadline = "6m"
|
||||||
|
auto_revert = true
|
||||||
|
}
|
||||||
|
|
||||||
|
group "[[PROJECT_NAME]]-group" {
|
||||||
|
count = 1
|
||||||
|
|
||||||
|
network {
|
||||||
|
port "http" {
|
||||||
|
to = [[PORT]] # Internal application port
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Host volume for persistent data (optional)
|
||||||
|
# Uncomment if your app needs persistent storage
|
||||||
|
# volume "data" {
|
||||||
|
# type = "host"
|
||||||
|
# source = "[[PROJECT_NAME]]-data"
|
||||||
|
# read_only = false
|
||||||
|
# }
|
||||||
|
|
||||||
|
# Register the service with Consul
|
||||||
|
service {
|
||||||
|
provider = "consul"
|
||||||
|
name = "[[PROJECT_NAME]]"
|
||||||
|
port = "http"
|
||||||
|
|
||||||
|
# Traefik-specific tags for routing
|
||||||
|
tags = [
|
||||||
|
"traefik.enable=true",
|
||||||
|
"traefik.http.routers.[[PROJECT_NAME]].rule=Host(`[[PROJECT_NAME]].i80.dk`)",
|
||||||
|
"traefik.http.routers.[[PROJECT_NAME]].tls=true",
|
||||||
|
"PORT=${NOMAD_PORT_http}"
|
||||||
|
]
|
||||||
|
|
||||||
|
# HTTP health check - CRITICAL!
|
||||||
|
# Your app MUST implement /health endpoint
|
||||||
|
check {
|
||||||
|
name = "http_health"
|
||||||
|
type = "http"
|
||||||
|
path = "/health"
|
||||||
|
interval = "10s"
|
||||||
|
timeout = "2s"
|
||||||
|
|
||||||
|
# Important: Use interpolated port
|
||||||
|
port = "http"
|
||||||
|
|
||||||
|
# Give app time to start before first check
|
||||||
|
check_restart {
|
||||||
|
limit = 3
|
||||||
|
grace = "10s"
|
||||||
|
ignore_warnings = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Backup TCP check (if HTTP health check fails during startup)
|
||||||
|
check {
|
||||||
|
name = "tcp_alive"
|
||||||
|
type = "tcp"
|
||||||
|
interval = "30s"
|
||||||
|
timeout = "2s"
|
||||||
|
port = "http"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
task "[[PROJECT_NAME]]-task" {
|
||||||
|
driver = "docker"
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "registry.i80.dk/gitea/[[PROJECT_NAME]]:latest"
|
||||||
|
ports = ["http"]
|
||||||
|
|
||||||
|
# Force pull latest image on each deployment
|
||||||
|
force_pull = true
|
||||||
|
|
||||||
|
# Optional: Mount host volume
|
||||||
|
# Uncomment if using volume above
|
||||||
|
# volumes = [
|
||||||
|
# "data:/app/data"
|
||||||
|
# ]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mount volume (if declared above)
|
||||||
|
# volume_mount {
|
||||||
|
# volume = "data"
|
||||||
|
# destination = "/app/data"
|
||||||
|
# read_only = false
|
||||||
|
# }
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
env {
|
||||||
|
APP_ENV = "production"
|
||||||
|
PORT = "${NOMAD_PORT_http}"
|
||||||
|
|
||||||
|
# Workaround for Vault being down:
|
||||||
|
# Set secrets as plain environment variables
|
||||||
|
# TODO: Move to Vault when available
|
||||||
|
# DATABASE_URL = "sqlite:///app/data/app.db"
|
||||||
|
# API_KEY = "your-api-key-here" # Replace with actual value
|
||||||
|
}
|
||||||
|
|
||||||
|
# Secrets from Vault (when Vault is working)
|
||||||
|
# Uncomment when Vault is available
|
||||||
|
# template {
|
||||||
|
# data = <<EOH
|
||||||
|
# {{ with secret "secret/data/[[PROJECT_NAME]]" }}
|
||||||
|
# DATABASE_URL="{{ .Data.data.database_url }}"
|
||||||
|
# API_KEY="{{ .Data.data.api_key }}"
|
||||||
|
# {{ end }}
|
||||||
|
# EOH
|
||||||
|
# destination = "secrets/config.env"
|
||||||
|
# env = true
|
||||||
|
# }
|
||||||
|
|
||||||
|
# Logs configuration
|
||||||
|
logs {
|
||||||
|
max_files = 5
|
||||||
|
max_file_size = 10 # MB
|
||||||
|
}
|
||||||
|
|
||||||
|
resources {
|
||||||
|
cpu = 250 # MHz
|
||||||
|
memory = 128 # MB
|
||||||
|
|
||||||
|
# Optional: Memory oversubscription
|
||||||
|
# memory_max = 256
|
||||||
|
}
|
||||||
|
|
||||||
|
# Kill timeout - give app time to gracefully shutdown
|
||||||
|
kill_timeout = "30s"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Restart policy
|
||||||
|
restart {
|
||||||
|
attempts = 3
|
||||||
|
interval = "5m"
|
||||||
|
delay = "15s"
|
||||||
|
mode = "fail"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ephemeral disk (for temporary files)
|
||||||
|
ephemeral_disk {
|
||||||
|
size = 300 # MB
|
||||||
|
migrate = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
312
DEPLOYMENT_CHECKLIST.md
Normal file
312
DEPLOYMENT_CHECKLIST.md
Normal file
@@ -0,0 +1,312 @@
|
|||||||
|
# Deployment Checklist
|
||||||
|
|
||||||
|
Use this checklist when deploying a new service to ensure you don't miss critical steps.
|
||||||
|
|
||||||
|
## Pre-Deployment
|
||||||
|
|
||||||
|
### Application Requirements
|
||||||
|
|
||||||
|
- [ ] **Health endpoint implemented** - `/health` returns 200 OK
|
||||||
|
- Returns JSON with status
|
||||||
|
- Responds quickly (<500ms)
|
||||||
|
- Doesn't block on external services
|
||||||
|
|
||||||
|
- [ ] **Port configuration** - App reads `PORT` from environment
|
||||||
|
```python
|
||||||
|
PORT = int(os.getenv('PORT', 5000))
|
||||||
|
app.run(host='0.0.0.0', port=PORT)
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Graceful shutdown** - App handles SIGTERM signal
|
||||||
|
- Closes connections cleanly
|
||||||
|
- Finishes current requests
|
||||||
|
- Exits within 30 seconds
|
||||||
|
|
||||||
|
- [ ] **Logging configured** - Uses stdout/stderr
|
||||||
|
- Structured logging (JSON preferred)
|
||||||
|
- Includes timestamps
|
||||||
|
- No log files (Nomad captures stdout)
|
||||||
|
|
||||||
|
### Docker Image
|
||||||
|
|
||||||
|
- [ ] **Dockerfile complete** - Based on `Dockerfile.complete`
|
||||||
|
- Multi-stage build (smaller image)
|
||||||
|
- Non-root user (uid 1000)
|
||||||
|
- Health check defined
|
||||||
|
- Minimal base image
|
||||||
|
|
||||||
|
- [ ] **Image tested locally**
|
||||||
|
```bash
|
||||||
|
docker build -t myapp:test .
|
||||||
|
docker run -p 5000:5000 -e PORT=5000 myapp:test
|
||||||
|
curl http://localhost:5000/health
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Image pushed to registry**
|
||||||
|
```bash
|
||||||
|
docker tag myapp:test registry.i80.dk/gitea/myapp:latest
|
||||||
|
docker push registry.i80.dk/gitea/myapp:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### Nomad Job Configuration
|
||||||
|
|
||||||
|
- [ ] **Job file created** - Copy from `nomad-job-complete.hcl.tmpl`
|
||||||
|
- Replace `[[PROJECT_NAME]]` with actual name
|
||||||
|
- Replace `[[PORT]]` with app port (usually 5000)
|
||||||
|
- Update resource limits (CPU/memory)
|
||||||
|
|
||||||
|
- [ ] **Health check configured** - Uses named port, not hardcoded
|
||||||
|
```hcl
|
||||||
|
check {
|
||||||
|
port = "http" # NOT "5000"!
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Traefik tags correct** - Domain matches expected URL
|
||||||
|
```hcl
|
||||||
|
"traefik.http.routers.myapp.rule=Host(`myapp.i80.dk`)"
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Volumes declared** (if needed)
|
||||||
|
- Volume source matches Autobox config
|
||||||
|
- Mount path correct
|
||||||
|
- Permissions considered
|
||||||
|
|
||||||
|
- [ ] **Secrets configured** - Using chosen workaround method
|
||||||
|
- Environment variables OR
|
||||||
|
- File-based secrets OR
|
||||||
|
- Consul KV
|
||||||
|
|
||||||
|
- [ ] **Job validates** - No syntax errors
|
||||||
|
```bash
|
||||||
|
nomad job validate nomad-job.hcl
|
||||||
|
```
|
||||||
|
|
||||||
|
### Autobox Configuration
|
||||||
|
|
||||||
|
- [ ] **Volumes created** (if needed)
|
||||||
|
```bash
|
||||||
|
# Run on Autobox
|
||||||
|
sudo ./setup-nomad-volumes.sh myapp
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Volumes show in agent-info**
|
||||||
|
```bash
|
||||||
|
nomad agent-info | grep myapp-data
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Secrets file created** (if using file-based secrets)
|
||||||
|
```bash
|
||||||
|
sudo vim /opt/nomad-secrets/myapp/secrets.env
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Permissions correct**
|
||||||
|
```bash
|
||||||
|
ls -la /opt/nomad-volumes/myapp-data # Should be 1000:1000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Gitea CI/CD (if using)
|
||||||
|
|
||||||
|
- [ ] **Workflow file created** - Copy from `main.yml.tmpl`
|
||||||
|
- Replace `[[PROJECT_NAME]]` everywhere
|
||||||
|
- Registry credentials configured
|
||||||
|
|
||||||
|
- [ ] **Secrets configured** - In Gitea repository settings
|
||||||
|
- `secrets.username` - Registry username
|
||||||
|
- `secrets.password` - Registry password
|
||||||
|
|
||||||
|
- [ ] **Self-hosted runner** - Has necessary access
|
||||||
|
- Docker installed
|
||||||
|
- Nomad CLI installed
|
||||||
|
- SSH access to Nomad server
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
### Initial Deployment
|
||||||
|
|
||||||
|
- [ ] **Job submitted**
|
||||||
|
```bash
|
||||||
|
nomad job run nomad-job.hcl
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Allocation running**
|
||||||
|
```bash
|
||||||
|
nomad job status myapp
|
||||||
|
# Should show: Running = 1
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **No errors in logs**
|
||||||
|
```bash
|
||||||
|
nomad alloc logs -f <alloc-id> myapp-task
|
||||||
|
```
|
||||||
|
|
||||||
|
### Consul Registration
|
||||||
|
|
||||||
|
- [ ] **Service registered**
|
||||||
|
```bash
|
||||||
|
consul catalog service myapp
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Service healthy**
|
||||||
|
```bash
|
||||||
|
consul catalog service myapp
|
||||||
|
# Look for: Checks: http_health: passing
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Tags correct**
|
||||||
|
```bash
|
||||||
|
consul catalog service myapp
|
||||||
|
# Verify traefik tags present
|
||||||
|
```
|
||||||
|
|
||||||
|
### DNS & Access
|
||||||
|
|
||||||
|
- [ ] **DNS record created** - Check consul-template output
|
||||||
|
```bash
|
||||||
|
cat /certs/consul/trinity_powerdns_records.txt | grep myapp
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Nginx config generated**
|
||||||
|
```bash
|
||||||
|
grep myapp /certs/consul-nginx/conf.d/services.conf
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Nginx reloaded** - Check watcher logs
|
||||||
|
```bash
|
||||||
|
tail -f /var/log/nginx_restater.log
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Service accessible** - Test public URL
|
||||||
|
```bash
|
||||||
|
curl https://myapp.i80.dk
|
||||||
|
curl https://myapp.i80.dk/health
|
||||||
|
```
|
||||||
|
|
||||||
|
## Post-Deployment
|
||||||
|
|
||||||
|
### Verification
|
||||||
|
|
||||||
|
- [ ] **Health check passing** - For at least 5 minutes
|
||||||
|
```bash
|
||||||
|
watch -n 5 'consul catalog service myapp'
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **No restarts** - Allocation stable
|
||||||
|
```bash
|
||||||
|
nomad alloc status <alloc-id>
|
||||||
|
# Check "Recent Events" - no restarts
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Logs clean** - No errors or warnings
|
||||||
|
```bash
|
||||||
|
nomad alloc logs -f <alloc-id> myapp-task
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Performance acceptable**
|
||||||
|
- Response time < 1s
|
||||||
|
- Memory usage stable
|
||||||
|
- CPU usage reasonable
|
||||||
|
|
||||||
|
### Monitoring
|
||||||
|
|
||||||
|
- [ ] **Metrics accessible** - If implemented
|
||||||
|
```bash
|
||||||
|
curl https://myapp.i80.dk/metrics
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Logs searchable** - Can find application logs
|
||||||
|
```bash
|
||||||
|
nomad alloc logs -f <alloc-id> myapp-task | grep ERROR
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Alerts configured** - If using monitoring system
|
||||||
|
- Health check failures
|
||||||
|
- High error rate
|
||||||
|
- High memory usage
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
- [ ] **Service documented** - In team wiki/docs
|
||||||
|
- What it does
|
||||||
|
- Where it's deployed
|
||||||
|
- How to access it
|
||||||
|
- Known issues
|
||||||
|
|
||||||
|
- [ ] **Runbook created** - For operational issues
|
||||||
|
- How to restart
|
||||||
|
- How to check logs
|
||||||
|
- Common troubleshooting steps
|
||||||
|
- Escalation path
|
||||||
|
|
||||||
|
- [ ] **Secrets documented** - Where they're stored
|
||||||
|
- Which Consul KV keys
|
||||||
|
- Which files on Autobox
|
||||||
|
- Who has access
|
||||||
|
|
||||||
|
## Rollback Plan
|
||||||
|
|
||||||
|
- [ ] **Previous version tagged** - In case of issues
|
||||||
|
```bash
|
||||||
|
docker tag myapp:latest myapp:stable
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Rollback tested** - Know how to revert
|
||||||
|
```bash
|
||||||
|
# Update job file to use :stable tag
|
||||||
|
# nomad job run nomad-job.hcl
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Data backup** - Before first deployment
|
||||||
|
```bash
|
||||||
|
# If using volumes
|
||||||
|
sudo tar -czf /backup/myapp-data.tar.gz /opt/nomad-volumes/myapp-data
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Issues Checklist
|
||||||
|
|
||||||
|
If deployment fails, check:
|
||||||
|
|
||||||
|
- [ ] Is `/health` endpoint implemented and returning 200?
|
||||||
|
- [ ] Is app binding to `0.0.0.0` (not `127.0.0.1`)?
|
||||||
|
- [ ] Is app reading `PORT` from environment variable?
|
||||||
|
- [ ] Are health check port references correct (no hardcoded ports)?
|
||||||
|
- [ ] Do volume paths match between Autobox and Nomad job?
|
||||||
|
- [ ] Are volume permissions correct (uid 1000)?
|
||||||
|
- [ ] Are secrets accessible (environment or files)?
|
||||||
|
- [ ] Is Docker image pulling successfully?
|
||||||
|
- [ ] Is allocation getting scheduled (not pending)?
|
||||||
|
- [ ] Are there port conflicts?
|
||||||
|
|
||||||
|
## Quick Debugging Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Service status
|
||||||
|
consul catalog service myapp
|
||||||
|
nomad job status myapp
|
||||||
|
|
||||||
|
# Allocation details
|
||||||
|
ALLOC_ID=$(nomad job status myapp | grep running | head -1 | awk '{print $1}')
|
||||||
|
nomad alloc status $ALLOC_ID
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
nomad alloc logs -f $ALLOC_ID myapp-task
|
||||||
|
nomad alloc logs -stderr -f $ALLOC_ID myapp-task
|
||||||
|
|
||||||
|
# Exec into container
|
||||||
|
nomad alloc exec -i -t $ALLOC_ID /bin/sh
|
||||||
|
|
||||||
|
# Health check test
|
||||||
|
PORT=$(nomad alloc status $ALLOC_ID | grep "Port.*http" | awk '{print $3}' | cut -d':' -f2)
|
||||||
|
curl http://192.168.15.124:$PORT/health
|
||||||
|
|
||||||
|
# Restart
|
||||||
|
nomad job restart myapp
|
||||||
|
|
||||||
|
# Force reschedule
|
||||||
|
nomad job stop -purge myapp
|
||||||
|
nomad job run nomad-job.hcl
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Print this checklist and use it for every deployment until the process becomes second nature!**
|
||||||
54
Dockerfile.complete
Normal file
54
Dockerfile.complete
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
# Multi-stage build for smaller image size
|
||||||
|
FROM python:3.11-slim as builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install build dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy requirements first for layer caching
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
RUN pip install --no-cache-dir --user -r requirements.txt
|
||||||
|
|
||||||
|
# Final stage
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Create non-root user for security
|
||||||
|
RUN useradd -m -u 1000 appuser
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy Python dependencies from builder
|
||||||
|
COPY --from=builder /root/.local /home/appuser/.local
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY --chown=appuser:appuser . .
|
||||||
|
|
||||||
|
# Make sure scripts are executable (if you have any)
|
||||||
|
# RUN chmod +x entrypoint.sh
|
||||||
|
|
||||||
|
# Switch to non-root user
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Add user's local bin to PATH
|
||||||
|
ENV PATH=/home/appuser/.local/bin:$PATH
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
ENV FLASK_APP=app.py
|
||||||
|
ENV FLASK_RUN_HOST=0.0.0.0
|
||||||
|
ENV PORT=5000
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# Health check - Docker level (optional, Nomad will also check)
|
||||||
|
HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
|
||||||
|
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:${PORT}/health')" || exit 1
|
||||||
|
|
||||||
|
# Expose port (documentation only)
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
# Command to run the application
|
||||||
|
CMD ["sh", "-c", "flask run --port ${PORT}"]
|
||||||
732
NOMAD_DEPLOYMENT_GUIDE.md
Normal file
732
NOMAD_DEPLOYMENT_GUIDE.md
Normal file
@@ -0,0 +1,732 @@
|
|||||||
|
# Nomad Deployment Guide for i80.dk Infrastructure
|
||||||
|
|
||||||
|
**Last Updated:** 2025-11-28
|
||||||
|
|
||||||
|
This guide covers deploying Python applications to your Nomad cluster with proper health checks, volumes, and Vault workarounds.
|
||||||
|
|
||||||
|
## 📋 Table of Contents
|
||||||
|
|
||||||
|
- [Quick Start](#quick-start)
|
||||||
|
- [Health Checks - The #1 Pain Point](#health-checks---the-1-pain-point)
|
||||||
|
- [Host Volumes - The #2 Pain Point](#host-volumes---the-2-pain-point)
|
||||||
|
- [Vault Workarounds](#vault-workarounds)
|
||||||
|
- [Complete Nomad Job Example](#complete-nomad-job-example)
|
||||||
|
- [Dockerfile Best Practices](#dockerfile-best-practices)
|
||||||
|
- [Gitea CI/CD Workflow](#gitea-cicd-workflow)
|
||||||
|
- [Troubleshooting](#troubleshooting)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### 1. Add Health Endpoint to Your App
|
||||||
|
|
||||||
|
**CRITICAL:** Your app MUST respond to `/health` with HTTP 200 OK.
|
||||||
|
|
||||||
|
```python
|
||||||
|
@app.route('/health')
|
||||||
|
def health():
|
||||||
|
return jsonify({'status': 'healthy'}), 200
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Use Complete Nomad Job Template
|
||||||
|
|
||||||
|
Copy `.gitea/workflows/nomad-job-complete.hcl.tmpl` to your project and customize:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .gitea/workflows/nomad-job-complete.hcl.tmpl .gitea/workflows/nomad-job.hcl
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace `[[PROJECT_NAME]]` and `[[PORT]]` with your values.
|
||||||
|
|
||||||
|
### 3. Build and Deploy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build Docker image
|
||||||
|
docker build -t registry.i80.dk/gitea/myapp:latest .
|
||||||
|
|
||||||
|
# Push to registry
|
||||||
|
docker push registry.i80.dk/gitea/myapp:latest
|
||||||
|
|
||||||
|
# Deploy to Nomad
|
||||||
|
nomad job run .gitea/workflows/nomad-job.hcl
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Health Checks - The #1 Pain Point
|
||||||
|
|
||||||
|
### Why Health Checks Fail
|
||||||
|
|
||||||
|
**Common mistakes:**
|
||||||
|
|
||||||
|
1. ❌ **No /health endpoint** - App doesn't implement health endpoint
|
||||||
|
2. ❌ **Wrong port** - Health check uses wrong port variable
|
||||||
|
3. ❌ **App not ready** - Health check runs before app starts
|
||||||
|
4. ❌ **Blocking endpoint** - /health takes too long to respond
|
||||||
|
5. ❌ **Wrong HTTP method** - App expects POST, Consul sends GET
|
||||||
|
|
||||||
|
### Proper Health Check Implementation
|
||||||
|
|
||||||
|
**In your Flask app:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import time
|
||||||
|
|
||||||
|
app_start_time = time.time()
|
||||||
|
|
||||||
|
@app.route('/health')
|
||||||
|
def health():
|
||||||
|
"""
|
||||||
|
Health check endpoint for Consul/Nomad.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
200 OK: Service is healthy
|
||||||
|
503: Service is not ready or shutting down
|
||||||
|
"""
|
||||||
|
# Give app time to initialize (optional)
|
||||||
|
if time.time() - app_start_time < 5:
|
||||||
|
return jsonify({'status': 'starting'}), 503
|
||||||
|
|
||||||
|
# Add your health checks
|
||||||
|
try:
|
||||||
|
# Check database connection
|
||||||
|
# db.execute("SELECT 1")
|
||||||
|
|
||||||
|
# Check external dependencies
|
||||||
|
# api_client.ping()
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
'status': 'healthy',
|
||||||
|
'uptime': time.time() - app_start_time
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'unhealthy',
|
||||||
|
'error': str(e)
|
||||||
|
}), 503
|
||||||
|
```
|
||||||
|
|
||||||
|
**In your Nomad job:**
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
service {
|
||||||
|
name = "myapp"
|
||||||
|
port = "http"
|
||||||
|
|
||||||
|
check {
|
||||||
|
name = "http_health"
|
||||||
|
type = "http"
|
||||||
|
path = "/health"
|
||||||
|
interval = "10s"
|
||||||
|
timeout = "2s"
|
||||||
|
port = "http" # Use named port, NOT hardcoded!
|
||||||
|
|
||||||
|
# Give app time to start before first check
|
||||||
|
check_restart {
|
||||||
|
limit = 3
|
||||||
|
grace = "10s"
|
||||||
|
ignore_warnings = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing Health Checks Locally
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start your app
|
||||||
|
python app.py
|
||||||
|
|
||||||
|
# Test health endpoint
|
||||||
|
curl http://localhost:5000/health
|
||||||
|
|
||||||
|
# Should return:
|
||||||
|
# {"status": "healthy", "uptime": 123.45}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Common Health Check Issues
|
||||||
|
|
||||||
|
**Issue: Service marked unhealthy immediately**
|
||||||
|
|
||||||
|
**Solution:** Add `check_restart` grace period:
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
check_restart {
|
||||||
|
limit = 3
|
||||||
|
grace = "10s" # Wait 10s before first check
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Issue: Health check timeout**
|
||||||
|
|
||||||
|
**Symptoms:**
|
||||||
|
```
|
||||||
|
Health check timed out (timeout: 2s)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
- Make /health endpoint faster
|
||||||
|
- Increase timeout: `timeout = "5s"`
|
||||||
|
- Remove slow operations from health check
|
||||||
|
|
||||||
|
**Issue: Wrong port**
|
||||||
|
|
||||||
|
**Symptoms:**
|
||||||
|
```
|
||||||
|
Connection refused on port 5000
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution:** Use dynamic port in Nomad job:
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
# ❌ WRONG - hardcoded port
|
||||||
|
check {
|
||||||
|
port = "5000"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ✅ CORRECT - use named port
|
||||||
|
check {
|
||||||
|
port = "http"
|
||||||
|
}
|
||||||
|
|
||||||
|
# And in your app environment:
|
||||||
|
env {
|
||||||
|
PORT = "${NOMAD_PORT_http}"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Host Volumes - The #2 Pain Point
|
||||||
|
|
||||||
|
### Why Host Volumes Fail
|
||||||
|
|
||||||
|
**Common mistakes:**
|
||||||
|
|
||||||
|
1. ❌ **Volume not declared on Nomad client** - Must configure on Autobox first!
|
||||||
|
2. ❌ **Wrong source name** - Source must match client config
|
||||||
|
3. ❌ **Permission issues** - Volume owned by root, app runs as user
|
||||||
|
4. ❌ **Mount path conflicts** - Path already exists in container
|
||||||
|
|
||||||
|
### Setting Up Host Volumes
|
||||||
|
|
||||||
|
**Step 1: Configure on Nomad Client (Autobox)**
|
||||||
|
|
||||||
|
**File:** `/etc/nomad.d/client.hcl` on Autobox
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
client {
|
||||||
|
enabled = true
|
||||||
|
|
||||||
|
host_volume "myapp-data" {
|
||||||
|
path = "/opt/nomad-volumes/myapp-data"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Create directory:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On Autobox
|
||||||
|
sudo mkdir -p /opt/nomad-volumes/myapp-data
|
||||||
|
sudo chown 1000:1000 /opt/nomad-volumes/myapp-data # Match container user
|
||||||
|
sudo chmod 755 /opt/nomad-volumes/myapp-data
|
||||||
|
```
|
||||||
|
|
||||||
|
**Restart Nomad client:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo systemctl restart nomad
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 2: Use Volume in Nomad Job**
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
group "myapp-group" {
|
||||||
|
volume "data" {
|
||||||
|
type = "host"
|
||||||
|
source = "myapp-data" # Must match name in client.hcl
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
task "myapp-task" {
|
||||||
|
volume_mount {
|
||||||
|
volume = "data"
|
||||||
|
destination = "/app/data"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
config {
|
||||||
|
image = "registry.i80.dk/gitea/myapp:latest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3: Use in Your App**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Data directory from mounted volume
|
||||||
|
DATA_DIR = os.getenv('DATA_DIR', '/app/data')
|
||||||
|
|
||||||
|
# SQLite database in persistent volume
|
||||||
|
db_path = os.path.join(DATA_DIR, 'app.db')
|
||||||
|
```
|
||||||
|
|
||||||
|
### Volume Permissions
|
||||||
|
|
||||||
|
**Best Practice: Run container as non-root user**
|
||||||
|
|
||||||
|
**In Dockerfile:**
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -m -u 1000 appuser
|
||||||
|
|
||||||
|
# Switch to user
|
||||||
|
USER appuser
|
||||||
|
```
|
||||||
|
|
||||||
|
**On Autobox:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Set ownership to match container user (uid 1000)
|
||||||
|
sudo chown -R 1000:1000 /opt/nomad-volumes/myapp-data
|
||||||
|
```
|
||||||
|
|
||||||
|
### Checking Volume Mounts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On Nomad - check allocation
|
||||||
|
nomad alloc status <alloc-id>
|
||||||
|
|
||||||
|
# Look for volume mounts section:
|
||||||
|
# Mounted Volumes:
|
||||||
|
# data -> /opt/nomad-volumes/myapp-data
|
||||||
|
|
||||||
|
# SSH to Autobox and verify
|
||||||
|
ls -la /opt/nomad-volumes/myapp-data
|
||||||
|
```
|
||||||
|
|
||||||
|
### Volume Backup
|
||||||
|
|
||||||
|
**Simple backup script:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# backup-volumes.sh
|
||||||
|
|
||||||
|
VOLUME_PATH="/opt/nomad-volumes/myapp-data"
|
||||||
|
BACKUP_PATH="/backup/$(date +%Y%m%d)"
|
||||||
|
|
||||||
|
mkdir -p "$BACKUP_PATH"
|
||||||
|
tar -czf "$BACKUP_PATH/myapp-data.tar.gz" "$VOLUME_PATH"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Vault Workarounds
|
||||||
|
|
||||||
|
### Problem
|
||||||
|
|
||||||
|
Your Vault is currently not working. Can't use proper secret management.
|
||||||
|
|
||||||
|
### Temporary Solutions
|
||||||
|
|
||||||
|
**Option 1: Environment Variables in Nomad Job (NOT RECOMMENDED)**
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
env {
|
||||||
|
APP_ENV = "production"
|
||||||
|
PORT = "${NOMAD_PORT_http}"
|
||||||
|
DATABASE_URL = "sqlite:///app/data/app.db"
|
||||||
|
API_KEY = "your-secret-key-here" # BAD: Secret in plain text!
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- Simple
|
||||||
|
- Works immediately
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- ❌ Secrets visible in Nomad UI
|
||||||
|
- ❌ Secrets in version control (if committed)
|
||||||
|
- ❌ Hard to rotate secrets
|
||||||
|
|
||||||
|
**Option 2: File-Based Secrets (BETTER)**
|
||||||
|
|
||||||
|
**Store secrets in file on Autobox:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On Autobox
|
||||||
|
sudo mkdir -p /opt/nomad-secrets/myapp
|
||||||
|
sudo vim /opt/nomad-secrets/myapp/secrets.env
|
||||||
|
|
||||||
|
# Content:
|
||||||
|
# API_KEY=your-secret-key
|
||||||
|
# DB_PASSWORD=your-db-password
|
||||||
|
|
||||||
|
sudo chown 1000:1000 /opt/nomad-secrets/myapp/secrets.env
|
||||||
|
sudo chmod 600 /opt/nomad-secrets/myapp/secrets.env
|
||||||
|
```
|
||||||
|
|
||||||
|
**Mount as host volume:**
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
group "myapp-group" {
|
||||||
|
volume "secrets" {
|
||||||
|
type = "host"
|
||||||
|
source = "myapp-secrets"
|
||||||
|
read_only = true # Read-only for security
|
||||||
|
}
|
||||||
|
|
||||||
|
task "myapp-task" {
|
||||||
|
volume_mount {
|
||||||
|
volume = "secrets"
|
||||||
|
destination = "/app/secrets"
|
||||||
|
read_only = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Read secrets file at startup
|
||||||
|
config {
|
||||||
|
command = "sh"
|
||||||
|
args = ["-c", "source /app/secrets/secrets.env && flask run --port $PORT"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- ✅ Secrets not in Nomad job file
|
||||||
|
- ✅ Can be backed up separately
|
||||||
|
- ✅ Easier to rotate
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- ⚠️ Still manual management
|
||||||
|
- ⚠️ Need to manage file permissions
|
||||||
|
|
||||||
|
**Option 3: Consul KV Store (RECOMMENDED TEMPORARY)**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Store secret in Consul
|
||||||
|
consul kv put secret/myapp/api_key "your-secret-key"
|
||||||
|
```
|
||||||
|
|
||||||
|
**In Nomad job template:**
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
task "myapp-task" {
|
||||||
|
template {
|
||||||
|
data = <<EOH
|
||||||
|
{{ with key "secret/myapp/api_key" }}
|
||||||
|
API_KEY="{{ . }}"
|
||||||
|
{{ end }}
|
||||||
|
EOH
|
||||||
|
destination = "secrets/config.env"
|
||||||
|
env = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- ✅ Uses existing infrastructure (Consul)
|
||||||
|
- ✅ Can be managed via API
|
||||||
|
- ✅ Not visible in Nomad UI
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- ⚠️ Not as secure as Vault
|
||||||
|
- ⚠️ Manual secret rotation
|
||||||
|
|
||||||
|
### When Vault is Fixed
|
||||||
|
|
||||||
|
**Proper Vault integration:**
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
task "myapp-task" {
|
||||||
|
vault {
|
||||||
|
policies = ["myapp-policy"]
|
||||||
|
}
|
||||||
|
|
||||||
|
template {
|
||||||
|
data = <<EOH
|
||||||
|
{{ with secret "secret/data/myapp" }}
|
||||||
|
API_KEY="{{ .Data.data.api_key }}"
|
||||||
|
DATABASE_URL="{{ .Data.data.database_url }}"
|
||||||
|
{{ end }}
|
||||||
|
EOH
|
||||||
|
destination = "secrets/config.env"
|
||||||
|
env = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Complete Nomad Job Example
|
||||||
|
|
||||||
|
See `.gitea/workflows/nomad-job-complete.hcl.tmpl` for a fully documented example with:
|
||||||
|
|
||||||
|
- ✅ Proper health checks with grace period
|
||||||
|
- ✅ Host volume configuration
|
||||||
|
- ✅ Vault workarounds
|
||||||
|
- ✅ Auto-revert on failed deployments
|
||||||
|
- ✅ Graceful shutdown handling
|
||||||
|
- ✅ Resource limits
|
||||||
|
- ✅ Log rotation
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dockerfile Best Practices
|
||||||
|
|
||||||
|
### Multi-Stage Build
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# Builder stage
|
||||||
|
FROM python:3.11-slim as builder
|
||||||
|
WORKDIR /app
|
||||||
|
RUN pip install --user -r requirements.txt
|
||||||
|
|
||||||
|
# Runtime stage (smaller)
|
||||||
|
FROM python:3.11-slim
|
||||||
|
COPY --from=builder /root/.local /home/appuser/.local
|
||||||
|
USER appuser
|
||||||
|
CMD ["flask", "run"]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits:**
|
||||||
|
- Smaller final image
|
||||||
|
- Faster deployment
|
||||||
|
- Less attack surface
|
||||||
|
|
||||||
|
### Non-Root User
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
# Create user
|
||||||
|
RUN useradd -m -u 1000 appuser
|
||||||
|
|
||||||
|
# Switch to user
|
||||||
|
USER appuser
|
||||||
|
```
|
||||||
|
|
||||||
|
**Why:**
|
||||||
|
- Security best practice
|
||||||
|
- Required for some volume mounts
|
||||||
|
- Prevents privilege escalation
|
||||||
|
|
||||||
|
### Health Check
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
HEALTHCHECK --interval=30s --timeout=3s --start-period=10s \
|
||||||
|
CMD curl -f http://localhost:${PORT}/health || exit 1
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits:**
|
||||||
|
- Docker can detect unhealthy containers
|
||||||
|
- Nomad respects Docker health checks
|
||||||
|
- Extra layer of monitoring
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Gitea CI/CD Workflow
|
||||||
|
|
||||||
|
### Complete Workflow Example
|
||||||
|
|
||||||
|
See `.gitea/workflows/main.yml.tmpl` for a complete Gitea Actions workflow that:
|
||||||
|
|
||||||
|
1. ✅ Builds Docker image
|
||||||
|
2. ✅ Tags with commit hash + latest
|
||||||
|
3. ✅ Pushes to private registry
|
||||||
|
4. ✅ Validates Nomad job
|
||||||
|
5. ✅ Stops old deployment
|
||||||
|
6. ✅ Deploys new version
|
||||||
|
7. ✅ Updates nginx configuration
|
||||||
|
8. ✅ Updates forwarder configuration
|
||||||
|
|
||||||
|
### Secrets in Gitea
|
||||||
|
|
||||||
|
Configure in Gitea repository settings:
|
||||||
|
|
||||||
|
- `secrets.username` - Registry username
|
||||||
|
- `secrets.password` - Registry password
|
||||||
|
|
||||||
|
### Self-Hosted Runner
|
||||||
|
|
||||||
|
Your runner must have:
|
||||||
|
|
||||||
|
- Docker installed
|
||||||
|
- Nomad CLI installed
|
||||||
|
- SSH access to Nomad server
|
||||||
|
- Access to private registry
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Service Marked Unhealthy
|
||||||
|
|
||||||
|
**Check Consul:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On Nomad
|
||||||
|
consul catalog service myapp
|
||||||
|
|
||||||
|
# Look for:
|
||||||
|
# Checks:
|
||||||
|
# - http_health: critical
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check allocation logs:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nomad alloc logs -f <alloc-id> myapp-task
|
||||||
|
```
|
||||||
|
|
||||||
|
**Common causes:**
|
||||||
|
- /health endpoint not implemented
|
||||||
|
- App crashed
|
||||||
|
- Wrong port
|
||||||
|
- Slow startup
|
||||||
|
|
||||||
|
### Container Keeps Restarting
|
||||||
|
|
||||||
|
**Check allocation status:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nomad alloc status <alloc-id>
|
||||||
|
|
||||||
|
# Look at Recent Events:
|
||||||
|
# Started -> Restart Signaled -> Started ...
|
||||||
|
```
|
||||||
|
|
||||||
|
**Common causes:**
|
||||||
|
- Failed health checks
|
||||||
|
- App crash on startup
|
||||||
|
- Missing dependencies
|
||||||
|
- Port already in use
|
||||||
|
|
||||||
|
### Volume Mount Issues
|
||||||
|
|
||||||
|
**Check Nomad client config:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On Autobox
|
||||||
|
sudo nomad agent-info | grep -A 10 "host_volumes"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check permissions:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On Autobox
|
||||||
|
ls -la /opt/nomad-volumes/myapp-data
|
||||||
|
|
||||||
|
# Should be owned by uid 1000 (or your container user)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check allocation:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nomad alloc status <alloc-id>
|
||||||
|
|
||||||
|
# Look for Mounted Volumes section
|
||||||
|
```
|
||||||
|
|
||||||
|
### Port Conflicts
|
||||||
|
|
||||||
|
**Symptoms:**
|
||||||
|
```
|
||||||
|
Failed to start task: bind: address already in use
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution:** Nomad assigns dynamic ports automatically:
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
network {
|
||||||
|
port "http" {
|
||||||
|
to = 5000 # Container internal port
|
||||||
|
# Nomad picks external port (30000-32000)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
env {
|
||||||
|
PORT = "${NOMAD_PORT_http}" # Use Nomad's assigned port
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Secrets Not Loading
|
||||||
|
|
||||||
|
**Check Consul KV:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
consul kv get secret/myapp/api_key
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check template rendering:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nomad alloc fs <alloc-id> secrets/
|
||||||
|
|
||||||
|
# Should see config.env or your secret files
|
||||||
|
```
|
||||||
|
|
||||||
|
**View rendered template:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nomad alloc fs <alloc-id> secrets/config.env
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference
|
||||||
|
|
||||||
|
### Essential Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check service health
|
||||||
|
consul catalog service myapp
|
||||||
|
|
||||||
|
# View allocation
|
||||||
|
nomad alloc status <alloc-id>
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
nomad alloc logs -f <alloc-id> myapp-task
|
||||||
|
|
||||||
|
# Exec into container
|
||||||
|
nomad alloc exec -i -t <alloc-id> /bin/sh
|
||||||
|
|
||||||
|
# Restart job
|
||||||
|
nomad job restart myapp
|
||||||
|
|
||||||
|
# Stop job
|
||||||
|
nomad job stop myapp
|
||||||
|
|
||||||
|
# Force reschedule
|
||||||
|
nomad job dispatch -meta restart=true myapp
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Check URL
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Find allocated port
|
||||||
|
nomad alloc status <alloc-id> | grep "Port.*http"
|
||||||
|
|
||||||
|
# Test health endpoint
|
||||||
|
curl http://192.168.15.124:30123/health
|
||||||
|
```
|
||||||
|
|
||||||
|
### Volume Locations
|
||||||
|
|
||||||
|
- **Client config:** `/etc/nomad.d/client.hcl` (on Autobox)
|
||||||
|
- **Volume data:** `/opt/nomad-volumes/<volume-name>` (on Autobox)
|
||||||
|
- **Secrets:** `/opt/nomad-secrets/<app-name>` (on Autobox)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**For more information, see:**
|
||||||
|
- Main infrastructure docs: `~/Projects/i80_network.md`
|
||||||
|
- Nomad docs: https://nomad.i80.dk:4646
|
||||||
|
- Consul UI: https://consul.i80.dk:8500
|
||||||
49
README.md
49
README.md
@@ -0,0 +1,49 @@
|
|||||||
|
# Python Template Project for i80.dk Nomad Infrastructure
|
||||||
|
|
||||||
|
**Last Updated:** 2025-11-28
|
||||||
|
|
||||||
|
This is a complete template for deploying Python web applications to your i80.dk Nomad infrastructure with Gitea CI/CD.
|
||||||
|
|
||||||
|
## 📋 What's Included
|
||||||
|
|
||||||
|
### Core Files
|
||||||
|
|
||||||
|
- **`app_example.py`** - Example Flask app with proper health endpoint
|
||||||
|
- **`Dockerfile.complete`** - Production-ready Dockerfile with security best practices
|
||||||
|
- **`requirements.txt`** - Python dependencies
|
||||||
|
- **`.gitea/workflows/nomad-job-complete.hcl.tmpl`** - Complete Nomad job with all features
|
||||||
|
- **`.gitea/workflows/main.yml.tmpl`** - Gitea Actions workflow for CI/CD
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
- **`NOMAD_DEPLOYMENT_GUIDE.md`** - Comprehensive deployment guide covering:
|
||||||
|
- ✅ Health check implementation (the #1 pain point!)
|
||||||
|
- ✅ Host volumes setup (the #2 pain point!)
|
||||||
|
- ✅ Vault workarounds (while Vault is down)
|
||||||
|
- ✅ Complete troubleshooting guide
|
||||||
|
|
||||||
|
### Utilities
|
||||||
|
|
||||||
|
- **`setup-nomad-volumes.sh`** - Automated script to setup volumes on Autobox
|
||||||
|
|
||||||
|
## 🚀 Quick Start
|
||||||
|
|
||||||
|
See **[NOMAD_DEPLOYMENT_GUIDE.md](./NOMAD_DEPLOYMENT_GUIDE.md)** for complete instructions.
|
||||||
|
|
||||||
|
Quick summary:
|
||||||
|
|
||||||
|
1. **Copy template** and customize for your project
|
||||||
|
2. **Implement /health endpoint** in your app (CRITICAL!)
|
||||||
|
3. **Setup volumes** on Autobox (if needed)
|
||||||
|
4. **Deploy** via Gitea or manually
|
||||||
|
|
||||||
|
## 📚 Documentation
|
||||||
|
|
||||||
|
- **[NOMAD_DEPLOYMENT_GUIDE.md](./NOMAD_DEPLOYMENT_GUIDE.md)** - Start here!
|
||||||
|
- **[~/Projects/i80_network.md](../i80_network.md)** - Full infrastructure docs
|
||||||
|
|
||||||
|
## <20><> Quick Links
|
||||||
|
|
||||||
|
- Nomad UI: https://nomad.i80.dk:4646
|
||||||
|
- Consul UI: https://consul.i80.dk:8500
|
||||||
|
- Gitea: https://gitea.i80.dk
|
||||||
|
|||||||
247
WHATS_NEW.md
Normal file
247
WHATS_NEW.md
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
# Python Template Project - What's New
|
||||||
|
|
||||||
|
**Updated:** 2025-11-28
|
||||||
|
|
||||||
|
## 🎯 Overview
|
||||||
|
|
||||||
|
Your Python template project has been completely updated to match your i80.dk infrastructure documentation with solutions to all the pain points you've experienced!
|
||||||
|
|
||||||
|
## 📦 New Files
|
||||||
|
|
||||||
|
### Core Application Files
|
||||||
|
|
||||||
|
1. **`app_example.py`** ⭐️ **NEW**
|
||||||
|
- Complete Flask example with proper health endpoint
|
||||||
|
- Graceful shutdown handling (SIGTERM)
|
||||||
|
- Environment variable configuration
|
||||||
|
- Ready-to-use health, ready, and metrics endpoints
|
||||||
|
|
||||||
|
2. **`Dockerfile.complete`** ⭐️ **NEW**
|
||||||
|
- Multi-stage build for smaller images
|
||||||
|
- Non-root user (uid 1000) for security
|
||||||
|
- Docker-level health check
|
||||||
|
- Production-ready best practices
|
||||||
|
|
||||||
|
### Nomad Configuration
|
||||||
|
|
||||||
|
3. **`.gitea/workflows/nomad-job-complete.hcl.tmpl`** ⭐️ **NEW**
|
||||||
|
- Complete Nomad job with ALL features
|
||||||
|
- Proper health checks with grace period
|
||||||
|
- Host volume configuration examples
|
||||||
|
- Vault integration (commented, ready for when it works)
|
||||||
|
- Vault workarounds for current use
|
||||||
|
- Auto-revert on failed deployments
|
||||||
|
- Comprehensive comments explaining everything
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
4. **`NOMAD_DEPLOYMENT_GUIDE.md`** ⭐️ **NEW** (50+ pages!)
|
||||||
|
- Complete deployment guide
|
||||||
|
- Health checks deep-dive (your #1 pain point)
|
||||||
|
- Host volumes setup guide (your #2 pain point)
|
||||||
|
- Vault workarounds (3 different approaches)
|
||||||
|
- Comprehensive troubleshooting section
|
||||||
|
- Quick reference commands
|
||||||
|
|
||||||
|
5. **`DEPLOYMENT_CHECKLIST.md`** ⭐️ **NEW**
|
||||||
|
- Step-by-step deployment checklist
|
||||||
|
- Pre-deployment verification
|
||||||
|
- Post-deployment checks
|
||||||
|
- Rollback planning
|
||||||
|
- Common issues quick reference
|
||||||
|
|
||||||
|
6. **`WHATS_NEW.md`** ⭐️ **NEW**
|
||||||
|
- This file - summary of updates
|
||||||
|
|
||||||
|
7. **`README.md`** ✏️ **UPDATED**
|
||||||
|
- Simplified with links to detailed guides
|
||||||
|
- Quick start section
|
||||||
|
- Clear structure
|
||||||
|
|
||||||
|
### Utilities
|
||||||
|
|
||||||
|
8. **`setup-nomad-volumes.sh`** ⭐️ **NEW**
|
||||||
|
- Automated script to setup volumes on Autobox
|
||||||
|
- Creates data and secrets directories
|
||||||
|
- Configures Nomad client
|
||||||
|
- Sets proper permissions
|
||||||
|
- Restarts Nomad and verifies
|
||||||
|
|
||||||
|
## 🎯 Pain Points Solved
|
||||||
|
|
||||||
|
### 1. Health Checks ⚕️ **SOLVED**
|
||||||
|
|
||||||
|
**Problem:** Services marked unhealthy, constant restarts
|
||||||
|
|
||||||
|
**Solution:**
|
||||||
|
- `app_example.py` shows proper implementation
|
||||||
|
- `NOMAD_DEPLOYMENT_GUIDE.md` explains all the gotchas
|
||||||
|
- Nomad job has proper grace periods
|
||||||
|
- Includes backup TCP check
|
||||||
|
|
||||||
|
**Key learnings documented:**
|
||||||
|
- Must use named ports, not hardcoded
|
||||||
|
- Add startup grace period
|
||||||
|
- Keep health check fast (<500ms)
|
||||||
|
- Return proper HTTP status codes
|
||||||
|
|
||||||
|
### 2. Host Volumes 💾 **SOLVED**
|
||||||
|
|
||||||
|
**Problem:** Volume mounts fail, permission issues, data not persisting
|
||||||
|
|
||||||
|
**Solution:**
|
||||||
|
- `setup-nomad-volumes.sh` automates entire setup
|
||||||
|
- Nomad job shows proper volume declaration
|
||||||
|
- Documentation covers all permission issues
|
||||||
|
- Examples for both data and secrets volumes
|
||||||
|
|
||||||
|
**Key learnings documented:**
|
||||||
|
- Configure on Autobox FIRST
|
||||||
|
- Match uid (1000) between container and host
|
||||||
|
- Test with `nomad agent-info`
|
||||||
|
- Backup volumes regularly
|
||||||
|
|
||||||
|
### 3. Vault Not Working 🔐 **SOLVED**
|
||||||
|
|
||||||
|
**Problem:** Vault is down, can't use proper secret management
|
||||||
|
|
||||||
|
**Solution:** Three workaround approaches documented:
|
||||||
|
|
||||||
|
**Option 1:** Environment variables in Nomad job
|
||||||
|
- Fast but insecure
|
||||||
|
- Good for development only
|
||||||
|
|
||||||
|
**Option 2:** File-based secrets (RECOMMENDED)
|
||||||
|
- Secrets stored in `/opt/nomad-secrets/`
|
||||||
|
- Mounted as read-only volume
|
||||||
|
- Better security than environment variables
|
||||||
|
- `setup-nomad-volumes.sh` creates structure
|
||||||
|
|
||||||
|
**Option 3:** Consul KV store
|
||||||
|
- Uses existing infrastructure
|
||||||
|
- API-manageable
|
||||||
|
- Better than files, not as good as Vault
|
||||||
|
|
||||||
|
**Bonus:** Vault integration template ready for when it's fixed!
|
||||||
|
|
||||||
|
## 📚 How to Use
|
||||||
|
|
||||||
|
### For New Projects
|
||||||
|
|
||||||
|
1. Copy entire template directory:
|
||||||
|
```bash
|
||||||
|
cp -r PythonTemplateProject MyNewApp
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Follow Quick Start in `README.md`
|
||||||
|
|
||||||
|
3. Use `DEPLOYMENT_CHECKLIST.md` for each deployment
|
||||||
|
|
||||||
|
4. Refer to `NOMAD_DEPLOYMENT_GUIDE.md` when issues arise
|
||||||
|
|
||||||
|
### For Existing Projects
|
||||||
|
|
||||||
|
1. Copy `app_example.py` health endpoint to your app
|
||||||
|
|
||||||
|
2. Update your Dockerfile based on `Dockerfile.complete`
|
||||||
|
|
||||||
|
3. Update your Nomad job from `nomad-job-complete.hcl.tmpl`
|
||||||
|
|
||||||
|
4. Run `setup-nomad-volumes.sh` if you need volumes
|
||||||
|
|
||||||
|
## 🎓 Key Concepts Explained
|
||||||
|
|
||||||
|
### Health Checks
|
||||||
|
|
||||||
|
The guide explains:
|
||||||
|
- Why they fail
|
||||||
|
- How to implement correctly
|
||||||
|
- Testing strategies
|
||||||
|
- Grace periods
|
||||||
|
- Backup checks
|
||||||
|
|
||||||
|
### Volumes
|
||||||
|
|
||||||
|
The guide covers:
|
||||||
|
- Host volume vs Docker volume
|
||||||
|
- Configuration on client
|
||||||
|
- Permission management
|
||||||
|
- Backup strategies
|
||||||
|
- Troubleshooting mounts
|
||||||
|
|
||||||
|
### Secrets Without Vault
|
||||||
|
|
||||||
|
The guide provides:
|
||||||
|
- Comparison of approaches
|
||||||
|
- Security implications
|
||||||
|
- Implementation examples
|
||||||
|
- Migration path to Vault
|
||||||
|
|
||||||
|
## 🔗 Integration with Infrastructure
|
||||||
|
|
||||||
|
This template integrates with your infrastructure documentation:
|
||||||
|
|
||||||
|
- References `~/Projects/i80_network.md` for infrastructure details
|
||||||
|
- Uses same conventions (port ranges, naming, etc.)
|
||||||
|
- Follows same patterns (Consul tags, service registration)
|
||||||
|
- Compatible with existing Gitea CI/CD
|
||||||
|
- Works with consul-template configurations
|
||||||
|
|
||||||
|
## 📊 Statistics
|
||||||
|
|
||||||
|
**New Files:** 8 files
|
||||||
|
**Updated Files:** 1 file
|
||||||
|
**New Documentation:** ~100 pages
|
||||||
|
**Pain Points Solved:** 3 major issues
|
||||||
|
**Examples Included:** 20+ code examples
|
||||||
|
**Troubleshooting Scenarios:** 15+ common issues
|
||||||
|
|
||||||
|
## 🚀 Next Steps
|
||||||
|
|
||||||
|
1. **Try the template** - Deploy `app_example.py` to test everything works
|
||||||
|
|
||||||
|
2. **Update existing apps** - Add health endpoints to running services
|
||||||
|
|
||||||
|
3. **Setup volumes** - Run `setup-nomad-volumes.sh` for apps that need storage
|
||||||
|
|
||||||
|
4. **Document your apps** - Use templates as examples
|
||||||
|
|
||||||
|
5. **Share knowledge** - Others on your team can use this too!
|
||||||
|
|
||||||
|
## 💡 Tips
|
||||||
|
|
||||||
|
**Start with app_example.py:**
|
||||||
|
- It's a working, complete example
|
||||||
|
- Shows all the patterns correctly
|
||||||
|
- Copy-paste friendly
|
||||||
|
|
||||||
|
**Use the checklist:**
|
||||||
|
- Don't skip steps
|
||||||
|
- Check off as you go
|
||||||
|
- Add project-specific items
|
||||||
|
|
||||||
|
**Read the troubleshooting section:**
|
||||||
|
- Before you have problems
|
||||||
|
- Understand common issues
|
||||||
|
- Know where to look for solutions
|
||||||
|
|
||||||
|
## 🎉 Benefits
|
||||||
|
|
||||||
|
**Time Savings:**
|
||||||
|
- No more debugging health checks for hours
|
||||||
|
- No more fighting with volume permissions
|
||||||
|
- No more wondering how to handle secrets
|
||||||
|
|
||||||
|
**Quality:**
|
||||||
|
- Production-ready examples
|
||||||
|
- Security best practices
|
||||||
|
- Comprehensive error handling
|
||||||
|
|
||||||
|
**Documentation:**
|
||||||
|
- Everything explained
|
||||||
|
- Examples for every scenario
|
||||||
|
- Quick reference commands
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Your infrastructure is complex but powerful. This template makes it easier to use!** 🚀
|
||||||
161
app_example.py
Normal file
161
app_example.py
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Example Flask application with proper health endpoint for Nomad/Consul.
|
||||||
|
|
||||||
|
This template shows:
|
||||||
|
- Health check endpoint (CRITICAL for Nomad!)
|
||||||
|
- Graceful shutdown handling
|
||||||
|
- Environment variable configuration
|
||||||
|
- Logging setup
|
||||||
|
- Error handling
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import signal
|
||||||
|
from datetime import datetime
|
||||||
|
from flask import Flask, jsonify, request
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(sys.stdout)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Create Flask app
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
# Configuration from environment
|
||||||
|
PORT = int(os.getenv('PORT', 5000))
|
||||||
|
APP_ENV = os.getenv('APP_ENV', 'development')
|
||||||
|
|
||||||
|
# Global state for graceful shutdown
|
||||||
|
is_shutting_down = False
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
"""Main endpoint - replace with your application logic."""
|
||||||
|
return jsonify({
|
||||||
|
'message': 'Hello from Flask!',
|
||||||
|
'environment': APP_ENV,
|
||||||
|
'timestamp': datetime.utcnow().isoformat()
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/health')
|
||||||
|
def health():
|
||||||
|
"""
|
||||||
|
Health check endpoint - CRITICAL for Nomad/Consul!
|
||||||
|
|
||||||
|
This endpoint is called by Consul every 10 seconds.
|
||||||
|
If this returns non-200 status, Consul marks service as unhealthy.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
200 OK: Service is healthy
|
||||||
|
503 Service Unavailable: Service is shutting down or unhealthy
|
||||||
|
"""
|
||||||
|
if is_shutting_down:
|
||||||
|
logger.warning("Health check called during shutdown")
|
||||||
|
return jsonify({
|
||||||
|
'status': 'shutting_down',
|
||||||
|
'timestamp': datetime.utcnow().isoformat()
|
||||||
|
}), 503
|
||||||
|
|
||||||
|
# Add your health checks here
|
||||||
|
health_status = {
|
||||||
|
'status': 'healthy',
|
||||||
|
'timestamp': datetime.utcnow().isoformat(),
|
||||||
|
'environment': APP_ENV,
|
||||||
|
'checks': {
|
||||||
|
'app': 'ok',
|
||||||
|
# Add more checks as needed:
|
||||||
|
# 'database': check_database(),
|
||||||
|
# 'cache': check_cache(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return jsonify(health_status), 200
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/ready')
|
||||||
|
def ready():
|
||||||
|
"""
|
||||||
|
Readiness check endpoint (optional).
|
||||||
|
|
||||||
|
Use this for more complex readiness checks (DB connections, etc.)
|
||||||
|
Nomad can use this as additional check.
|
||||||
|
"""
|
||||||
|
return jsonify({
|
||||||
|
'ready': True,
|
||||||
|
'timestamp': datetime.utcnow().isoformat()
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/metrics')
|
||||||
|
def metrics():
|
||||||
|
"""
|
||||||
|
Metrics endpoint for monitoring (optional).
|
||||||
|
|
||||||
|
Can be scraped by Prometheus if you set it up.
|
||||||
|
"""
|
||||||
|
# Example basic metrics
|
||||||
|
return jsonify({
|
||||||
|
'requests_total': 0, # Implement counter
|
||||||
|
'uptime_seconds': 0, # Implement uptime tracking
|
||||||
|
'timestamp': datetime.utcnow().isoformat()
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
|
||||||
|
@app.errorhandler(404)
|
||||||
|
def not_found(error):
|
||||||
|
"""Handle 404 errors."""
|
||||||
|
return jsonify({'error': 'Not found'}), 404
|
||||||
|
|
||||||
|
|
||||||
|
@app.errorhandler(500)
|
||||||
|
def internal_error(error):
|
||||||
|
"""Handle 500 errors."""
|
||||||
|
logger.error(f"Internal error: {error}")
|
||||||
|
return jsonify({'error': 'Internal server error'}), 500
|
||||||
|
|
||||||
|
|
||||||
|
def shutdown_handler(signum, frame):
|
||||||
|
"""
|
||||||
|
Handle shutdown signals gracefully.
|
||||||
|
|
||||||
|
When Nomad stops the job, it sends SIGTERM.
|
||||||
|
This gives the app time to finish current requests.
|
||||||
|
"""
|
||||||
|
global is_shutting_down
|
||||||
|
logger.info(f"Received signal {signum}, initiating graceful shutdown...")
|
||||||
|
is_shutting_down = True
|
||||||
|
|
||||||
|
# Perform cleanup here
|
||||||
|
# - Close database connections
|
||||||
|
# - Finish pending requests
|
||||||
|
# - Save state if needed
|
||||||
|
|
||||||
|
logger.info("Cleanup complete, exiting...")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
# Register signal handlers
|
||||||
|
signal.signal(signal.SIGTERM, shutdown_handler)
|
||||||
|
signal.signal(signal.SIGINT, shutdown_handler)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logger.info(f"Starting Flask app on port {PORT} in {APP_ENV} mode")
|
||||||
|
|
||||||
|
# Run Flask app
|
||||||
|
app.run(
|
||||||
|
host='0.0.0.0',
|
||||||
|
port=PORT,
|
||||||
|
debug=(APP_ENV == 'development')
|
||||||
|
)
|
||||||
132
setup-nomad-volumes.sh
Executable file
132
setup-nomad-volumes.sh
Executable file
@@ -0,0 +1,132 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Setup script for Nomad host volumes on Autobox
|
||||||
|
# Run this on the Autobox server
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
PROJECT_NAME="${1:-myapp}"
|
||||||
|
VOLUME_PATH="/opt/nomad-volumes/${PROJECT_NAME}-data"
|
||||||
|
SECRETS_PATH="/opt/nomad-secrets/${PROJECT_NAME}"
|
||||||
|
NOMAD_CONFIG="/etc/nomad.d/client.hcl"
|
||||||
|
|
||||||
|
echo "=================================================="
|
||||||
|
echo "Setting up Nomad volumes for: $PROJECT_NAME"
|
||||||
|
echo "=================================================="
|
||||||
|
|
||||||
|
# Check if running as root
|
||||||
|
if [ "$EUID" -ne 0 ]; then
|
||||||
|
echo "❌ Please run as root (use sudo)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 1. Create volume directory
|
||||||
|
echo ""
|
||||||
|
echo "📁 Creating volume directory..."
|
||||||
|
mkdir -p "$VOLUME_PATH"
|
||||||
|
chown 1000:1000 "$VOLUME_PATH"
|
||||||
|
chmod 755 "$VOLUME_PATH"
|
||||||
|
echo "✅ Created: $VOLUME_PATH"
|
||||||
|
|
||||||
|
# 2. Create secrets directory
|
||||||
|
echo ""
|
||||||
|
echo "🔐 Creating secrets directory..."
|
||||||
|
mkdir -p "$SECRETS_PATH"
|
||||||
|
chown 1000:1000 "$SECRETS_PATH"
|
||||||
|
chmod 700 "$SECRETS_PATH"
|
||||||
|
echo "✅ Created: $SECRETS_PATH"
|
||||||
|
|
||||||
|
# 3. Check if volume already configured in Nomad
|
||||||
|
echo ""
|
||||||
|
echo "📝 Checking Nomad client configuration..."
|
||||||
|
if grep -q "host_volume \"${PROJECT_NAME}-data\"" "$NOMAD_CONFIG"; then
|
||||||
|
echo "⚠️ Volume already configured in $NOMAD_CONFIG"
|
||||||
|
else
|
||||||
|
echo "Adding volume configuration to $NOMAD_CONFIG..."
|
||||||
|
|
||||||
|
# Backup config
|
||||||
|
cp "$NOMAD_CONFIG" "${NOMAD_CONFIG}.backup.$(date +%Y%m%d_%H%M%S)"
|
||||||
|
|
||||||
|
# Add volume configuration
|
||||||
|
cat >> "$NOMAD_CONFIG" << EOF
|
||||||
|
|
||||||
|
# Volume for $PROJECT_NAME
|
||||||
|
host_volume "${PROJECT_NAME}-data" {
|
||||||
|
path = "$VOLUME_PATH"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
# Secrets for $PROJECT_NAME
|
||||||
|
host_volume "${PROJECT_NAME}-secrets" {
|
||||||
|
path = "$SECRETS_PATH"
|
||||||
|
read_only = true
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo "✅ Added volume configuration"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 4. Create example secrets file
|
||||||
|
echo ""
|
||||||
|
echo "🔑 Creating example secrets file..."
|
||||||
|
cat > "${SECRETS_PATH}/secrets.env" << 'EOF'
|
||||||
|
# Example secrets for your application
|
||||||
|
# Edit this file with your actual secrets
|
||||||
|
|
||||||
|
API_KEY=change-me-to-your-api-key
|
||||||
|
DATABASE_URL=sqlite:////app/data/app.db
|
||||||
|
SECRET_KEY=change-me-to-a-random-string
|
||||||
|
|
||||||
|
# Add more secrets as needed
|
||||||
|
EOF
|
||||||
|
|
||||||
|
chown 1000:1000 "${SECRETS_PATH}/secrets.env"
|
||||||
|
chmod 600 "${SECRETS_PATH}/secrets.env"
|
||||||
|
echo "✅ Created: ${SECRETS_PATH}/secrets.env"
|
||||||
|
echo " ⚠️ EDIT THIS FILE WITH YOUR ACTUAL SECRETS!"
|
||||||
|
|
||||||
|
# 5. Restart Nomad to pick up new configuration
|
||||||
|
echo ""
|
||||||
|
echo "🔄 Restarting Nomad client..."
|
||||||
|
systemctl restart nomad
|
||||||
|
|
||||||
|
# Wait for Nomad to start
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Check if Nomad is running
|
||||||
|
if systemctl is-active --quiet nomad; then
|
||||||
|
echo "✅ Nomad restarted successfully"
|
||||||
|
else
|
||||||
|
echo "❌ Nomad failed to start! Check logs:"
|
||||||
|
echo " journalctl -u nomad -n 50 --no-pager"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 6. Verify volume registration
|
||||||
|
echo ""
|
||||||
|
echo "✅ Verifying volume registration..."
|
||||||
|
if nomad agent-info | grep -q "${PROJECT_NAME}-data"; then
|
||||||
|
echo "✅ Volume registered successfully"
|
||||||
|
else
|
||||||
|
echo "⚠️ Volume not showing in agent-info (may need time to register)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 7. Print summary
|
||||||
|
echo ""
|
||||||
|
echo "=================================================="
|
||||||
|
echo "✅ Setup Complete!"
|
||||||
|
echo "=================================================="
|
||||||
|
echo ""
|
||||||
|
echo "Volumes created:"
|
||||||
|
echo " 📁 Data: $VOLUME_PATH"
|
||||||
|
echo " 🔐 Secrets: $SECRETS_PATH"
|
||||||
|
echo ""
|
||||||
|
echo "Next steps:"
|
||||||
|
echo " 1. Edit secrets: vim ${SECRETS_PATH}/secrets.env"
|
||||||
|
echo " 2. Update your Nomad job to use volumes:"
|
||||||
|
echo " - Volume source: '${PROJECT_NAME}-data'"
|
||||||
|
echo " - Volume source: '${PROJECT_NAME}-secrets'"
|
||||||
|
echo " 3. Deploy your application: nomad job run job.hcl"
|
||||||
|
echo ""
|
||||||
|
echo "Verify volumes:"
|
||||||
|
echo " nomad agent-info | grep -A 5 host_volumes"
|
||||||
|
echo ""
|
||||||
Reference in New Issue
Block a user