- scripts/taxonomy.py: shared taxonomy with 14 categories, keyword scorer and classify_text() function - scripts/classify_server.py: FastAPI service — forwards to kreuzberg /extract, applies taxonomy, returns category/subcategory/confidence alongside full kreuzberg response - Dockerfile.classify: lightweight Python image for classify service - classify.nomad: Nomad job → classify.i80.dk - .gitea/workflows/classify.yml: CI/CD pipeline (build + deploy) - analyse_familie.py: refactored to import from taxonomy.py (no duplication) - .gitignore: exclude dokumenter_keywords.* and extract_all.log
97 lines
1.8 KiB
HCL
97 lines
1.8 KiB
HCL
job "classify" {
|
|
region = "global"
|
|
datacenters = ["dc1"]
|
|
type = "service"
|
|
|
|
meta {
|
|
uuid = uuidv4()
|
|
deployed_at = "[[ timeNowUTC ]]"
|
|
}
|
|
|
|
update {
|
|
stagger = "30s"
|
|
max_parallel = 1
|
|
auto_revert = true
|
|
progress_deadline = "10m"
|
|
}
|
|
|
|
group "classify-group" {
|
|
count = 1
|
|
|
|
constraint {
|
|
attribute = "${node.unique.name}"
|
|
value = "int"
|
|
}
|
|
|
|
update {
|
|
canary = 1
|
|
auto_promote = true
|
|
min_healthy_time = "20s"
|
|
healthy_deadline = "10m"
|
|
progress_deadline = "15m"
|
|
auto_revert = true
|
|
}
|
|
|
|
network {
|
|
port "http" {}
|
|
}
|
|
|
|
reschedule {
|
|
attempts = 5
|
|
interval = "10m"
|
|
delay = "30s"
|
|
delay_function = "exponential"
|
|
max_delay = "120s"
|
|
unlimited = false
|
|
}
|
|
|
|
service {
|
|
provider = "consul"
|
|
name = "classify"
|
|
port = "http"
|
|
|
|
tags = [
|
|
"traefik.enable=true",
|
|
"traefik.http.routers.classify.rule=Host(`classify.i80.dk`)",
|
|
"traefik.http.routers.classify.tls=true",
|
|
]
|
|
|
|
check {
|
|
name = "http_health"
|
|
type = "http"
|
|
port = "http"
|
|
path = "/health"
|
|
interval = "15s"
|
|
timeout = "5s"
|
|
}
|
|
}
|
|
|
|
task "classify-task" {
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "ghcr.io/hjess/kreuzberg-classify:latest"
|
|
ports = ["http"]
|
|
force_pull = true
|
|
}
|
|
|
|
env {
|
|
KREUZBERG_URL = "https://check.i80.dk"
|
|
PORT = "${NOMAD_PORT_http}"
|
|
}
|
|
|
|
restart {
|
|
attempts = 5
|
|
interval = "10m"
|
|
delay = "20s"
|
|
mode = "fail"
|
|
}
|
|
|
|
resources {
|
|
cpu = 200
|
|
memory = 256
|
|
}
|
|
}
|
|
}
|
|
}
|