feat: add taxonomy classify service + /classify endpoint
Some checks failed
Deploy classify service / build-and-deploy (push) Failing after 24s
Deploy fil (kreuzberg) / deploy (push) Successful in 53s

- scripts/taxonomy.py: shared taxonomy with 14 categories, keyword scorer
  and classify_text() function
- scripts/classify_server.py: FastAPI service — forwards to kreuzberg /extract,
  applies taxonomy, returns category/subcategory/confidence alongside full kreuzberg response
- Dockerfile.classify: lightweight Python image for classify service
- classify.nomad: Nomad job → classify.i80.dk
- .gitea/workflows/classify.yml: CI/CD pipeline (build + deploy)
- analyse_familie.py: refactored to import from taxonomy.py (no duplication)
- .gitignore: exclude dokumenter_keywords.* and extract_all.log
This commit is contained in:
Henrik Jess Nielsen
2026-06-05 19:57:39 +02:00
parent f0300b586b
commit 58210207ea
7 changed files with 1027 additions and 0 deletions

96
classify.nomad Normal file
View File

@@ -0,0 +1,96 @@
job "classify" {
region = "global"
datacenters = ["dc1"]
type = "service"
meta {
uuid = uuidv4()
deployed_at = "[[ timeNowUTC ]]"
}
update {
stagger = "30s"
max_parallel = 1
auto_revert = true
progress_deadline = "10m"
}
group "classify-group" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "int"
}
update {
canary = 1
auto_promote = true
min_healthy_time = "20s"
healthy_deadline = "10m"
progress_deadline = "15m"
auto_revert = true
}
network {
port "http" {}
}
reschedule {
attempts = 5
interval = "10m"
delay = "30s"
delay_function = "exponential"
max_delay = "120s"
unlimited = false
}
service {
provider = "consul"
name = "classify"
port = "http"
tags = [
"traefik.enable=true",
"traefik.http.routers.classify.rule=Host(`classify.i80.dk`)",
"traefik.http.routers.classify.tls=true",
]
check {
name = "http_health"
type = "http"
port = "http"
path = "/health"
interval = "15s"
timeout = "5s"
}
}
task "classify-task" {
driver = "docker"
config {
image = "ghcr.io/hjess/kreuzberg-classify:latest"
ports = ["http"]
force_pull = true
}
env {
KREUZBERG_URL = "https://check.i80.dk"
PORT = "${NOMAD_PORT_http}"
}
restart {
attempts = 5
interval = "10m"
delay = "20s"
mode = "fail"
}
resources {
cpu = 200
memory = 256
}
}
}
}