feat: add taxonomy classify service + /classify endpoint
- scripts/taxonomy.py: shared taxonomy with 14 categories, keyword scorer and classify_text() function - scripts/classify_server.py: FastAPI service — forwards to kreuzberg /extract, applies taxonomy, returns category/subcategory/confidence alongside full kreuzberg response - Dockerfile.classify: lightweight Python image for classify service - classify.nomad: Nomad job → classify.i80.dk - .gitea/workflows/classify.yml: CI/CD pipeline (build + deploy) - analyse_familie.py: refactored to import from taxonomy.py (no duplication) - .gitignore: exclude dokumenter_keywords.* and extract_all.log
This commit is contained in:
96
classify.nomad
Normal file
96
classify.nomad
Normal file
@@ -0,0 +1,96 @@
|
||||
job "classify" {
|
||||
region = "global"
|
||||
datacenters = ["dc1"]
|
||||
type = "service"
|
||||
|
||||
meta {
|
||||
uuid = uuidv4()
|
||||
deployed_at = "[[ timeNowUTC ]]"
|
||||
}
|
||||
|
||||
update {
|
||||
stagger = "30s"
|
||||
max_parallel = 1
|
||||
auto_revert = true
|
||||
progress_deadline = "10m"
|
||||
}
|
||||
|
||||
group "classify-group" {
|
||||
count = 1
|
||||
|
||||
constraint {
|
||||
attribute = "${node.unique.name}"
|
||||
value = "int"
|
||||
}
|
||||
|
||||
update {
|
||||
canary = 1
|
||||
auto_promote = true
|
||||
min_healthy_time = "20s"
|
||||
healthy_deadline = "10m"
|
||||
progress_deadline = "15m"
|
||||
auto_revert = true
|
||||
}
|
||||
|
||||
network {
|
||||
port "http" {}
|
||||
}
|
||||
|
||||
reschedule {
|
||||
attempts = 5
|
||||
interval = "10m"
|
||||
delay = "30s"
|
||||
delay_function = "exponential"
|
||||
max_delay = "120s"
|
||||
unlimited = false
|
||||
}
|
||||
|
||||
service {
|
||||
provider = "consul"
|
||||
name = "classify"
|
||||
port = "http"
|
||||
|
||||
tags = [
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.classify.rule=Host(`classify.i80.dk`)",
|
||||
"traefik.http.routers.classify.tls=true",
|
||||
]
|
||||
|
||||
check {
|
||||
name = "http_health"
|
||||
type = "http"
|
||||
port = "http"
|
||||
path = "/health"
|
||||
interval = "15s"
|
||||
timeout = "5s"
|
||||
}
|
||||
}
|
||||
|
||||
task "classify-task" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "ghcr.io/hjess/kreuzberg-classify:latest"
|
||||
ports = ["http"]
|
||||
force_pull = true
|
||||
}
|
||||
|
||||
env {
|
||||
KREUZBERG_URL = "https://check.i80.dk"
|
||||
PORT = "${NOMAD_PORT_http}"
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 5
|
||||
interval = "10m"
|
||||
delay = "20s"
|
||||
mode = "fail"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 256
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user