feat: add taxonomy classify service + /classify endpoint
- scripts/taxonomy.py: shared taxonomy with 14 categories, keyword scorer and classify_text() function - scripts/classify_server.py: FastAPI service — forwards to kreuzberg /extract, applies taxonomy, returns category/subcategory/confidence alongside full kreuzberg response - Dockerfile.classify: lightweight Python image for classify service - classify.nomad: Nomad job → classify.i80.dk - .gitea/workflows/classify.yml: CI/CD pipeline (build + deploy) - analyse_familie.py: refactored to import from taxonomy.py (no duplication) - .gitignore: exclude dokumenter_keywords.* and extract_all.log
This commit is contained in:
52
.gitea/workflows/classify.yml
Normal file
52
.gitea/workflows/classify.yml
Normal file
@@ -0,0 +1,52 @@
|
||||
name: Deploy classify service
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "scripts/classify_server.py"
|
||||
- "scripts/taxonomy.py"
|
||||
- "Dockerfile.classify"
|
||||
- "classify.nomad"
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: hjess/kreuzberg-classify
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: debian-host
|
||||
|
||||
env:
|
||||
PATH: /usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/sbin:/bin:/snap/bin
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker build -f Dockerfile.classify -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest .
|
||||
|
||||
- name: Push to registry
|
||||
run: |
|
||||
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||
|
||||
- name: Validate Nomad job
|
||||
run: nomad job validate classify.nomad
|
||||
env:
|
||||
NOMAD_ADDR: "https://nomad.i80.dk:4646"
|
||||
|
||||
- name: Deploy to Nomad
|
||||
run: nomad job run classify.nomad
|
||||
env:
|
||||
NOMAD_ADDR: "https://nomad.i80.dk:4646"
|
||||
|
||||
- name: Check deployment status
|
||||
run: |
|
||||
sleep 10
|
||||
nomad job status classify
|
||||
env:
|
||||
NOMAD_ADDR: "https://nomad.i80.dk:4646"
|
||||
Reference in New Issue
Block a user