Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
# Patterns to ignore when building packages.
.DS_Store
.git/
.gitignore
.idea/
*.swp
*.bak
*.tmp
*.orig

View File

@@ -0,0 +1,31 @@
apiVersion: v2
name: kreuzberg
description: >-
A Helm chart for Kreuzberg — polyglot document intelligence framework with
a Rust core. Extract text, metadata, images, and structured information from
PDFs, Office documents, images, and 91+ formats.
type: application
version: 0.1.0
appVersion: "4.8.4"
home: https://kreuzberg.dev
sources:
- https://github.com/kreuzberg-dev/kreuzberg
keywords:
- document-intelligence
- text-extraction
- pdf
- ocr
- tesseract
- rag
- rust
maintainers:
- name: kreuzberg-dev
url: https://github.com/kreuzberg-dev
icon: https://raw.githubusercontent.com/kreuzberg-dev/kreuzberg/main/docs/assets/logo.svg
annotations:
artifacthub.io/license: Elastic-2.0
artifacthub.io/links: |
- name: Documentation
url: https://kreuzberg.dev
- name: Source
url: https://github.com/kreuzberg-dev/kreuzberg

View File

@@ -0,0 +1,5 @@
repositoryID: 404b1376-7cf3-4b3d-ab22-f121a3050396
owners:
- name: kreuzberg-dev
email: naaman@kreuzberg.dev
license: Elastic-2.0

View File

@@ -0,0 +1,67 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "kreuzberg.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
*/}}
{{- define "kreuzberg.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "kreuzberg.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels.
*/}}
{{- define "kreuzberg.labels" -}}
helm.sh/chart: {{ include "kreuzberg.chart" . }}
{{ include "kreuzberg.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels.
*/}}
{{- define "kreuzberg.selectorLabels" -}}
app.kubernetes.io/name: {{ include "kreuzberg.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
ServiceAccount name.
*/}}
{{- define "kreuzberg.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "kreuzberg.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
{{/*
Container image reference.
*/}}
{{- define "kreuzberg.image" -}}
{{- printf "%s/%s:%s" .Values.image.registry .Values.image.repository (.Values.image.tag | default .Chart.AppVersion) }}
{{- end }}

View File

@@ -0,0 +1,126 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "kreuzberg.fullname" . }}
labels:
{{- include "kreuzberg.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
{{- with .Values.strategy }}
strategy:
{{- toYaml . | nindent 4 }}
{{- end }}
selector:
matchLabels:
{{- include "kreuzberg.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "kreuzberg.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "kreuzberg.serviceAccountName" . }}
enableServiceLinks: {{ .Values.enableServiceLinks }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- if and .Values.cache.enabled .Values.cache.initChown }}
initContainers:
- name: init-cache
image: busybox:1.37-glibc
command: ['sh', '-c', 'mkdir -p /app/.kreuzberg && chown -R 1000:1000 /app/.kreuzberg']
securityContext:
runAsUser: 0
runAsNonRoot: false
allowPrivilegeEscalation: false
capabilities:
add: ["CHOWN"]
drop: ["ALL"]
volumeMounts:
- name: cache
mountPath: /app/.kreuzberg
{{- end }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "kreuzberg.image" . }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
args: ["serve", "--host", "0.0.0.0", "--port", "8000"]
ports:
- name: http
containerPort: 8000
protocol: TCP
env:
- name: RUST_LOG
value: {{ .Values.kreuzberg.logLevel | quote }}
- name: TESSDATA_PREFIX
value: {{ .Values.kreuzberg.tessdataPrefix | quote }}
- name: KREUZBERG_OCR_LANGUAGE
value: {{ .Values.kreuzberg.ocrLanguage | quote }}
- name: KREUZBERG_CACHE_DIR
value: "/app/.kreuzberg"
- name: HF_HOME
value: "/app/.kreuzberg/huggingface"
{{- with .Values.extraEnv }}
{{- toYaml . | nindent 12 }}
{{- end }}
startupProbe:
httpGet:
path: /health
port: http
periodSeconds: 10
failureThreshold: 30
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 2
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumeMounts:
{{- if .Values.cache.enabled }}
- name: cache
mountPath: /app/.kreuzberg
{{- end }}
- name: tmp
mountPath: /tmp
volumes:
{{- if .Values.cache.enabled }}
- name: cache
persistentVolumeClaim:
claimName: {{ include "kreuzberg.fullname" . }}-cache
{{- end }}
- name: tmp
emptyDir: {}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@@ -0,0 +1,30 @@
{{- if .Values.autoscaling.enabled -}}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "kreuzberg.fullname" . }}
labels:
{{- include "kreuzberg.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "kreuzberg.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,41 @@
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "kreuzberg.fullname" . }}
labels:
{{- include "kreuzberg.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.className }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
pathType: {{ .pathType }}
backend:
service:
name: {{ include "kreuzberg.fullname" $ }}
port:
name: http
{{- end }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,13 @@
{{- if .Values.podDisruptionBudget.enabled -}}
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: {{ include "kreuzberg.fullname" . }}
labels:
{{- include "kreuzberg.labels" . | nindent 4 }}
spec:
minAvailable: {{ .Values.podDisruptionBudget.minAvailable }}
selector:
matchLabels:
{{- include "kreuzberg.selectorLabels" . | nindent 6 }}
{{- end }}

View File

@@ -0,0 +1,19 @@
{{- if .Values.cache.enabled -}}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ include "kreuzberg.fullname" . }}-cache
labels:
{{- include "kreuzberg.labels" . | nindent 4 }}
annotations:
helm.sh/resource-policy: keep
spec:
accessModes:
{{- toYaml .Values.cache.accessModes | nindent 4 }}
{{- if .Values.cache.storageClass }}
storageClassName: {{ .Values.cache.storageClass | quote }}
{{- end }}
resources:
requests:
storage: {{ .Values.cache.size }}
{{- end }}

View File

@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "kreuzberg.fullname" . }}
labels: {{- include "kreuzberg.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector: {{- include "kreuzberg.selectorLabels" . | nindent 4 }}

View File

@@ -0,0 +1,12 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "kreuzberg.serviceAccountName" . }}
labels:
{{- include "kreuzberg.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,23 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "kreuzberg.fullname" . }}-test-connection"
labels:
{{- include "kreuzberg.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
securityContext:
runAsNonRoot: true
runAsUser: 65534
containers:
- name: wget
image: busybox:1.37-glibc
command: ['wget']
args: ['--spider', '--timeout=5', '{{ include "kreuzberg.fullname" . }}:{{ .Values.service.port }}/health']
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop: ["ALL"]
restartPolicy: Never

View File

@@ -0,0 +1,163 @@
# -- Number of replicas.
# WARNING: When cache.enabled=true and cache.accessModes=[ReadWriteOnce], only one
# replica can mount the PVC at a time. Keep replicaCount: 1 with RWO storage, or
# switch to ReadWriteMany storage before increasing replicas. With RWO + multiple
# replicas the deployment strategy must be Recreate (not RollingUpdate).
replicaCount: 1
# -- Deployment strategy. When cache is enabled with ReadWriteOnce storage,
# set to Recreate to avoid Multi-Attach errors during rolling updates.
strategy:
type: Recreate
image:
# -- Container image registry
registry: ghcr.io
# -- Container image repository
repository: kreuzberg-dev/kreuzberg
# -- Image tag. Defaults to Chart.AppVersion when empty.
# Use "latest" for the full image (Tesseract + PaddleOCR + layout models)
# or "core" for the minimal image (no pre-downloaded models).
tag: ""
# -- Image pull policy
pullPolicy: IfNotPresent
# -- Image pull secrets for private registries
imagePullSecrets: []
# -- Override the chart name
nameOverride: ""
# -- Override the full release name
fullnameOverride: ""
serviceAccount:
# -- Create a ServiceAccount
create: true
# -- Annotations for the ServiceAccount
annotations: {}
# -- Override the ServiceAccount name (defaults to release fullname)
name: ""
# -- Pod-level annotations
podAnnotations: {}
# -- Pod-level security context
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
seccompProfile:
type: RuntimeDefault
# -- Container-level security context
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop: ["ALL"]
service:
# -- Service type
type: ClusterIP
# -- Service port
port: 80
ingress:
# -- Enable Ingress
enabled: false
# -- Ingress class name (e.g. "nginx")
className: ""
# -- Ingress annotations
annotations: {}
# -- Ingress hosts
hosts:
- host: kreuzberg.local
paths:
- path: /
pathType: Prefix
# -- Ingress TLS configuration
tls: []
# - secretName: kreuzberg-tls
# hosts:
# - kreuzberg.local
# -- Container resource requests and limits
resources:
requests:
memory: "512Mi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "2000m"
autoscaling:
# -- Enable HorizontalPodAutoscaler
enabled: false
# -- Minimum replicas. Note: if podDisruptionBudget.minAvailable equals this
# value, scale-down will be blocked. Set minReplicas lower than minAvailable
# or raise minAvailable accordingly.
minReplicas: 1
# -- Maximum replicas
maxReplicas: 10
# -- Target CPU utilization (percent)
targetCPUUtilizationPercentage: 80
# -- Target memory utilization (percent). Leave unset to disable.
# targetMemoryUtilizationPercentage: 80
# -- Node selector for pod scheduling
nodeSelector: {}
# -- Tolerations for pod scheduling
tolerations: []
# -- Affinity rules for pod scheduling
affinity: {}
# -- Extra environment variables
extraEnv: []
# - name: KREUZBERG_CORS_ORIGINS
# value: "https://app.example.com"
# - name: KREUZBERG_MAX_UPLOAD_SIZE_MB
# value: "500"
# -- Kreuzberg-specific configuration
kreuzberg:
# -- Log level: trace, debug, info, warn, error
logLevel: "info"
# -- Tesseract data prefix path (must match the container image)
tessdataPrefix: "/usr/share/tesseract-ocr/5/tessdata"
# -- Default OCR language
ocrLanguage: "eng"
cache:
# -- Enable persistent cache for embedding models and downloaded assets.
# Models range from 90 MB to 1.2 GB and are re-downloaded on every pod
# restart without a PVC.
enabled: true
# -- Storage size for the cache PVC
size: 2Gi
# -- StorageClass for the cache PVC (empty string uses cluster default)
storageClass: ""
# -- Access modes for the cache PVC
accessModes:
- ReadWriteOnce
# -- Run an init container to chown the cache directory to UID 1000.
# Most block-backed storage classes (EBS, GKE PD, Azure Disk) handle
# ownership automatically via the pod's fsGroup, so you can set this
# to false there. Set to true for NFS or other storage that does not
# honour fsGroup on mount.
initChown: true
# -- Disable Kubernetes service-discovery environment variable injection.
# Kubernetes injects {SVCNAME}_PORT=tcp://<clusterIP>:<port> for every
# Service in the namespace. When the release is named "kreuzberg" this
# injects KREUZBERG_PORT which the binary parses as a u16 and panics.
# CoreDNS makes these variables unnecessary in all modern clusters.
enableServiceLinks: false
podDisruptionBudget:
# -- Enable PodDisruptionBudget
enabled: false
# -- Minimum available pods during disruption
minAvailable: 1