Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

1244
.github/workflows/benchmarks.yaml vendored Normal file

File diff suppressed because it is too large Load Diff

74
.github/workflows/build-node-native.yml vendored Normal file
View File

@@ -0,0 +1,74 @@
name: Build Node Native
on:
push:
branches: [main]
paths:
- "crates/kreuzberg-node/**"
- "crates/kreuzberg/**"
- "Cargo.toml"
- "Cargo.lock"
- "rust-toolchain.toml"
- ".github/workflows/build-node-native.yml"
pull_request:
branches: [main]
paths:
- "crates/kreuzberg-node/**"
- "crates/kreuzberg/**"
- "Cargo.toml"
- "Cargo.lock"
- "rust-toolchain.toml"
- ".github/workflows/build-node-native.yml"
workflow_dispatch:
concurrency:
group: build-node-native-${{ github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
MACOSX_DEPLOYMENT_TARGET: "14.0"
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
permissions:
contents: read
jobs:
build:
name: Build ${{ matrix.target }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
- os: ubuntu-24.04-arm
target: aarch64-unknown-linux-gnu
- os: macos-13
target: x86_64-apple-darwin
- os: macos-latest
target: aarch64-apple-darwin
- os: windows-latest
target: x86_64-pc-windows-msvc
steps:
- uses: actions/checkout@v6.0.2
with:
submodules: recursive
- uses: kreuzberg-dev/actions/setup-rust@v1
with:
target: ${{ matrix.target }}
- uses: kreuzberg-dev/actions/setup-node-workspace@v1
with:
node-version: "24"
- name: Build NAPI binding
uses: kreuzberg-dev/actions/build-node-napi@v1
with:
crate-dir: crates/kreuzberg-node
build-command: pnpm exec napi build --release --target ${{ matrix.target }} --platform

79
.github/workflows/ci-docker.yaml vendored Normal file
View File

@@ -0,0 +1,79 @@
name: CI Docker
on:
workflow_dispatch:
concurrency:
group: ci-docker-${{ github.ref }}
cancel-in-progress: true
env:
ORT_VERSION: "1.24.2"
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
permissions:
contents: read
jobs:
docker:
name: Docker (${{ matrix.variant }})
if: github.repository == 'kreuzberg-dev/kreuzberg' && github.actor != 'dependabot[bot]'
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
variant: [core, full, cli]
steps:
- uses: actions/checkout@v6
- name: Free disk space
uses: kreuzberg-dev/actions/free-disk-space-linux@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- name: Build Docker image
uses: docker/build-push-action@v7
with:
context: .
file: docker/Dockerfile.${{ matrix.variant }}
push: false
load: true
tags: kreuzberg:${{ matrix.variant }}
build-args: ONNXRUNTIME_VERSION=${{ env.ORT_VERSION }}
cache-from: type=gha,scope=ci-docker-${{ matrix.variant }}
cache-to: type=gha,mode=max,scope=ci-docker-${{ matrix.variant }}
- name: Save Docker image
shell: bash
run: |
mkdir -p /tmp
docker save kreuzberg:${{ matrix.variant }} | gzip > /tmp/kreuzberg-${{ matrix.variant }}.tar.gz
ls -lh /tmp/kreuzberg-${{ matrix.variant }}.tar.gz
- name: Check image size
uses: kreuzberg-dev/actions/check-docker-image-size@v1
with:
image: kreuzberg:${{ matrix.variant }}
warn-mb: ${{ matrix.variant == 'cli' && '200' || '' }}
label: "${{ matrix.variant }} image"
- name: Run feature tests
if: matrix.variant != 'cli'
run: scripts/ci/docker/run-feature-tests.sh "${{ matrix.variant }}"
- name: Run configuration tests
if: matrix.variant != 'cli'
run: scripts/ci/docker/run-config-tests.sh "${{ matrix.variant }}"
- name: Run API contract tests with schemathesis
if: matrix.variant != 'cli'
uses: kreuzberg-dev/actions/run-api-contract-tests@v1
with:
image: kreuzberg:${{ matrix.variant }}
port: "8000"
- name: Run CLI tests
if: matrix.variant == 'cli'
run: scripts/ci/docker/run-cli-tests.sh

102
.github/workflows/ci-docs.yaml vendored Normal file
View File

@@ -0,0 +1,102 @@
name: CI Docs
on:
pull_request:
paths:
- "docs/**"
- "packages/**/README.md"
- "crates/*/README.md"
- "packages/python/pyproject.toml"
- "packages/typescript/package.json"
- "packages/ruby/kreuzberg.gemspec"
- "packages/php/composer.json"
- "packages/go/v5/go.mod"
- "packages/java/pom.xml"
- "packages/csharp/**/Kreuzberg.csproj"
- "packages/elixir/mix.exs"
- "packages/r/DESCRIPTION"
- "packages/dart/pubspec.yaml"
- "zensical.toml"
- "mkdocs.yml"
- "alef.toml"
- ".github/workflows/ci-docs.yaml"
push:
branches: [main]
paths:
- "docs/**"
- "packages/**/README.md"
- "crates/*/README.md"
- "zensical.toml"
- "pyproject.toml"
- "alef.toml"
- "CHANGELOG.md"
- ".github/workflows/ci-docs.yaml"
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: ci-docs-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: false
jobs:
lint:
name: Lint
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v6
- name: Lint documentation + validate snippets
uses: kreuzberg-dev/actions/lint-docs@v1
with:
working-directory: .
strict: "true"
validate-snippets: "true"
alef-ref: v0.19.5
build:
name: Build
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Build documentation
uses: kreuzberg-dev/actions/build-docs@v1
with:
working-directory: .
strict: "true"
- name: Upload site artifact
uses: actions/upload-artifact@v7
with:
name: docs-site
path: site/
retention-days: 1
deploy:
name: Deploy
needs: [build, lint]
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
runs-on: ubuntu-latest
steps:
- name: Download site artifact
uses: actions/download-artifact@v8
with:
name: docs-site
path: site/
- name: Upload Pages artifact
uses: actions/upload-pages-artifact@v5
with:
path: site
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v5

345
.github/workflows/ci-e2e.yaml vendored Normal file
View File

@@ -0,0 +1,345 @@
name: CI E2E
on:
push:
branches: [main]
paths:
- "crates/**"
- "packages/**"
- "e2e/**"
- "fixtures/**"
- "alef.toml"
- "Cargo.toml"
- "Cargo.lock"
- "Taskfile.yml"
- ".github/workflows/ci-e2e.yaml"
pull_request:
branches: [main]
paths:
- "crates/**"
- "packages/**"
- "e2e/**"
- "fixtures/**"
- "alef.toml"
- "Cargo.toml"
- "Cargo.lock"
- "Taskfile.yml"
- ".github/workflows/ci-e2e.yaml"
workflow_dispatch:
concurrency:
group: ci-e2e-${{ github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
CARGO_PROFILE_DEV_DEBUG: 0
RUST_BACKTRACE: short
RUST_MIN_STACK: 16777216
ORT_VERSION: "1.24.2"
MACOSX_DEPLOYMENT_TARGET: "14.0"
BUILD_PROFILE: "ci"
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
permissions:
contents: read
jobs:
build-ffi:
name: Build FFI (${{ matrix.target }})
if: github.repository == 'kreuzberg-dev/kreuzberg' && github.actor != 'dependabot[bot]'
runs-on: ${{ matrix.os }}
timeout-minutes: ${{ matrix.os == 'windows-latest' && 120 || 60 }}
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-24.04-arm
target: aarch64-unknown-linux-gnu
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
- os: macos-latest
target: aarch64-apple-darwin
- os: windows-latest
target: x86_64-pc-windows-msvc
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1
with:
cache-key-prefix: build-ffi-${{ matrix.target }}
target: ${{ matrix.target }}
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: kreuzberg-dev/actions/setup-openssl@v1
- name: Build FFI library
uses: kreuzberg-dev/actions/build-rust-ffi@v1
with:
crate-name: kreuzberg-ffi
- name: Build CLI
uses: kreuzberg-dev/actions/build-rust-cli@v1
with:
package-name: kreuzberg-cli
binary-name: kreuzberg
extra-cargo-args: --features all
- name: Upload FFI artifacts
uses: actions/upload-artifact@v7
with:
name: ffi-${{ matrix.target }}
path: |
target/release/libkreuzberg_ffi.*
target/release/kreuzberg_ffi.*
crates/kreuzberg-ffi/include/kreuzberg.h
crates/kreuzberg-ffi/kreuzberg-ffi.pc
crates/kreuzberg-ffi/cmake/
target/release/kreuzberg
target/release/kreuzberg.exe
retention-days: 7
if-no-files-found: error
e2e-tests:
name: E2E (${{ matrix.lang }})
if: github.repository == 'kreuzberg-dev/kreuzberg' && github.actor != 'dependabot[bot]'
needs: [build-ffi]
runs-on: ubuntu-24.04-arm
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- lang: python
python-version: "3.13"
test-cmd: "pip install maturin && cd packages/python && maturin develop --release && cd ../../e2e/python && python3 -m pytest tests/ -q"
- lang: node
node-version: "24"
test-cmd: "cd crates/kreuzberg-node && npm run build && cd ../../e2e/node && npx vitest run"
- lang: go
go-version: "1.26"
test-cmd: "cd e2e/go && go test ./... -count=1 -v"
- lang: ruby
ruby-version: "3.4"
test-cmd: "cd e2e/ruby && bundle exec rspec"
- lang: java
java-version: "25"
test-cmd: "cd packages/java && mvn -q package -DskipTests && cd ../../e2e/java && mvn test -q"
- lang: csharp
dotnet-version: "10.0.x"
test-cmd: "cd e2e/csharp && dotnet test"
- lang: php
php-version: "8.4"
test-cmd: 'cd crates/kreuzberg-php && cargo build --release && echo "extension=$(pwd)/../../target/release/libkreuzberg_php.so" | sudo tee -a "$(php -r ''echo php_ini_loaded_file();'')" >/dev/null && cd ../../e2e/php && composer install -q && vendor/bin/phpunit'
- lang: elixir
elixir-version: "1.19"
otp-version: "28"
test-cmd: "cd e2e/elixir && KREUZBERG_BUILD=true mix deps.get && KREUZBERG_BUILD=true mix test"
- lang: wasm
node-version: "24"
test-cmd: 'curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh && export PATH="$HOME/.cargo/bin:$PATH" && export RUSTFLAGS=''--cfg getrandom_backend="wasm_js"'' && cd crates/kreuzberg-wasm && wasm-pack build --release --target web --out-dir ../../packages/wasm/pkg && cd ../../e2e/wasm && npm install && npm test'
- lang: rust
test-cmd: "cd e2e/rust && cargo test"
- lang: r
r-version: "4.3"
test-cmd: "cd e2e/r && Rscript run_tests.R"
- lang: dart
dart-version: "3.11"
test-cmd: "cargo build --release -p kreuzberg-dart && mkdir -p packages/dart/rust/target/release && cp target/release/libkreuzberg_dart.* packages/dart/rust/target/release/ 2>/dev/null || true && cd packages/dart && dart pub get && cd ../../e2e/dart && dart pub get && dart test"
- lang: kotlin_android
java-version: "25"
test-cmd: "cd e2e/kotlin_android && gradle test --no-daemon"
- lang: swift
swift-version: "6.0"
test-cmd: "cd e2e/swift_e2e && swift test"
- lang: zig
zig-version: "0.16.0"
test-cmd: 'FFI_ABS="$PWD/target/release" && cd e2e/zig && zig build test -Dffi_path="$FFI_ABS"'
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1
with:
cache-key-prefix: e2e-${{ matrix.lang }}
- name: Download FFI artifacts
uses: actions/download-artifact@v8
with:
name: ffi-aarch64-unknown-linux-gnu
path: ffi-artifacts
- name: Stage FFI artifacts
shell: bash
run: |
mkdir -p target/release crates/kreuzberg-ffi/include crates/kreuzberg-ffi/cmake
if [ -d ffi-artifacts/target/release ]; then
cp -r ffi-artifacts/target/release/. target/release/
fi
if [ -d ffi-artifacts/crates/kreuzberg-ffi/include ]; then
cp -r ffi-artifacts/crates/kreuzberg-ffi/include/. crates/kreuzberg-ffi/include/
fi
if [ -d ffi-artifacts/crates/kreuzberg-ffi/cmake ]; then
cp -r ffi-artifacts/crates/kreuzberg-ffi/cmake/. crates/kreuzberg-ffi/cmake/
fi
if [ -f ffi-artifacts/crates/kreuzberg-ffi/kreuzberg-ffi.pc ]; then
cp ffi-artifacts/crates/kreuzberg-ffi/kreuzberg-ffi.pc crates/kreuzberg-ffi/
fi
chmod +x target/release/libkreuzberg_ffi.so 2>/dev/null || true
ls -la target/release/
if [ -f target/release/libkreuzberg_ffi.so ]; then
sudo cp target/release/libkreuzberg_ffi.so /usr/local/lib/
sudo ldconfig
fi
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: kreuzberg-dev/actions/setup-openssl@v1
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Setup Tesseract cache
uses: ./.github/actions/setup-tesseract-cache
with:
label: e2e-${{ matrix.lang }}
- name: Install WASI SDK
if: matrix.lang == 'wasm'
uses: kreuzberg-dev/actions/install-wasi-sdk@v1
- name: Setup Python
if: matrix.python-version
uses: kreuzberg-dev/actions/setup-python-env@v1
with:
python-version: ${{ matrix.python-version }}
cache-prefix: e2e-py-${{ matrix.python-version }}
- name: Setup Node
if: matrix.node-version
uses: kreuzberg-dev/actions/setup-node-workspace@v1
- name: Setup Go
if: matrix.go-version
uses: actions/setup-go@v6
with:
go-version: ${{ matrix.go-version }}
- name: Setup Ruby
if: matrix.ruby-version
uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby-version }}
bundler-cache: true
working-directory: e2e/ruby
- name: Setup Java
if: matrix.java-version
uses: actions/setup-java@v5
with:
distribution: temurin
java-version: ${{ matrix.java-version }}
- name: Setup Android SDK
if: matrix.lang == 'kotlin_android'
uses: android-actions/setup-android@v3
with:
api-level: 35
build-tools-version: "35.0.0"
- name: Setup Gradle
if: matrix.lang == 'kotlin_android'
uses: kreuzberg-dev/actions/setup-gradle@v1
with:
gradle-version: "9.1.0"
- name: Setup .NET
if: matrix.dotnet-version
uses: actions/setup-dotnet@v5
with:
dotnet-version: ${{ matrix.dotnet-version }}
- name: Setup PHP
if: matrix.php-version
uses: kreuzberg-dev/actions/setup-php@v1
with:
php-version: ${{ matrix.php-version }}
tools: composer
- name: Setup Elixir
if: matrix.elixir-version
uses: kreuzberg-dev/actions/setup-elixir@v1
with:
elixir-version: ${{ matrix.elixir-version }}
otp-version: ${{ matrix.otp-version }}
- name: Setup R
if: matrix.r-version
uses: kreuzberg-dev/actions/setup-r@v1
with:
r-version: ${{ matrix.r-version }}
- name: Install R test packages
if: matrix.lang == 'r'
run: R -e 'install.packages(c("testthat","jsonlite","devtools"), repos="https://cloud.r-project.org")'
- name: Setup Dart
if: matrix.dart-version
uses: dart-lang/setup-dart@v1
with:
sdk: ${{ matrix.dart-version }}
- name: Setup Swift
if: matrix.swift-version
uses: kreuzberg-dev/actions/setup-swift@v1
with:
swift-version: ${{ matrix.swift-version }}
- name: Setup Zig
if: matrix.zig-version
uses: kreuzberg-dev/actions/setup-zig@v1
with:
version: ${{ matrix.zig-version }}
- name: Setup library paths for FFI bindings
if: |
matrix.lang == 'go' || matrix.lang == 'java' ||
matrix.lang == 'csharp' || matrix.lang == 'elixir' ||
matrix.lang == 'r' || matrix.lang == 'kotlin_android' ||
matrix.lang == 'swift' || matrix.lang == 'zig'
shell: bash
run: |
export PKG_CONFIG_PATH="${PWD}/crates/kreuzberg-ffi:${PKG_CONFIG_PATH}"
export LD_LIBRARY_PATH="${PWD}/target/release:${LD_LIBRARY_PATH}"
echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1
- name: Compile Ruby native extension
if: matrix.lang == 'ruby'
working-directory: packages/ruby
run: bundle install && bundle exec rake compile
- name: Run tests
run: ${{ matrix.test-cmd }}
shell: bash
env:
PKG_CONFIG_PATH: ${{ env.PKG_CONFIG_PATH }}
LD_LIBRARY_PATH: ${{ env.LD_LIBRARY_PATH }}
DYLD_LIBRARY_PATH: ${{ env.DYLD_LIBRARY_PATH || '' }}
TESSDATA_PREFIX: "/usr/share/tesseract-ocr/5/tessdata"

112
.github/workflows/ci-gpu.yaml vendored Normal file
View File

@@ -0,0 +1,112 @@
name: CI GPU
on:
workflow_dispatch:
concurrency:
group: ci-gpu-${{ github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
CARGO_PROFILE_DEV_DEBUG: 0
RUST_BACKTRACE: short
RUST_MIN_STACK: 16777216
ORT_VERSION: "1.24.2"
permissions:
contents: read
jobs:
build:
name: "Build test binary"
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout
uses: actions/checkout@v6
with:
submodules: recursive
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
with:
toolchain: "1.95"
- name: Cache Cargo
uses: actions/cache@v5
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: gpu-build-${{ runner.os }}-${{ hashFiles('Cargo.lock') }}
restore-keys: |
gpu-build-${{ runner.os }}-
- name: Build GPU test binary
uses: kreuzberg-dev/actions/build-gpu-test-binary@v1
with:
package: kreuzberg
test-name: gpu_acceleration
features: "paddle-ocr,layout-detection,embeddings,pdf,ocr,ort-dynamic"
output-name: gpu-acceleration-test
- name: Upload test binary
uses: actions/upload-artifact@v7
with:
name: gpu-test-binary
path: gpu-acceleration-test
retention-days: 1
gpu-tests:
name: "GPU Tests (CUDA)"
needs: build
runs-on: runner-gpu-l4
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6
with:
submodules: recursive
- name: Verify GPU
run: |
nvidia-smi || {
echo "ERROR: nvidia-smi failed — no GPU detected"
exit 1
}
echo "GPU detected:"
nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader
- name: Download test binary
uses: actions/download-artifact@v8.0.1
with:
name: gpu-test-binary
- name: Download ONNX Runtime (GPU/CUDA)
uses: kreuzberg-dev/actions/setup-onnx-runtime-gpu@v1
with:
version: ${{ env.ORT_VERSION }}
- name: Setup PaddleOCR models
uses: ./.github/actions/setup-paddle-ocr-models
- name: Clear stale layout model cache (self-hosted runner persistence)
run: |
rm -rf "$HOME/.cache/kreuzberg/layout"
echo "Cleared layout model cache"
- name: Run GPU tests
run: |
chmod +x gpu-acceleration-test
./gpu-acceleration-test --ignored --nocapture
env:
RUST_LOG: "kreuzberg=debug"
TEST_DOCUMENTS_DIR: ${{ github.workspace }}/test_documents

107
.github/workflows/ci-lint.yaml vendored Normal file
View File

@@ -0,0 +1,107 @@
name: CI Lint
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
concurrency:
group: ci-lint-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
lint:
name: Lint
if: github.repository == 'kreuzberg-dev/kreuzberg' && github.actor != 'dependabot[bot]'
runs-on: ubuntu-24.04-arm
timeout-minutes: 60
env:
CARGO_TERM_COLOR: always
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1
with:
cache-key-prefix: lint
- name: Setup Python
uses: kreuzberg-dev/actions/setup-python-env@v1
with:
python-version: "3.13"
cache-prefix: lint-py
install-command: "uv sync --group dev --no-install-project --no-install-workspace --frozen"
- name: Setup Node
uses: kreuzberg-dev/actions/setup-node-workspace@v1
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version: "1.26"
- name: Setup Java
uses: actions/setup-java@v5
with:
distribution: temurin
java-version: "25"
- name: Setup Elixir
uses: kreuzberg-dev/actions/setup-elixir@v1
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: "3.4"
bundler-cache: true
working-directory: packages/ruby
- name: Setup PHP
uses: kreuzberg-dev/actions/setup-php@v1
- name: Setup .NET
uses: actions/setup-dotnet@v5
with:
dotnet-version: "10.0.x"
- name: Setup R
uses: kreuzberg-dev/actions/setup-r@v1
with:
r-version: "release"
install-deps: "false"
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1
- name: Setup Helm
uses: azure/setup-helm@v5
- name: Setup kubeconform
uses: bmuschko/setup-kubeconform@v1
- name: Install alef CLI
uses: kreuzberg-dev/actions/install-alef@v1
- name: Run all prek hooks
uses: j178/prek-action@v2
with:
cache: false
extra-args: --all-files
- name: Validate C header
shell: bash
run: |
HEADER="crates/kreuzberg-ffi/include/kreuzberg.h"
if [ ! -f "$HEADER" ]; then
echo "::error::C header not found at $HEADER — run 'task alef:generate'"
exit 1
fi
echo "C header verified at $HEADER"

79
.github/workflows/ci-mobile.yaml vendored Normal file
View File

@@ -0,0 +1,79 @@
name: CI Mobile
on:
push:
branches: [main]
paths:
- "crates/**"
- "packages/dart/**"
- "packages/swift/**"
- "packages/kotlin-android/**"
- ".github/workflows/ci-mobile.yaml"
pull_request:
branches: [main]
paths:
- "crates/**"
- "packages/dart/**"
- "packages/swift/**"
- "packages/kotlin-android/**"
- ".github/workflows/ci-mobile.yaml"
workflow_dispatch:
concurrency:
group: ci-mobile-${{ github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
CARGO_PROFILE_DEV_DEBUG: 0
RUST_BACKTRACE: short
# Mobile feature subsets (Android drops ORT-requiring features) leave some
# functions only used in the full-feature graph; -A dead_code keeps the
# cross-compile check honest about other classes of warnings without choking
# on these.
RUSTFLAGS: "-D warnings -A dead_code -A unpredictable-function-pointer-comparisons -A mismatched-lifetime-syntaxes"
permissions:
contents: read
jobs:
android-check:
name: Android cargo check (${{ matrix.abi }})
if: github.repository == 'kreuzberg-dev/kreuzberg' && github.actor != 'dependabot[bot]'
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
abi: [arm64-v8a, x86_64]
steps:
- uses: actions/checkout@v6
- uses: kreuzberg-dev/actions/setup-rust@v1
with:
cache-key-prefix: ci-mobile-android-${{ matrix.abi }}
- uses: kreuzberg-dev/actions/setup-android-ndk@v1
- name: cargo ndk check kreuzberg-dart
run: cargo ndk --target ${{ matrix.abi }} --platform 21 -- check -p kreuzberg-dart
- name: cargo ndk check kreuzberg-ffi
run: cargo ndk --target ${{ matrix.abi }} --platform 21 -- check -p kreuzberg-ffi
ios-check:
name: iOS cargo check (${{ matrix.target }})
if: github.repository == 'kreuzberg-dev/kreuzberg' && github.actor != 'dependabot[bot]'
runs-on: macos-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
target: [aarch64-apple-ios, aarch64-apple-ios-sim]
steps:
- uses: actions/checkout@v6
- uses: kreuzberg-dev/actions/setup-rust@v1
with:
target: ${{ matrix.target }}
cache-key-prefix: ci-mobile-ios-${{ matrix.target }}
- name: cargo check kreuzberg-dart
run: cargo check -p kreuzberg-dart --target ${{ matrix.target }}
- name: cargo check kreuzberg-swift
run: cargo check -p kreuzberg-swift --target ${{ matrix.target }}

103
.github/workflows/ci-rust.yaml vendored Normal file
View File

@@ -0,0 +1,103 @@
name: CI Rust
on:
push:
branches: [main]
paths:
- "crates/**"
- "Cargo.toml"
- "Cargo.lock"
- "rust-toolchain.toml"
- ".github/workflows/ci-rust.yaml"
pull_request:
branches: [main]
paths:
- "crates/**"
- "Cargo.toml"
- "Cargo.lock"
- "rust-toolchain.toml"
- ".github/workflows/ci-rust.yaml"
workflow_dispatch:
concurrency:
group: ci-rust-${{ github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
CARGO_PROFILE_DEV_DEBUG: 0
RUST_BACKTRACE: short
RUST_MIN_STACK: 16777216
ORT_VERSION: "1.24.2"
MACOSX_DEPLOYMENT_TARGET: "14.0"
BUILD_PROFILE: "ci"
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
permissions:
contents: read
jobs:
rust:
name: Rust (${{ matrix.os }})
if: github.repository == 'kreuzberg-dev/kreuzberg' && github.actor != 'dependabot[bot]'
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-24.04-arm
- os: macos-latest
steps:
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Free disk space
if: runner.os == 'Linux'
uses: kreuzberg-dev/actions/free-disk-space-linux@v1
with:
show-initial: "false"
show-final: "true"
- name: Setup Rust
uses: kreuzberg-dev/actions/setup-rust@v1
with:
cache-key-prefix: rust-${{ matrix.os }}
use-sccache: "true"
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: kreuzberg-dev/actions/setup-openssl@v1
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Setup Tesseract cache
uses: ./.github/actions/setup-tesseract-cache
with:
label: ${{ matrix.os }}
- name: Install Task
uses: kreuzberg-dev/actions/install-task@v1
- name: Run clippy
run: cargo clippy --workspace --exclude kreuzberg-ffi --exclude kreuzberg-py --exclude kreuzberg-php --exclude kreuzberg-node --exclude kreuzberg-wasm --exclude kreuzberg-dart --exclude kreuzberg-swift --exclude kreuzberg_nif -- -D warnings
shell: bash
- name: Run tests
run: task rust:test:ci
shell: bash
env:
LD_LIBRARY_PATH: ${{ env.LD_LIBRARY_PATH || '' }}
DYLD_LIBRARY_PATH: ${{ env.DYLD_LIBRARY_PATH || '' }}
DYLD_FALLBACK_LIBRARY_PATH: ${{ env.DYLD_FALLBACK_LIBRARY_PATH || '' }}
- name: Check no-default-features
run: cargo check -p kreuzberg --no-default-features
shell: bash

1303
.github/workflows/profiling.yaml vendored Normal file

File diff suppressed because it is too large Load Diff

262
.github/workflows/publish-docker.yaml vendored Normal file
View File

@@ -0,0 +1,262 @@
name: Publish Docker Images
on:
workflow_dispatch:
inputs:
tag:
description: "Release tag to build (e.g., v4.3.6)"
required: true
type: string
dry_run:
description: "Prepare artifacts without publishing"
required: false
type: boolean
default: false
ref:
description: "Git ref (branch, tag, or commit) to build; defaults to the tag"
required: false
type: string
force_republish:
description: "Force re-publish even if artifacts already exist"
required: false
type: boolean
default: false
release:
types: [published]
repository_dispatch:
types: [publish-docker]
concurrency:
group: ${{ github.workflow }}-${{ (github.event_name == 'workflow_dispatch' && (github.event.inputs.ref || github.event.inputs.tag)) || github.ref || github.run_id }}
cancel-in-progress: false
env:
CARGO_TERM_COLOR: always
ORT_VERSION: "1.24.2"
MACOSX_DEPLOYMENT_TARGET: "14.0"
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
permissions:
contents: read
jobs:
prepare:
name: Prepare metadata
if: ${{ github.event_name != 'release' || !github.event.release.prerelease }}
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
tag: ${{ steps.meta.outputs.tag }}
version: ${{ steps.meta.outputs.version }}
ref: ${{ steps.meta.outputs.ref }}
dry_run: ${{ steps.meta.outputs.dry_run }}
force_republish: ${{ steps.meta.outputs.force_republish }}
checkout_ref: ${{ steps.meta.outputs.checkout_ref }}
target_sha: ${{ steps.meta.outputs.target_sha }}
is_tag: ${{ steps.meta.outputs.is_tag }}
release_docker: ${{ steps.meta.outputs.release_docker }}
steps:
- name: Checkout code (default)
uses: actions/checkout@v6
- name: Resolve release metadata
id: meta
uses: kreuzberg-dev/actions/prepare-release-metadata@v1
with:
tag: ${{ inputs.tag }}
ref: ${{ inputs.ref }}
targets: docker
dry-run: ${{ inputs.dry_run }}
force-republish: ${{ inputs.force_republish }}
- name: Re-checkout at target ref
if: ${{ steps.meta.outputs.checkout_ref != '' }}
uses: actions/checkout@v6
with:
ref: ${{ steps.meta.outputs.checkout_ref }}
fetch-depth: 0
submodules: recursive
- name: Show metadata
env:
META_TAG: ${{ steps.meta.outputs.tag }}
META_VERSION: ${{ steps.meta.outputs.version }}
META_REF: ${{ steps.meta.outputs.ref }}
META_DRY_RUN: ${{ steps.meta.outputs.dry_run }}
META_FORCE_REPUBLISH: ${{ steps.meta.outputs.force_republish }}
META_CHECKOUT_REF: ${{ steps.meta.outputs.checkout_ref }}
META_TARGET_SHA: ${{ steps.meta.outputs.target_sha }}
META_IS_TAG: ${{ steps.meta.outputs.is_tag }}
META_RELEASE_DOCKER: ${{ steps.meta.outputs.release_docker }}
run: |
{
echo "## Release Metadata"
echo "- **Tag**: \`$META_TAG\`"
echo "- **Version**: \`$META_VERSION\`"
echo "- **Ref**: \`$META_REF\`"
echo "- **Dry Run**: \`$META_DRY_RUN\`"
echo "- **Force Republish**: \`$META_FORCE_REPUBLISH\`"
echo "- **Checkout Ref**: \`$META_CHECKOUT_REF\`"
echo "- **Target SHA**: \`$META_TARGET_SHA\`"
echo "- **Is Tag**: \`$META_IS_TAG\`"
echo "- **Release Docker**: \`$META_RELEASE_DOCKER\`"
} >> "$GITHUB_STEP_SUMMARY"
check-docker:
name: Check if Docker image tag exists
needs: prepare
if: ${{ needs.prepare.outputs.release_docker == 'true' }}
runs-on: ubuntu-latest
permissions:
contents: read
packages: read
outputs:
core_exists: ${{ steps.core.outputs.exists }}
full_exists: ${{ steps.full.outputs.exists }}
cli_exists: ${{ steps.cli.outputs.exists }}
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
ref: ${{ needs.prepare.outputs.tag }}
- name: Log in to GitHub Container Registry
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Check core image tag
id: core
env:
DOCKER_TAG: ghcr.io/kreuzberg-dev/kreuzberg:${{ needs.prepare.outputs.version }}-core
SUMMARY_LABEL: core
run: scripts/publish/check-docker-tag.sh
- name: Check full image tag
id: full
env:
DOCKER_TAG: ghcr.io/kreuzberg-dev/kreuzberg:${{ needs.prepare.outputs.version }}
SUMMARY_LABEL: full
run: scripts/publish/check-docker-tag.sh
- name: Check CLI image tag
id: cli
env:
DOCKER_TAG: ghcr.io/kreuzberg-dev/kreuzberg-cli:${{ needs.prepare.outputs.version }}
SUMMARY_LABEL: cli
run: scripts/publish/check-docker-tag.sh
publish-docker:
name: Publish Docker image (${{ matrix.variant }})
needs:
- prepare
- check-docker
runs-on: ubuntu-latest
timeout-minutes: 360
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix:
include:
- variant: core
dockerfile: docker/Dockerfile.core
image: ghcr.io/kreuzberg-dev/kreuzberg
tag_suffix: "-core"
extra_tag: "core"
- variant: full
dockerfile: docker/Dockerfile.full
image: ghcr.io/kreuzberg-dev/kreuzberg
tag_suffix: ""
extra_tag: "latest"
- variant: cli
dockerfile: docker/Dockerfile.cli
image: ghcr.io/kreuzberg-dev/kreuzberg-cli
tag_suffix: ""
extra_tag: "latest"
if: ${{ needs.prepare.outputs.release_docker == 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ needs.prepare.outputs.checkout_ref }}
fetch-depth: 0
submodules: recursive
- name: Free up disk space
uses: kreuzberg-dev/actions/free-disk-space-linux@v1
- name: Ensure target commit
if: ${{ needs.prepare.outputs.target_sha != '' }}
run: git checkout --progress --force ${{ needs.prepare.outputs.target_sha }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- name: Skip because tag already exists
if: ${{ needs.prepare.outputs.force_republish != 'true' && ((matrix.variant == 'core' && needs.check-docker.outputs.core_exists == 'true') || (matrix.variant == 'full' && needs.check-docker.outputs.full_exists == 'true') || (matrix.variant == 'cli' && needs.check-docker.outputs.cli_exists == 'true')) }}
run: echo "Docker tag already exists for variant ${{ matrix.variant }}; skipping publish." >> "$GITHUB_STEP_SUMMARY"
- name: Build AMD64 test image
if: ${{ needs.prepare.outputs.force_republish == 'true' || (matrix.variant == 'core' && needs.check-docker.outputs.core_exists != 'true') || (matrix.variant == 'full' && needs.check-docker.outputs.full_exists != 'true') || (matrix.variant == 'cli' && needs.check-docker.outputs.cli_exists != 'true') }}
run: docker build -f ${{ matrix.dockerfile }} --build-arg ONNXRUNTIME_VERSION=${{ env.ORT_VERSION }} -t kreuzberg-publish:${{ matrix.variant }}-test .
- name: Run Docker tests
if: ${{ needs.prepare.outputs.force_republish == 'true' || (matrix.variant == 'core' && needs.check-docker.outputs.core_exists != 'true') || (matrix.variant == 'full' && needs.check-docker.outputs.full_exists != 'true') || (matrix.variant == 'cli' && needs.check-docker.outputs.cli_exists != 'true') }}
run: python3 scripts/ci/docker/test_docker.py --image kreuzberg-publish:${{ matrix.variant }}-test --variant ${{ matrix.variant }} --verbose
- name: Log in to GitHub Container Registry
if: ${{ needs.prepare.outputs.dry_run != 'true' && (needs.prepare.outputs.force_republish == 'true' || (matrix.variant == 'core' && needs.check-docker.outputs.core_exists != 'true') || (matrix.variant == 'full' && needs.check-docker.outputs.full_exists != 'true') || (matrix.variant == 'cli' && needs.check-docker.outputs.cli_exists != 'true')) }}
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract Docker metadata
if: ${{ needs.prepare.outputs.dry_run != 'true' && (needs.prepare.outputs.force_republish == 'true' || (matrix.variant == 'core' && needs.check-docker.outputs.core_exists != 'true') || (matrix.variant == 'full' && needs.check-docker.outputs.full_exists != 'true') || (matrix.variant == 'cli' && needs.check-docker.outputs.cli_exists != 'true')) }}
id: docker_meta
uses: docker/metadata-action@v6
with:
images: ${{ matrix.image }}
tags: |
type=raw,value=${{ needs.prepare.outputs.version }}${{ matrix.tag_suffix }}
type=raw,value=${{ matrix.extra_tag }}
- name: Build and push image
if: ${{ needs.prepare.outputs.dry_run != 'true' && (needs.prepare.outputs.force_republish == 'true' || (matrix.variant == 'core' && needs.check-docker.outputs.core_exists != 'true') || (matrix.variant == 'full' && needs.check-docker.outputs.full_exists != 'true') || (matrix.variant == 'cli' && needs.check-docker.outputs.cli_exists != 'true')) }}
uses: docker/build-push-action@v7
with:
context: .
file: ${{ matrix.dockerfile }}
push: true
build-args: |
ONNXRUNTIME_VERSION=${{ env.ORT_VERSION }}
tags: ${{ steps.docker_meta.outputs.tags }}
labels: |
${{ steps.docker_meta.outputs.labels }}
org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
org.opencontainers.image.description=Kreuzberg document intelligence - ${{ matrix.variant }} variant
org.opencontainers.image.licenses=MIT
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max,scope=publish-docker-${{ matrix.variant }}
- name: Docker dry-run summary
if: ${{ needs.prepare.outputs.dry_run == 'true' }}
env:
IMAGE: ${{ matrix.image }}
VERSION: ${{ needs.prepare.outputs.version }}
TAG_SUFFIX: ${{ matrix.tag_suffix }}
run: scripts/publish/docker/dry-run-summary.sh
- name: Clean up local Docker images
if: ${{ always() }}
run: docker rmi kreuzberg-publish:${{ matrix.variant }}-test || true

108
.github/workflows/publish-helm.yaml vendored Normal file
View File

@@ -0,0 +1,108 @@
name: Publish Helm Chart
on:
workflow_dispatch:
inputs:
tag:
description: "Release tag to build (e.g., v4.3.6)"
required: true
type: string
dry_run:
description: "Prepare artifacts without publishing"
required: false
type: boolean
default: false
release:
types: [published]
repository_dispatch:
types: [publish-helm]
concurrency:
group: ${{ github.workflow }}-${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.tag) || github.ref || github.run_id }}
cancel-in-progress: false
permissions:
contents: read
jobs:
publish-helm:
name: Publish Helm chart to GHCR
if: ${{ github.event_name != 'release' || !github.event.release.prerelease }}
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Resolve version
id: meta
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
TAG="${{ inputs.tag }}"
elif [[ "${{ github.event_name }}" == "release" ]]; then
TAG="${{ github.event.release.tag_name }}"
elif [[ "${{ github.event_name }}" == "repository_dispatch" ]]; then
TAG="${{ github.event.client_payload.tag }}"
fi
VERSION="${TAG#v}"
DRY_RUN="${{ inputs.dry_run || 'false' }}"
{
echo "tag=${TAG}"
echo "version=${VERSION}"
echo "dry_run=${DRY_RUN}"
} >> "$GITHUB_OUTPUT"
{
echo "## Helm Publish Metadata"
echo "- **Tag**: \`${TAG}\`"
echo "- **Version**: \`${VERSION}\`"
echo "- **Dry Run**: \`${DRY_RUN}\`"
} >> "$GITHUB_STEP_SUMMARY"
- name: Setup Helm
uses: azure/setup-helm@v5
- name: Lint chart
run: helm lint --strict charts/kreuzberg/
- name: Update Chart.yaml version
run: |
sed -i "s/^version:.*/version: ${{ steps.meta.outputs.version }}/" charts/kreuzberg/Chart.yaml
sed -i "s/^appVersion:.*/appVersion: \"${{ steps.meta.outputs.version }}\"/" charts/kreuzberg/Chart.yaml
{
echo "### Chart.yaml"
echo '```yaml'
cat charts/kreuzberg/Chart.yaml
echo '```'
} >> "$GITHUB_STEP_SUMMARY"
- name: Package chart
run: |
helm package charts/kreuzberg/ --destination .helm-packages/
echo "### Packaged" >> "$GITHUB_STEP_SUMMARY"
ls -lh .helm-packages/ >> "$GITHUB_STEP_SUMMARY"
- name: Log in to GitHub Container Registry
if: ${{ steps.meta.outputs.dry_run != 'true' }}
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Push chart to GHCR
if: ${{ steps.meta.outputs.dry_run != 'true' }}
run: |
helm push .helm-packages/kreuzberg-${{ steps.meta.outputs.version }}.tgz oci://ghcr.io/kreuzberg-dev/charts
echo "### Published" >> "$GITHUB_STEP_SUMMARY"
echo "Chart pushed to \`oci://ghcr.io/kreuzberg-dev/charts/kreuzberg:${{ steps.meta.outputs.version }}\`" >> "$GITHUB_STEP_SUMMARY"
- name: Dry-run summary
if: ${{ steps.meta.outputs.dry_run == 'true' }}
run: |
echo "### Dry Run" >> "$GITHUB_STEP_SUMMARY"
echo "Would have pushed \`kreuzberg-${{ steps.meta.outputs.version }}.tgz\` to \`oci://ghcr.io/kreuzberg-dev/charts\`" >> "$GITHUB_STEP_SUMMARY"

46
.github/workflows/publish-pubdev.yaml vendored Normal file
View File

@@ -0,0 +1,46 @@
name: Publish pub.dev
# pub.dev OIDC trusted publishing rejects tokens originating from `release`
# events; only `push` and `workflow_dispatch` are accepted.
#
# Because the kreuzberg Dart package embeds platform-specific native binaries
# (Android JNI, iOS XCFramework, server libs for linux/macos/windows), we
# cannot just rebuild here — those artifacts are produced by the main
# `publish.yaml` workflow. Instead, the main workflow's `trigger-pubdev` job
# dispatches this workflow with the run_id of the main workflow, and this
# workflow downloads the `dart-package-assembled` artifact from that run.
#
# One-time setup: on pub.dev → kreuzberg package → Admin → Automated publishing,
# set the workflow path to `.github/workflows/publish-pubdev.yaml`.
on:
workflow_dispatch:
inputs:
run_id:
description: "GitHub Actions run ID of publish.yaml that produced the dart-package-assembled artifact"
required: true
type: string
permissions:
contents: read
id-token: write
actions: read
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
jobs:
publish-pub:
name: Publish pub.dev
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v8.0.1
with:
name: dart-package-assembled
path: packages/dart
run-id: ${{ inputs.run_id }}
github-token: ${{ secrets.GITHUB_TOKEN }}
- uses: kreuzberg-dev/actions/publish-pub@v1
with:
package-dir: packages/dart

2345
.github/workflows/publish.yaml vendored Normal file

File diff suppressed because it is too large Load Diff

10
.github/workflows/validate-issues.yml vendored Normal file
View File

@@ -0,0 +1,10 @@
name: Validate Issues
on:
issues:
types: [opened, edited]
jobs:
validate:
uses: kreuzberg-dev/actions/.github/workflows/reusable-validate-issues.yml@v1
secrets: inherit

10
.github/workflows/validate-pr.yml vendored Normal file
View File

@@ -0,0 +1,10 @@
name: Validate PR
on:
pull_request:
types: [opened, edited, synchronize]
jobs:
validate:
uses: kreuzberg-dev/actions/.github/workflows/reusable-validate-pr.yml@v1
secrets: inherit