# ============================================================================= # Minimal CLI-only Docker image for Kreuzberg. # # Uses the musl-build stage to produce a fully static binary, then copies it # into a minimal Alpine image for shell access and volume mounts. # # Usage: # docker build -f docker/Dockerfile.cli -t kreuzberg-cli . # docker run -v $(pwd):/data kreuzberg-cli extract /data/document.pdf # ============================================================================= # Stage 1: Build the static binary using the musl builder FROM alpine:3.21 AS builder ARG RUST_TOOLCHAIN=nightly-2026-03-10 WORKDIR /build # Install build dependencies + ONNX Runtime from Alpine edge (musl-native). # ort-sys checks ORT_LIB_LOCATION before attempting download, so this overrides # the download-binaries feature transparently — no Cargo feature changes needed. # Edge repos needed because onnxruntime depends on abseil-cpp/protobuf from edge/main. RUN apk add --no-cache \ curl gcc g++ musl-dev cmake make pkgconf \ openssl-dev openssl-libs-static \ perl linux-headers git file && \ apk add --no-cache onnxruntime-dev \ --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \ --repository=https://dl-cdn.alpinelinux.org/alpine/edge/main # Install Rust via rustup RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ | sh -s -- -y --default-toolchain "${RUST_TOOLCHAIN}" --component rust-src ENV PATH="/root/.cargo/bin:${PATH}" # Allow dynamic loading (dlopen) on musl targets by disabling crt-static. ENV RUSTFLAGS="-C target-feature=-crt-static" # Point ort-sys to Alpine's system ORT library instead of downloading prebuilt binaries. ENV ORT_LIB_LOCATION=/usr/lib ENV ORT_PREFER_DYNAMIC_LINK=1 # Copy workspace manifests and crates COPY Cargo.toml Cargo.lock ./ COPY crates/kreuzberg/ crates/kreuzberg/ COPY crates/kreuzberg-cli/ crates/kreuzberg-cli/ COPY crates/kreuzberg-tesseract/ crates/kreuzberg-tesseract/ COPY crates/kreuzberg-paddle-ocr/ crates/kreuzberg-paddle-ocr/ # Remove workspace members that aren't included RUN sed -i '/kreuzberg-py/d; /kreuzberg_rb/d; /kreuzberg-node/d; /kreuzberg-ffi/d; /kreuzberg-php/d; /kreuzberg_rustler/d; /kreuzberg_nif/d; /packages\/dart\/rust/d; /packages\/swift\/rust/d; /\"crates\/kreuzberg-wasm\"/d; /^\[profile\.release\.package\.kreuzberg-wasm\]$/,$d; /benchmark-harness/d; /e2e-generator/d; /snippet-runner/d; /e2e\/rust/d' Cargo.toml RUN cargo build --release --package kreuzberg-cli --features all && \ cp target/release/kreuzberg /build/kreuzberg && \ strip /build/kreuzberg # Verify the binary was built successfully RUN file /build/kreuzberg && \ echo "=== Dynamic dependencies ===" && \ readelf -d /build/kreuzberg 2>/dev/null | grep NEEDED || echo "No external dependencies" # ============================================================================= # Stage 2: Minimal runtime image # ============================================================================= FROM alpine:3.21 # Install runtime dependencies needed by dynamically linked binary RUN apk add --no-cache libstdc++ libgcc && \ apk add --no-cache onnxruntime \ --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \ --repository=https://dl-cdn.alpinelinux.org/alpine/edge/main COPY --from=builder /build/kreuzberg /usr/local/bin/kreuzberg ENTRYPOINT ["kreuzberg"]