76 lines
3.3 KiB
Docker
76 lines
3.3 KiB
Docker
|
|
# =============================================================================
|
||
|
|
# Minimal CLI-only Docker image for Kreuzberg.
|
||
|
|
#
|
||
|
|
# Uses the musl-build stage to produce a fully static binary, then copies it
|
||
|
|
# into a minimal Alpine image for shell access and volume mounts.
|
||
|
|
#
|
||
|
|
# Usage:
|
||
|
|
# docker build -f docker/Dockerfile.cli -t kreuzberg-cli .
|
||
|
|
# docker run -v $(pwd):/data kreuzberg-cli extract /data/document.pdf
|
||
|
|
# =============================================================================
|
||
|
|
|
||
|
|
# Stage 1: Build the static binary using the musl builder
|
||
|
|
FROM alpine:3.21 AS builder
|
||
|
|
|
||
|
|
ARG RUST_TOOLCHAIN=nightly-2026-03-10
|
||
|
|
|
||
|
|
WORKDIR /build
|
||
|
|
|
||
|
|
# Install build dependencies + ONNX Runtime from Alpine edge (musl-native).
|
||
|
|
# ort-sys checks ORT_LIB_LOCATION before attempting download, so this overrides
|
||
|
|
# the download-binaries feature transparently — no Cargo feature changes needed.
|
||
|
|
# Edge repos needed because onnxruntime depends on abseil-cpp/protobuf from edge/main.
|
||
|
|
RUN apk add --no-cache \
|
||
|
|
curl gcc g++ musl-dev cmake make pkgconf \
|
||
|
|
openssl-dev openssl-libs-static \
|
||
|
|
perl linux-headers git file && \
|
||
|
|
apk add --no-cache onnxruntime-dev \
|
||
|
|
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
|
||
|
|
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
|
||
|
|
|
||
|
|
# Install Rust via rustup
|
||
|
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
||
|
|
| sh -s -- -y --default-toolchain "${RUST_TOOLCHAIN}" --component rust-src
|
||
|
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||
|
|
|
||
|
|
# Allow dynamic loading (dlopen) on musl targets by disabling crt-static.
|
||
|
|
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||
|
|
|
||
|
|
# Point ort-sys to Alpine's system ORT library instead of downloading prebuilt binaries.
|
||
|
|
ENV ORT_LIB_LOCATION=/usr/lib
|
||
|
|
ENV ORT_PREFER_DYNAMIC_LINK=1
|
||
|
|
|
||
|
|
# Copy workspace manifests and crates
|
||
|
|
COPY Cargo.toml Cargo.lock ./
|
||
|
|
COPY crates/kreuzberg/ crates/kreuzberg/
|
||
|
|
COPY crates/kreuzberg-cli/ crates/kreuzberg-cli/
|
||
|
|
COPY crates/kreuzberg-tesseract/ crates/kreuzberg-tesseract/
|
||
|
|
COPY crates/kreuzberg-paddle-ocr/ crates/kreuzberg-paddle-ocr/
|
||
|
|
|
||
|
|
# Remove workspace members that aren't included
|
||
|
|
RUN sed -i '/kreuzberg-py/d; /kreuzberg_rb/d; /kreuzberg-node/d; /kreuzberg-ffi/d; /kreuzberg-php/d; /kreuzberg_rustler/d; /kreuzberg_nif/d; /packages\/dart\/rust/d; /packages\/swift\/rust/d; /\"crates\/kreuzberg-wasm\"/d; /^\[profile\.release\.package\.kreuzberg-wasm\]$/,$d; /benchmark-harness/d; /e2e-generator/d; /snippet-runner/d; /e2e\/rust/d' Cargo.toml
|
||
|
|
|
||
|
|
RUN cargo build --release --package kreuzberg-cli --features all && \
|
||
|
|
cp target/release/kreuzberg /build/kreuzberg && \
|
||
|
|
strip /build/kreuzberg
|
||
|
|
|
||
|
|
# Verify the binary was built successfully
|
||
|
|
RUN file /build/kreuzberg && \
|
||
|
|
echo "=== Dynamic dependencies ===" && \
|
||
|
|
readelf -d /build/kreuzberg 2>/dev/null | grep NEEDED || echo "No external dependencies"
|
||
|
|
|
||
|
|
# =============================================================================
|
||
|
|
# Stage 2: Minimal runtime image
|
||
|
|
# =============================================================================
|
||
|
|
FROM alpine:3.21
|
||
|
|
|
||
|
|
# Install runtime dependencies needed by dynamically linked binary
|
||
|
|
RUN apk add --no-cache libstdc++ libgcc && \
|
||
|
|
apk add --no-cache onnxruntime \
|
||
|
|
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
|
||
|
|
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
|
||
|
|
|
||
|
|
COPY --from=builder /build/kreuzberg /usr/local/bin/kreuzberg
|
||
|
|
|
||
|
|
ENTRYPOINT ["kreuzberg"]
|