Files
fil/docker/Dockerfile.cli

76 lines
3.3 KiB
Docker
Raw Normal View History

2026-06-01 23:40:55 +02:00
# =============================================================================
# Minimal CLI-only Docker image for Kreuzberg.
#
# Uses the musl-build stage to produce a fully static binary, then copies it
# into a minimal Alpine image for shell access and volume mounts.
#
# Usage:
# docker build -f docker/Dockerfile.cli -t kreuzberg-cli .
# docker run -v $(pwd):/data kreuzberg-cli extract /data/document.pdf
# =============================================================================
# Stage 1: Build the static binary using the musl builder
FROM alpine:3.21 AS builder
ARG RUST_TOOLCHAIN=nightly-2026-03-10
WORKDIR /build
# Install build dependencies + ONNX Runtime from Alpine edge (musl-native).
# ort-sys checks ORT_LIB_LOCATION before attempting download, so this overrides
# the download-binaries feature transparently — no Cargo feature changes needed.
# Edge repos needed because onnxruntime depends on abseil-cpp/protobuf from edge/main.
RUN apk add --no-cache \
curl gcc g++ musl-dev cmake make pkgconf \
openssl-dev openssl-libs-static \
perl linux-headers git file && \
apk add --no-cache onnxruntime-dev \
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
# Install Rust via rustup
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
| sh -s -- -y --default-toolchain "${RUST_TOOLCHAIN}" --component rust-src
ENV PATH="/root/.cargo/bin:${PATH}"
# Allow dynamic loading (dlopen) on musl targets by disabling crt-static.
ENV RUSTFLAGS="-C target-feature=-crt-static"
# Point ort-sys to Alpine's system ORT library instead of downloading prebuilt binaries.
ENV ORT_LIB_LOCATION=/usr/lib
ENV ORT_PREFER_DYNAMIC_LINK=1
# Copy workspace manifests and crates
COPY Cargo.toml Cargo.lock ./
COPY crates/kreuzberg/ crates/kreuzberg/
COPY crates/kreuzberg-cli/ crates/kreuzberg-cli/
COPY crates/kreuzberg-tesseract/ crates/kreuzberg-tesseract/
COPY crates/kreuzberg-paddle-ocr/ crates/kreuzberg-paddle-ocr/
# Remove workspace members that aren't included
RUN sed -i '/kreuzberg-py/d; /kreuzberg_rb/d; /kreuzberg-node/d; /kreuzberg-ffi/d; /kreuzberg-php/d; /kreuzberg_rustler/d; /kreuzberg_nif/d; /packages\/dart\/rust/d; /packages\/swift\/rust/d; /\"crates\/kreuzberg-wasm\"/d; /^\[profile\.release\.package\.kreuzberg-wasm\]$/,$d; /benchmark-harness/d; /e2e-generator/d; /snippet-runner/d; /e2e\/rust/d' Cargo.toml
RUN cargo build --release --package kreuzberg-cli --features all && \
cp target/release/kreuzberg /build/kreuzberg && \
strip /build/kreuzberg
# Verify the binary was built successfully
RUN file /build/kreuzberg && \
echo "=== Dynamic dependencies ===" && \
readelf -d /build/kreuzberg 2>/dev/null | grep NEEDED || echo "No external dependencies"
# =============================================================================
# Stage 2: Minimal runtime image
# =============================================================================
FROM alpine:3.21
# Install runtime dependencies needed by dynamically linked binary
RUN apk add --no-cache libstdc++ libgcc && \
apk add --no-cache onnxruntime \
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
COPY --from=builder /build/kreuzberg /usr/local/bin/kreuzberg
ENTRYPOINT ["kreuzberg"]