This commit is contained in:
75
docker/Dockerfile.cli
Normal file
75
docker/Dockerfile.cli
Normal file
@@ -0,0 +1,75 @@
|
||||
# =============================================================================
|
||||
# Minimal CLI-only Docker image for Kreuzberg.
|
||||
#
|
||||
# Uses the musl-build stage to produce a fully static binary, then copies it
|
||||
# into a minimal Alpine image for shell access and volume mounts.
|
||||
#
|
||||
# Usage:
|
||||
# docker build -f docker/Dockerfile.cli -t kreuzberg-cli .
|
||||
# docker run -v $(pwd):/data kreuzberg-cli extract /data/document.pdf
|
||||
# =============================================================================
|
||||
|
||||
# Stage 1: Build the static binary using the musl builder
|
||||
FROM alpine:3.21 AS builder
|
||||
|
||||
ARG RUST_TOOLCHAIN=nightly-2026-03-10
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Install build dependencies + ONNX Runtime from Alpine edge (musl-native).
|
||||
# ort-sys checks ORT_LIB_LOCATION before attempting download, so this overrides
|
||||
# the download-binaries feature transparently — no Cargo feature changes needed.
|
||||
# Edge repos needed because onnxruntime depends on abseil-cpp/protobuf from edge/main.
|
||||
RUN apk add --no-cache \
|
||||
curl gcc g++ musl-dev cmake make pkgconf \
|
||||
openssl-dev openssl-libs-static \
|
||||
perl linux-headers git file && \
|
||||
apk add --no-cache onnxruntime-dev \
|
||||
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
|
||||
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
|
||||
|
||||
# Install Rust via rustup
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
||||
| sh -s -- -y --default-toolchain "${RUST_TOOLCHAIN}" --component rust-src
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
# Allow dynamic loading (dlopen) on musl targets by disabling crt-static.
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
# Point ort-sys to Alpine's system ORT library instead of downloading prebuilt binaries.
|
||||
ENV ORT_LIB_LOCATION=/usr/lib
|
||||
ENV ORT_PREFER_DYNAMIC_LINK=1
|
||||
|
||||
# Copy workspace manifests and crates
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY crates/kreuzberg/ crates/kreuzberg/
|
||||
COPY crates/kreuzberg-cli/ crates/kreuzberg-cli/
|
||||
COPY crates/kreuzberg-tesseract/ crates/kreuzberg-tesseract/
|
||||
COPY crates/kreuzberg-paddle-ocr/ crates/kreuzberg-paddle-ocr/
|
||||
|
||||
# Remove workspace members that aren't included
|
||||
RUN sed -i '/kreuzberg-py/d; /kreuzberg_rb/d; /kreuzberg-node/d; /kreuzberg-ffi/d; /kreuzberg-php/d; /kreuzberg_rustler/d; /kreuzberg_nif/d; /packages\/dart\/rust/d; /packages\/swift\/rust/d; /\"crates\/kreuzberg-wasm\"/d; /^\[profile\.release\.package\.kreuzberg-wasm\]$/,$d; /benchmark-harness/d; /e2e-generator/d; /snippet-runner/d; /e2e\/rust/d' Cargo.toml
|
||||
|
||||
RUN cargo build --release --package kreuzberg-cli --features all && \
|
||||
cp target/release/kreuzberg /build/kreuzberg && \
|
||||
strip /build/kreuzberg
|
||||
|
||||
# Verify the binary was built successfully
|
||||
RUN file /build/kreuzberg && \
|
||||
echo "=== Dynamic dependencies ===" && \
|
||||
readelf -d /build/kreuzberg 2>/dev/null | grep NEEDED || echo "No external dependencies"
|
||||
|
||||
# =============================================================================
|
||||
# Stage 2: Minimal runtime image
|
||||
# =============================================================================
|
||||
FROM alpine:3.21
|
||||
|
||||
# Install runtime dependencies needed by dynamically linked binary
|
||||
RUN apk add --no-cache libstdc++ libgcc && \
|
||||
apk add --no-cache onnxruntime \
|
||||
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
|
||||
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
|
||||
|
||||
COPY --from=builder /build/kreuzberg /usr/local/bin/kreuzberg
|
||||
|
||||
ENTRYPOINT ["kreuzberg"]
|
||||
Reference in New Issue
Block a user