# ============================================================================= # Alpine-based builder for musl CLI binaries. # # Usage: # docker build -f docker/Dockerfile.musl-build \ # --output type=local,dest=./dist \ # --build-arg TARGETARCH=x86_64 . # # Produces: dist/kreuzberg (binary) and dist/lib/ (runtime libraries) # # Runtime libraries (musl libc, libstdc++, libgcc) are bundled alongside # the binary for portability across Linux distros. # ============================================================================= FROM alpine:3.21 AS builder ARG RUST_TOOLCHAIN=nightly-2026-03-10 WORKDIR /build # Install build dependencies — Alpine's g++ and libstdc++ are musl-native, # so tesseract C++ compilation works without glibc conflicts. # onnxruntime-dev from edge provides musl-native ORT for linking. RUN apk add --no-cache \ curl gcc g++ musl-dev cmake make pkgconf \ openssl-dev openssl-libs-static \ perl linux-headers git file patchelf && \ apk add --no-cache onnxruntime-dev \ --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \ --repository=https://dl-cdn.alpinelinux.org/alpine/edge/main # Install Rust via rustup (Alpine's packaged Rust may be too old / not nightly) RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ | sh -s -- -y --default-toolchain "${RUST_TOOLCHAIN}" --component rust-src && \ echo "Rust host: $(~/.cargo/bin/rustc -vV | grep host)" && \ echo "Default target: $(~/.cargo/bin/rustc --print cfg | grep target)" ENV PATH="/root/.cargo/bin:${PATH}" # Disable crt-static so the binary can dlopen shared libraries at runtime. ENV RUSTFLAGS="-C target-feature=-crt-static" # Point ort-sys to Alpine's system ORT library instead of downloading prebuilt binaries. # ort-sys checks ORT_LIB_LOCATION before attempting download (build/main.rs line 45). ENV ORT_LIB_LOCATION=/usr/lib ENV ORT_PREFER_DYNAMIC_LINK=1 # Copy workspace manifests and crates COPY Cargo.toml Cargo.lock ./ COPY crates/kreuzberg/ crates/kreuzberg/ COPY crates/kreuzberg-cli/ crates/kreuzberg-cli/ COPY crates/kreuzberg-tesseract/ crates/kreuzberg-tesseract/ COPY crates/kreuzberg-paddle-ocr/ crates/kreuzberg-paddle-ocr/ # Remove workspace members that aren't included RUN sed -i '/kreuzberg-py/d; /kreuzberg_rb/d; /kreuzberg-node/d; /kreuzberg-ffi/d; /kreuzberg-php/d; /kreuzberg_rustler/d; /kreuzberg_nif/d; /packages\/dart\/rust/d; /packages\/swift\/rust/d; /"crates\/kreuzberg-wasm"/d; /^\[profile\.release\.package\.kreuzberg-wasm\]$/,$d; /benchmark-harness/d; /e2e-generator/d; /snippet-runner/d; /e2e\/rust/d' Cargo.toml RUN cargo build --release --package kreuzberg-cli --features all && \ cp target/release/kreuzberg /build/kreuzberg && \ strip /build/kreuzberg # Set RPATH so the binary finds shared libs relative to itself RUN patchelf --set-rpath '$ORIGIN/lib' /build/kreuzberg # Collect runtime libraries. # # The launcher (below) invokes the musl loader with `--library-path lib/`, # which REPLACES the loader's search path. The bundle must therefore be # self-contained: every transitive dependency of every shipped .so has to # live in /build/lib/ too, otherwise the loader prints "Error loading shared # library X: No such file or directory" at startup (issue #991). # # Strategy: # 1. Copy the well-known runtime bits (musl loader, libstdc++, libgcc, ORT). # 2. Recursively `ldd`-walk every .so in the bundle and copy any host lib # they resolve to that isn't already present. # 3. Smoke-test the loader against each shipped .so so the build FAILS if # anything is still missing — better to break the image than to ship a # tarball that crashes on first invocation. RUN set -eu; \ mkdir -p /build/lib; \ cp /usr/lib/libstdc++.so.6 /build/lib/; \ cp /usr/lib/libgcc_s.so.1 /build/lib/; \ # Bundle ONNX Runtime for embeddings/layout-detection at runtime. cp /usr/lib/libonnxruntime.so* /build/lib/ 2>/dev/null || true; \ # Copy the musl dynamic linker/libc. cp /lib/ld-musl-*.so.1 /build/lib/ 2>/dev/null || true; \ # Recursively resolve transitive deps of everything in /build/lib via ldd # (alpine's musl ldd resolves against system paths). Re-walk until no new # libraries are pulled in to handle multi-level chains (libonnxruntime → # libprotobuf-lite → libabsl_* → ...). LOADER="$(ls /build/lib/ld-musl-*.so.1 | head -n1)"; \ while :; do \ before=$(ls /build/lib | wc -l); \ for so in /build/lib/*.so*; do \ # Skip the loader itself; ldd against it is meaningless. case "$so" in *ld-musl-*) continue ;; esac; \ "$LOADER" --list "$so" 2>/dev/null \ | awk '/=>/ { print $3 }' \ | grep -E '^/' \ | while read -r dep; do \ base="$(basename "$dep")"; \ if [ ! -e "/build/lib/$base" ]; then \ cp -L "$dep" /build/lib/; \ fi; \ done; \ done; \ after=$(ls /build/lib | wc -l); \ [ "$before" = "$after" ] && break; \ done; \ # Verify nothing in the bundle has unresolved deps when constrained to lib/. for so in /build/lib/*.so*; do \ case "$so" in *ld-musl-*) continue ;; esac; \ if "$LOADER" --library-path /build/lib --list "$so" 2>&1 | grep -q 'not found'; then \ echo "FAIL: $so has unresolved dependencies inside the bundle:" >&2; \ "$LOADER" --library-path /build/lib --list "$so" >&2; \ exit 1; \ fi; \ done; \ echo "OK: every bundled library resolves inside /build/lib/" # Rename the actual binary and create a wrapper script that invokes it # via the bundled musl interpreter. This makes the binary work on ANY # Linux distro (glibc or musl) without system dependencies. RUN mv /build/kreuzberg /build/kreuzberg.bin && \ INTERP_NAME=$(basename /lib/ld-musl-*.so.1) && \ printf '#!/bin/sh\nSCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"\nexec "$SCRIPT_DIR/lib/%s" --library-path "$SCRIPT_DIR/lib" "$SCRIPT_DIR/kreuzberg.bin" "$@"\n' \ "$INTERP_NAME" > /build/kreuzberg && \ chmod +x /build/kreuzberg # Verify the binary was built successfully RUN file /build/kreuzberg && \ echo "=== Dynamic dependencies ===" && \ readelf -d /build/kreuzberg 2>/dev/null | grep -E "NEEDED|RPATH|RUNPATH" || echo "No dependencies" && \ echo "=== Bundled libraries ===" && \ ls -la /build/lib/ # ============================================================================= # Output stage — binary + bundled runtime libraries # ============================================================================= FROM scratch COPY --from=builder /build/kreuzberg /kreuzberg COPY --from=builder /build/kreuzberg.bin /kreuzberg.bin COPY --from=builder /build/lib/ /lib/