This commit is contained in:
142
docker/Dockerfile.musl-build
Normal file
142
docker/Dockerfile.musl-build
Normal file
@@ -0,0 +1,142 @@
|
||||
# =============================================================================
|
||||
# Alpine-based builder for musl CLI binaries.
|
||||
#
|
||||
# Usage:
|
||||
# docker build -f docker/Dockerfile.musl-build \
|
||||
# --output type=local,dest=./dist \
|
||||
# --build-arg TARGETARCH=x86_64 .
|
||||
#
|
||||
# Produces: dist/kreuzberg (binary) and dist/lib/ (runtime libraries)
|
||||
#
|
||||
# Runtime libraries (musl libc, libstdc++, libgcc) are bundled alongside
|
||||
# the binary for portability across Linux distros.
|
||||
# =============================================================================
|
||||
FROM alpine:3.21 AS builder
|
||||
|
||||
ARG RUST_TOOLCHAIN=nightly-2026-03-10
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Install build dependencies — Alpine's g++ and libstdc++ are musl-native,
|
||||
# so tesseract C++ compilation works without glibc conflicts.
|
||||
# onnxruntime-dev from edge provides musl-native ORT for linking.
|
||||
RUN apk add --no-cache \
|
||||
curl gcc g++ musl-dev cmake make pkgconf \
|
||||
openssl-dev openssl-libs-static \
|
||||
perl linux-headers git file patchelf && \
|
||||
apk add --no-cache onnxruntime-dev \
|
||||
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
|
||||
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
|
||||
|
||||
# Install Rust via rustup (Alpine's packaged Rust may be too old / not nightly)
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
||||
| sh -s -- -y --default-toolchain "${RUST_TOOLCHAIN}" --component rust-src && \
|
||||
echo "Rust host: $(~/.cargo/bin/rustc -vV | grep host)" && \
|
||||
echo "Default target: $(~/.cargo/bin/rustc --print cfg | grep target)"
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
# Disable crt-static so the binary can dlopen shared libraries at runtime.
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
# Point ort-sys to Alpine's system ORT library instead of downloading prebuilt binaries.
|
||||
# ort-sys checks ORT_LIB_LOCATION before attempting download (build/main.rs line 45).
|
||||
ENV ORT_LIB_LOCATION=/usr/lib
|
||||
ENV ORT_PREFER_DYNAMIC_LINK=1
|
||||
|
||||
# Copy workspace manifests and crates
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY crates/kreuzberg/ crates/kreuzberg/
|
||||
COPY crates/kreuzberg-cli/ crates/kreuzberg-cli/
|
||||
COPY crates/kreuzberg-tesseract/ crates/kreuzberg-tesseract/
|
||||
COPY crates/kreuzberg-paddle-ocr/ crates/kreuzberg-paddle-ocr/
|
||||
|
||||
# Remove workspace members that aren't included
|
||||
RUN sed -i '/kreuzberg-py/d; /kreuzberg_rb/d; /kreuzberg-node/d; /kreuzberg-ffi/d; /kreuzberg-php/d; /kreuzberg_rustler/d; /kreuzberg_nif/d; /packages\/dart\/rust/d; /packages\/swift\/rust/d; /"crates\/kreuzberg-wasm"/d; /^\[profile\.release\.package\.kreuzberg-wasm\]$/,$d; /benchmark-harness/d; /e2e-generator/d; /snippet-runner/d; /e2e\/rust/d' Cargo.toml
|
||||
|
||||
RUN cargo build --release --package kreuzberg-cli --features all && \
|
||||
cp target/release/kreuzberg /build/kreuzberg && \
|
||||
strip /build/kreuzberg
|
||||
|
||||
# Set RPATH so the binary finds shared libs relative to itself
|
||||
RUN patchelf --set-rpath '$ORIGIN/lib' /build/kreuzberg
|
||||
|
||||
# Collect runtime libraries.
|
||||
#
|
||||
# The launcher (below) invokes the musl loader with `--library-path lib/`,
|
||||
# which REPLACES the loader's search path. The bundle must therefore be
|
||||
# self-contained: every transitive dependency of every shipped .so has to
|
||||
# live in /build/lib/ too, otherwise the loader prints "Error loading shared
|
||||
# library X: No such file or directory" at startup (issue #991).
|
||||
#
|
||||
# Strategy:
|
||||
# 1. Copy the well-known runtime bits (musl loader, libstdc++, libgcc, ORT).
|
||||
# 2. Recursively `ldd`-walk every .so in the bundle and copy any host lib
|
||||
# they resolve to that isn't already present.
|
||||
# 3. Smoke-test the loader against each shipped .so so the build FAILS if
|
||||
# anything is still missing — better to break the image than to ship a
|
||||
# tarball that crashes on first invocation.
|
||||
RUN set -eu; \
|
||||
mkdir -p /build/lib; \
|
||||
cp /usr/lib/libstdc++.so.6 /build/lib/; \
|
||||
cp /usr/lib/libgcc_s.so.1 /build/lib/; \
|
||||
# Bundle ONNX Runtime for embeddings/layout-detection at runtime.
|
||||
cp /usr/lib/libonnxruntime.so* /build/lib/ 2>/dev/null || true; \
|
||||
# Copy the musl dynamic linker/libc.
|
||||
cp /lib/ld-musl-*.so.1 /build/lib/ 2>/dev/null || true; \
|
||||
# Recursively resolve transitive deps of everything in /build/lib via ldd
|
||||
# (alpine's musl ldd resolves against system paths). Re-walk until no new
|
||||
# libraries are pulled in to handle multi-level chains (libonnxruntime →
|
||||
# libprotobuf-lite → libabsl_* → ...).
|
||||
LOADER="$(ls /build/lib/ld-musl-*.so.1 | head -n1)"; \
|
||||
while :; do \
|
||||
before=$(ls /build/lib | wc -l); \
|
||||
for so in /build/lib/*.so*; do \
|
||||
# Skip the loader itself; ldd against it is meaningless.
|
||||
case "$so" in *ld-musl-*) continue ;; esac; \
|
||||
"$LOADER" --list "$so" 2>/dev/null \
|
||||
| awk '/=>/ { print $3 }' \
|
||||
| grep -E '^/' \
|
||||
| while read -r dep; do \
|
||||
base="$(basename "$dep")"; \
|
||||
if [ ! -e "/build/lib/$base" ]; then \
|
||||
cp -L "$dep" /build/lib/; \
|
||||
fi; \
|
||||
done; \
|
||||
done; \
|
||||
after=$(ls /build/lib | wc -l); \
|
||||
[ "$before" = "$after" ] && break; \
|
||||
done; \
|
||||
# Verify nothing in the bundle has unresolved deps when constrained to lib/.
|
||||
for so in /build/lib/*.so*; do \
|
||||
case "$so" in *ld-musl-*) continue ;; esac; \
|
||||
if "$LOADER" --library-path /build/lib --list "$so" 2>&1 | grep -q 'not found'; then \
|
||||
echo "FAIL: $so has unresolved dependencies inside the bundle:" >&2; \
|
||||
"$LOADER" --library-path /build/lib --list "$so" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
done; \
|
||||
echo "OK: every bundled library resolves inside /build/lib/"
|
||||
|
||||
# Rename the actual binary and create a wrapper script that invokes it
|
||||
# via the bundled musl interpreter. This makes the binary work on ANY
|
||||
# Linux distro (glibc or musl) without system dependencies.
|
||||
RUN mv /build/kreuzberg /build/kreuzberg.bin && \
|
||||
INTERP_NAME=$(basename /lib/ld-musl-*.so.1) && \
|
||||
printf '#!/bin/sh\nSCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"\nexec "$SCRIPT_DIR/lib/%s" --library-path "$SCRIPT_DIR/lib" "$SCRIPT_DIR/kreuzberg.bin" "$@"\n' \
|
||||
"$INTERP_NAME" > /build/kreuzberg && \
|
||||
chmod +x /build/kreuzberg
|
||||
|
||||
# Verify the binary was built successfully
|
||||
RUN file /build/kreuzberg && \
|
||||
echo "=== Dynamic dependencies ===" && \
|
||||
readelf -d /build/kreuzberg 2>/dev/null | grep -E "NEEDED|RPATH|RUNPATH" || echo "No dependencies" && \
|
||||
echo "=== Bundled libraries ===" && \
|
||||
ls -la /build/lib/
|
||||
|
||||
# =============================================================================
|
||||
# Output stage — binary + bundled runtime libraries
|
||||
# =============================================================================
|
||||
FROM scratch
|
||||
COPY --from=builder /build/kreuzberg /kreuzberg
|
||||
COPY --from=builder /build/kreuzberg.bin /kreuzberg.bin
|
||||
COPY --from=builder /build/lib/ /lib/
|
||||
Reference in New Issue
Block a user