Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,142 @@
# =============================================================================
# Alpine-based builder for musl CLI binaries.
#
# Usage:
# docker build -f docker/Dockerfile.musl-build \
# --output type=local,dest=./dist \
# --build-arg TARGETARCH=x86_64 .
#
# Produces: dist/kreuzberg (binary) and dist/lib/ (runtime libraries)
#
# Runtime libraries (musl libc, libstdc++, libgcc) are bundled alongside
# the binary for portability across Linux distros.
# =============================================================================
FROM alpine:3.21 AS builder
ARG RUST_TOOLCHAIN=nightly-2026-03-10
WORKDIR /build
# Install build dependencies — Alpine's g++ and libstdc++ are musl-native,
# so tesseract C++ compilation works without glibc conflicts.
# onnxruntime-dev from edge provides musl-native ORT for linking.
RUN apk add --no-cache \
curl gcc g++ musl-dev cmake make pkgconf \
openssl-dev openssl-libs-static \
perl linux-headers git file patchelf && \
apk add --no-cache onnxruntime-dev \
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
--repository=https://dl-cdn.alpinelinux.org/alpine/edge/main
# Install Rust via rustup (Alpine's packaged Rust may be too old / not nightly)
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
| sh -s -- -y --default-toolchain "${RUST_TOOLCHAIN}" --component rust-src && \
echo "Rust host: $(~/.cargo/bin/rustc -vV | grep host)" && \
echo "Default target: $(~/.cargo/bin/rustc --print cfg | grep target)"
ENV PATH="/root/.cargo/bin:${PATH}"
# Disable crt-static so the binary can dlopen shared libraries at runtime.
ENV RUSTFLAGS="-C target-feature=-crt-static"
# Point ort-sys to Alpine's system ORT library instead of downloading prebuilt binaries.
# ort-sys checks ORT_LIB_LOCATION before attempting download (build/main.rs line 45).
ENV ORT_LIB_LOCATION=/usr/lib
ENV ORT_PREFER_DYNAMIC_LINK=1
# Copy workspace manifests and crates
COPY Cargo.toml Cargo.lock ./
COPY crates/kreuzberg/ crates/kreuzberg/
COPY crates/kreuzberg-cli/ crates/kreuzberg-cli/
COPY crates/kreuzberg-tesseract/ crates/kreuzberg-tesseract/
COPY crates/kreuzberg-paddle-ocr/ crates/kreuzberg-paddle-ocr/
# Remove workspace members that aren't included
RUN sed -i '/kreuzberg-py/d; /kreuzberg_rb/d; /kreuzberg-node/d; /kreuzberg-ffi/d; /kreuzberg-php/d; /kreuzberg_rustler/d; /kreuzberg_nif/d; /packages\/dart\/rust/d; /packages\/swift\/rust/d; /"crates\/kreuzberg-wasm"/d; /^\[profile\.release\.package\.kreuzberg-wasm\]$/,$d; /benchmark-harness/d; /e2e-generator/d; /snippet-runner/d; /e2e\/rust/d' Cargo.toml
RUN cargo build --release --package kreuzberg-cli --features all && \
cp target/release/kreuzberg /build/kreuzberg && \
strip /build/kreuzberg
# Set RPATH so the binary finds shared libs relative to itself
RUN patchelf --set-rpath '$ORIGIN/lib' /build/kreuzberg
# Collect runtime libraries.
#
# The launcher (below) invokes the musl loader with `--library-path lib/`,
# which REPLACES the loader's search path. The bundle must therefore be
# self-contained: every transitive dependency of every shipped .so has to
# live in /build/lib/ too, otherwise the loader prints "Error loading shared
# library X: No such file or directory" at startup (issue #991).
#
# Strategy:
# 1. Copy the well-known runtime bits (musl loader, libstdc++, libgcc, ORT).
# 2. Recursively `ldd`-walk every .so in the bundle and copy any host lib
# they resolve to that isn't already present.
# 3. Smoke-test the loader against each shipped .so so the build FAILS if
# anything is still missing — better to break the image than to ship a
# tarball that crashes on first invocation.
RUN set -eu; \
mkdir -p /build/lib; \
cp /usr/lib/libstdc++.so.6 /build/lib/; \
cp /usr/lib/libgcc_s.so.1 /build/lib/; \
# Bundle ONNX Runtime for embeddings/layout-detection at runtime.
cp /usr/lib/libonnxruntime.so* /build/lib/ 2>/dev/null || true; \
# Copy the musl dynamic linker/libc.
cp /lib/ld-musl-*.so.1 /build/lib/ 2>/dev/null || true; \
# Recursively resolve transitive deps of everything in /build/lib via ldd
# (alpine's musl ldd resolves against system paths). Re-walk until no new
# libraries are pulled in to handle multi-level chains (libonnxruntime →
# libprotobuf-lite → libabsl_* → ...).
LOADER="$(ls /build/lib/ld-musl-*.so.1 | head -n1)"; \
while :; do \
before=$(ls /build/lib | wc -l); \
for so in /build/lib/*.so*; do \
# Skip the loader itself; ldd against it is meaningless.
case "$so" in *ld-musl-*) continue ;; esac; \
"$LOADER" --list "$so" 2>/dev/null \
| awk '/=>/ { print $3 }' \
| grep -E '^/' \
| while read -r dep; do \
base="$(basename "$dep")"; \
if [ ! -e "/build/lib/$base" ]; then \
cp -L "$dep" /build/lib/; \
fi; \
done; \
done; \
after=$(ls /build/lib | wc -l); \
[ "$before" = "$after" ] && break; \
done; \
# Verify nothing in the bundle has unresolved deps when constrained to lib/.
for so in /build/lib/*.so*; do \
case "$so" in *ld-musl-*) continue ;; esac; \
if "$LOADER" --library-path /build/lib --list "$so" 2>&1 | grep -q 'not found'; then \
echo "FAIL: $so has unresolved dependencies inside the bundle:" >&2; \
"$LOADER" --library-path /build/lib --list "$so" >&2; \
exit 1; \
fi; \
done; \
echo "OK: every bundled library resolves inside /build/lib/"
# Rename the actual binary and create a wrapper script that invokes it
# via the bundled musl interpreter. This makes the binary work on ANY
# Linux distro (glibc or musl) without system dependencies.
RUN mv /build/kreuzberg /build/kreuzberg.bin && \
INTERP_NAME=$(basename /lib/ld-musl-*.so.1) && \
printf '#!/bin/sh\nSCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"\nexec "$SCRIPT_DIR/lib/%s" --library-path "$SCRIPT_DIR/lib" "$SCRIPT_DIR/kreuzberg.bin" "$@"\n' \
"$INTERP_NAME" > /build/kreuzberg && \
chmod +x /build/kreuzberg
# Verify the binary was built successfully
RUN file /build/kreuzberg && \
echo "=== Dynamic dependencies ===" && \
readelf -d /build/kreuzberg 2>/dev/null | grep -E "NEEDED|RPATH|RUNPATH" || echo "No dependencies" && \
echo "=== Bundled libraries ===" && \
ls -la /build/lib/
# =============================================================================
# Output stage — binary + bundled runtime libraries
# =============================================================================
FROM scratch
COPY --from=builder /build/kreuzberg /kreuzberg
COPY --from=builder /build/kreuzberg.bin /kreuzberg.bin
COPY --from=builder /build/lib/ /lib/