Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,90 @@
#!/usr/bin/env bash
set -euo pipefail
ort_version="${1:?ort-version required}"
dest_dir="${2:-crates/kreuzberg-node}"
arch_id="${3:-}"
strategy="${4:-system}"
extract_dir="$RUNNER_TEMP/onnxruntime"
if [ -z "$arch_id" ]; then
case "$(uname -m)" in
x86_64 | amd64) arch_id="x64" ;;
arm64 | aarch64) arch_id="arm64" ;;
*)
echo "Unsupported Linux architecture: $(uname -m)" >&2
exit 1
;;
esac
fi
case "$arch_id" in
x64)
ort_dir_name="onnxruntime-linux-x64-${ort_version}"
archive="onnxruntime-linux-x64-${ort_version}.tgz"
;;
arm64)
ort_dir_name="onnxruntime-linux-aarch64-${ort_version}"
archive="onnxruntime-linux-aarch64-${ort_version}.tgz"
;;
*)
echo "Unsupported Linux arch-id: $arch_id" >&2
exit 1
;;
esac
if [ ! -d "$extract_dir/$ort_dir_name" ]; then
echo "Cache miss: Downloading ONNX Runtime ${ort_version}"
curl -fsSL --retry 5 --retry-delay 5 --retry-all-errors -o "$RUNNER_TEMP/$archive" "https://github.com/microsoft/onnxruntime/releases/download/v${ort_version}/$archive"
mkdir -p "$extract_dir"
tar -xzf "$RUNNER_TEMP/$archive" -C "$extract_dir"
else
echo "Cache hit: Using cached ONNX Runtime ${ort_version}"
fi
ort_root="$extract_dir/$ort_dir_name"
if [ ! -d "$ort_root/lib" ]; then
echo "ERROR: ONNX Runtime lib directory missing at $ort_root/lib" >&2
echo "Available directories:" >&2
ls -la "$extract_dir" >&2 || true
exit 1
fi
if ! ls "$ort_root/lib"/*.so* 1>/dev/null 2>&1; then
echo "ERROR: No ONNX Runtime libraries found in $ort_root/lib" >&2
echo "Directory contents:" >&2
ls -la "$ort_root/lib" >&2 || true
exit 1
fi
dest="$GITHUB_WORKSPACE/$dest_dir"
mkdir -p "$dest"
cp -f "$ort_root/lib/"*.so* "$dest/"
if [ -n "${RUSTFLAGS:-}" ]; then
rustflags="$RUSTFLAGS -L $ort_root/lib"
else
rustflags="-L $ort_root/lib"
fi
if [ "$strategy" = "bundled" ]; then
echo "Using bundled ORT strategy — letting ort-sys download-binaries handle static linking"
{
echo "LD_LIBRARY_PATH=$ort_root/lib:$dest:${LD_LIBRARY_PATH:-}"
echo "LIBRARY_PATH=$ort_root/lib:$dest:${LIBRARY_PATH:-}"
} >>"$GITHUB_ENV"
else
{
ort_lib=$(find "$ort_root/lib" -name "libonnxruntime*.so*" -print -quit)
echo "ORT_LIB_LOCATION=$ort_root/lib"
echo "ORT_PREFER_DYNAMIC_LINK=1"
echo "ORT_SKIP_DOWNLOAD=1"
echo "ORT_STRATEGY=system"
echo "ORT_DYLIB_PATH=$ort_root/lib/${ort_lib##*/}"
echo "LD_LIBRARY_PATH=$ort_root/lib:$dest:${LD_LIBRARY_PATH:-}"
echo "LIBRARY_PATH=$ort_root/lib:$dest:${LIBRARY_PATH:-}"
echo "RUSTFLAGS=$rustflags"
} >>"$GITHUB_ENV"
fi

View File

@@ -0,0 +1,86 @@
#!/usr/bin/env bash
set -euo pipefail
ort_version="${1:?ort-version required}"
dest_dir="${2:-crates/kreuzberg-node}"
arch_id="${3:-}"
strategy="${4:-system}"
extract_dir="$RUNNER_TEMP/onnxruntime"
if [ -z "$arch_id" ]; then
arch="$(uname -m)"
if [ "$arch" = "arm64" ]; then
arch_id="arm64"
else
arch_id="x64"
fi
fi
case "$arch_id" in
arm64) ort_arch="arm64" ;;
x64) ort_arch="x86_64" ;;
*)
echo "Unsupported macOS arch-id: $arch_id" >&2
exit 1
;;
esac
echo "Using macOS ONNX Runtime arch: $ort_arch"
if [ ! -d "$extract_dir/onnxruntime-osx-${ort_arch}-${ort_version}" ]; then
echo "Cache miss: Downloading ONNX Runtime ${ort_version} for macOS ${ort_arch}"
archive="onnxruntime-osx-${ort_arch}-${ort_version}.tgz"
curl -fsSL --retry 5 --retry-delay 5 --retry-all-errors -o "$RUNNER_TEMP/$archive" "https://github.com/microsoft/onnxruntime/releases/download/v${ort_version}/$archive"
mkdir -p "$extract_dir"
tar -xzf "$RUNNER_TEMP/$archive" -C "$extract_dir"
else
echo "Cache hit: Using cached ONNX Runtime ${ort_version}"
fi
ort_root="$extract_dir/onnxruntime-osx-${ort_arch}-${ort_version}"
if [ ! -d "$ort_root/lib" ]; then
echo "ERROR: ONNX Runtime lib directory missing at $ort_root/lib" >&2
echo "Available directories:" >&2
ls -la "$extract_dir" >&2 || true
exit 1
fi
if ! ls "$ort_root/lib"/libonnxruntime*.dylib 1>/dev/null 2>&1; then
echo "ERROR: No ONNX Runtime libraries found in $ort_root/lib" >&2
echo "Directory contents:" >&2
ls -la "$ort_root/lib" >&2 || true
exit 1
fi
dest="$GITHUB_WORKSPACE/$dest_dir"
mkdir -p "$dest"
cp -f "$ort_root/lib/"libonnxruntime*.dylib "$dest/"
if [ -n "${RUSTFLAGS:-}" ]; then
rustflags="$RUSTFLAGS -L $ort_root/lib"
else
rustflags="-L $ort_root/lib"
fi
if [ "$strategy" = "bundled" ]; then
echo "Using bundled ORT strategy — letting ort-sys download-binaries handle static linking"
{
echo "DYLD_LIBRARY_PATH=$ort_root/lib:$dest:${DYLD_LIBRARY_PATH:-}"
echo "DYLD_FALLBACK_LIBRARY_PATH=$ort_root/lib:$dest:${DYLD_FALLBACK_LIBRARY_PATH:-}"
echo "LIBRARY_PATH=$ort_root/lib:$dest:${LIBRARY_PATH:-}"
} >>"$GITHUB_ENV"
else
{
ort_lib=$(find "$ort_root/lib" -name "libonnxruntime*.dylib" -print -quit)
echo "ORT_LIB_LOCATION=$ort_root/lib"
echo "ORT_PREFER_DYNAMIC_LINK=1"
echo "ORT_SKIP_DOWNLOAD=1"
echo "ORT_STRATEGY=system"
echo "ORT_DYLIB_PATH=$ort_root/lib/${ort_lib##*/}"
echo "DYLD_LIBRARY_PATH=$ort_root/lib:$dest:${DYLD_LIBRARY_PATH:-}"
echo "DYLD_FALLBACK_LIBRARY_PATH=$ort_root/lib:$dest:${DYLD_FALLBACK_LIBRARY_PATH:-}"
echo "LIBRARY_PATH=$ort_root/lib:$dest:${LIBRARY_PATH:-}"
echo "RUSTFLAGS=$rustflags"
} >>"$GITHUB_ENV"
fi

View File

@@ -0,0 +1,100 @@
$OrtVersion = $args[0]
if ([string]::IsNullOrWhiteSpace($OrtVersion)) { throw "Usage: windows.ps1 <ortVersion> [destDir] [archId] [strategy]" }
$DestDir = if ($args.Count -ge 2 -and -not [string]::IsNullOrWhiteSpace($args[1])) { $args[1] } else { "crates/kreuzberg-node" }
$ArchId = if ($args.Count -ge 3) { $args[2] } else { "" }
$Strategy = if ($args.Count -ge 4 -and -not [string]::IsNullOrWhiteSpace($args[3])) { $args[3] } else { "system" }
$ExtractRoot = Join-Path $env:TEMP "onnxruntime"
if ([string]::IsNullOrWhiteSpace($ArchId)) {
$ArchId = $env:RUNNER_ARCH
}
$ArchId = $ArchId.ToLowerInvariant()
if ($ArchId -eq "arm64") { $ArchId = "arm64" } else { $ArchId = "x64" }
$OrtRoot = Join-Path $ExtractRoot "onnxruntime-win-$ArchId-$OrtVersion"
$OrtBin = Join-Path $OrtRoot 'bin'
$OrtLib = Join-Path $OrtRoot 'lib'
if (-Not (Test-Path $OrtRoot)) {
Write-Host "Cache miss: Downloading ONNX Runtime $OrtVersion"
$Archive = "onnxruntime-win-$ArchId-$OrtVersion.zip"
$DownloadPath = Join-Path $env:TEMP $Archive
Invoke-WebRequest -Uri "https://github.com/microsoft/onnxruntime/releases/download/v$OrtVersion/$Archive" -OutFile $DownloadPath -UseBasicParsing -MaximumRetryCount 5 -RetryIntervalSec 5
New-Item -ItemType Directory -Path $ExtractRoot -Force | Out-Null
Expand-Archive -Path $DownloadPath -DestinationPath $ExtractRoot -Force
} else {
Write-Host "Cache hit: Using cached ONNX Runtime $OrtVersion"
}
if (!(Test-Path $OrtLib)) {
Write-Error "ERROR: ONNX Runtime lib directory missing at $OrtLib"
Get-ChildItem -Path $ExtractRoot -Recurse | Write-Host
exit 1
}
$LibFiles = @(Get-ChildItem -Path $OrtLib -Filter "*.lib" -ErrorAction SilentlyContinue)
if ($LibFiles.Count -eq 0) {
Write-Error "ERROR: No ONNX Runtime library files found in $OrtLib"
Get-ChildItem -Path $OrtLib | Write-Host
exit 1
}
$DllDirs = @()
foreach ($Candidate in @($OrtLib, $OrtBin)) {
if (Test-Path $Candidate) {
$CandidateDlls = @(Get-ChildItem -Path $Candidate -Filter "*.dll" -File -ErrorAction SilentlyContinue)
if ($CandidateDlls.Count -gt 0) {
$DllDirs += $Candidate
}
}
}
if ($DllDirs.Count -eq 0) {
$OrtDll = Get-ChildItem -Path $OrtRoot -Recurse -Filter "onnxruntime.dll" -File -ErrorAction SilentlyContinue | Select-Object -First 1
if ($OrtDll) { $DllDirs += $OrtDll.DirectoryName }
}
if ($DllDirs.Count -eq 0) {
$AnyDll = Get-ChildItem -Path $OrtRoot -Recurse -Filter "*.dll" -File -ErrorAction SilentlyContinue | Select-Object -First 1
if ($AnyDll) { $DllDirs += $AnyDll.DirectoryName }
}
$DllDirs = $DllDirs | Select-Object -Unique
if ($DllDirs.Count -eq 0) {
Write-Error "ERROR: No ONNX Runtime runtime DLLs found under $OrtRoot"
Get-ChildItem -Path $OrtRoot -Recurse | Write-Host
exit 1
}
$Dest = Join-Path $env:GITHUB_WORKSPACE $DestDir
New-Item -ItemType Directory -Path $Dest -Force | Out-Null
Copy-Item -Path (Join-Path $OrtLib '*') -Destination $Dest -Force
foreach ($Dir in $DllDirs) {
Copy-Item -Path (Join-Path $Dir '*.dll') -Destination $Dest -Force
}
$RustFlags = if ($env:RUSTFLAGS) { "$env:RUSTFLAGS -L $OrtLib" } else { "-L $OrtLib" }
if ($Strategy -eq "bundled") {
# ort-sys has no prebuilt static binaries for x86_64-pc-windows-gnu (MSYS2/MinGW).
# Use the pre-downloaded Microsoft ORT with dynamic linking for Windows GNU targets.
Write-Host "Using bundled ORT strategy (Windows) - dynamic linking against pre-downloaded ORT (no static binaries for windows-gnu)"
@(
"ORT_LIB_LOCATION=$OrtLib"
"ORT_PREFER_DYNAMIC_LINK=1"
"RUSTFLAGS=$RustFlags"
"LIB=$OrtLib;$env:LIB"
"LIBRARY_PATH=$OrtLib;$env:LIBRARY_PATH"
"PATH=$Dest;$env:PATH"
) | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
} else {
@(
"ORT_LIB_LOCATION=$OrtLib"
"ORT_PREFER_DYNAMIC_LINK=1"
"ORT_SKIP_DOWNLOAD=1"
"ORT_STRATEGY=system"
"ORT_DYLIB_PATH=$Dest\onnxruntime.dll"
"RUSTFLAGS=$RustFlags"
"LIB=$OrtLib;$env:LIB"
"LIBRARY_PATH=$OrtLib;$env:LIBRARY_PATH"
"PATH=$Dest;$env:PATH"
) | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
}

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -euo pipefail
target="${1:?target required}"
case "$target" in
aarch64-apple-darwin)
ort_url="https://cdn.pyke.io/0/pyke:ort-rs/ms@1.24.1/aarch64-apple-darwin.tgz"
;;
x86_64-apple-darwin)
ort_url="https://cdn.pyke.io/0/pyke:ort-rs/ms@1.24.1/x86_64-apple-darwin.tgz"
;;
*)
echo "setup-prebuilt-onnx does not support target $target" >&2
exit 1
;;
esac
ort_dir="${GITHUB_WORKSPACE}/target/onnxruntime/${target}"
ort_root="${ort_dir}/onnxruntime"
ort_lib="${ort_root}/lib"
write_env() {
{
echo "ORT_STRATEGY=system"
echo "ORT_LIB_LOCATION=${ort_lib}"
echo "ORT_SKIP_DOWNLOAD=1"
echo "ORT_PREFER_DYNAMIC_LINK=1"
} >>"${GITHUB_ENV}"
}
if [ ! -f "${ort_lib}/libonnxruntime.a" ]; then
rm -rf "${ort_dir}"
mkdir -p "${ort_lib}"
echo "Attempting to download prebuilt ONNX Runtime for ${target}..." >&2
if curl -fsSL --max-time 30 -o /tmp/ort.tgz "${ort_url}" 2>/dev/null; then
tar -xz -C "${ort_lib}" -f /tmp/ort.tgz
rm -f /tmp/ort.tgz
write_env
else
echo "Warning: Prebuilt ONNX Runtime not available for ${target}" >&2
echo "Will download and build ONNX Runtime during compilation" >&2
fi
else
echo "Using existing ONNX Runtime at ${ort_lib}" >&2
write_env
fi

View File

@@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -euo pipefail
# Usage: build-with-sccache-fallback.sh <cargo command...>
log_file=$(mktemp)
trap 'rm -f "$log_file"' EXIT
echo "Building with sccache (fallback on errors)..."
# Attempt with sccache
if "$@" 2>&1 | tee "$log_file"; then
echo "✓ Build succeeded with sccache"
exit 0
fi
# Check for sccache-related errors
if grep -Eq "sccache.*(error|failed)|cache storage failed|dns error|connection (refused|timed out)" "$log_file"; then
echo "⚠️ sccache failure detected, retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
if "$@"; then
echo "✓ Build succeeded without sccache (fallback)"
exit 0
fi
fi
echo "✗ Build failed"
exit 1

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
label="${1:?label required}"
rm -rf ".tesseract-cache/${label}"
rm -rf ".xdg-cache/${label}"

View File

@@ -0,0 +1,5 @@
#!/usr/bin/env bash
set -euo pipefail
rust_target="${1:?rust target required}"
rm -rf "target/${rust_target}/kreuzberg-tesseract-cache"

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env bash
set -euo pipefail
label="${1:?label required}"
enable_cache="${2:?enable-cache required (true/false)}"
if [ "$enable_cache" = "true" ]; then
cache_dir="${GITHUB_WORKSPACE}/.tesseract-cache/${label}"
echo "TESSERACT_RS_CACHE_DIR=${cache_dir}" >>"$GITHUB_ENV"
echo "XDG_CACHE_HOME=${GITHUB_WORKSPACE}/.xdg-cache/${label}" >>"$GITHUB_ENV"
echo "cache-dir=${cache_dir}" >>"$GITHUB_OUTPUT"
echo "cache-enabled=true" >>"$GITHUB_OUTPUT"
docker_opts="--env TESSERACT_RS_CACHE_DIR=/io/.tesseract-cache/${label}"
docker_opts="${docker_opts} --env XDG_CACHE_HOME=/io/.xdg-cache/${label}"
multiarch=""
if command -v dpkg-architecture >/dev/null 2>&1; then
multiarch="$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null || true)"
fi
if [ -z "$multiarch" ]; then
case "$(uname -m)" in
x86_64) multiarch="x86_64-linux-gnu" ;;
aarch64 | arm64) multiarch="aarch64-linux-gnu" ;;
esac
fi
openssl_lib_dir="/usr/lib"
if [ -n "$multiarch" ]; then
openssl_lib_dir="/usr/lib/${multiarch}"
fi
docker_opts="${docker_opts} --env OPENSSL_LIB_DIR=${openssl_lib_dir}"
docker_opts="${docker_opts} --env OPENSSL_INCLUDE_DIR=/usr/include"
echo "docker-options=${docker_opts}" >>"$GITHUB_OUTPUT"
else
{
echo "TESSERACT_RS_CACHE_DIR="
} >>"$GITHUB_ENV"
{
echo "cache-dir="
echo "cache-enabled=false"
echo "docker-options="
} >>"$GITHUB_OUTPUT"
fi

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
label="${1:?label required}"
mkdir -p ".tesseract-cache/${label}"
mkdir -p ".xdg-cache/${label}"