Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -euo pipefail
version="$(
apt-cache policy tesseract-ocr 2>/dev/null |
grep 'Candidate:' |
grep -Eo '[0-9]+\.[0-9]+' |
head -1 ||
true
)"
if [[ -z "${version}" ]]; then
version="unknown"
fi
echo "version=${version}" >>"${GITHUB_OUTPUT}"
echo "::notice title=Tesseract Version::Detected version: ${version}"

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -euo pipefail
version=""
json="$(brew info --json=v2 tesseract 2>/dev/null || true)"
if [[ -n "${json}" ]]; then
version="$(
python3 -c 'import json, re, sys; data = json.loads(sys.argv[1]); stable = (((data.get("formulae") or [{}])[0].get("versions") or {}).get("stable") or ""); m = re.match(r"^(\d+\.\d+)", stable); print(m.group(1) if m else "")' "${json}" || true
)"
fi
if [[ -z "${version}" ]]; then
first_line="$(brew info tesseract 2>/dev/null | head -1 || true)"
if [[ "${first_line}" =~ ([0-9]+\.[0-9]+) ]]; then
version="${BASH_REMATCH[1]}"
fi
fi
if [[ -z "${version}" ]]; then
version="unknown"
fi
echo "version=${version}" >>"${GITHUB_OUTPUT}"
echo "::notice title=Tesseract Version::Detected version: ${version}"

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="${REPO_ROOT:-$(cd "$SCRIPT_DIR/../../.." && pwd)}"
source "$REPO_ROOT/scripts/lib/retry.sh"
echo "::group::Installing Linux dependencies"
echo "Updating package index..."
if ! retry_with_backoff sudo apt-get update; then
echo "::warning::apt-get update failed after retries, continuing anyway..."
fi
packages=(
tesseract-ocr
tesseract-ocr-eng
tesseract-ocr-tur
tesseract-ocr-deu
fonts-liberation
fonts-dejavu-core
fonts-noto-core
libssl-dev
pkg-config
build-essential
cmake
libmagic-dev
libuv1-dev
php-cli
php-dev
)
echo "Installing dependencies..."
if retry_with_backoff_timeout 900 sudo apt-get install -y "${packages[@]}"; then
echo "✓ All packages installed successfully"
else
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo "::error::Package installation timed out after 15 minutes"
else
echo "::warning::Some packages failed to install, attempting individual installs..."
for pkg in tesseract-ocr libssl-dev pkg-config cmake; do
echo "Installing $pkg..."
if retry_with_backoff_timeout 300 sudo apt-get install -y "$pkg" 2>&1; then
echo "$pkg installed"
else
echo " ⚠ Failed to install $pkg"
fi
done
fi
fi
echo "::endgroup::"
echo "::group::Verifying Linux installations"
echo "CMake:"
if command -v cmake >/dev/null 2>&1; then
cmake --version | head -1
echo "✓ CMake available"
# Export CMAKE environment variable for immediate availability in build scripts
CMAKE_FULL_PATH="$(command -v cmake)"
if [[ -n "$GITHUB_ENV" ]]; then
echo "CMAKE=$CMAKE_FULL_PATH" >>"$GITHUB_ENV"
echo "✓ Set CMAKE=$CMAKE_FULL_PATH in GITHUB_ENV"
fi
# Also add cmake binary directory to GITHUB_PATH for subsequent steps
CMAKE_BIN="$(dirname "$CMAKE_FULL_PATH")"
if [[ -n "$GITHUB_PATH" && -d "$CMAKE_BIN" ]]; then
echo "$CMAKE_BIN" >>"$GITHUB_PATH"
echo "✓ Added cmake directory to GITHUB_PATH: $CMAKE_BIN"
fi
else
echo "::error::CMake not found after installation"
exit 1
fi
echo ""
echo "Tesseract:"
if command -v tesseract >/dev/null 2>&1; then
if tesseract --version 2>/dev/null | head -1; then
echo "✓ Tesseract CLI available"
else
echo "::warning::Tesseract CLI present but failed to run"
fi
else
echo "::warning::Tesseract CLI not found; continuing (OCR will rely on bundled Tesseract)"
fi
echo ""
echo "Available Tesseract languages:"
if command -v tesseract >/dev/null 2>&1; then
tesseract --list-langs | head -10 || true
else
echo "(tesseract CLI not available)"
fi
echo ""
echo "PHP:"
if command -v php >/dev/null 2>&1; then
php --version | head -1
echo "✓ PHP available"
else
echo "::error::PHP not found after installation"
exit 1
fi
echo ""
echo "Checking Tesseract data path..."
tessdata_found=0
for tessdata_path in "/usr/share/tesseract-ocr/5/tessdata" "/usr/share/tesseract-ocr/tessdata"; do
if [ -d "$tessdata_path" ]; then
echo "Found tessdata at: $tessdata_path"
echo "Required language files:"
for lang in eng tur deu; do
if [ -f "$tessdata_path/${lang}.traineddata" ]; then
size=$(stat -c%s "$tessdata_path/${lang}.traineddata" 2>/dev/null || echo "unknown")
echo "${lang}.traineddata ($size bytes)"
else
echo "${lang}.traineddata (missing)"
fi
done
tessdata_found=1
break
fi
done
if [ $tessdata_found -eq 0 ]; then
echo "::error::Tessdata directory not found in standard locations"
exit 1
fi
echo "::endgroup::"

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="${REPO_ROOT:-$(cd "$SCRIPT_DIR/../../.." && pwd)}"
source "$REPO_ROOT/scripts/lib/retry.sh"
echo "::group::Installing macOS dependencies"
if [[ -d "/opt/homebrew/bin" ]]; then
export PATH="/opt/homebrew/bin:/opt/homebrew/sbin:${PATH}"
echo "/opt/homebrew/bin" >>"$GITHUB_PATH"
echo "/opt/homebrew/sbin" >>"$GITHUB_PATH"
fi
if [[ -d "/usr/local/bin" ]]; then
export PATH="/usr/local/bin:/usr/local/sbin:${PATH}"
echo "/usr/local/bin" >>"$GITHUB_PATH"
echo "/usr/local/sbin" >>"$GITHUB_PATH"
fi
if ! brew list cmake &>/dev/null; then
echo "Installing CMake..."
retry_with_backoff brew install cmake || {
echo "::error::Failed to install CMake after retries"
exit 1
}
else
echo "✓ CMake already installed"
fi
if ! command -v cmake >/dev/null 2>&1; then
echo "CMake not on PATH after install; attempting brew link..."
brew link --overwrite cmake >/dev/null 2>&1 || true
fi
if ! brew list tesseract &>/dev/null; then
echo "Installing Tesseract..."
retry_with_backoff brew install tesseract || {
echo "::error::Failed to install Tesseract after retries"
exit 1
}
else
echo "✓ Tesseract already installed"
fi
if ! command -v tesseract >/dev/null 2>&1; then
echo "Tesseract not on PATH after install; attempting brew link..."
brew link --overwrite tesseract >/dev/null 2>&1 || true
fi
if ! brew list tesseract-lang &>/dev/null; then
echo "Installing Tesseract language packs..."
retry_with_backoff brew install tesseract-lang || {
echo "::warning::Failed to install tesseract-lang, some languages may be unavailable"
}
else
echo "✓ Tesseract language packs already installed"
fi
if ! brew list libmagic &>/dev/null; then
echo "Installing libmagic..."
retry_with_backoff brew install libmagic || {
echo "::warning::Failed to install libmagic after retries"
}
else
echo "✓ libmagic already installed"
fi
if ! brew list php &>/dev/null; then
echo "Installing PHP..."
retry_with_backoff brew install php || {
echo "::error::Failed to install PHP after retries"
exit 1
}
else
echo "✓ PHP already installed"
fi
if ! command -v php >/dev/null 2>&1; then
echo "PHP not on PATH after install; attempting brew link..."
brew link --overwrite php >/dev/null 2>&1 || true
fi
echo "::endgroup::"
echo "::group::Verifying macOS installations"
echo "CMake:"
if command -v cmake >/dev/null 2>&1; then
cmake --version | head -1
# Export CMAKE environment variable for immediate availability in build scripts
CMAKE_FULL_PATH="$(command -v cmake)"
if [[ -n "$GITHUB_ENV" ]]; then
echo "CMAKE=$CMAKE_FULL_PATH" >>"$GITHUB_ENV"
echo "✓ Set CMAKE=$CMAKE_FULL_PATH in GITHUB_ENV"
fi
# Also add cmake binary directory to GITHUB_PATH for subsequent steps
CMAKE_BIN="$(dirname "$CMAKE_FULL_PATH")"
if [[ -n "$GITHUB_PATH" && -d "$CMAKE_BIN" ]]; then
echo "$CMAKE_BIN" >>"$GITHUB_PATH"
echo "✓ Added cmake directory to GITHUB_PATH: $CMAKE_BIN"
fi
else
echo "::error::CMake not found on PATH after installation"
echo "PATH=$PATH"
brew --prefix cmake 2>/dev/null || true
exit 1
fi
echo ""
echo "Tesseract:"
if command -v tesseract >/dev/null 2>&1; then
tesseract --version | head -1
else
echo "::error::Tesseract not found on PATH after installation"
echo "PATH=$PATH"
brew --prefix tesseract 2>/dev/null || true
exit 1
fi
echo ""
echo "Available languages:"
tesseract --list-langs | head -5
echo ""
echo "PHP:"
if command -v php >/dev/null 2>&1; then
php --version | head -1
else
echo "::error::PHP not found on PATH after installation"
echo "PATH=$PATH"
exit 1
fi
echo "::endgroup::"

View File

@@ -0,0 +1,301 @@
#!/usr/bin/env pwsh
Set-StrictMode -Version Latest
$ErrorActionPreference = 'Stop'
Write-Host "::group::Installing Windows dependencies"
function Retry-Command {
param(
[scriptblock]$Command,
[int]$MaxAttempts = 3,
[int]$DelaySeconds = 5
)
$attempt = 1
while ($attempt -le $MaxAttempts) {
try {
Write-Host "Attempt $attempt of $MaxAttempts..."
& $Command
return $true
}
catch {
$attempt++
if ($attempt -le $MaxAttempts) {
$backoffDelay = $DelaySeconds * [Math]::Pow(2, $attempt - 1)
Write-Host "⚠ Attempt failed, retrying in ${backoffDelay}s..." -ForegroundColor Yellow
Start-Sleep -Seconds $backoffDelay
}
else {
return $false
}
}
}
}
$tesseractCacheHit = $env:TESSERACT_CACHE_HIT -eq "true"
$llvmCacheHit = $env:LLVM_CACHE_HIT -eq "true"
$cmakeCacheHit = $env:CMAKE_CACHE_HIT -eq "true"
$cmakeInstalled = $false
Write-Host "Cache status:"
Write-Host " TESSERACT_CACHE_HIT: $env:TESSERACT_CACHE_HIT (evaluated: $tesseractCacheHit)"
Write-Host " LLVM_CACHE_HIT: $env:LLVM_CACHE_HIT (evaluated: $llvmCacheHit)"
Write-Host " CMAKE_CACHE_HIT: $env:CMAKE_CACHE_HIT (evaluated: $cmakeCacheHit)"
Write-Host ""
try {
& cmake --version 2>$null
Write-Host "✓ CMake already installed"
$cmakeInstalled = $true
}
catch {
Write-Host "CMake not found, will attempt to install"
}
if (-not $tesseractCacheHit) {
Write-Host "Tesseract cache miss, installing (optional for build - needed for tests only)..."
if (-not (Retry-Command { choco install -y tesseract --no-progress } -MaxAttempts 3)) {
Write-Host "::warning::Failed to install Tesseract (optional dependency - gem build does not require it)"
}
else {
Write-Host "✓ Tesseract installed"
# Ensure tessdata directory exists and is accessible
$tesseractPath = "C:\Program Files\Tesseract-OCR"
if (Test-Path $tesseractPath) {
Write-Host " Configuring Tesseract data paths..."
# Create tessdata directory if it doesn't exist
$tessdataPath = "$tesseractPath\tessdata"
if (-not (Test-Path $tessdataPath)) {
Write-Host " Creating tessdata directory at: $tessdataPath"
New-Item -ItemType Directory -Path $tessdataPath -Force | Out-Null
}
# Download English language data if not present
if (-not (Test-Path "$tessdataPath\eng.traineddata")) {
Write-Host " Downloading English language data..."
try {
$engUrl = "https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata"
Invoke-WebRequest -Uri $engUrl -OutFile "$tessdataPath\eng.traineddata" -ErrorAction Stop
Write-Host " ✓ Downloaded eng.traineddata"
}
catch {
Write-Host " ::warning::Failed to download eng.traineddata: $($_.Exception.Message)"
}
}
# Download OSD data if not present (needed for orientation detection)
if (-not (Test-Path "$tessdataPath\osd.traineddata")) {
Write-Host " Downloading OSD data..."
try {
$osdUrl = "https://github.com/tesseract-ocr/tessdata_fast/raw/main/osd.traineddata"
Invoke-WebRequest -Uri $osdUrl -OutFile "$tessdataPath\osd.traineddata" -ErrorAction Stop
Write-Host " ✓ Downloaded osd.traineddata"
}
catch {
Write-Host " ::warning::Failed to download osd.traineddata: $($_.Exception.Message)"
}
}
}
}
}
else {
Write-Host "✓ Tesseract found in cache"
}
if (-not $llvmCacheHit) {
Write-Host "LLVM cache miss, installing LLVM/Clang (required for bindgen)..."
if (-not (Retry-Command { choco install -y llvm --no-progress } -MaxAttempts 3)) {
Write-Host "::warning::Failed to install LLVM/Clang via Chocolatey"
}
else {
Write-Host "✓ LLVM/Clang installed"
}
}
else {
Write-Host "✓ LLVM/Clang found in cache"
}
Write-Host "Installing PHP..."
$phpInstalled = $false
try {
& php --version 2>$null
Write-Host "✓ PHP already installed"
$phpInstalled = $true
}
catch {
Write-Host "PHP not found, installing via Chocolatey..."
if (-not (Retry-Command { choco install -y php --no-progress } -MaxAttempts 3)) {
Write-Host "::warning::Failed to install PHP via Chocolatey, will rely on shivammathur/setup-php action"
}
else {
Write-Host "✓ PHP installed via Chocolatey"
$phpInstalled = $true
}
}
Write-Host "Installing CMake..."
if (-not $cmakeCacheHit) {
Write-Host "CMake cache miss, installing..."
if (-not (Retry-Command { choco install -y cmake --no-progress } -MaxAttempts 3)) {
throw "Failed to install CMake after 3 attempts"
}
Write-Host "✓ CMake installed"
}
else {
Write-Host "✓ CMake found in cache"
}
Write-Host "Configuring PATH and environment variables..."
$paths = @(
"C:\Program Files\CMake\bin",
"C:\Program Files\Tesseract-OCR",
"C:\Program Files\LLVM\bin",
"C:\tools\php",
"C:\Program Files\PHP"
)
foreach ($path in $paths) {
if (Test-Path $path) {
Write-Host " Adding to PATH: $path"
Write-Output $path | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
$env:PATH = "$path;$env:PATH"
}
else {
Write-Host " Path not found (skipping): $path"
}
}
# Ensure TESSDATA_PREFIX is set for Windows OCR tests
$tesseractPath = "C:\Program Files\Tesseract-OCR"
if (Test-Path $tesseractPath) {
$tessdataPath = "$tesseractPath\tessdata"
if (Test-Path $tessdataPath) {
Write-Host " Setting TESSDATA_PREFIX for tests: $tessdataPath"
Add-Content -Path $env:GITHUB_ENV -Value "TESSDATA_PREFIX=$tessdataPath"
$env:TESSDATA_PREFIX = $tessdataPath
}
}
Write-Host "::endgroup::"
Write-Host "::group::Verifying Windows installations"
Write-Host "Tesseract (optional for build):"
try {
$tesseractCmd = Get-Command tesseract -ErrorAction Stop
$tesseractPath = $tesseractCmd.Path
Write-Host " Found at: $tesseractPath"
Write-Host " Command type: $($tesseractCmd.CommandType)"
# Get installation directory
$tesseractDir = Split-Path -Parent $tesseractPath
Write-Host " Installation directory: $tesseractDir"
# Check for tessdata
$tessdataPath = Join-Path $tesseractDir "tessdata"
if (Test-Path $tessdataPath) {
Write-Host " tessdata directory: $tessdataPath"
Write-Host " Available language files:"
Get-ChildItem "$tessdataPath\*.traineddata" -ErrorAction SilentlyContinue | ForEach-Object {
Write-Host " - $($_.Name)"
}
}
else {
Write-Host " tessdata directory not found at: $tessdataPath"
}
try {
$version = & tesseract --version 2>&1
Write-Host " Version output: $version"
Write-Host "✓ Tesseract available and working"
Write-Host ""
Write-Host "Available Tesseract languages:"
& tesseract --list-langs 2>&1 | ForEach-Object { Write-Host " $_" }
}
catch {
Write-Host "⚠ Warning: Tesseract found but failed to run: $($_.Exception.Message)"
}
# Set TESSDATA_PREFIX environment variable for tests
if (Test-Path $tessdataPath) {
Write-Host ""
Write-Host "Setting TESSDATA_PREFIX environment variable..."
Add-Content -Path $env:GITHUB_ENV -Value "TESSDATA_PREFIX=$tessdataPath"
Write-Host "✓ Set TESSDATA_PREFIX=$tessdataPath in GITHUB_ENV"
$env:TESSDATA_PREFIX = $tessdataPath
}
}
catch {
Write-Host "⚠ Tesseract not found on PATH (not required for build)"
Write-Host " Error details: $($_.Exception.Message)"
Write-Host " Searching common installation locations..."
$commonPaths = @(
"C:\Program Files\Tesseract-OCR\tesseract.exe",
"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
"${env:ProgramFiles}\Tesseract-OCR\tesseract.exe",
"${env:ProgramFiles(x86)}\Tesseract-OCR\tesseract.exe"
)
$found = $false
foreach ($path in $commonPaths) {
if (Test-Path $path) {
Write-Host " Found Tesseract at: $path (not on PATH)"
$tesseractDir = Split-Path -Parent $path
$tessdataPath = Join-Path $tesseractDir "tessdata"
if (Test-Path $tessdataPath) {
Write-Host " Found tessdata at: $tessdataPath"
Add-Content -Path $env:GITHUB_ENV -Value "TESSDATA_PREFIX=$tessdataPath"
Write-Host "✓ Set TESSDATA_PREFIX=$tessdataPath in GITHUB_ENV"
$env:TESSDATA_PREFIX = $tessdataPath
}
$found = $true
break
}
}
if (-not $found) {
Write-Host " Tesseract not found in common locations"
}
}
Write-Host ""
Write-Host "CMake:"
try {
& cmake --version
Write-Host "✓ CMake available"
# Export CMAKE environment variable for immediate availability in build scripts
$cmakePath = (Get-Command cmake -ErrorAction Stop).Source
if ($cmakePath) {
Add-Content -Path $env:GITHUB_ENV -Value "CMAKE=$cmakePath"
Write-Host "✓ Set CMAKE=$cmakePath in GITHUB_ENV"
}
}
catch {
Write-Host "::error::CMake not found after installation"
throw "CMake verification failed"
}
Write-Host ""
Write-Host "Clang:"
try {
& clang --version
Write-Host "✓ Clang available"
}
catch {
Write-Host "⚠ Warning: Clang not currently available on PATH"
}
Write-Host ""
Write-Host "PHP:"
try {
& php --version
Write-Host "✓ PHP available"
}
catch {
Write-Host "⚠ Warning: PHP not currently available on PATH (will be set up by shivammathur/setup-php action)"
}
Write-Host "::endgroup::"