This commit is contained in:
295
e2e/java/mvnw
generated
vendored
Executable file
295
e2e/java/mvnw
generated
vendored
Executable file
@@ -0,0 +1,295 @@
|
||||
#!/bin/sh
|
||||
# ----------------------------------------------------------------------------
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Apache Maven Wrapper startup batch script, version 3.3.4
|
||||
#
|
||||
# Optional ENV vars
|
||||
# -----------------
|
||||
# JAVA_HOME - location of a JDK home dir, required when download maven via java source
|
||||
# MVNW_REPOURL - repo url base for downloading maven distribution
|
||||
# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
|
||||
# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
set -euf
|
||||
[ "${MVNW_VERBOSE-}" != debug ] || set -x
|
||||
|
||||
# OS specific support.
|
||||
native_path() { printf %s\\n "$1"; }
|
||||
case "$(uname)" in
|
||||
CYGWIN* | MINGW*)
|
||||
[ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
|
||||
native_path() { cygpath --path --windows "$1"; }
|
||||
;;
|
||||
esac
|
||||
|
||||
# set JAVACMD and JAVACCMD
|
||||
set_java_home() {
|
||||
# For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
|
||||
if [ -n "${JAVA_HOME-}" ]; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ]; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
||||
JAVACCMD="$JAVA_HOME/jre/sh/javac"
|
||||
else
|
||||
JAVACMD="$JAVA_HOME/bin/java"
|
||||
JAVACCMD="$JAVA_HOME/bin/javac"
|
||||
|
||||
if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
|
||||
echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
|
||||
echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
else
|
||||
JAVACMD="$(
|
||||
'set' +e
|
||||
'unset' -f command 2>/dev/null
|
||||
'command' -v java
|
||||
)" || :
|
||||
JAVACCMD="$(
|
||||
'set' +e
|
||||
'unset' -f command 2>/dev/null
|
||||
'command' -v javac
|
||||
)" || :
|
||||
|
||||
if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
|
||||
echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# hash string like Java String::hashCode
|
||||
hash_string() {
|
||||
str="${1:-}" h=0
|
||||
while [ -n "$str" ]; do
|
||||
char="${str%"${str#?}"}"
|
||||
h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
|
||||
str="${str#?}"
|
||||
done
|
||||
printf %x\\n $h
|
||||
}
|
||||
|
||||
verbose() { :; }
|
||||
[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
|
||||
|
||||
die() {
|
||||
printf %s\\n "$1" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
trim() {
|
||||
# MWRAPPER-139:
|
||||
# Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
|
||||
# Needed for removing poorly interpreted newline sequences when running in more
|
||||
# exotic environments such as mingw bash on Windows.
|
||||
printf "%s" "${1}" | tr -d '[:space:]'
|
||||
}
|
||||
|
||||
scriptDir="$(dirname "$0")"
|
||||
scriptName="$(basename "$0")"
|
||||
|
||||
# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
|
||||
while IFS="=" read -r key value; do
|
||||
case "${key-}" in
|
||||
distributionUrl) distributionUrl=$(trim "${value-}") ;;
|
||||
distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
|
||||
esac
|
||||
done <"$scriptDir/.mvn/wrapper/maven-wrapper.properties"
|
||||
[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties"
|
||||
|
||||
case "${distributionUrl##*/}" in
|
||||
maven-mvnd-*bin.*)
|
||||
MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
|
||||
case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
|
||||
*AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
|
||||
:Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
|
||||
:Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
|
||||
:Linux*x86_64*) distributionPlatform=linux-amd64 ;;
|
||||
*)
|
||||
echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
|
||||
distributionPlatform=linux-amd64
|
||||
;;
|
||||
esac
|
||||
distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
|
||||
;;
|
||||
maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
|
||||
*) MVN_CMD="mvn${scriptName#mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
|
||||
esac
|
||||
|
||||
# apply MVNW_REPOURL and calculate MAVEN_HOME
|
||||
# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
|
||||
[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
|
||||
distributionUrlName="${distributionUrl##*/}"
|
||||
distributionUrlNameMain="${distributionUrlName%.*}"
|
||||
distributionUrlNameMain="${distributionUrlNameMain%-bin}"
|
||||
MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
|
||||
MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
|
||||
|
||||
exec_maven() {
|
||||
unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
|
||||
exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
|
||||
}
|
||||
|
||||
if [ -d "$MAVEN_HOME" ]; then
|
||||
verbose "found existing MAVEN_HOME at $MAVEN_HOME"
|
||||
exec_maven "$@"
|
||||
fi
|
||||
|
||||
case "${distributionUrl-}" in
|
||||
*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
|
||||
*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
|
||||
esac
|
||||
|
||||
# prepare tmp dir
|
||||
if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
|
||||
clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
|
||||
trap clean HUP INT TERM EXIT
|
||||
else
|
||||
die "cannot create temp dir"
|
||||
fi
|
||||
|
||||
mkdir -p -- "${MAVEN_HOME%/*}"
|
||||
|
||||
# Download and Install Apache Maven
|
||||
verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
|
||||
verbose "Downloading from: $distributionUrl"
|
||||
verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
|
||||
|
||||
# select .zip or .tar.gz
|
||||
if ! command -v unzip >/dev/null; then
|
||||
distributionUrl="${distributionUrl%.zip}.tar.gz"
|
||||
distributionUrlName="${distributionUrl##*/}"
|
||||
fi
|
||||
|
||||
# verbose opt
|
||||
__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
|
||||
[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
|
||||
|
||||
# normalize http auth
|
||||
case "${MVNW_PASSWORD:+has-password}" in
|
||||
'') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
|
||||
has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
|
||||
esac
|
||||
|
||||
if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
|
||||
verbose "Found wget ... using wget"
|
||||
wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
|
||||
elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
|
||||
verbose "Found curl ... using curl"
|
||||
curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
|
||||
elif set_java_home; then
|
||||
verbose "Falling back to use Java to download"
|
||||
javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
|
||||
targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
|
||||
cat >"$javaSource" <<-END
|
||||
public class Downloader extends java.net.Authenticator
|
||||
{
|
||||
protected java.net.PasswordAuthentication getPasswordAuthentication()
|
||||
{
|
||||
return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
|
||||
}
|
||||
public static void main( String[] args ) throws Exception
|
||||
{
|
||||
setDefault( new Downloader() );
|
||||
java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
|
||||
}
|
||||
}
|
||||
END
|
||||
# For Cygwin/MinGW, switch paths to Windows format before running javac and java
|
||||
verbose " - Compiling Downloader.java ..."
|
||||
"$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
|
||||
verbose " - Running Downloader.java ..."
|
||||
"$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
|
||||
fi
|
||||
|
||||
# If specified, validate the SHA-256 sum of the Maven distribution zip file
|
||||
if [ -n "${distributionSha256Sum-}" ]; then
|
||||
distributionSha256Result=false
|
||||
if [ "$MVN_CMD" = mvnd.sh ]; then
|
||||
echo "Checksum validation is not supported for maven-mvnd." >&2
|
||||
echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
|
||||
exit 1
|
||||
elif command -v sha256sum >/dev/null; then
|
||||
if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c - >/dev/null 2>&1; then
|
||||
distributionSha256Result=true
|
||||
fi
|
||||
elif command -v shasum >/dev/null; then
|
||||
if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
|
||||
distributionSha256Result=true
|
||||
fi
|
||||
else
|
||||
echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
|
||||
echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ $distributionSha256Result = false ]; then
|
||||
echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
|
||||
echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# unzip and move
|
||||
if command -v unzip >/dev/null; then
|
||||
unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
|
||||
else
|
||||
tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
|
||||
fi
|
||||
|
||||
# Find the actual extracted directory name (handles snapshots where filename != directory name)
|
||||
actualDistributionDir=""
|
||||
|
||||
# First try the expected directory name (for regular distributions)
|
||||
if [ -d "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" ]; then
|
||||
if [ -f "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/bin/$MVN_CMD" ]; then
|
||||
actualDistributionDir="$distributionUrlNameMain"
|
||||
fi
|
||||
fi
|
||||
|
||||
# If not found, search for any directory with the Maven executable (for snapshots)
|
||||
if [ -z "$actualDistributionDir" ]; then
|
||||
# enable globbing to iterate over items
|
||||
set +f
|
||||
for dir in "$TMP_DOWNLOAD_DIR"/*; do
|
||||
if [ -d "$dir" ]; then
|
||||
if [ -f "$dir/bin/$MVN_CMD" ]; then
|
||||
actualDistributionDir="$(basename "$dir")"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
set -f
|
||||
fi
|
||||
|
||||
if [ -z "$actualDistributionDir" ]; then
|
||||
verbose "Contents of $TMP_DOWNLOAD_DIR:"
|
||||
verbose "$(ls -la "$TMP_DOWNLOAD_DIR")"
|
||||
die "Could not find Maven distribution directory in extracted archive"
|
||||
fi
|
||||
|
||||
verbose "Found extracted Maven distribution directory: $actualDistributionDir"
|
||||
printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$actualDistributionDir/mvnw.url"
|
||||
mv -- "$TMP_DOWNLOAD_DIR/$actualDistributionDir" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
|
||||
|
||||
clean || :
|
||||
exec_maven "$@"
|
||||
189
e2e/java/mvnw.cmd
generated
vendored
Normal file
189
e2e/java/mvnw.cmd
generated
vendored
Normal file
@@ -0,0 +1,189 @@
|
||||
<# : batch portion
|
||||
@REM ----------------------------------------------------------------------------
|
||||
@REM Licensed to the Apache Software Foundation (ASF) under one
|
||||
@REM or more contributor license agreements. See the NOTICE file
|
||||
@REM distributed with this work for additional information
|
||||
@REM regarding copyright ownership. The ASF licenses this file
|
||||
@REM to you under the Apache License, Version 2.0 (the
|
||||
@REM "License"); you may not use this file except in compliance
|
||||
@REM with the License. You may obtain a copy of the License at
|
||||
@REM
|
||||
@REM http://www.apache.org/licenses/LICENSE-2.0
|
||||
@REM
|
||||
@REM Unless required by applicable law or agreed to in writing,
|
||||
@REM software distributed under the License is distributed on an
|
||||
@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
@REM KIND, either express or implied. See the License for the
|
||||
@REM specific language governing permissions and limitations
|
||||
@REM under the License.
|
||||
@REM ----------------------------------------------------------------------------
|
||||
|
||||
@REM ----------------------------------------------------------------------------
|
||||
@REM Apache Maven Wrapper startup batch script, version 3.3.4
|
||||
@REM
|
||||
@REM Optional ENV vars
|
||||
@REM MVNW_REPOURL - repo url base for downloading maven distribution
|
||||
@REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
|
||||
@REM MVNW_VERBOSE - true: enable verbose log; others: silence the output
|
||||
@REM ----------------------------------------------------------------------------
|
||||
|
||||
@IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0)
|
||||
@SET __MVNW_CMD__=
|
||||
@SET __MVNW_ERROR__=
|
||||
@SET __MVNW_PSMODULEP_SAVE=%PSModulePath%
|
||||
@SET PSModulePath=
|
||||
@FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @(
|
||||
IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B)
|
||||
)
|
||||
@SET PSModulePath=%__MVNW_PSMODULEP_SAVE%
|
||||
@SET __MVNW_PSMODULEP_SAVE%
|
||||
@SET __MVNW_ARG0_NAME__=
|
||||
@SET MVNW_USERNAME=
|
||||
@SET MVNW_PASSWORD=
|
||||
@IF NOT "%__MVNW_CMD__%"=="" ("%__MVNW_CMD__%" %*)
|
||||
@echo Cannot start maven from wrapper >&2 && exit /b 1
|
||||
@GOTO :EOF
|
||||
: end batch / begin powershell #>
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
if ($env:MVNW_VERBOSE -eq "true") {
|
||||
$VerbosePreference = "Continue"
|
||||
}
|
||||
|
||||
# calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties
|
||||
$distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl
|
||||
if (!$distributionUrl) {
|
||||
Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties"
|
||||
}
|
||||
|
||||
switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) {
|
||||
"maven-mvnd-*" {
|
||||
$USE_MVND = $true
|
||||
$distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip"
|
||||
$MVN_CMD = "mvnd.cmd"
|
||||
break
|
||||
}
|
||||
default {
|
||||
$USE_MVND = $false
|
||||
$MVN_CMD = $script -replace '^mvnw','mvn'
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
# apply MVNW_REPOURL and calculate MAVEN_HOME
|
||||
# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
|
||||
if ($env:MVNW_REPOURL) {
|
||||
$MVNW_REPO_PATTERN = if ($USE_MVND -eq $False) { "/org/apache/maven/" } else { "/maven/mvnd/" }
|
||||
$distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace "^.*$MVNW_REPO_PATTERN",'')"
|
||||
}
|
||||
$distributionUrlName = $distributionUrl -replace '^.*/',''
|
||||
$distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$',''
|
||||
|
||||
$MAVEN_M2_PATH = "$HOME/.m2"
|
||||
if ($env:MAVEN_USER_HOME) {
|
||||
$MAVEN_M2_PATH = "$env:MAVEN_USER_HOME"
|
||||
}
|
||||
|
||||
if (-not (Test-Path -Path $MAVEN_M2_PATH)) {
|
||||
New-Item -Path $MAVEN_M2_PATH -ItemType Directory | Out-Null
|
||||
}
|
||||
|
||||
$MAVEN_WRAPPER_DISTS = $null
|
||||
if ((Get-Item $MAVEN_M2_PATH).Target[0] -eq $null) {
|
||||
$MAVEN_WRAPPER_DISTS = "$MAVEN_M2_PATH/wrapper/dists"
|
||||
} else {
|
||||
$MAVEN_WRAPPER_DISTS = (Get-Item $MAVEN_M2_PATH).Target[0] + "/wrapper/dists"
|
||||
}
|
||||
|
||||
$MAVEN_HOME_PARENT = "$MAVEN_WRAPPER_DISTS/$distributionUrlNameMain"
|
||||
$MAVEN_HOME_NAME = ([System.Security.Cryptography.SHA256]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join ''
|
||||
$MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME"
|
||||
|
||||
if (Test-Path -Path "$MAVEN_HOME" -PathType Container) {
|
||||
Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME"
|
||||
Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
|
||||
exit $?
|
||||
}
|
||||
|
||||
if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) {
|
||||
Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl"
|
||||
}
|
||||
|
||||
# prepare tmp dir
|
||||
$TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile
|
||||
$TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir"
|
||||
$TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null
|
||||
trap {
|
||||
if ($TMP_DOWNLOAD_DIR.Exists) {
|
||||
try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
|
||||
catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
|
||||
}
|
||||
}
|
||||
|
||||
New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null
|
||||
|
||||
# Download and Install Apache Maven
|
||||
Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
|
||||
Write-Verbose "Downloading from: $distributionUrl"
|
||||
Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
|
||||
|
||||
$webclient = New-Object System.Net.WebClient
|
||||
if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) {
|
||||
$webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD)
|
||||
}
|
||||
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
|
||||
$webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null
|
||||
|
||||
# If specified, validate the SHA-256 sum of the Maven distribution zip file
|
||||
$distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum
|
||||
if ($distributionSha256Sum) {
|
||||
if ($USE_MVND) {
|
||||
Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties."
|
||||
}
|
||||
Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash
|
||||
if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) {
|
||||
Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property."
|
||||
}
|
||||
}
|
||||
|
||||
# unzip and move
|
||||
Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null
|
||||
|
||||
# Find the actual extracted directory name (handles snapshots where filename != directory name)
|
||||
$actualDistributionDir = ""
|
||||
|
||||
# First try the expected directory name (for regular distributions)
|
||||
$expectedPath = Join-Path "$TMP_DOWNLOAD_DIR" "$distributionUrlNameMain"
|
||||
$expectedMvnPath = Join-Path "$expectedPath" "bin/$MVN_CMD"
|
||||
if ((Test-Path -Path $expectedPath -PathType Container) -and (Test-Path -Path $expectedMvnPath -PathType Leaf)) {
|
||||
$actualDistributionDir = $distributionUrlNameMain
|
||||
}
|
||||
|
||||
# If not found, search for any directory with the Maven executable (for snapshots)
|
||||
if (!$actualDistributionDir) {
|
||||
Get-ChildItem -Path "$TMP_DOWNLOAD_DIR" -Directory | ForEach-Object {
|
||||
$testPath = Join-Path $_.FullName "bin/$MVN_CMD"
|
||||
if (Test-Path -Path $testPath -PathType Leaf) {
|
||||
$actualDistributionDir = $_.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!$actualDistributionDir) {
|
||||
Write-Error "Could not find Maven distribution directory in extracted archive"
|
||||
}
|
||||
|
||||
Write-Verbose "Found extracted Maven distribution directory: $actualDistributionDir"
|
||||
Rename-Item -Path "$TMP_DOWNLOAD_DIR/$actualDistributionDir" -NewName $MAVEN_HOME_NAME | Out-Null
|
||||
try {
|
||||
Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null
|
||||
} catch {
|
||||
if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) {
|
||||
Write-Error "fail to move MAVEN_HOME"
|
||||
}
|
||||
} finally {
|
||||
try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
|
||||
catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
|
||||
}
|
||||
|
||||
Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
|
||||
98
e2e/java/pom.xml
generated
Normal file
98
e2e/java/pom.xml
generated
Normal file
@@ -0,0 +1,98 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>dev.kreuzberg</groupId>
|
||||
<artifactId>kreuzberg-e2e-java</artifactId>
|
||||
<version>0.1.0</version>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>25</maven.compiler.source>
|
||||
<maven.compiler.target>25</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<junit.version>6.1.0</junit.version>
|
||||
</properties>
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.junit</groupId>
|
||||
<artifactId>junit-bom</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>dev.kreuzberg</groupId>
|
||||
<artifactId>kreuzberg</artifactId>
|
||||
<version>5.0.0-rc.3</version>
|
||||
<scope>system</scope>
|
||||
<systemPath>${project.basedir}/../../packages/java/target/kreuzberg-5.0.0-rc.3.jar</systemPath>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>2.18.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||
<artifactId>jackson-datatype-jdk8</artifactId>
|
||||
<version>2.18.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains</groupId>
|
||||
<artifactId>annotations</artifactId>
|
||||
<version>24.1.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.platform</groupId>
|
||||
<artifactId>junit-platform-launcher</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>build-helper-maven-plugin</artifactId>
|
||||
<version>3.6.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>add-test-source</id>
|
||||
<phase>generate-test-sources</phase>
|
||||
<goals>
|
||||
<goal>add-test-source</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<sources>
|
||||
<source>src/test/java</source>
|
||||
</sources>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>3.5.2</version>
|
||||
<configuration>
|
||||
<argLine>--enable-preview --enable-native-access=ALL-UNNAMED -Djava.library.path=${project.basedir}/../../target/release</argLine>
|
||||
<workingDirectory>${project.basedir}/../../test_documents</workingDirectory>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
63
e2e/java/src/test/java/dev/kreuzberg/e2e/AsyncTest.java
generated
Normal file
63
e2e/java/src/test/java/dev/kreuzberg/e2e/AsyncTest.java
generated
Normal file
@@ -0,0 +1,63 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: async. */
|
||||
class AsyncTest {
|
||||
|
||||
@Test
|
||||
void testAsyncExtractBytes() throws Exception {
|
||||
// Async extract_bytes call on PDF document
|
||||
var result = Kreuzberg.extractBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("pdf/fake_memo.pdf")), "application/pdf", ExtractionConfig.builder().build());
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 50, "expected length >= 50");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testAsyncExtractBytesEmptyMime() throws Exception {
|
||||
// extract_bytes empty MIME async
|
||||
// Wrap setup_lines + call_expr inside the lambda so error fixtures
|
||||
// catch failures at *any* step — including `<Type>.fromJson(...)`
|
||||
// calls that throw on malformed JSON (e.g. error fixtures with an
|
||||
// invalid enum value like `"purpose":"invalid-purpose"`). Mirrors
|
||||
// the C# `Assert.ThrowsAnyAsync(() => client.X(Type.FromJson(...)))`
|
||||
// pattern.
|
||||
assertThrows(Exception.class, () -> {
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
Kreuzberg.extractBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("text/plain.txt")), "", config);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testAsyncExtractBytesInvalidMime() throws Exception {
|
||||
// extract_bytes unsupported MIME async
|
||||
// Wrap setup_lines + call_expr inside the lambda so error fixtures
|
||||
// catch failures at *any* step — including `<Type>.fromJson(...)`
|
||||
// calls that throw on malformed JSON (e.g. error fixtures with an
|
||||
// invalid enum value like `"purpose":"invalid-purpose"`). Mirrors
|
||||
// the C# `Assert.ThrowsAnyAsync(() => client.X(Type.FromJson(...)))`
|
||||
// pattern.
|
||||
assertThrows(Exception.class, () -> {
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
Kreuzberg.extractBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("text/plain.txt")), "application/x-nonexistent", config);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
110
e2e/java/src/test/java/dev/kreuzberg/e2e/BatchTest.java
generated
Normal file
110
e2e/java/src/test/java/dev/kreuzberg/e2e/BatchTest.java
generated
Normal file
@@ -0,0 +1,110 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.BatchBytesItem;
|
||||
import dev.kreuzberg.BatchFileItem;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: batch. */
|
||||
class BatchTest {
|
||||
|
||||
@Test
|
||||
void testBatchBytesInvalidMime() throws Exception {
|
||||
// batch_extract_bytes_sync invalid MIME
|
||||
var result = Kreuzberg.batchExtractBytesSync(java.util.Arrays.asList(new BatchBytesItem(new byte[] {(byte) 72, (byte) 101, (byte) 108, (byte) 108, (byte) 111}, "application/x-nonexistent", null)), ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchExtractBytesHappy() throws Exception {
|
||||
// batch_extract_bytes: happy path with mixed inputs
|
||||
var result = Kreuzberg.batchExtractBytes(java.util.Arrays.asList(new BatchBytesItem(new byte[] {(byte) 72, (byte) 101, (byte) 108, (byte) 108, (byte) 111, (byte) 44, (byte) 32, (byte) 119, (byte) 111, (byte) 114, (byte) 108, (byte) 100, (byte) 33}, "text/plain", null), new BatchBytesItem(new byte[] {(byte) 60, (byte) 104, (byte) 116, (byte) 109, (byte) 108, (byte) 62, (byte) 60, (byte) 98, (byte) 111, (byte) 100, (byte) 121, (byte) 62, (byte) 84, (byte) 101, (byte) 115, (byte) 116, (byte) 60, (byte) 47, (byte) 98, (byte) 111, (byte) 100, (byte) 121, (byte) 62, (byte) 60, (byte) 47, (byte) 104, (byte) 116, (byte) 109, (byte) 108, (byte) 62}, "text/html", null)), ExtractionConfig.builder().build());
|
||||
assertTrue(result.size() >= 1, "expected at least 1 elements");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchExtractBytesMixedFormat() throws Exception {
|
||||
// batch_extract_bytes: handles unsupported MIME gracefully
|
||||
var result = Kreuzberg.batchExtractBytes(java.util.Arrays.asList(new BatchBytesItem(new byte[] {(byte) 80, (byte) 68, (byte) 70, (byte) 32, (byte) 112, (byte) 108, (byte) 97, (byte) 99, (byte) 101, (byte) 104, (byte) 111, (byte) 108, (byte) 100, (byte) 101, (byte) 114}, "application/x-unknown", null)), ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchExtractBytesSyncEmptyList() throws Exception {
|
||||
// batch_extract_bytes_sync: empty batch
|
||||
var result = Kreuzberg.batchExtractBytesSync(java.util.Arrays.asList(), ExtractionConfig.builder().build());
|
||||
assertEquals(0, result.size(), "expected exactly 0 elements");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchExtractBytesSyncInvalidMime() throws Exception {
|
||||
// batch_extract_bytes_sync: unsupported MIME
|
||||
var result = Kreuzberg.batchExtractBytesSync(java.util.Arrays.asList(new BatchBytesItem(new byte[] {(byte) 100, (byte) 97, (byte) 116, (byte) 97}, "application/x-unknown", null)), ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchFileAsyncBasic() throws Exception {
|
||||
// Extract text from multiple files asynchronously
|
||||
var result = Kreuzberg.batchExtractFiles(java.util.Arrays.asList(new BatchFileItem(java.nio.file.Paths.get("pdf/fake_memo.pdf"), null), new BatchFileItem(java.nio.file.Paths.get("text/fake_text.txt"), null)), ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchFileAsyncNotFound() throws Exception {
|
||||
// batch_extract_file async nonexistent
|
||||
var result = Kreuzberg.batchExtractFiles(java.util.Arrays.asList(new BatchFileItem(java.nio.file.Paths.get("/nonexistent/a.pdf"), null)), ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchFileNotFound() throws Exception {
|
||||
// batch_extract_file_sync nonexistent
|
||||
var result = Kreuzberg.batchExtractFilesSync(java.util.Arrays.asList(new BatchFileItem(java.nio.file.Paths.get("/nonexistent/a.pdf"), null), new BatchFileItem(java.nio.file.Paths.get("/nonexistent/b.txt"), null)), ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchFilePartial() throws Exception {
|
||||
// batch_extract_file_sync mixed
|
||||
var result = Kreuzberg.batchExtractFilesSync(java.util.Arrays.asList(new BatchFileItem(java.nio.file.Paths.get("text/plain.txt"), null), new BatchFileItem(java.nio.file.Paths.get("/nonexistent/missing.pdf"), null)), ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testBatchFileSyncBasic() throws Exception {
|
||||
// Extract text from multiple files synchronously
|
||||
var result = Kreuzberg.batchExtractFilesSync(java.util.Arrays.asList(new BatchFileItem(java.nio.file.Paths.get("pdf/fake_memo.pdf"), null), new BatchFileItem(java.nio.file.Paths.get("text/fake_text.txt"), null)), ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
27
e2e/java/src/test/java/dev/kreuzberg/e2e/CodeTest.java
generated
Normal file
27
e2e/java/src/test/java/dev/kreuzberg/e2e/CodeTest.java
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: code. */
|
||||
class CodeTest {
|
||||
|
||||
@Test
|
||||
void testCodeShebangDetection() throws Exception {
|
||||
// Test language detection from shebang line via bytes input
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("code/script.sh"), "text/x-source-code", ExtractionConfig.builder().build());
|
||||
assertEquals("text/x-source-code", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10");assertTrue(result.content().contains("build"), "expected to contain: " + "build");assertTrue(result.content().contains("clean"), "expected to contain: " + "clean");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
197
e2e/java/src/test/java/dev/kreuzberg/e2e/ContractTest.java
generated
Normal file
197
e2e/java/src/test/java/dev/kreuzberg/e2e/ContractTest.java
generated
Normal file
@@ -0,0 +1,197 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: contract. */
|
||||
class ContractTest {
|
||||
|
||||
@Test
|
||||
void testApiBatchBytesAsync() throws Exception {
|
||||
// Tests async batch bytes extraction API (batch_extract_bytes)
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, ExtractionConfig.builder().build());
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10");assertTrue(result.content().contains("May 5, 2023") || result.content().contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testApiBatchBytesWithConfigsAsync() throws Exception {
|
||||
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
|
||||
var config = JsonUtil.fromJson("{\"output_format\":\"markdown\"}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10"); // skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testApiBatchFileAsync() throws Exception {
|
||||
// Tests async batch file extraction API (batch_extract_file)
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, ExtractionConfig.builder().build());
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10");assertTrue(result.content().contains("May 5, 2023") || result.content().contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testApiBatchFileWithConfigsAsync() throws Exception {
|
||||
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
|
||||
var config = JsonUtil.fromJson("{\"output_format\":\"markdown\"}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10"); // skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testApiExtractBytesAsync() throws Exception {
|
||||
// Tests async bytes extraction API (extract_bytes)
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, ExtractionConfig.builder().build());
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10");assertTrue(result.content().contains("May 5, 2023") || result.content().contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testApiExtractFileAsync() throws Exception {
|
||||
// Tests async file extraction API (extract_file)
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, ExtractionConfig.builder().build());
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10");assertTrue(result.content().contains("May 5, 2023") || result.content().contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigChunkingPrependHeadingContext() throws Exception {
|
||||
// Tests markdown chunker prepends heading hierarchy to chunk content
|
||||
var config = JsonUtil.fromJson("{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("markdown/extraction_test.md"), null, config);
|
||||
assertTrue(result.content().length() >= 10, "expected length >= 10"); // skipped: field 'chunks' not available on result type assertTrue(java.util.Optional.ofNullable(result.chunks()).orElse(java.util.List.of()).stream().allMatch(c -> c.content() != null && !c.content().isBlank()), "expected true"); assertTrue(java.util.Optional.ofNullable(result.chunks()).orElse(java.util.List.of()).stream().allMatch(c -> c.metadata().headingContext() != null), "expected true"); assertTrue(java.util.Optional.ofNullable(result.chunks()).orElse(java.util.List.of()).stream().findFirst().map(c -> c.metadata().headingContext() != null).orElse(false), "expected true");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigDocumentStructureWithHeadings() throws Exception {
|
||||
// Tests document structure with DOCX heading-driven nesting
|
||||
var config = JsonUtil.fromJson("{\"include_document_structure\":true}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("docx/fake.docx"), null, config);
|
||||
assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.mimeType().trim()); // skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigElementTypes() throws Exception {
|
||||
// Tests element-based result format with element type assertions on DOCX
|
||||
var config = JsonUtil.fromJson("{\"result_format\":\"element_based\"}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("docx/unit_test_headers.docx"), null, config);
|
||||
assertTrue(result.mimeType().contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values"); // skipped: field 'elements' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigExtractionTimeout() throws Exception {
|
||||
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
|
||||
var config = JsonUtil.fromJson("{\"extraction_timeout_secs\":300}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigKeywords() throws Exception {
|
||||
// Tests keyword extraction via YAKE algorithm
|
||||
var config = JsonUtil.fromJson("{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10"); // skipped: field 'keywords' not available on Java ExtractionResult // skipped: field 'keywords' not available on Java ExtractionResult
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigPages() throws Exception {
|
||||
// Tests page extraction and page marker configuration
|
||||
var config = JsonUtil.fromJson("{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10");assertTrue(result.content().contains("PAGE"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigQualityEnabled() throws Exception {
|
||||
// Tests quality scoring produces a score value in [0.0, 1.0]
|
||||
var config = JsonUtil.fromJson("{\"enable_quality_processing\":true}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10"); // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigSecurityLimits() throws Exception {
|
||||
// Tests archive extraction with custom security limits
|
||||
var config = JsonUtil.fromJson("{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("archives/documents.zip"), null, config);
|
||||
assertTrue(result.mimeType().contains("application/zip") || result.mimeType().contains("application/x-zip-compressed"), "expected to contain at least one of the specified values");assertTrue(result.content().length() >= 10, "expected length >= 10");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testConfigTreeSitter() throws Exception {
|
||||
// Tests tree-sitter configuration round-trip
|
||||
var config = JsonUtil.fromJson("{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("code/hello.py"), null, config);
|
||||
assertEquals("text/x-source-code", result.mimeType().trim());assertTrue(result.content().length() >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testOutputFormatBytesMarkdown() throws Exception {
|
||||
// Tests markdown output format via bytes extraction API
|
||||
var config = JsonUtil.fromJson("{\"output_format\":\"markdown\"}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("pdf/fake_memo.pdf")), "application/pdf", config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10"); // skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testOutputFormatMarkdown() throws Exception {
|
||||
// Tests Markdown output format
|
||||
var config = JsonUtil.fromJson("{\"output_format\":\"markdown\"}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("pdf/fake_memo.pdf"), null, config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10"); // skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
61
e2e/java/src/test/java/dev/kreuzberg/e2e/DetectionTest.java
generated
Normal file
61
e2e/java/src/test/java/dev/kreuzberg/e2e/DetectionTest.java
generated
Normal file
@@ -0,0 +1,61 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: detection. */
|
||||
class DetectionTest {
|
||||
|
||||
@Test
|
||||
void testDetectMimeBytesHtml() throws Exception {
|
||||
// Detect HTML MIME from bytes
|
||||
var result = Kreuzberg.detectMimeTypeFromBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("html/html.html")));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testDetectMimeBytesPdf() throws Exception {
|
||||
// Detect PDF MIME type from bytes
|
||||
var result = Kreuzberg.detectMimeTypeFromBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("pdf/fake_memo.pdf")));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testDetectMimeBytesPng() throws Exception {
|
||||
// Detect PNG MIME type from bytes
|
||||
var result = Kreuzberg.detectMimeTypeFromBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("images/test_hello_world.png")));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testGetExtensionsUnknownMime() throws Exception {
|
||||
// get_extensions unknown MIME
|
||||
// Wrap setup_lines + call_expr inside the lambda so error fixtures
|
||||
// catch failures at *any* step — including `<Type>.fromJson(...)`
|
||||
// calls that throw on malformed JSON (e.g. error fixtures with an
|
||||
// invalid enum value like `"purpose":"invalid-purpose"`). Mirrors
|
||||
// the C# `Assert.ThrowsAnyAsync(() => client.X(Type.FromJson(...)))`
|
||||
// pattern.
|
||||
assertThrows(Exception.class, () -> {
|
||||
Kreuzberg.getExtensionsForMime("application/x-totally-unknown");
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
36
e2e/java/src/test/java/dev/kreuzberg/e2e/DocumentExtractorManagementTest.java
generated
Normal file
36
e2e/java/src/test/java/dev/kreuzberg/e2e/DocumentExtractorManagementTest.java
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: document_extractor_management. */
|
||||
class DocumentExtractorManagementTest {
|
||||
|
||||
@Test
|
||||
void testDocumentExtractorsClear() throws Exception {
|
||||
// Clear all document extractors and verify list is empty
|
||||
Kreuzberg.clearDocumentExtractors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testExtractorsList() throws Exception {
|
||||
// List all registered document extractors
|
||||
var result = Kreuzberg.listDocumentExtractors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
48
e2e/java/src/test/java/dev/kreuzberg/e2e/EmbedAsyncPendingTest.java
generated
Normal file
48
e2e/java/src/test/java/dev/kreuzberg/e2e/EmbedAsyncPendingTest.java
generated
Normal file
@@ -0,0 +1,48 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.EmbeddingConfig;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: embed_async_pending. */
|
||||
class EmbedAsyncPendingTest {
|
||||
|
||||
@Test
|
||||
void testEmbedTextsAsyncEmptyInput() throws Exception {
|
||||
// embed_texts_async: empty text list
|
||||
var result = Kreuzberg.embedTextsAsync(java.util.List.of(), EmbeddingConfig.builder().build());
|
||||
// skipped: field 'embeddings' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testEmbedTextsAsyncHappy() throws Exception {
|
||||
// embed_texts_async: basic async embedding
|
||||
var result = Kreuzberg.embedTextsAsync(java.util.Arrays.asList(JsonUtil.fromJson("\"First\"", String.class), JsonUtil.fromJson("\"Second\"", String.class)), EmbeddingConfig.builder().build());
|
||||
// skipped: field 'embeddings' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testEmbedTextsAsyncPresetSwitch() throws Exception {
|
||||
// embed_texts_async: preset override
|
||||
var config = JsonUtil.fromJson("{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}", EmbeddingConfig.class);
|
||||
|
||||
var result = Kreuzberg.embedTextsAsync(java.util.Arrays.asList(JsonUtil.fromJson("\"Text\"", String.class)), config);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
30
e2e/java/src/test/java/dev/kreuzberg/e2e/EmbedExtraTest.java
generated
Normal file
30
e2e/java/src/test/java/dev/kreuzberg/e2e/EmbedExtraTest.java
generated
Normal file
@@ -0,0 +1,30 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.EmbeddingConfig;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: embed_extra. */
|
||||
class EmbedExtraTest {
|
||||
|
||||
@Test
|
||||
void testEmbedTextsBatch() throws Exception {
|
||||
// Batch embed texts
|
||||
var config = JsonUtil.fromJson("{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}", EmbeddingConfig.class);
|
||||
|
||||
var result = Kreuzberg.embedTexts(java.util.Arrays.asList(JsonUtil.fromJson("\"Hello\"", String.class), JsonUtil.fromJson("\"World\"", String.class)), config);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
36
e2e/java/src/test/java/dev/kreuzberg/e2e/EmbeddingBackendManagementTest.java
generated
Normal file
36
e2e/java/src/test/java/dev/kreuzberg/e2e/EmbeddingBackendManagementTest.java
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: embedding_backend_management. */
|
||||
class EmbeddingBackendManagementTest {
|
||||
|
||||
@Test
|
||||
void testEmbeddingBackendsClear() throws Exception {
|
||||
// Clear all embedding backends and verify list is empty
|
||||
Kreuzberg.clearEmbeddingBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testEmbeddingBackendsList() throws Exception {
|
||||
// List all registered embedding backends
|
||||
var result = Kreuzberg.listEmbeddingBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
67
e2e/java/src/test/java/dev/kreuzberg/e2e/EmbeddingsTest.java
generated
Normal file
67
e2e/java/src/test/java/dev/kreuzberg/e2e/EmbeddingsTest.java
generated
Normal file
@@ -0,0 +1,67 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.EmbeddingConfig;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: embeddings. */
|
||||
class EmbeddingsTest {
|
||||
|
||||
@Test
|
||||
void testEmbedTextsDifferentPreset() throws Exception {
|
||||
// embed_texts: multilingual preset
|
||||
var config = JsonUtil.fromJson("{\"model\":{\"name\":\"multilingual\",\"type\":\"preset\"}}", EmbeddingConfig.class);
|
||||
|
||||
var result = Kreuzberg.embedTexts(java.util.Arrays.asList(JsonUtil.fromJson("\"Hello world\"", String.class), JsonUtil.fromJson("\"Test\"", String.class)), config);
|
||||
// skipped: field 'embeddings' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testGetEmbeddingPresetKnown() throws Exception {
|
||||
// get_embedding_preset: known preset
|
||||
var result = Kreuzberg.getEmbeddingPreset("balanced");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testGetEmbeddingPresetNominal() throws Exception {
|
||||
// get_embedding_preset: nominal case
|
||||
var result = Kreuzberg.getEmbeddingPreset("balanced");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testGetEmbeddingPresetUnknown() throws Exception {
|
||||
// get_embedding_preset: unknown preset fails
|
||||
var result = Kreuzberg.getEmbeddingPreset("nonexistent-xyz");
|
||||
assertNull(result, "expected empty value");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testListEmbeddingPresetsSanity() throws Exception {
|
||||
// list_embedding_presets: returns at least one
|
||||
var result = Kreuzberg.listEmbeddingPresets();
|
||||
assertFalse(result.isEmpty(), "expected non-empty value");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
101
e2e/java/src/test/java/dev/kreuzberg/e2e/ErrorTest.java
generated
Normal file
101
e2e/java/src/test/java/dev/kreuzberg/e2e/ErrorTest.java
generated
Normal file
@@ -0,0 +1,101 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: error. */
|
||||
class ErrorTest {
|
||||
|
||||
@Test
|
||||
void testErrorEmptyBytes() throws Exception {
|
||||
// Graceful handling of empty bytes (should not error)
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("text/empty.txt")), "text/plain", config);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testErrorEmptyMime() throws Exception {
|
||||
// Error when extracting with empty MIME type
|
||||
// Wrap setup_lines + call_expr inside the lambda so error fixtures
|
||||
// catch failures at *any* step — including `<Type>.fromJson(...)`
|
||||
// calls that throw on malformed JSON (e.g. error fixtures with an
|
||||
// invalid enum value like `"purpose":"invalid-purpose"`). Mirrors
|
||||
// the C# `Assert.ThrowsAnyAsync(() => client.X(Type.FromJson(...)))`
|
||||
// pattern.
|
||||
assertThrows(Exception.class, () -> {
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("text/plain.txt")), "", config);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testErrorExtractBytesConflictingOcr() throws Exception {
|
||||
// extract_bytes force+disable OCR
|
||||
// Wrap setup_lines + call_expr inside the lambda so error fixtures
|
||||
// catch failures at *any* step — including `<Type>.fromJson(...)`
|
||||
// calls that throw on malformed JSON (e.g. error fixtures with an
|
||||
// invalid enum value like `"purpose":"invalid-purpose"`). Mirrors
|
||||
// the C# `Assert.ThrowsAnyAsync(() => client.X(Type.FromJson(...)))`
|
||||
// pattern.
|
||||
assertThrows(Exception.class, () -> {
|
||||
var config = JsonUtil.fromJson("{\"disable_ocr\":true,\"force_ocr\":true}", ExtractionConfig.class);
|
||||
|
||||
Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("text/fake_text.txt")), "text/plain", config);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testErrorInvalidMimeFormat() throws Exception {
|
||||
// Error when extracting with invalid MIME type format
|
||||
// Wrap setup_lines + call_expr inside the lambda so error fixtures
|
||||
// catch failures at *any* step — including `<Type>.fromJson(...)`
|
||||
// calls that throw on malformed JSON (e.g. error fixtures with an
|
||||
// invalid enum value like `"purpose":"invalid-purpose"`). Mirrors
|
||||
// the C# `Assert.ThrowsAnyAsync(() => client.X(Type.FromJson(...)))`
|
||||
// pattern.
|
||||
assertThrows(Exception.class, () -> {
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("text/plain.txt")), "not-a-mime", config);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testErrorUnsupportedMime() throws Exception {
|
||||
// Error when extracting with unsupported MIME type
|
||||
// Wrap setup_lines + call_expr inside the lambda so error fixtures
|
||||
// catch failures at *any* step — including `<Type>.fromJson(...)`
|
||||
// calls that throw on malformed JSON (e.g. error fixtures with an
|
||||
// invalid enum value like `"purpose":"invalid-purpose"`). Mirrors
|
||||
// the C# `Assert.ThrowsAnyAsync(() => client.X(Type.FromJson(...)))`
|
||||
// pattern.
|
||||
assertThrows(Exception.class, () -> {
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("text/plain.txt")), "application/x-nonexistent", config);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
63
e2e/java/src/test/java/dev/kreuzberg/e2e/FormatSpecificTest.java
generated
Normal file
63
e2e/java/src/test/java/dev/kreuzberg/e2e/FormatSpecificTest.java
generated
Normal file
@@ -0,0 +1,63 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: format_specific. */
|
||||
class FormatSpecificTest {
|
||||
|
||||
@Test
|
||||
void testFormatDocxStandalone() throws Exception {
|
||||
// Standalone DOCX extraction using extract_bytes_sync
|
||||
var result = Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("docx/fake.docx")), "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ExtractionConfig.builder().build());
|
||||
assertTrue(result.content().length() >= 20, "expected length >= 20");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testFormatHwpxStandalone() throws Exception {
|
||||
// Standalone HWPX extraction using extract_bytes_sync
|
||||
var result = Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("hwpx/simple.hwpx")), "application/haansofthwpx", ExtractionConfig.builder().build());
|
||||
assertTrue(result.content().length() >= 20, "expected length >= 20");assertTrue(result.content().contains("Hello from HWPX"), "expected to contain: " + "Hello from HWPX");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testFormatPdfText() throws Exception {
|
||||
// Standalone PDF text extraction using extract_bytes_sync
|
||||
var result = Kreuzberg.extractBytesSync(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("pdf/fake_memo.pdf")), "application/pdf", ExtractionConfig.builder().build());
|
||||
assertTrue(result.content().length() >= 50, "expected length >= 50");assertTrue(result.content().contains("Mallori") || result.content().contains("May"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testFormatPptx() throws Exception {
|
||||
// PPTX presentation extraction using extract_file_sync
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("pptx/simple.pptx"), "application/vnd.openxmlformats-officedocument.presentationml.presentation", ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testFormatXlsx() throws Exception {
|
||||
// XLSX spreadsheet extraction using extract_file_sync
|
||||
var result = Kreuzberg.extractFileSync(java.nio.file.Path.of("xlsx/stanley_cups.xlsx"), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ExtractionConfig.builder().build());
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
45
e2e/java/src/test/java/dev/kreuzberg/e2e/MimeUtilitiesTest.java
generated
Normal file
45
e2e/java/src/test/java/dev/kreuzberg/e2e/MimeUtilitiesTest.java
generated
Normal file
@@ -0,0 +1,45 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: mime_utilities. */
|
||||
class MimeUtilitiesTest {
|
||||
|
||||
@Test
|
||||
void testMimeDetectBytes() throws Exception {
|
||||
// Detect MIME type from file bytes
|
||||
var result = Kreuzberg.detectMimeTypeFromBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("pdf/fake_memo.pdf")));
|
||||
assertTrue(result.contains("pdf"), "expected to contain: " + "pdf");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testMimeDetectImage() throws Exception {
|
||||
// Detect MIME type from PNG image bytes
|
||||
var result = Kreuzberg.detectMimeTypeFromBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("images/test_hello_world.png")));
|
||||
assertTrue(result.contains("png"), "expected to contain: " + "png");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testMimeGetExtensions() throws Exception {
|
||||
// Get file extensions for a MIME type
|
||||
var result = Kreuzberg.getExtensionsForMime("application/pdf");
|
||||
assertTrue(result.contains("pdf"), "expected to contain: " + "pdf");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
45
e2e/java/src/test/java/dev/kreuzberg/e2e/OcrBackendManagementTest.java
generated
Normal file
45
e2e/java/src/test/java/dev/kreuzberg/e2e/OcrBackendManagementTest.java
generated
Normal file
@@ -0,0 +1,45 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: ocr_backend_management. */
|
||||
class OcrBackendManagementTest {
|
||||
|
||||
@Test
|
||||
void testOcrBackendsClear() throws Exception {
|
||||
// Clear all OCR backends and verify list is empty
|
||||
Kreuzberg.clearOcrBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testOcrBackendsList() throws Exception {
|
||||
// List all registered OCR backends
|
||||
var result = Kreuzberg.listOcrBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testOcrBackendsUnregister() throws Exception {
|
||||
// Unregister nonexistent OCR backend gracefully
|
||||
Kreuzberg.unregisterOcrBackend("nonexistent-backend-xyz");
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
45
e2e/java/src/test/java/dev/kreuzberg/e2e/PdfTest.java
generated
Normal file
45
e2e/java/src/test/java/dev/kreuzberg/e2e/PdfTest.java
generated
Normal file
@@ -0,0 +1,45 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: pdf. */
|
||||
class PdfTest {
|
||||
|
||||
@Test
|
||||
void testRenderPdfPageFirst() throws Exception {
|
||||
// render_pdf_page_to_png: first page
|
||||
var result = Kreuzberg.renderPdfPageToPng(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("pdf/fake_memo.pdf")), 0, null, null);
|
||||
assertNotNull(result, "expected non-null byte[] response");
|
||||
// skipped: assertion type 'min_length' not supported on byte[] result
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testRenderPdfPageOutOfRange() throws Exception {
|
||||
// render_pdf_page_to_png: page out of range
|
||||
// Wrap setup_lines + call_expr inside the lambda so error fixtures
|
||||
// catch failures at *any* step — including `<Type>.fromJson(...)`
|
||||
// calls that throw on malformed JSON (e.g. error fixtures with an
|
||||
// invalid enum value like `"purpose":"invalid-purpose"`). Mirrors
|
||||
// the C# `Assert.ThrowsAnyAsync(() => client.X(Type.FromJson(...)))`
|
||||
// pattern.
|
||||
assertThrows(Exception.class, () -> {
|
||||
Kreuzberg.renderPdfPageToPng(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("pdf/fake_memo.pdf")), 999, null, null);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
293
e2e/java/src/test/java/dev/kreuzberg/e2e/PluginApiTest.java
generated
Normal file
293
e2e/java/src/test/java/dev/kreuzberg/e2e/PluginApiTest.java
generated
Normal file
@@ -0,0 +1,293 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: plugin_api. */
|
||||
class PluginApiTest {
|
||||
|
||||
@Test
|
||||
void testRegisterDocumentExtractorTraitBridge() throws Exception {
|
||||
// register_document_extractor: trait bridge
|
||||
class TestStubRegisterDocumentExtractorTraitBridge implements dev.kreuzberg.IDocumentExtractor {
|
||||
@Override
|
||||
public String extract_bytes(byte[] content, String mimeType, dev.kreuzberg.ExtractionConfig config) {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public String extract_file(java.nio.file.Path path, String mimeType, dev.kreuzberg.ExtractionConfig config) {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public java.util.List<String> supported_mime_types() {
|
||||
return new java.util.ArrayList<>();
|
||||
}
|
||||
@Override
|
||||
public int priority() {
|
||||
return 0;
|
||||
}
|
||||
@Override
|
||||
public boolean can_handle(java.nio.file.Path path, String mimeType) {
|
||||
return false;
|
||||
}
|
||||
@Override
|
||||
public String name() { return "test-extractor"; }
|
||||
@Override
|
||||
public String version() {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public void initialize() {}
|
||||
@Override
|
||||
public void shutdown() {}
|
||||
}
|
||||
|
||||
Kreuzberg.registerDocumentExtractor(new TestStubRegisterDocumentExtractorTraitBridge());
|
||||
|
||||
Kreuzberg.unregisterDocumentExtractor("test-extractor");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testRegisterEmbeddingBackendTraitBridge() throws Exception {
|
||||
// register_embedding_backend: trait bridge
|
||||
class TestStubRegisterEmbeddingBackendTraitBridge implements dev.kreuzberg.IEmbeddingBackend {
|
||||
@Override
|
||||
public long dimensions() {
|
||||
return 0;
|
||||
}
|
||||
@Override
|
||||
public java.util.List<java.util.List<Float>> embed(java.util.List<String> texts) {
|
||||
return new java.util.ArrayList<>();
|
||||
}
|
||||
@Override
|
||||
public String name() { return "test-embedding-backend"; }
|
||||
@Override
|
||||
public String version() {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public void initialize() {}
|
||||
@Override
|
||||
public void shutdown() {}
|
||||
}
|
||||
|
||||
Kreuzberg.registerEmbeddingBackend(new TestStubRegisterEmbeddingBackendTraitBridge());
|
||||
|
||||
Kreuzberg.unregisterEmbeddingBackend("test-embedding-backend");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testRegisterOcrBackendTraitBridge() throws Exception {
|
||||
// register_ocr_backend: trait bridge
|
||||
class TestStubRegisterOcrBackendTraitBridge implements dev.kreuzberg.IOcrBackend {
|
||||
@Override
|
||||
public dev.kreuzberg.ExtractionResult process_image(byte[] imageBytes, dev.kreuzberg.OcrConfig config) {
|
||||
return null;
|
||||
}
|
||||
@Override
|
||||
public dev.kreuzberg.ExtractionResult process_image_file(java.nio.file.Path path, dev.kreuzberg.OcrConfig config) {
|
||||
return null;
|
||||
}
|
||||
@Override
|
||||
public boolean supports_language(String lang) {
|
||||
return false;
|
||||
}
|
||||
@Override
|
||||
public String backend_type() {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public java.util.List<String> supported_languages() {
|
||||
return new java.util.ArrayList<>();
|
||||
}
|
||||
@Override
|
||||
public boolean supports_table_detection() {
|
||||
return false;
|
||||
}
|
||||
@Override
|
||||
public boolean supports_document_processing() {
|
||||
return false;
|
||||
}
|
||||
@Override
|
||||
public dev.kreuzberg.ExtractionResult process_document(java.nio.file.Path path, dev.kreuzberg.OcrConfig config) {
|
||||
return null;
|
||||
}
|
||||
@Override
|
||||
public String name() { return "test-backend"; }
|
||||
@Override
|
||||
public String version() {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public void initialize() {}
|
||||
@Override
|
||||
public void shutdown() {}
|
||||
}
|
||||
|
||||
Kreuzberg.registerOcrBackend(new TestStubRegisterOcrBackendTraitBridge());
|
||||
|
||||
Kreuzberg.unregisterOcrBackend("test-backend");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testRegisterPostProcessorTraitBridge() throws Exception {
|
||||
// register_post_processor: trait bridge
|
||||
class TestStubRegisterPostProcessorTraitBridge implements dev.kreuzberg.IPostProcessor {
|
||||
@Override
|
||||
public void process(dev.kreuzberg.ExtractionResult result, dev.kreuzberg.ExtractionConfig config) {}
|
||||
@Override
|
||||
public String processing_stage() {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public boolean should_process(dev.kreuzberg.ExtractionResult result, dev.kreuzberg.ExtractionConfig config) {
|
||||
return false;
|
||||
}
|
||||
@Override
|
||||
public long estimated_duration_ms(dev.kreuzberg.ExtractionResult result) {
|
||||
return 0;
|
||||
}
|
||||
@Override
|
||||
public int priority() {
|
||||
return 0;
|
||||
}
|
||||
@Override
|
||||
public String name() { return "test-processor"; }
|
||||
@Override
|
||||
public String version() {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public void initialize() {}
|
||||
@Override
|
||||
public void shutdown() {}
|
||||
}
|
||||
|
||||
Kreuzberg.registerPostProcessor(new TestStubRegisterPostProcessorTraitBridge());
|
||||
|
||||
Kreuzberg.unregisterPostProcessor("test-processor");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testRegisterRendererTraitBridge() throws Exception {
|
||||
// register_renderer: trait bridge
|
||||
class TestStubRegisterRendererTraitBridge implements dev.kreuzberg.IRenderer {
|
||||
@Override
|
||||
public String render(String doc) {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public String name() { return "test-renderer"; }
|
||||
@Override
|
||||
public String version() {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public void initialize() {}
|
||||
@Override
|
||||
public void shutdown() {}
|
||||
}
|
||||
|
||||
Kreuzberg.registerRenderer(new TestStubRegisterRendererTraitBridge());
|
||||
|
||||
Kreuzberg.unregisterRenderer("test-renderer");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testRegisterValidatorTraitBridge() throws Exception {
|
||||
// register_validator: trait bridge
|
||||
class TestStubRegisterValidatorTraitBridge implements dev.kreuzberg.IValidator {
|
||||
@Override
|
||||
public void validate(dev.kreuzberg.ExtractionResult result, dev.kreuzberg.ExtractionConfig config) {}
|
||||
@Override
|
||||
public boolean should_validate(dev.kreuzberg.ExtractionResult result, dev.kreuzberg.ExtractionConfig config) {
|
||||
return false;
|
||||
}
|
||||
@Override
|
||||
public int priority() {
|
||||
return 0;
|
||||
}
|
||||
@Override
|
||||
public String name() { return "test-validator"; }
|
||||
@Override
|
||||
public String version() {
|
||||
return "";
|
||||
}
|
||||
@Override
|
||||
public void initialize() {}
|
||||
@Override
|
||||
public void shutdown() {}
|
||||
}
|
||||
|
||||
Kreuzberg.registerValidator(new TestStubRegisterValidatorTraitBridge());
|
||||
|
||||
Kreuzberg.unregisterValidator("test-validator");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testUnregisterDocumentExtractorAfterRegister() throws Exception {
|
||||
// unregister_document_extractor
|
||||
Kreuzberg.unregisterDocumentExtractor("test-extractor");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testUnregisterEmbeddingBackendAfterRegister() throws Exception {
|
||||
// unregister_embedding_backend
|
||||
Kreuzberg.unregisterEmbeddingBackend("test-embedding-backend");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testUnregisterPostProcessorAfterRegister() throws Exception {
|
||||
// unregister_post_processor
|
||||
Kreuzberg.unregisterPostProcessor("test-processor");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testUnregisterRendererAfterRegister() throws Exception {
|
||||
// unregister_renderer
|
||||
Kreuzberg.unregisterRenderer("test-renderer");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testUnregisterValidatorAfterRegister() throws Exception {
|
||||
// unregister_validator
|
||||
Kreuzberg.unregisterValidator("test-validator");
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
36
e2e/java/src/test/java/dev/kreuzberg/e2e/PostProcessorManagementTest.java
generated
Normal file
36
e2e/java/src/test/java/dev/kreuzberg/e2e/PostProcessorManagementTest.java
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: post_processor_management. */
|
||||
class PostProcessorManagementTest {
|
||||
|
||||
@Test
|
||||
void testPostProcessorsClear() throws Exception {
|
||||
// Clear all post-processors and verify list is empty
|
||||
Kreuzberg.clearPostProcessors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testPostProcessorsList() throws Exception {
|
||||
// List all registered post-processors
|
||||
var result = Kreuzberg.listPostProcessors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
45
e2e/java/src/test/java/dev/kreuzberg/e2e/RegistryOperationsTest.java
generated
Normal file
45
e2e/java/src/test/java/dev/kreuzberg/e2e/RegistryOperationsTest.java
generated
Normal file
@@ -0,0 +1,45 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: registry_operations. */
|
||||
class RegistryOperationsTest {
|
||||
|
||||
@Test
|
||||
void testExtensionsDocx() throws Exception {
|
||||
// Get file extensions for DOCX MIME type
|
||||
var result = Kreuzberg.getExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testExtensionsHtml() throws Exception {
|
||||
// Get file extensions for HTML MIME type
|
||||
var result = Kreuzberg.getExtensionsForMime("text/html");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testExtensionsPdf() throws Exception {
|
||||
// Get file extensions for PDF MIME type
|
||||
var result = Kreuzberg.getExtensionsForMime("application/pdf");
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
72
e2e/java/src/test/java/dev/kreuzberg/e2e/RegistryTest.java
generated
Normal file
72
e2e/java/src/test/java/dev/kreuzberg/e2e/RegistryTest.java
generated
Normal file
@@ -0,0 +1,72 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: registry. */
|
||||
class RegistryTest {
|
||||
|
||||
@Test
|
||||
void testListDocumentExtractors() throws Exception {
|
||||
// List document extractors
|
||||
var result = Kreuzberg.listDocumentExtractors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testListEmbeddingBackends() throws Exception {
|
||||
// List embedding backends
|
||||
var result = Kreuzberg.listEmbeddingBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testListOcrBackends() throws Exception {
|
||||
// List OCR backends
|
||||
var result = Kreuzberg.listOcrBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testListPostProcessors() throws Exception {
|
||||
// List post-processors
|
||||
var result = Kreuzberg.listPostProcessors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testListRenderers() throws Exception {
|
||||
// List renderers
|
||||
var result = Kreuzberg.listRenderers();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testListValidators() throws Exception {
|
||||
// List validators
|
||||
var result = Kreuzberg.listValidators();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
36
e2e/java/src/test/java/dev/kreuzberg/e2e/RendererManagementTest.java
generated
Normal file
36
e2e/java/src/test/java/dev/kreuzberg/e2e/RendererManagementTest.java
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: renderer_management. */
|
||||
class RendererManagementTest {
|
||||
|
||||
@Test
|
||||
void testRenderersClear() throws Exception {
|
||||
// Clear all renderers and verify list is empty
|
||||
Kreuzberg.clearRenderers();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testRenderersList() throws Exception {
|
||||
// List all registered renderers
|
||||
var result = Kreuzberg.listRenderers();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
106
e2e/java/src/test/java/dev/kreuzberg/e2e/SmokeTest.java
generated
Normal file
106
e2e/java/src/test/java/dev/kreuzberg/e2e/SmokeTest.java
generated
Normal file
@@ -0,0 +1,106 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: smoke. */
|
||||
class SmokeTest {
|
||||
|
||||
@Test
|
||||
void testOcrImagePng() throws Exception {
|
||||
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractBytes(java.nio.file.Files.readAllBytes(java.nio.file.Path.of("images/test_hello_world.png")), "image/png", config);
|
||||
assertEquals("image/png", result.mimeType().trim());assertTrue(result.content().length() >= 1, "expected length >= 1");assertTrue(result.content().contains("Hello") || result.content().contains("World") || result.content().contains("hello") || result.content().contains("world"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSmokeDocxBasic() throws Exception {
|
||||
// Smoke test: DOCX with formatted text
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("docx/fake.docx"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config);
|
||||
assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.mimeType().trim());assertTrue(result.content().length() >= 20, "expected length >= 20");assertTrue(result.content().contains("Lorem") || result.content().contains("ipsum") || result.content().contains("document") || result.content().contains("text"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSmokeHtmlBasic() throws Exception {
|
||||
// Smoke test: HTML table extraction
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("html/simple_table.html"), "text/html", config);
|
||||
assertEquals("text/html", result.mimeType().trim());assertTrue(result.content().length() >= 10, "expected length >= 10");assertTrue(result.content().contains("Sample Data Table") || result.content().contains("Laptop") || result.content().contains("Electronics") || result.content().contains("Product"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSmokeImagePng() throws Exception {
|
||||
// Smoke test: PNG image (without OCR, metadata only)
|
||||
var config = JsonUtil.fromJson("{\"disable_ocr\":true}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("images/sample.png"), null, config);
|
||||
assertEquals("image/png", result.mimeType().trim());
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSmokeJsonBasic() throws Exception {
|
||||
// Smoke test: JSON file extraction
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("json/simple.json"), "application/json", config);
|
||||
assertEquals("application/json", result.mimeType().trim());assertTrue(result.content().length() >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSmokePdfBasic() throws Exception {
|
||||
// Smoke test: PDF with simple text extraction
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("pdf/fake_memo.pdf"), "application/pdf", config);
|
||||
assertEquals("application/pdf", result.mimeType().trim());assertTrue(result.content().length() >= 50, "expected length >= 50");assertTrue(result.content().contains("May 5, 2023") || result.content().contains("To Whom it May Concern"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSmokeTxtBasic() throws Exception {
|
||||
// Smoke test: Plain text file
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("text/report.txt"), "text/plain", config);
|
||||
assertEquals("text/plain", result.mimeType().trim());assertTrue(result.content().length() >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testSmokeXlsxBasic() throws Exception {
|
||||
// Smoke test: XLSX with basic spreadsheet data including tables
|
||||
var config = JsonUtil.fromJson("{}", ExtractionConfig.class);
|
||||
|
||||
var result = Kreuzberg.extractFile(java.nio.file.Path.of("xlsx/stanley_cups.xlsx"), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config);
|
||||
assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", result.mimeType().trim());assertTrue(result.content().length() >= 100, "expected length >= 100");assertTrue(result.content().contains("Team"), "expected to contain: " + "Team");assertTrue(result.content().contains("Location"), "expected to contain: " + "Location");assertTrue(result.content().contains("Stanley Cups"), "expected to contain: " + "Stanley Cups");assertTrue(result.content().contains("Blues"), "expected to contain: " + "Blues");assertTrue(result.content().contains("Flyers"), "expected to contain: " + "Flyers");assertTrue(result.content().contains("Maple Leafs"), "expected to contain: " + "Maple Leafs");assertTrue(result.content().contains("STL"), "expected to contain: " + "STL");assertTrue(result.content().contains("PHI"), "expected to contain: " + "PHI");assertTrue(result.content().contains("TOR"), "expected to contain: " + "TOR"); // skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
36
e2e/java/src/test/java/dev/kreuzberg/e2e/ValidatorManagementTest.java
generated
Normal file
36
e2e/java/src/test/java/dev/kreuzberg/e2e/ValidatorManagementTest.java
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
package dev.kreuzberg.e2e;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.util.Optional;
|
||||
import dev.kreuzberg.JsonUtil;
|
||||
|
||||
/** E2e tests for category: validator_management. */
|
||||
class ValidatorManagementTest {
|
||||
|
||||
@Test
|
||||
void testValidatorsClear() throws Exception {
|
||||
// Clear all validators and verify list is empty
|
||||
Kreuzberg.clearValidators();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testValidatorsList() throws Exception {
|
||||
// List all registered validators
|
||||
var result = Kreuzberg.listValidators();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user