Files
fil/packages/php/src/Kreuzberg.php
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

588 lines
20 KiB
PHP

<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg;
final class Kreuzberg
{
/**
* Extract content from a byte array.
*
* This is the main entry point for in-memory extraction. It performs the following steps:
* 1. Validate MIME type
* 2. Handle legacy format conversion if needed
* 3. Select appropriate extractor from registry
* 4. Extract content
* 5. Run post-processing pipeline
*
* @param string $content
* @param string $mime_type
* @param ExtractionConfig $config
* @return ExtractionResult
* @throws \Kreuzberg\KreuzbergException
*/
public static function extractBytes(
string $content, string $mime_type, ?ExtractionConfig $config = null): ExtractionResult
{
return \Kreuzberg\KreuzbergApi::extractBytes($content, $mime_type, $config ?? new ExtractionConfig()); // delegate to native extension class
}
/**
* Extract content from a file.
*
* This is the main entry point for file-based extraction. It performs the following steps:
* 1. Check cache for existing result (if caching enabled)
* 2. Detect or validate MIME type
* 3. Select appropriate extractor from registry
* 4. Extract content
* 5. Run post-processing pipeline
* 6. Store result in cache (if caching enabled)
*
* @param string $path
* @param ?string $mime_type
* @param ExtractionConfig $config
* @return ExtractionResult
* @throws \Kreuzberg\KreuzbergException
*/
public static function extractFile(
string $path, ?string $mime_type = null, ?ExtractionConfig $config = null): ExtractionResult
{
return \Kreuzberg\KreuzbergApi::extractFile($path, $mime_type, $config ?? new ExtractionConfig()); // delegate to native extension class
}
/**
* Synchronous wrapper for `extract_file`.
*
* This is a convenience function that blocks the current thread until extraction completes.
* For async code, use `extract_file` directly.
*
* Uses the global Tokio runtime for 100x+ performance improvement over creating
* a new runtime per call. Always uses the global runtime to avoid nested runtime issues.
*
* This function is only available with the `tokio-runtime` feature. For WASM targets,
* use a truly synchronous extraction approach instead.
*
* @param string $path
* @param ?string $mime_type
* @param ExtractionConfig $config
* @return ExtractionResult
* @throws \Kreuzberg\KreuzbergException
*/
public static function extractFileSync(
string $path, ?string $mime_type = null, ?ExtractionConfig $config = null): ExtractionResult
{
return \Kreuzberg\KreuzbergApi::extractFileSync($path, $mime_type, $config ?? new ExtractionConfig()); // delegate to native extension class
}
/**
* Synchronous wrapper for `extract_bytes`.
*
* Uses the global Tokio runtime for 100x+ performance improvement over creating
* a new runtime per call.
*
* With the `tokio-runtime` feature, this blocks the current thread using the global
* Tokio runtime. Without it (WASM), this calls a truly synchronous implementation.
*
* @param string $content
* @param string $mime_type
* @param ExtractionConfig $config
* @return ExtractionResult
* @throws \Kreuzberg\KreuzbergException
*/
public static function extractBytesSync(
string $content, string $mime_type, ?ExtractionConfig $config = null): ExtractionResult
{
return \Kreuzberg\KreuzbergApi::extractBytesSync($content, $mime_type, $config ?? new ExtractionConfig()); // delegate to native extension class
}
/**
* Synchronous wrapper for `batch_extract_files`.
*
* Uses the global Tokio runtime for optimal performance.
* Only available with `tokio-runtime` (WASM has no filesystem).
*
* @param array<BatchFileItem> $items
* @param ExtractionConfig $config
* @return array<ExtractionResult>
* @throws \Kreuzberg\KreuzbergException
*/
public static function batchExtractFilesSync(
array $items, ?ExtractionConfig $config = null): array
{
return \Kreuzberg\KreuzbergApi::batchExtractFilesSync($items, $config ?? new ExtractionConfig()); // delegate to native extension class
}
/**
* Synchronous wrapper for `batch_extract_bytes`.
*
* Uses the global Tokio runtime for optimal performance.
* With the `tokio-runtime` feature, this blocks the current thread using the global
* Tokio runtime. Without it (WASM), this calls a truly synchronous implementation
* that iterates through items and calls `extract_bytes_sync()`.
*
* @param array<BatchBytesItem> $items
* @param ExtractionConfig $config
* @return array<ExtractionResult>
* @throws \Kreuzberg\KreuzbergException
*/
public static function batchExtractBytesSync(
array $items, ?ExtractionConfig $config = null): array
{
return \Kreuzberg\KreuzbergApi::batchExtractBytesSync($items, $config ?? new ExtractionConfig()); // delegate to native extension class
}
/**
* Extract content from multiple files concurrently.
*
* This function processes multiple files in parallel, automatically managing
* concurrency to prevent resource exhaustion. The concurrency limit can be
* configured via `ExtractionConfig::max_concurrent_extractions` or defaults
* to `(num_cpus * 1.5).ceil()`.
*
* Each file can optionally specify a [`FileExtractionConfig`] that overrides specific
* fields from the batch-level `config`. Pass `None` for a file to use the batch defaults.
* Batch-level settings like `max_concurrent_extractions` and `use_cache` are always
* taken from the batch-level `config`.
*
* @param array<BatchFileItem> $items
* @param ExtractionConfig $config
* @return array<ExtractionResult>
* @throws \Kreuzberg\KreuzbergException
*/
public static function batchExtractFiles(
array $items, ?ExtractionConfig $config = null): array
{
return \Kreuzberg\KreuzbergApi::batchExtractFiles($items, $config ?? new ExtractionConfig()); // delegate to native extension class
}
/**
* Extract content from multiple byte arrays concurrently.
*
* This function processes multiple byte arrays in parallel, automatically managing
* concurrency to prevent resource exhaustion. The concurrency limit can be
* configured via `ExtractionConfig::max_concurrent_extractions` or defaults
* to `(num_cpus * 1.5).ceil()`.
*
* Each item can optionally specify a [`FileExtractionConfig`] that overrides specific
* fields from the batch-level `config`. Pass `None` as the config to use
* the batch-level defaults for that item.
*
* @param array<BatchBytesItem> $items
* @param ExtractionConfig $config
* @return array<ExtractionResult>
* @throws \Kreuzberg\KreuzbergException
*/
public static function batchExtractBytes(
array $items, ?ExtractionConfig $config = null): array
{
return \Kreuzberg\KreuzbergApi::batchExtractBytes($items, $config ?? new ExtractionConfig()); // delegate to native extension class
}
/**
* Detect MIME type from raw file bytes.
*
* Uses magic byte signatures to detect file type from content.
* Falls back to `infer` crate for comprehensive detection.
*
* For ZIP-based files, inspects contents to distinguish Office Open XML
* formats (DOCX, XLSX, PPTX) from plain ZIP archives.
*
* @param string $content
* @return string
* @throws \Kreuzberg\KreuzbergException
*/
public static function detectMimeTypeFromBytes(
string $content): string
{
return \Kreuzberg\KreuzbergApi::detectMimeTypeFromBytes($content); // delegate to native extension class
}
/**
* Get file extensions for a given MIME type.
*
* Returns all known file extensions that map to the specified MIME type.
*
* @param string $mime_type
* @return array<string>
* @throws \Kreuzberg\KreuzbergException
*/
public static function getExtensionsForMime(
string $mime_type): array
{
return \Kreuzberg\KreuzbergApi::getExtensionsForMime($mime_type); // delegate to native extension class
}
/**
* List the names of all registered embedding backends.
*
* Used by `kreuzberg-cli`, the api/mcp endpoints, and generated language
* bindings.
*
* @return array<string>
* @throws \Kreuzberg\KreuzbergException
*/
public static function listEmbeddingBackends(
): array
{
return \Kreuzberg\KreuzbergApi::listEmbeddingBackends(); // delegate to native extension class
}
/**
* List names of all registered document extractors.
*
* @return array<string>
* @throws \Kreuzberg\KreuzbergException
*/
public static function listDocumentExtractors(
): array
{
return \Kreuzberg\KreuzbergApi::listDocumentExtractors(); // delegate to native extension class
}
/**
* List all registered OCR backends.
*
* Returns the names of all OCR backends currently registered in the global registry.
*
* @return array<string>
* @throws \Kreuzberg\KreuzbergException
*/
public static function listOcrBackends(
): array
{
return \Kreuzberg\KreuzbergApi::listOcrBackends(); // delegate to native extension class
}
/**
* List all registered post-processor names.
*
* Returns a vector of all post-processor names currently registered in the
* global registry.
*
* @return array<string>
* @throws \Kreuzberg\KreuzbergException
*/
public static function listPostProcessors(
): array
{
return \Kreuzberg\KreuzbergApi::listPostProcessors(); // delegate to native extension class
}
/**
* List names of all registered renderers.
*
* @return array<string>
* @throws \Kreuzberg\KreuzbergException
*/
public static function listRenderers(
): array
{
return \Kreuzberg\KreuzbergApi::listRenderers(); // delegate to native extension class
}
/**
* List names of all registered validators.
*
* @return array<string>
* @throws \Kreuzberg\KreuzbergException
*/
public static function listValidators(
): array
{
return \Kreuzberg\KreuzbergApi::listValidators(); // delegate to native extension class
}
/**
* Compare two extraction results and return a structured diff.
*
* The comparison is purely structural — no I/O, no side effects. All fields
* of [`ExtractionDiff`] are populated according to the provided [`DiffOptions`].
*
* @param ExtractionResult $a
* @param ExtractionResult $b
* @param DiffOptions $opts
* @return ExtractionDiff
*/
public static function compare(
ExtractionResult $a, ExtractionResult $b, DiffOptions $opts): ExtractionDiff
{
return \Kreuzberg\KreuzbergApi::compare($a, $b, $opts); // delegate to native extension class
}
/**
* Generate embeddings asynchronously for a list of text strings.
*
* This is the async counterpart to [`embed_texts`]. It offloads the blocking
* ONNX inference work to a dedicated blocking thread pool via Tokio's
* `spawn_blocking`, keeping the async executor free.
*
* Returns one embedding vector per input text in the same order.
*
* @param array<string> $texts
* @param EmbeddingConfig $config
* @return array<array<float>>
* @throws \Kreuzberg\KreuzbergException
*/
public static function embedTextsAsync(
array $texts, ?EmbeddingConfig $config = null): array
{
return \Kreuzberg\KreuzbergApi::embedTextsAsync($texts, $config ?? new EmbeddingConfig()); // delegate to native extension class
}
/**
* Render a single PDF page to PNG bytes.
*
* Returns raw PNG-encoded bytes for the specified page at the given DPI.
* Uses pdf_oxide with tiny-skia for pure-Rust rendering.
*
* @param string $pdf_bytes
* @param int $page_index
* @param ?int $dpi
* @param ?string $password
* @return string
* @throws \Kreuzberg\KreuzbergException
*/
public static function renderPdfPageToPng(
string $pdf_bytes, int $page_index, ?int $dpi = null, ?string $password = null): string
{
return \Kreuzberg\KreuzbergApi::renderPdfPageToPng($pdf_bytes, $page_index, $dpi, $password); // delegate to native extension class
}
/**
* Detect the MIME type of a file at the given path.
*
* Uses the file extension and optionally the file content to determine the MIME type.
* Set `check_exists` to `true` to verify the file exists before detection.
*
* @param string $path
* @param bool $check_exists
* @return string
* @throws \Kreuzberg\KreuzbergException
*/
public static function detectMimeType(
string $path, bool $check_exists): string
{
return \Kreuzberg\KreuzbergApi::detectMimeType($path, $check_exists); // delegate to native extension class
}
/**
* Embed a list of texts using the configured embedding model.
*
* Returns a 2D vector where each inner vector is the embedding for the corresponding text.
*
* @param array<string> $texts
* @param EmbeddingConfig $config
* @return array<array<float>>
* @throws \Kreuzberg\KreuzbergException
*/
public static function embedTexts(
array $texts, ?EmbeddingConfig $config = null): array
{
return \Kreuzberg\KreuzbergApi::embedTexts($texts, $config ?? new EmbeddingConfig()); // delegate to native extension class
}
/**
* Get an embedding preset by name.
*
* Returns `None` if no preset with the given name exists. Returns an owned
* clone so the value is safe to pass across FFI boundaries.
*
* @param string $name
* @return ?EmbeddingPreset
*/
public static function getEmbeddingPreset(
string $name): ?EmbeddingPreset
{
return \Kreuzberg\KreuzbergApi::getEmbeddingPreset($name); // delegate to native extension class
}
/**
* List the names of all available embedding presets.
*
* Returns owned `String`s so the values are safe to pass across FFI boundaries.
*
* @return array<string>
*/
public static function listEmbeddingPresets(
): array
{
return \Kreuzberg\KreuzbergApi::listEmbeddingPresets(); // delegate to native extension class
}
/**
* registerOcrBackend.
*
* @param OcrBackend $backend
* @return void
*/
public static function registerOcrBackend(
OcrBackend $backend) : void
{
\Kreuzberg\KreuzbergApi::registerOcrBackend($backend); // delegate to native extension class
}
/**
* unregisterOcrBackend.
*
* @param string $name
* @return void
*/
public static function unregisterOcrBackend(
string $name) : void
{
\Kreuzberg\KreuzbergApi::unregisterOcrBackend($name); // delegate to native extension class
}
/**
* clearOcrBackends.
*
* @return void
*/
public static function clearOcrBackends(
) : void
{
\Kreuzberg\KreuzbergApi::clearOcrBackends(); // delegate to native extension class
}
/**
* registerPostProcessor.
*
* @param PostProcessor $backend
* @return void
*/
public static function registerPostProcessor(
PostProcessor $backend) : void
{
\Kreuzberg\KreuzbergApi::registerPostProcessor($backend); // delegate to native extension class
}
/**
* unregisterPostProcessor.
*
* @param string $name
* @return void
*/
public static function unregisterPostProcessor(
string $name) : void
{
\Kreuzberg\KreuzbergApi::unregisterPostProcessor($name); // delegate to native extension class
}
/**
* clearPostProcessors.
*
* @return void
*/
public static function clearPostProcessors(
) : void
{
\Kreuzberg\KreuzbergApi::clearPostProcessors(); // delegate to native extension class
}
/**
* registerValidator.
*
* @param Validator $backend
* @return void
*/
public static function registerValidator(
Validator $backend) : void
{
\Kreuzberg\KreuzbergApi::registerValidator($backend); // delegate to native extension class
}
/**
* unregisterValidator.
*
* @param string $name
* @return void
*/
public static function unregisterValidator(
string $name) : void
{
\Kreuzberg\KreuzbergApi::unregisterValidator($name); // delegate to native extension class
}
/**
* clearValidators.
*
* @return void
*/
public static function clearValidators(
) : void
{
\Kreuzberg\KreuzbergApi::clearValidators(); // delegate to native extension class
}
/**
* registerEmbeddingBackend.
*
* @param EmbeddingBackend $backend
* @return void
*/
public static function registerEmbeddingBackend(
EmbeddingBackend $backend) : void
{
\Kreuzberg\KreuzbergApi::registerEmbeddingBackend($backend); // delegate to native extension class
}
/**
* unregisterEmbeddingBackend.
*
* @param string $name
* @return void
*/
public static function unregisterEmbeddingBackend(
string $name) : void
{
\Kreuzberg\KreuzbergApi::unregisterEmbeddingBackend($name); // delegate to native extension class
}
/**
* clearEmbeddingBackends.
*
* @return void
*/
public static function clearEmbeddingBackends(
) : void
{
\Kreuzberg\KreuzbergApi::clearEmbeddingBackends(); // delegate to native extension class
}
/**
* registerDocumentExtractor.
*
* @param DocumentExtractor $backend
* @return void
*/
public static function registerDocumentExtractor(
DocumentExtractor $backend) : void
{
\Kreuzberg\KreuzbergApi::registerDocumentExtractor($backend); // delegate to native extension class
}
/**
* unregisterDocumentExtractor.
*
* @param string $name
* @return void
*/
public static function unregisterDocumentExtractor(
string $name) : void
{
\Kreuzberg\KreuzbergApi::unregisterDocumentExtractor($name); // delegate to native extension class
}
/**
* clearDocumentExtractors.
*
* @return void
*/
public static function clearDocumentExtractors(
) : void
{
\Kreuzberg\KreuzbergApi::clearDocumentExtractors(); // delegate to native extension class
}
/**
* registerRenderer.
*
* @param Renderer $backend
* @return void
*/
public static function registerRenderer(
Renderer $backend) : void
{
\Kreuzberg\KreuzbergApi::registerRenderer($backend); // delegate to native extension class
}
/**
* unregisterRenderer.
*
* @param string $name
* @return void
*/
public static function unregisterRenderer(
string $name) : void
{
\Kreuzberg\KreuzbergApi::unregisterRenderer($name); // delegate to native extension class
}
/**
* clearRenderers.
*
* @return void
*/
public static function clearRenderers(
) : void
{
\Kreuzberg\KreuzbergApi::clearRenderers(); // delegate to native extension class
}
}