$items * @param ExtractionConfig $config * @return array * @throws \Kreuzberg\KreuzbergException */ public static function batchExtractFilesSync( array $items, ?ExtractionConfig $config = null): array { return \Kreuzberg\KreuzbergApi::batchExtractFilesSync($items, $config ?? new ExtractionConfig()); // delegate to native extension class } /** * Synchronous wrapper for `batch_extract_bytes`. * * Uses the global Tokio runtime for optimal performance. * With the `tokio-runtime` feature, this blocks the current thread using the global * Tokio runtime. Without it (WASM), this calls a truly synchronous implementation * that iterates through items and calls `extract_bytes_sync()`. * * @param array $items * @param ExtractionConfig $config * @return array * @throws \Kreuzberg\KreuzbergException */ public static function batchExtractBytesSync( array $items, ?ExtractionConfig $config = null): array { return \Kreuzberg\KreuzbergApi::batchExtractBytesSync($items, $config ?? new ExtractionConfig()); // delegate to native extension class } /** * Extract content from multiple files concurrently. * * This function processes multiple files in parallel, automatically managing * concurrency to prevent resource exhaustion. The concurrency limit can be * configured via `ExtractionConfig::max_concurrent_extractions` or defaults * to `(num_cpus * 1.5).ceil()`. * * Each file can optionally specify a [`FileExtractionConfig`] that overrides specific * fields from the batch-level `config`. Pass `None` for a file to use the batch defaults. * Batch-level settings like `max_concurrent_extractions` and `use_cache` are always * taken from the batch-level `config`. * * @param array $items * @param ExtractionConfig $config * @return array * @throws \Kreuzberg\KreuzbergException */ public static function batchExtractFiles( array $items, ?ExtractionConfig $config = null): array { return \Kreuzberg\KreuzbergApi::batchExtractFiles($items, $config ?? new ExtractionConfig()); // delegate to native extension class } /** * Extract content from multiple byte arrays concurrently. * * This function processes multiple byte arrays in parallel, automatically managing * concurrency to prevent resource exhaustion. The concurrency limit can be * configured via `ExtractionConfig::max_concurrent_extractions` or defaults * to `(num_cpus * 1.5).ceil()`. * * Each item can optionally specify a [`FileExtractionConfig`] that overrides specific * fields from the batch-level `config`. Pass `None` as the config to use * the batch-level defaults for that item. * * @param array $items * @param ExtractionConfig $config * @return array * @throws \Kreuzberg\KreuzbergException */ public static function batchExtractBytes( array $items, ?ExtractionConfig $config = null): array { return \Kreuzberg\KreuzbergApi::batchExtractBytes($items, $config ?? new ExtractionConfig()); // delegate to native extension class } /** * Detect MIME type from raw file bytes. * * Uses magic byte signatures to detect file type from content. * Falls back to `infer` crate for comprehensive detection. * * For ZIP-based files, inspects contents to distinguish Office Open XML * formats (DOCX, XLSX, PPTX) from plain ZIP archives. * * @param string $content * @return string * @throws \Kreuzberg\KreuzbergException */ public static function detectMimeTypeFromBytes( string $content): string { return \Kreuzberg\KreuzbergApi::detectMimeTypeFromBytes($content); // delegate to native extension class } /** * Get file extensions for a given MIME type. * * Returns all known file extensions that map to the specified MIME type. * * @param string $mime_type * @return array * @throws \Kreuzberg\KreuzbergException */ public static function getExtensionsForMime( string $mime_type): array { return \Kreuzberg\KreuzbergApi::getExtensionsForMime($mime_type); // delegate to native extension class } /** * List the names of all registered embedding backends. * * Used by `kreuzberg-cli`, the api/mcp endpoints, and generated language * bindings. * * @return array * @throws \Kreuzberg\KreuzbergException */ public static function listEmbeddingBackends( ): array { return \Kreuzberg\KreuzbergApi::listEmbeddingBackends(); // delegate to native extension class } /** * List names of all registered document extractors. * * @return array * @throws \Kreuzberg\KreuzbergException */ public static function listDocumentExtractors( ): array { return \Kreuzberg\KreuzbergApi::listDocumentExtractors(); // delegate to native extension class } /** * List all registered OCR backends. * * Returns the names of all OCR backends currently registered in the global registry. * * @return array * @throws \Kreuzberg\KreuzbergException */ public static function listOcrBackends( ): array { return \Kreuzberg\KreuzbergApi::listOcrBackends(); // delegate to native extension class } /** * List all registered post-processor names. * * Returns a vector of all post-processor names currently registered in the * global registry. * * @return array * @throws \Kreuzberg\KreuzbergException */ public static function listPostProcessors( ): array { return \Kreuzberg\KreuzbergApi::listPostProcessors(); // delegate to native extension class } /** * List names of all registered renderers. * * @return array * @throws \Kreuzberg\KreuzbergException */ public static function listRenderers( ): array { return \Kreuzberg\KreuzbergApi::listRenderers(); // delegate to native extension class } /** * List names of all registered validators. * * @return array * @throws \Kreuzberg\KreuzbergException */ public static function listValidators( ): array { return \Kreuzberg\KreuzbergApi::listValidators(); // delegate to native extension class } /** * Compare two extraction results and return a structured diff. * * The comparison is purely structural — no I/O, no side effects. All fields * of [`ExtractionDiff`] are populated according to the provided [`DiffOptions`]. * * @param ExtractionResult $a * @param ExtractionResult $b * @param DiffOptions $opts * @return ExtractionDiff */ public static function compare( ExtractionResult $a, ExtractionResult $b, DiffOptions $opts): ExtractionDiff { return \Kreuzberg\KreuzbergApi::compare($a, $b, $opts); // delegate to native extension class } /** * Generate embeddings asynchronously for a list of text strings. * * This is the async counterpart to [`embed_texts`]. It offloads the blocking * ONNX inference work to a dedicated blocking thread pool via Tokio's * `spawn_blocking`, keeping the async executor free. * * Returns one embedding vector per input text in the same order. * * @param array $texts * @param EmbeddingConfig $config * @return array> * @throws \Kreuzberg\KreuzbergException */ public static function embedTextsAsync( array $texts, ?EmbeddingConfig $config = null): array { return \Kreuzberg\KreuzbergApi::embedTextsAsync($texts, $config ?? new EmbeddingConfig()); // delegate to native extension class } /** * Render a single PDF page to PNG bytes. * * Returns raw PNG-encoded bytes for the specified page at the given DPI. * Uses pdf_oxide with tiny-skia for pure-Rust rendering. * * @param string $pdf_bytes * @param int $page_index * @param ?int $dpi * @param ?string $password * @return string * @throws \Kreuzberg\KreuzbergException */ public static function renderPdfPageToPng( string $pdf_bytes, int $page_index, ?int $dpi = null, ?string $password = null): string { return \Kreuzberg\KreuzbergApi::renderPdfPageToPng($pdf_bytes, $page_index, $dpi, $password); // delegate to native extension class } /** * Detect the MIME type of a file at the given path. * * Uses the file extension and optionally the file content to determine the MIME type. * Set `check_exists` to `true` to verify the file exists before detection. * * @param string $path * @param bool $check_exists * @return string * @throws \Kreuzberg\KreuzbergException */ public static function detectMimeType( string $path, bool $check_exists): string { return \Kreuzberg\KreuzbergApi::detectMimeType($path, $check_exists); // delegate to native extension class } /** * Embed a list of texts using the configured embedding model. * * Returns a 2D vector where each inner vector is the embedding for the corresponding text. * * @param array $texts * @param EmbeddingConfig $config * @return array> * @throws \Kreuzberg\KreuzbergException */ public static function embedTexts( array $texts, ?EmbeddingConfig $config = null): array { return \Kreuzberg\KreuzbergApi::embedTexts($texts, $config ?? new EmbeddingConfig()); // delegate to native extension class } /** * Get an embedding preset by name. * * Returns `None` if no preset with the given name exists. Returns an owned * clone so the value is safe to pass across FFI boundaries. * * @param string $name * @return ?EmbeddingPreset */ public static function getEmbeddingPreset( string $name): ?EmbeddingPreset { return \Kreuzberg\KreuzbergApi::getEmbeddingPreset($name); // delegate to native extension class } /** * List the names of all available embedding presets. * * Returns owned `String`s so the values are safe to pass across FFI boundaries. * * @return array */ public static function listEmbeddingPresets( ): array { return \Kreuzberg\KreuzbergApi::listEmbeddingPresets(); // delegate to native extension class } /** * registerOcrBackend. * * @param OcrBackend $backend * @return void */ public static function registerOcrBackend( OcrBackend $backend) : void { \Kreuzberg\KreuzbergApi::registerOcrBackend($backend); // delegate to native extension class } /** * unregisterOcrBackend. * * @param string $name * @return void */ public static function unregisterOcrBackend( string $name) : void { \Kreuzberg\KreuzbergApi::unregisterOcrBackend($name); // delegate to native extension class } /** * clearOcrBackends. * * @return void */ public static function clearOcrBackends( ) : void { \Kreuzberg\KreuzbergApi::clearOcrBackends(); // delegate to native extension class } /** * registerPostProcessor. * * @param PostProcessor $backend * @return void */ public static function registerPostProcessor( PostProcessor $backend) : void { \Kreuzberg\KreuzbergApi::registerPostProcessor($backend); // delegate to native extension class } /** * unregisterPostProcessor. * * @param string $name * @return void */ public static function unregisterPostProcessor( string $name) : void { \Kreuzberg\KreuzbergApi::unregisterPostProcessor($name); // delegate to native extension class } /** * clearPostProcessors. * * @return void */ public static function clearPostProcessors( ) : void { \Kreuzberg\KreuzbergApi::clearPostProcessors(); // delegate to native extension class } /** * registerValidator. * * @param Validator $backend * @return void */ public static function registerValidator( Validator $backend) : void { \Kreuzberg\KreuzbergApi::registerValidator($backend); // delegate to native extension class } /** * unregisterValidator. * * @param string $name * @return void */ public static function unregisterValidator( string $name) : void { \Kreuzberg\KreuzbergApi::unregisterValidator($name); // delegate to native extension class } /** * clearValidators. * * @return void */ public static function clearValidators( ) : void { \Kreuzberg\KreuzbergApi::clearValidators(); // delegate to native extension class } /** * registerEmbeddingBackend. * * @param EmbeddingBackend $backend * @return void */ public static function registerEmbeddingBackend( EmbeddingBackend $backend) : void { \Kreuzberg\KreuzbergApi::registerEmbeddingBackend($backend); // delegate to native extension class } /** * unregisterEmbeddingBackend. * * @param string $name * @return void */ public static function unregisterEmbeddingBackend( string $name) : void { \Kreuzberg\KreuzbergApi::unregisterEmbeddingBackend($name); // delegate to native extension class } /** * clearEmbeddingBackends. * * @return void */ public static function clearEmbeddingBackends( ) : void { \Kreuzberg\KreuzbergApi::clearEmbeddingBackends(); // delegate to native extension class } /** * registerDocumentExtractor. * * @param DocumentExtractor $backend * @return void */ public static function registerDocumentExtractor( DocumentExtractor $backend) : void { \Kreuzberg\KreuzbergApi::registerDocumentExtractor($backend); // delegate to native extension class } /** * unregisterDocumentExtractor. * * @param string $name * @return void */ public static function unregisterDocumentExtractor( string $name) : void { \Kreuzberg\KreuzbergApi::unregisterDocumentExtractor($name); // delegate to native extension class } /** * clearDocumentExtractors. * * @return void */ public static function clearDocumentExtractors( ) : void { \Kreuzberg\KreuzbergApi::clearDocumentExtractors(); // delegate to native extension class } /** * registerRenderer. * * @param Renderer $backend * @return void */ public static function registerRenderer( Renderer $backend) : void { \Kreuzberg\KreuzbergApi::registerRenderer($backend); // delegate to native extension class } /** * unregisterRenderer. * * @param string $name * @return void */ public static function unregisterRenderer( string $name) : void { \Kreuzberg\KreuzbergApi::unregisterRenderer($name); // delegate to native extension class } /** * clearRenderers. * * @return void */ public static function clearRenderers( ) : void { \Kreuzberg\KreuzbergApi::clearRenderers(); // delegate to native extension class } }