packages/kotlin-android/src/main/kotlin/dev/kreuzberg/IDocumentExtractor.kt

// Generated by alef. Do not edit by hand.
@file:Suppress(
    "ktlint:standard:trailing-comma-on-call-site",
    "ktlint:standard:trailing-comma-on-declaration-site",
    "ktlint:standard:spacing-between-declarations-with-comments",
    "ktlint:standard:spacing-between-declarations-with-annotations",
    "ktlint:standard:when-entry-bracing",
    "ktlint:standard:blank-line-between-when-conditions",
    "ktlint:standard:blank-line-before-declaration",
    "ktlint:standard:chain-method-continuation",
    "ktlint:standard:annotation",
    "ktlint:standard:max-line-length",
    "ktlint:standard:no-semi",
    "ktlint:standard:statement-wrapping",
    "MaxLineLength",
    "TooManyFunctions",
    "FunctionParameterNaming",
    "LongParameterList",
    "CyclomaticComplexMethod",
    "LongMethod",
)

package dev.kreuzberg

import java.nio.file.Path

/**
 * Trait for document extractor plugins.
 *
 * Implement this trait to add support for new document formats or to override
 * built-in extraction behavior with custom logic.
 *
 * # Return Type
 *
 * Extractors return `InternalDocument`, a flat intermediate representation.
 * The pipeline converts this into the public `ExtractionResult` via the
 * derivation step.
 *
 * # Priority System
 *
 * When multiple extractors support the same MIME type, the registry selects
 * the extractor with the highest priority value. Use this to:
 *
 * - Override built-in extractors (priority > 50)
 * - Provide fallback extractors (priority < 50)
 * - Implement specialized extractors for specific use cases
 *
 * Default priority is 50.
 *
 * # Thread Safety
 *
 * Extractors must be thread-safe (`Send + Sync`) to support concurrent extraction.
 */
interface IDocumentExtractor {
    fun name(): String
    fun version(): String
    fun initialize() {}
    fun shutdown() {}
    /**
     * Extract content from a byte array.
     *
     * This is the core extraction method that processes in-memory document data.
     *
     * **Returns:**
     *
     * An `InternalDocument` containing the extracted elements, metadata, and tables.
     * The pipeline will convert this into the public `ExtractionResult`.
     *
     * **Errors:**
     *
     * - `KreuzbergError.Parsing` - Document parsing failed
     * - `KreuzbergError.Validation` - Invalid document structure
     * - `KreuzbergError.Io` - I/O errors (these always bubble up)
     * - `KreuzbergError.MissingDependency` - Required dependency not available
     */
    suspend fun extractBytes(
        content: ByteArray,
        mimeType: String,
        config: ExtractionConfig,
    ): ExtractionResult
    /**
     * Extract content from a file.
     *
     * Default implementation reads the file and calls `extract_bytes`.
     * Override for custom file handling, streaming, or memory optimizations.
     *
     * **Returns:**
     *
     * An `InternalDocument` containing the extracted elements, metadata, and tables.
     *
     * **Errors:**
     *
     * Same as `extract_bytes`, plus file I/O errors.
     */
    suspend fun extractFile(
        path: java.nio.file.Path,
        mimeType: String,
        config: ExtractionConfig,
    ): ExtractionResult
    /**
     * Get the list of MIME types supported by this extractor.
     *
     * Can include exact MIME types and prefix patterns:
     *
     * - Exact: `"application/pdf"`, `"text/plain"`
     * - Prefix: `"image/*"` (matches any image type)
     *
     * **Returns:**
     *
     * A slice of MIME type strings.
     */
    fun supportedMimeTypes(): List<String>
    /**
     * Get the priority of this extractor.
     *
     * Higher priority extractors are preferred when multiple extractors
     * support the same MIME type.
     *
     * # Priority Guidelines
     *
     * - **0-25**: Fallback/low-quality extractors
     * - **26-49**: Alternative extractors
     * - **50**: Default priority (built-in extractors)
     * - **51-75**: Premium/enhanced extractors
     * - **76-100**: Specialized/high-priority extractors
     *
     * **Returns:**
     *
     * Priority value (default: 50)
     */
    fun priority(): Int
    /**
     * Optional: Check if this extractor can handle a specific file.
     *
     * Allows for more sophisticated detection beyond MIME types.
     * Defaults to `true` (rely on MIME type matching).
     *
     * **Returns:**
     *
     * `true` if the extractor can handle this file, `false` otherwise.
     */
    fun canHandle(path: java.nio.file.Path, mimeType: String): Boolean
}
Nomad changes 2026-06-01 23:40:55 +02:00			`// Generated by alef. Do not edit by hand.`
			`@file:Suppress(`
			`"ktlint:standard:trailing-comma-on-call-site",`
			`"ktlint:standard:trailing-comma-on-declaration-site",`
			`"ktlint:standard:spacing-between-declarations-with-comments",`
			`"ktlint:standard:spacing-between-declarations-with-annotations",`
			`"ktlint:standard:when-entry-bracing",`
			`"ktlint:standard:blank-line-between-when-conditions",`
			`"ktlint:standard:blank-line-before-declaration",`
			`"ktlint:standard:chain-method-continuation",`
			`"ktlint:standard:annotation",`
			`"ktlint:standard:max-line-length",`
			`"ktlint:standard:no-semi",`
			`"ktlint:standard:statement-wrapping",`
			`"MaxLineLength",`
			`"TooManyFunctions",`
			`"FunctionParameterNaming",`
			`"LongParameterList",`
			`"CyclomaticComplexMethod",`
			`"LongMethod",`
			`)`

			`package dev.kreuzberg`

			`import java.nio.file.Path`

			`/**`
			`* Trait for document extractor plugins.`
			`*`
			`* Implement this trait to add support for new document formats or to override`
			`* built-in extraction behavior with custom logic.`
			`*`
			`* # Return Type`
			`*`
			* Extractors return `InternalDocument`, a flat intermediate representation.
			* The pipeline converts this into the public `ExtractionResult` via the
			`* derivation step.`
			`*`
			`* # Priority System`
			`*`
			`* When multiple extractors support the same MIME type, the registry selects`
			`* the extractor with the highest priority value. Use this to:`
			`*`
			`* - Override built-in extractors (priority > 50)`
			`* - Provide fallback extractors (priority < 50)`
			`* - Implement specialized extractors for specific use cases`
			`*`
			`* Default priority is 50.`
			`*`
			`* # Thread Safety`
			`*`
			* Extractors must be thread-safe (`Send + Sync`) to support concurrent extraction.
			`*/`
			`interface IDocumentExtractor {`
			`fun name(): String`
			`fun version(): String`
			`fun initialize() {}`
			`fun shutdown() {}`
			`/**`
			`* Extract content from a byte array.`
			`*`
			`* This is the core extraction method that processes in-memory document data.`
			`*`
			`* Returns:`
			`*`
			* An `InternalDocument` containing the extracted elements, metadata, and tables.
			* The pipeline will convert this into the public `ExtractionResult`.
			`*`
			`* Errors:`
			`*`
			* - `KreuzbergError.Parsing` - Document parsing failed
			* - `KreuzbergError.Validation` - Invalid document structure
			* - `KreuzbergError.Io` - I/O errors (these always bubble up)
			* - `KreuzbergError.MissingDependency` - Required dependency not available
			`*/`
			`suspend fun extractBytes(`
			`content: ByteArray,`
			`mimeType: String,`
			`config: ExtractionConfig,`
			`): ExtractionResult`
			`/**`
			`* Extract content from a file.`
			`*`
			* Default implementation reads the file and calls `extract_bytes`.
			`* Override for custom file handling, streaming, or memory optimizations.`
			`*`
			`* Returns:`
			`*`
			* An `InternalDocument` containing the extracted elements, metadata, and tables.
			`*`
			`* Errors:`
			`*`
			* Same as `extract_bytes`, plus file I/O errors.
			`*/`
			`suspend fun extractFile(`
			`path: java.nio.file.Path,`
			`mimeType: String,`
			`config: ExtractionConfig,`
			`): ExtractionResult`
			`/**`
			`* Get the list of MIME types supported by this extractor.`
			`*`
			`* Can include exact MIME types and prefix patterns:`
			`*`
			* - Exact: `"application/pdf"`, `"text/plain"`
			* - Prefix: `"image/*"` (matches any image type)
			`*`
			`* Returns:`
			`*`
			`* A slice of MIME type strings.`
			`*/`
			`fun supportedMimeTypes(): List<String>`
			`/**`
			`* Get the priority of this extractor.`
			`*`
			`* Higher priority extractors are preferred when multiple extractors`
			`* support the same MIME type.`
			`*`
			`* # Priority Guidelines`
			`*`
			`* - 0-25: Fallback/low-quality extractors`
			`* - 26-49: Alternative extractors`
			`* - 50: Default priority (built-in extractors)`
			`* - 51-75: Premium/enhanced extractors`
			`* - 76-100: Specialized/high-priority extractors`
			`*`
			`* Returns:`
			`*`
			`* Priority value (default: 50)`
			`*/`
			`fun priority(): Int`
			`/**`
			`* Optional: Check if this extractor can handle a specific file.`
			`*`
			`* Allows for more sophisticated detection beyond MIME types.`
			* Defaults to `true` (rely on MIME type matching).
			`*`
			`* Returns:`
			`*`
			* `true` if the extractor can handle this file, `false` otherwise.
			`*/`
			`fun canHandle(path: java.nio.file.Path, mimeType: String): Boolean`
			`}`