Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/packages/kotlin-android/src/main/kotlin/dev/kreuzberg/IDocumentExtractor.kt
+++ b/packages/kotlin-android/src/main/kotlin/dev/kreuzberg/IDocumentExtractor.kt
@@ -0,0 +1,143 @@
+// Generated by alef. Do not edit by hand.
+@file:Suppress(
+    "ktlint:standard:trailing-comma-on-call-site",
+    "ktlint:standard:trailing-comma-on-declaration-site",
+    "ktlint:standard:spacing-between-declarations-with-comments",
+    "ktlint:standard:spacing-between-declarations-with-annotations",
+    "ktlint:standard:when-entry-bracing",
+    "ktlint:standard:blank-line-between-when-conditions",
+    "ktlint:standard:blank-line-before-declaration",
+    "ktlint:standard:chain-method-continuation",
+    "ktlint:standard:annotation",
+    "ktlint:standard:max-line-length",
+    "ktlint:standard:no-semi",
+    "ktlint:standard:statement-wrapping",
+    "MaxLineLength",
+    "TooManyFunctions",
+    "FunctionParameterNaming",
+    "LongParameterList",
+    "CyclomaticComplexMethod",
+    "LongMethod",
+)
+
+package dev.kreuzberg
+
+import java.nio.file.Path
+
+/**
+ * Trait for document extractor plugins.
+ *
+ * Implement this trait to add support for new document formats or to override
+ * built-in extraction behavior with custom logic.
+ *
+ * # Return Type
+ *
+ * Extractors return `InternalDocument`, a flat intermediate representation.
+ * The pipeline converts this into the public `ExtractionResult` via the
+ * derivation step.
+ *
+ * # Priority System
+ *
+ * When multiple extractors support the same MIME type, the registry selects
+ * the extractor with the highest priority value. Use this to:
+ *
+ * - Override built-in extractors (priority > 50)
+ * - Provide fallback extractors (priority < 50)
+ * - Implement specialized extractors for specific use cases
+ *
+ * Default priority is 50.
+ *
+ * # Thread Safety
+ *
+ * Extractors must be thread-safe (`Send + Sync`) to support concurrent extraction.
+ */
+interface IDocumentExtractor {
+    fun name(): String
+    fun version(): String
+    fun initialize() {}
+    fun shutdown() {}
+    /**
+     * Extract content from a byte array.
+     *
+     * This is the core extraction method that processes in-memory document data.
+     *
+     * **Returns:**
+     *
+     * An `InternalDocument` containing the extracted elements, metadata, and tables.
+     * The pipeline will convert this into the public `ExtractionResult`.
+     *
+     * **Errors:**
+     *
+     * - `KreuzbergError.Parsing` - Document parsing failed
+     * - `KreuzbergError.Validation` - Invalid document structure
+     * - `KreuzbergError.Io` - I/O errors (these always bubble up)
+     * - `KreuzbergError.MissingDependency` - Required dependency not available
+     */
+    suspend fun extractBytes(
+        content: ByteArray,
+        mimeType: String,
+        config: ExtractionConfig,
+    ): ExtractionResult
+    /**
+     * Extract content from a file.
+     *
+     * Default implementation reads the file and calls `extract_bytes`.
+     * Override for custom file handling, streaming, or memory optimizations.
+     *
+     * **Returns:**
+     *
+     * An `InternalDocument` containing the extracted elements, metadata, and tables.
+     *
+     * **Errors:**
+     *
+     * Same as `extract_bytes`, plus file I/O errors.
+     */
+    suspend fun extractFile(
+        path: java.nio.file.Path,
+        mimeType: String,
+        config: ExtractionConfig,
+    ): ExtractionResult
+    /**
+     * Get the list of MIME types supported by this extractor.
+     *
+     * Can include exact MIME types and prefix patterns:
+     *
+     * - Exact: `"application/pdf"`, `"text/plain"`
+     * - Prefix: `"image/*"` (matches any image type)
+     *
+     * **Returns:**
+     *
+     * A slice of MIME type strings.
+     */
+    fun supportedMimeTypes(): List<String>
+    /**
+     * Get the priority of this extractor.
+     *
+     * Higher priority extractors are preferred when multiple extractors
+     * support the same MIME type.
+     *
+     * # Priority Guidelines
+     *
+     * - **0-25**: Fallback/low-quality extractors
+     * - **26-49**: Alternative extractors
+     * - **50**: Default priority (built-in extractors)
+     * - **51-75**: Premium/enhanced extractors
+     * - **76-100**: Specialized/high-priority extractors
+     *
+     * **Returns:**
+     *
+     * Priority value (default: 50)
+     */
+    fun priority(): Int
+    /**
+     * Optional: Check if this extractor can handle a specific file.
+     *
+     * Allows for more sophisticated detection beyond MIME types.
+     * Defaults to `true` (rely on MIME type matching).
+     *
+     * **Returns:**
+     *
+     * `true` if the extractor can handle this file, `false` otherwise.
+     */
+    fun canHandle(path: java.nio.file.Path, mimeType: String): Boolean
+}