// This file is auto-generated by alef. DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // Re-generate with: alef generate #![allow(dead_code, unused_imports, unused_variables)] #![allow( clippy::too_many_arguments, clippy::let_unit_value, clippy::needless_borrow, clippy::map_identity, clippy::just_underscores_and_digits, clippy::unused_unit, clippy::unnecessary_cast, clippy::unwrap_or_default, clippy::derivable_impls, clippy::needless_borrows_for_generic_args, clippy::unnecessary_fallible_conversions, clippy::useless_conversion, clippy::arc_with_non_send_sync, clippy::collapsible_if, clippy::clone_on_copy, clippy::should_implement_trait, clippy::await_holding_refcell_ref )] use std::sync::Arc; use std::sync::Mutex; use wasm_bindgen::prelude::*; #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmCacheStats { total_files: usize, total_size_mb: f64, available_space_mb: f64, oldest_file_age_days: f64, newest_file_age_days: f64, } #[wasm_bindgen] impl WasmCacheStats { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( totalFiles: usize, totalSizeMb: f64, availableSpaceMb: f64, oldestFileAgeDays: f64, newestFileAgeDays: f64, ) -> WasmCacheStats { WasmCacheStats { total_files: totalFiles, total_size_mb: totalSizeMb, available_space_mb: availableSpaceMb, oldest_file_age_days: oldestFileAgeDays, newest_file_age_days: newestFileAgeDays, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmCacheStats { ::default() } #[wasm_bindgen(getter, js_name = "totalFiles")] pub fn total_files(&self) -> usize { self.total_files } #[wasm_bindgen(setter, js_name = "totalFiles")] pub fn set_total_files(&mut self, value: usize) { self.total_files = value; } #[wasm_bindgen(getter, js_name = "totalSizeMb")] pub fn total_size_mb(&self) -> f64 { self.total_size_mb } #[wasm_bindgen(setter, js_name = "totalSizeMb")] pub fn set_total_size_mb(&mut self, value: f64) { self.total_size_mb = value; } #[wasm_bindgen(getter, js_name = "availableSpaceMb")] pub fn available_space_mb(&self) -> f64 { self.available_space_mb } #[wasm_bindgen(setter, js_name = "availableSpaceMb")] pub fn set_available_space_mb(&mut self, value: f64) { self.available_space_mb = value; } #[wasm_bindgen(getter, js_name = "oldestFileAgeDays")] pub fn oldest_file_age_days(&self) -> f64 { self.oldest_file_age_days } #[wasm_bindgen(setter, js_name = "oldestFileAgeDays")] pub fn set_oldest_file_age_days(&mut self, value: f64) { self.oldest_file_age_days = value; } #[wasm_bindgen(getter, js_name = "newestFileAgeDays")] pub fn newest_file_age_days(&self) -> f64 { self.newest_file_age_days } #[wasm_bindgen(setter, js_name = "newestFileAgeDays")] pub fn set_newest_file_age_days(&mut self, value: f64) { self.newest_file_age_days = value; } } /// Hardware acceleration configuration for ONNX Runtime models. /// /// Controls which execution provider (CPU, CoreML, CUDA, TensorRT) is used /// for inference in layout detection and embedding generation. /// /// # Example #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmAccelerationConfig { provider: WasmExecutionProviderType, device_id: u32, } #[wasm_bindgen] impl WasmAccelerationConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(provider: Option, deviceId: Option) -> WasmAccelerationConfig { WasmAccelerationConfig { provider: provider.unwrap_or_default(), device_id: deviceId.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmAccelerationConfig { ::default() } #[wasm_bindgen(getter)] pub fn provider(&self) -> String { self.provider.to_api_str().to_owned() } #[wasm_bindgen(setter)] pub fn set_provider(&mut self, value: WasmExecutionProviderType) { self.provider = value; } #[wasm_bindgen(getter, js_name = "deviceId")] pub fn device_id(&self) -> u32 { self.device_id } #[wasm_bindgen(setter, js_name = "deviceId")] pub fn set_device_id(&mut self, value: u32) { self.device_id = value; } } /// Cross-extractor content filtering configuration. /// /// Controls whether "furniture" content (headers, footers, page numbers, /// watermarks, repeating text) is included in or stripped from extraction /// results. Applies across all extractors (PDF, DOCX, RTF, ODT, HTML, etc.) /// with format-specific implementation. /// /// When `None` on `ExtractionConfig`, each extractor uses its current /// default behavior unchanged. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmContentFilterConfig { include_headers: bool, include_footers: bool, strip_repeating_text: bool, include_watermarks: bool, } #[wasm_bindgen] impl WasmContentFilterConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( includeHeaders: Option, includeFooters: Option, stripRepeatingText: Option, includeWatermarks: Option, ) -> WasmContentFilterConfig { WasmContentFilterConfig { include_headers: includeHeaders.unwrap_or(false), include_footers: includeFooters.unwrap_or(false), strip_repeating_text: stripRepeatingText.unwrap_or(true), include_watermarks: includeWatermarks.unwrap_or(false), } } #[wasm_bindgen(getter, js_name = "includeHeaders")] pub fn include_headers(&self) -> bool { self.include_headers } #[wasm_bindgen(setter, js_name = "includeHeaders")] pub fn set_include_headers(&mut self, value: bool) { self.include_headers = value; } #[wasm_bindgen(getter, js_name = "includeFooters")] pub fn include_footers(&self) -> bool { self.include_footers } #[wasm_bindgen(setter, js_name = "includeFooters")] pub fn set_include_footers(&mut self, value: bool) { self.include_footers = value; } #[wasm_bindgen(getter, js_name = "stripRepeatingText")] pub fn strip_repeating_text(&self) -> bool { self.strip_repeating_text } #[wasm_bindgen(setter, js_name = "stripRepeatingText")] pub fn set_strip_repeating_text(&mut self, value: bool) { self.strip_repeating_text = value; } #[wasm_bindgen(getter, js_name = "includeWatermarks")] pub fn include_watermarks(&self) -> bool { self.include_watermarks } #[wasm_bindgen(setter, js_name = "includeWatermarks")] pub fn set_include_watermarks(&mut self, value: bool) { self.include_watermarks = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmContentFilterConfig { kreuzberg::ContentFilterConfig::default().into() } } /// Configuration for email extraction. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmEmailConfig { msg_fallback_codepage: Option, } #[wasm_bindgen] impl WasmEmailConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(msgFallbackCodepage: Option) -> WasmEmailConfig { WasmEmailConfig { msg_fallback_codepage: msgFallbackCodepage, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmEmailConfig { ::default() } #[wasm_bindgen(getter, js_name = "msgFallbackCodepage")] pub fn msg_fallback_codepage(&self) -> Option { self.msg_fallback_codepage } #[wasm_bindgen(setter, js_name = "msgFallbackCodepage")] pub fn set_msg_fallback_codepage(&mut self, value: Option) { self.msg_fallback_codepage = value; } } /// Main extraction configuration. /// /// This struct contains all configuration options for the extraction process. /// It can be loaded from TOML, YAML, or JSON files, or created programmatically. /// /// # Example #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmExtractionConfig { use_cache: bool, enable_quality_processing: bool, ocr: Option, force_ocr: bool, force_ocr_pages: Option>, disable_ocr: bool, chunking: Option, content_filter: Option, images: Option, token_reduction: Option, language_detection: Option, pages: Option, postprocessor: Option, html_options: Option, extraction_timeout_secs: Option, max_concurrent_extractions: Option, result_format: WasmResultFormat, security_limits: Option, max_embedded_file_bytes: Option, output_format: WasmOutputFormat, use_layout_for_markdown: bool, include_document_structure: bool, acceleration: Option, cache_namespace: Option, cache_ttl_secs: Option, email: Option, concurrency: Option, max_archive_depth: usize, structured_extraction: Option, cancel_token: Option, } #[wasm_bindgen] impl WasmExtractionConfig { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( useCache: Option, enableQualityProcessing: Option, forceOcr: Option, disableOcr: Option, resultFormat: Option, outputFormat: Option, useLayoutForMarkdown: Option, includeDocumentStructure: Option, maxArchiveDepth: Option, ocr: Option, forceOcrPages: Option>, chunking: Option, contentFilter: Option, images: Option, tokenReduction: Option, languageDetection: Option, pages: Option, postprocessor: Option, extractionTimeoutSecs: Option, maxConcurrentExtractions: Option, securityLimits: Option, maxEmbeddedFileBytes: Option, acceleration: Option, cacheNamespace: Option, cacheTtlSecs: Option, email: Option, concurrency: Option, structuredExtraction: Option, cancelToken: Option, ) -> WasmExtractionConfig { WasmExtractionConfig { use_cache: useCache.unwrap_or(true), enable_quality_processing: enableQualityProcessing.unwrap_or(true), ocr, force_ocr: forceOcr.unwrap_or(false), force_ocr_pages: forceOcrPages, disable_ocr: disableOcr.unwrap_or(false), chunking, content_filter: contentFilter, images, token_reduction: tokenReduction, language_detection: languageDetection, pages, postprocessor, html_options: Default::default(), extraction_timeout_secs: extractionTimeoutSecs, max_concurrent_extractions: maxConcurrentExtractions, result_format: resultFormat.unwrap_or_default(), security_limits: securityLimits, max_embedded_file_bytes: maxEmbeddedFileBytes, output_format: outputFormat.unwrap_or_default(), use_layout_for_markdown: useLayoutForMarkdown.unwrap_or(false), include_document_structure: includeDocumentStructure.unwrap_or(false), acceleration, cache_namespace: cacheNamespace, cache_ttl_secs: cacheTtlSecs, email, concurrency, max_archive_depth: maxArchiveDepth.unwrap_or_default(), structured_extraction: structuredExtraction, cancel_token: cancelToken, } } #[wasm_bindgen(getter, js_name = "useCache")] pub fn use_cache(&self) -> bool { self.use_cache } #[wasm_bindgen(setter, js_name = "useCache")] pub fn set_use_cache(&mut self, value: bool) { self.use_cache = value; } #[wasm_bindgen(getter, js_name = "enableQualityProcessing")] pub fn enable_quality_processing(&self) -> bool { self.enable_quality_processing } #[wasm_bindgen(setter, js_name = "enableQualityProcessing")] pub fn set_enable_quality_processing(&mut self, value: bool) { self.enable_quality_processing = value; } #[wasm_bindgen(getter)] pub fn ocr(&self) -> Option { self.ocr.clone() } #[wasm_bindgen(setter)] pub fn set_ocr(&mut self, value: Option) { self.ocr = value; } #[wasm_bindgen(getter, js_name = "forceOcr")] pub fn force_ocr(&self) -> bool { self.force_ocr } #[wasm_bindgen(setter, js_name = "forceOcr")] pub fn set_force_ocr(&mut self, value: bool) { self.force_ocr = value; } #[wasm_bindgen(getter, js_name = "forceOcrPages")] pub fn force_ocr_pages(&self) -> Option> { self.force_ocr_pages.clone() } #[wasm_bindgen(setter, js_name = "forceOcrPages")] pub fn set_force_ocr_pages(&mut self, value: Option>) { self.force_ocr_pages = value; } #[wasm_bindgen(getter, js_name = "disableOcr")] pub fn disable_ocr(&self) -> bool { self.disable_ocr } #[wasm_bindgen(setter, js_name = "disableOcr")] pub fn set_disable_ocr(&mut self, value: bool) { self.disable_ocr = value; } #[wasm_bindgen(getter)] pub fn chunking(&self) -> Option { self.chunking.clone() } #[wasm_bindgen(setter)] pub fn set_chunking(&mut self, value: Option) { self.chunking = value; } #[wasm_bindgen(getter, js_name = "contentFilter")] pub fn content_filter(&self) -> Option { self.content_filter.clone() } #[wasm_bindgen(setter, js_name = "contentFilter")] pub fn set_content_filter(&mut self, value: Option) { self.content_filter = value; } #[wasm_bindgen(getter)] pub fn images(&self) -> Option { self.images.clone() } #[wasm_bindgen(setter)] pub fn set_images(&mut self, value: Option) { self.images = value; } #[wasm_bindgen(getter, js_name = "tokenReduction")] pub fn token_reduction(&self) -> Option { self.token_reduction.clone() } #[wasm_bindgen(setter, js_name = "tokenReduction")] pub fn set_token_reduction(&mut self, value: Option) { self.token_reduction = value; } #[wasm_bindgen(getter, js_name = "languageDetection")] pub fn language_detection(&self) -> Option { self.language_detection.clone() } #[wasm_bindgen(setter, js_name = "languageDetection")] pub fn set_language_detection(&mut self, value: Option) { self.language_detection = value; } #[wasm_bindgen(getter)] pub fn pages(&self) -> Option { self.pages.clone() } #[wasm_bindgen(setter)] pub fn set_pages(&mut self, value: Option) { self.pages = value; } #[wasm_bindgen(getter)] pub fn postprocessor(&self) -> Option { self.postprocessor.clone() } #[wasm_bindgen(setter)] pub fn set_postprocessor(&mut self, value: Option) { self.postprocessor = value; } #[wasm_bindgen(getter, js_name = "htmlOptions")] pub fn html_options(&self) -> Option { self.html_options.clone() } #[wasm_bindgen(setter, js_name = "htmlOptions")] pub fn set_html_options(&mut self, value: Option) { self.html_options = value; } #[wasm_bindgen(getter, js_name = "extractionTimeoutSecs")] pub fn extraction_timeout_secs(&self) -> Option { self.extraction_timeout_secs } #[wasm_bindgen(setter, js_name = "extractionTimeoutSecs")] pub fn set_extraction_timeout_secs(&mut self, value: Option) { self.extraction_timeout_secs = value; } #[wasm_bindgen(getter, js_name = "maxConcurrentExtractions")] pub fn max_concurrent_extractions(&self) -> Option { self.max_concurrent_extractions } #[wasm_bindgen(setter, js_name = "maxConcurrentExtractions")] pub fn set_max_concurrent_extractions(&mut self, value: Option) { self.max_concurrent_extractions = value; } #[wasm_bindgen(getter, js_name = "resultFormat")] pub fn result_format(&self) -> String { self.result_format.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "resultFormat")] pub fn set_result_format(&mut self, value: WasmResultFormat) { self.result_format = value; } #[wasm_bindgen(getter, js_name = "securityLimits")] pub fn security_limits(&self) -> Option { self.security_limits.clone() } #[wasm_bindgen(setter, js_name = "securityLimits")] pub fn set_security_limits(&mut self, value: Option) { self.security_limits = value; } #[wasm_bindgen(getter, js_name = "maxEmbeddedFileBytes")] pub fn max_embedded_file_bytes(&self) -> Option { self.max_embedded_file_bytes } #[wasm_bindgen(setter, js_name = "maxEmbeddedFileBytes")] pub fn set_max_embedded_file_bytes(&mut self, value: Option) { self.max_embedded_file_bytes = value; } #[wasm_bindgen(getter, js_name = "outputFormat")] pub fn output_format(&self) -> String { self.output_format.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "outputFormat")] pub fn set_output_format(&mut self, value: WasmOutputFormat) { self.output_format = value; } #[wasm_bindgen(getter, js_name = "useLayoutForMarkdown")] pub fn use_layout_for_markdown(&self) -> bool { self.use_layout_for_markdown } #[wasm_bindgen(setter, js_name = "useLayoutForMarkdown")] pub fn set_use_layout_for_markdown(&mut self, value: bool) { self.use_layout_for_markdown = value; } #[wasm_bindgen(getter, js_name = "includeDocumentStructure")] pub fn include_document_structure(&self) -> bool { self.include_document_structure } #[wasm_bindgen(setter, js_name = "includeDocumentStructure")] pub fn set_include_document_structure(&mut self, value: bool) { self.include_document_structure = value; } #[wasm_bindgen(getter)] pub fn acceleration(&self) -> Option { self.acceleration.clone() } #[wasm_bindgen(setter)] pub fn set_acceleration(&mut self, value: Option) { self.acceleration = value; } #[wasm_bindgen(getter, js_name = "cacheNamespace")] pub fn cache_namespace(&self) -> Option { self.cache_namespace.clone() } #[wasm_bindgen(setter, js_name = "cacheNamespace")] pub fn set_cache_namespace(&mut self, value: Option) { self.cache_namespace = value; } #[wasm_bindgen(getter, js_name = "cacheTtlSecs")] pub fn cache_ttl_secs(&self) -> Option { self.cache_ttl_secs } #[wasm_bindgen(setter, js_name = "cacheTtlSecs")] pub fn set_cache_ttl_secs(&mut self, value: Option) { self.cache_ttl_secs = value; } #[wasm_bindgen(getter)] pub fn email(&self) -> Option { self.email.clone() } #[wasm_bindgen(setter)] pub fn set_email(&mut self, value: Option) { self.email = value; } #[wasm_bindgen(getter)] pub fn concurrency(&self) -> Option { self.concurrency.clone() } #[wasm_bindgen(setter)] pub fn set_concurrency(&mut self, value: Option) { self.concurrency = value; } #[wasm_bindgen(getter, js_name = "maxArchiveDepth")] pub fn max_archive_depth(&self) -> usize { self.max_archive_depth } #[wasm_bindgen(setter, js_name = "maxArchiveDepth")] pub fn set_max_archive_depth(&mut self, value: usize) { self.max_archive_depth = value; } #[wasm_bindgen(getter, js_name = "structuredExtraction")] pub fn structured_extraction(&self) -> Option { self.structured_extraction.clone() } #[wasm_bindgen(setter, js_name = "structuredExtraction")] pub fn set_structured_extraction(&mut self, value: Option) { self.structured_extraction = value; } #[wasm_bindgen(getter, js_name = "cancelToken")] pub fn cancel_token(&self) -> Option { self.cancel_token.clone() } #[wasm_bindgen(setter, js_name = "cancelToken")] pub fn set_cancel_token(&mut self, value: Option) { self.cancel_token = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmExtractionConfig { kreuzberg::ExtractionConfig::default().into() } /// Check if image processing is needed by examining OCR and image extraction settings. /// /// Returns `true` if either OCR is enabled or image extraction is configured, /// indicating that image decompression and processing should occur. /// Returns `false` if both are disabled, allowing optimization to skip unnecessary /// image decompression for text-only extraction workflows. /// /// # Optimization Impact /// For text-only extractions (no OCR, no image extraction), skipping image /// decompression can improve CPU utilization by 5-10% by avoiding wasteful /// image I/O and processing when results won't be used. #[wasm_bindgen(js_name = "needsImageProcessing")] pub fn needs_image_processing(&self) -> bool { kreuzberg::ExtractionConfig::from(self.clone()).needs_image_processing() } } /// Per-file extraction configuration overrides for batch processing. /// /// All fields are `Option` — `None` means "use the batch-level default." /// This type is used with `batch_extract_files` and /// `batch_extract_bytes` to allow heterogeneous /// extraction settings within a single batch. /// /// # Excluded Fields /// /// The following `ExtractionConfig` fields are batch-level only and /// cannot be overridden per file: /// - `max_concurrent_extractions` — controls batch parallelism /// - `use_cache` — global caching policy /// - `acceleration` — shared ONNX execution provider /// - `security_limits` — global archive security policy /// /// # Example #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmFileExtractionConfig { enable_quality_processing: Option, ocr: Option, force_ocr: Option, force_ocr_pages: Option>, disable_ocr: Option, chunking: Option, content_filter: Option, images: Option, token_reduction: Option, language_detection: Option, pages: Option, postprocessor: Option, html_options: Option, result_format: Option, output_format: Option, include_document_structure: Option, timeout_secs: Option, structured_extraction: Option, } #[wasm_bindgen] impl WasmFileExtractionConfig { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( enableQualityProcessing: Option, ocr: Option, forceOcr: Option, forceOcrPages: Option>, disableOcr: Option, chunking: Option, contentFilter: Option, images: Option, tokenReduction: Option, languageDetection: Option, pages: Option, postprocessor: Option, resultFormat: Option, outputFormat: Option, includeDocumentStructure: Option, timeoutSecs: Option, structuredExtraction: Option, ) -> WasmFileExtractionConfig { WasmFileExtractionConfig { enable_quality_processing: enableQualityProcessing, ocr, force_ocr: forceOcr, force_ocr_pages: forceOcrPages, disable_ocr: disableOcr, chunking, content_filter: contentFilter, images, token_reduction: tokenReduction, language_detection: languageDetection, pages, postprocessor, html_options: Default::default(), result_format: resultFormat, output_format: outputFormat, include_document_structure: includeDocumentStructure, timeout_secs: timeoutSecs, structured_extraction: structuredExtraction, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmFileExtractionConfig { ::default() } #[wasm_bindgen(getter, js_name = "enableQualityProcessing")] pub fn enable_quality_processing(&self) -> Option { self.enable_quality_processing } #[wasm_bindgen(setter, js_name = "enableQualityProcessing")] pub fn set_enable_quality_processing(&mut self, value: Option) { self.enable_quality_processing = value; } #[wasm_bindgen(getter)] pub fn ocr(&self) -> Option { self.ocr.clone() } #[wasm_bindgen(setter)] pub fn set_ocr(&mut self, value: Option) { self.ocr = value; } #[wasm_bindgen(getter, js_name = "forceOcr")] pub fn force_ocr(&self) -> Option { self.force_ocr } #[wasm_bindgen(setter, js_name = "forceOcr")] pub fn set_force_ocr(&mut self, value: Option) { self.force_ocr = value; } #[wasm_bindgen(getter, js_name = "forceOcrPages")] pub fn force_ocr_pages(&self) -> Option> { self.force_ocr_pages.clone() } #[wasm_bindgen(setter, js_name = "forceOcrPages")] pub fn set_force_ocr_pages(&mut self, value: Option>) { self.force_ocr_pages = value; } #[wasm_bindgen(getter, js_name = "disableOcr")] pub fn disable_ocr(&self) -> Option { self.disable_ocr } #[wasm_bindgen(setter, js_name = "disableOcr")] pub fn set_disable_ocr(&mut self, value: Option) { self.disable_ocr = value; } #[wasm_bindgen(getter)] pub fn chunking(&self) -> Option { self.chunking.clone() } #[wasm_bindgen(setter)] pub fn set_chunking(&mut self, value: Option) { self.chunking = value; } #[wasm_bindgen(getter, js_name = "contentFilter")] pub fn content_filter(&self) -> Option { self.content_filter.clone() } #[wasm_bindgen(setter, js_name = "contentFilter")] pub fn set_content_filter(&mut self, value: Option) { self.content_filter = value; } #[wasm_bindgen(getter)] pub fn images(&self) -> Option { self.images.clone() } #[wasm_bindgen(setter)] pub fn set_images(&mut self, value: Option) { self.images = value; } #[wasm_bindgen(getter, js_name = "tokenReduction")] pub fn token_reduction(&self) -> Option { self.token_reduction.clone() } #[wasm_bindgen(setter, js_name = "tokenReduction")] pub fn set_token_reduction(&mut self, value: Option) { self.token_reduction = value; } #[wasm_bindgen(getter, js_name = "languageDetection")] pub fn language_detection(&self) -> Option { self.language_detection.clone() } #[wasm_bindgen(setter, js_name = "languageDetection")] pub fn set_language_detection(&mut self, value: Option) { self.language_detection = value; } #[wasm_bindgen(getter)] pub fn pages(&self) -> Option { self.pages.clone() } #[wasm_bindgen(setter)] pub fn set_pages(&mut self, value: Option) { self.pages = value; } #[wasm_bindgen(getter)] pub fn postprocessor(&self) -> Option { self.postprocessor.clone() } #[wasm_bindgen(setter)] pub fn set_postprocessor(&mut self, value: Option) { self.postprocessor = value; } #[wasm_bindgen(getter, js_name = "htmlOptions")] pub fn html_options(&self) -> Option { self.html_options.clone() } #[wasm_bindgen(setter, js_name = "htmlOptions")] pub fn set_html_options(&mut self, value: Option) { self.html_options = value; } #[wasm_bindgen(getter, js_name = "resultFormat")] pub fn result_format(&self) -> Option { self.result_format.map(|v| v.to_api_str().to_owned()) } #[wasm_bindgen(setter, js_name = "resultFormat")] pub fn set_result_format(&mut self, value: Option) { self.result_format = value; } #[wasm_bindgen(getter, js_name = "outputFormat")] pub fn output_format(&self) -> Option { self.output_format.map(|v| v.to_api_str().to_owned()) } #[wasm_bindgen(setter, js_name = "outputFormat")] pub fn set_output_format(&mut self, value: Option) { self.output_format = value; } #[wasm_bindgen(getter, js_name = "includeDocumentStructure")] pub fn include_document_structure(&self) -> Option { self.include_document_structure } #[wasm_bindgen(setter, js_name = "includeDocumentStructure")] pub fn set_include_document_structure(&mut self, value: Option) { self.include_document_structure = value; } #[wasm_bindgen(getter, js_name = "timeoutSecs")] pub fn timeout_secs(&self) -> Option { self.timeout_secs } #[wasm_bindgen(setter, js_name = "timeoutSecs")] pub fn set_timeout_secs(&mut self, value: Option) { self.timeout_secs = value; } #[wasm_bindgen(getter, js_name = "structuredExtraction")] pub fn structured_extraction(&self) -> Option { self.structured_extraction.clone() } #[wasm_bindgen(setter, js_name = "structuredExtraction")] pub fn set_structured_extraction(&mut self, value: Option) { self.structured_extraction = value; } } /// Batch item for byte array extraction. /// /// Used with `batch_extract_bytes` and `batch_extract_bytes_sync` /// to represent a single item in a batch extraction job. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmBatchBytesItem { content: Vec, mime_type: String, config: Option, } #[wasm_bindgen] impl WasmBatchBytesItem { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(content: Vec, mimeType: String, config: Option) -> WasmBatchBytesItem { WasmBatchBytesItem { content, mime_type: mimeType, config, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmBatchBytesItem { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> Vec { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: Vec) { self.content = value; } #[wasm_bindgen(getter, js_name = "mimeType")] pub fn mime_type(&self) -> String { self.mime_type.clone() } #[wasm_bindgen(setter, js_name = "mimeType")] pub fn set_mime_type(&mut self, value: String) { self.mime_type = value; } #[wasm_bindgen(getter)] pub fn config(&self) -> Option { self.config.clone() } #[wasm_bindgen(setter)] pub fn set_config(&mut self, value: Option) { self.config = value; } } /// Batch item for file extraction. /// /// Used with `batch_extract_files` and `batch_extract_files_sync` /// to represent a single file in a batch extraction job. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmBatchFileItem { path: String, config: Option, } #[wasm_bindgen] impl WasmBatchFileItem { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(path: String, config: Option) -> WasmBatchFileItem { WasmBatchFileItem { path, config } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmBatchFileItem { ::default() } #[wasm_bindgen(getter)] pub fn path(&self) -> String { self.path.clone() } #[wasm_bindgen(setter)] pub fn set_path(&mut self, value: String) { self.path = value; } #[wasm_bindgen(getter)] pub fn config(&self) -> Option { self.config.clone() } #[wasm_bindgen(setter)] pub fn set_config(&mut self, value: Option) { self.config = value; } } /// Image extraction configuration. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmImageExtractionConfig { extract_images: bool, target_dpi: i32, max_image_dimension: i32, inject_placeholders: bool, auto_adjust_dpi: bool, min_dpi: i32, max_dpi: i32, max_images_per_page: Option, classify: bool, include_page_rasters: bool, run_ocr_on_images: bool, ocr_text_only: bool, append_ocr_text: bool, } #[wasm_bindgen] impl WasmImageExtractionConfig { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( extractImages: Option, targetDpi: Option, maxImageDimension: Option, injectPlaceholders: Option, autoAdjustDpi: Option, minDpi: Option, maxDpi: Option, classify: Option, includePageRasters: Option, runOcrOnImages: Option, ocrTextOnly: Option, appendOcrText: Option, maxImagesPerPage: Option, ) -> WasmImageExtractionConfig { WasmImageExtractionConfig { extract_images: extractImages.unwrap_or(true), target_dpi: targetDpi.unwrap_or(300), max_image_dimension: maxImageDimension.unwrap_or(4096), inject_placeholders: injectPlaceholders.unwrap_or(true), auto_adjust_dpi: autoAdjustDpi.unwrap_or(true), min_dpi: minDpi.unwrap_or(72), max_dpi: maxDpi.unwrap_or(600), max_images_per_page: maxImagesPerPage, classify: classify.unwrap_or(true), include_page_rasters: includePageRasters.unwrap_or(false), run_ocr_on_images: runOcrOnImages.unwrap_or(true), ocr_text_only: ocrTextOnly.unwrap_or(false), append_ocr_text: appendOcrText.unwrap_or(false), } } #[wasm_bindgen(getter, js_name = "extractImages")] pub fn extract_images(&self) -> bool { self.extract_images } #[wasm_bindgen(setter, js_name = "extractImages")] pub fn set_extract_images(&mut self, value: bool) { self.extract_images = value; } #[wasm_bindgen(getter, js_name = "targetDpi")] pub fn target_dpi(&self) -> i32 { self.target_dpi } #[wasm_bindgen(setter, js_name = "targetDpi")] pub fn set_target_dpi(&mut self, value: i32) { self.target_dpi = value; } #[wasm_bindgen(getter, js_name = "maxImageDimension")] pub fn max_image_dimension(&self) -> i32 { self.max_image_dimension } #[wasm_bindgen(setter, js_name = "maxImageDimension")] pub fn set_max_image_dimension(&mut self, value: i32) { self.max_image_dimension = value; } #[wasm_bindgen(getter, js_name = "injectPlaceholders")] pub fn inject_placeholders(&self) -> bool { self.inject_placeholders } #[wasm_bindgen(setter, js_name = "injectPlaceholders")] pub fn set_inject_placeholders(&mut self, value: bool) { self.inject_placeholders = value; } #[wasm_bindgen(getter, js_name = "autoAdjustDpi")] pub fn auto_adjust_dpi(&self) -> bool { self.auto_adjust_dpi } #[wasm_bindgen(setter, js_name = "autoAdjustDpi")] pub fn set_auto_adjust_dpi(&mut self, value: bool) { self.auto_adjust_dpi = value; } #[wasm_bindgen(getter, js_name = "minDpi")] pub fn min_dpi(&self) -> i32 { self.min_dpi } #[wasm_bindgen(setter, js_name = "minDpi")] pub fn set_min_dpi(&mut self, value: i32) { self.min_dpi = value; } #[wasm_bindgen(getter, js_name = "maxDpi")] pub fn max_dpi(&self) -> i32 { self.max_dpi } #[wasm_bindgen(setter, js_name = "maxDpi")] pub fn set_max_dpi(&mut self, value: i32) { self.max_dpi = value; } #[wasm_bindgen(getter, js_name = "maxImagesPerPage")] pub fn max_images_per_page(&self) -> Option { self.max_images_per_page } #[wasm_bindgen(setter, js_name = "maxImagesPerPage")] pub fn set_max_images_per_page(&mut self, value: Option) { self.max_images_per_page = value; } #[wasm_bindgen(getter)] pub fn classify(&self) -> bool { self.classify } #[wasm_bindgen(setter)] pub fn set_classify(&mut self, value: bool) { self.classify = value; } #[wasm_bindgen(getter, js_name = "includePageRasters")] pub fn include_page_rasters(&self) -> bool { self.include_page_rasters } #[wasm_bindgen(setter, js_name = "includePageRasters")] pub fn set_include_page_rasters(&mut self, value: bool) { self.include_page_rasters = value; } #[wasm_bindgen(getter, js_name = "runOcrOnImages")] pub fn run_ocr_on_images(&self) -> bool { self.run_ocr_on_images } #[wasm_bindgen(setter, js_name = "runOcrOnImages")] pub fn set_run_ocr_on_images(&mut self, value: bool) { self.run_ocr_on_images = value; } #[wasm_bindgen(getter, js_name = "ocrTextOnly")] pub fn ocr_text_only(&self) -> bool { self.ocr_text_only } #[wasm_bindgen(setter, js_name = "ocrTextOnly")] pub fn set_ocr_text_only(&mut self, value: bool) { self.ocr_text_only = value; } #[wasm_bindgen(getter, js_name = "appendOcrText")] pub fn append_ocr_text(&self) -> bool { self.append_ocr_text } #[wasm_bindgen(setter, js_name = "appendOcrText")] pub fn set_append_ocr_text(&mut self, value: bool) { self.append_ocr_text = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmImageExtractionConfig { kreuzberg::ImageExtractionConfig::default().into() } } /// Token reduction configuration. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmTokenReductionOptions { mode: String, preserve_important_words: bool, } #[wasm_bindgen] impl WasmTokenReductionOptions { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(mode: Option, preserveImportantWords: Option) -> WasmTokenReductionOptions { WasmTokenReductionOptions { mode: mode.unwrap_or_default(), preserve_important_words: preserveImportantWords.unwrap_or(true), } } #[wasm_bindgen(getter)] pub fn mode(&self) -> String { self.mode.clone() } #[wasm_bindgen(setter)] pub fn set_mode(&mut self, value: String) { self.mode = value; } #[wasm_bindgen(getter, js_name = "preserveImportantWords")] pub fn preserve_important_words(&self) -> bool { self.preserve_important_words } #[wasm_bindgen(setter, js_name = "preserveImportantWords")] pub fn set_preserve_important_words(&mut self, value: bool) { self.preserve_important_words = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmTokenReductionOptions { kreuzberg::TokenReductionOptions::default().into() } } /// Language detection configuration. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmLanguageDetectionConfig { enabled: bool, min_confidence: f64, detect_multiple: bool, } #[wasm_bindgen] impl WasmLanguageDetectionConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( enabled: Option, minConfidence: Option, detectMultiple: Option, ) -> WasmLanguageDetectionConfig { WasmLanguageDetectionConfig { enabled: enabled.unwrap_or(true), min_confidence: minConfidence.unwrap_or(0.8), detect_multiple: detectMultiple.unwrap_or(false), } } #[wasm_bindgen(getter)] pub fn enabled(&self) -> bool { self.enabled } #[wasm_bindgen(setter)] pub fn set_enabled(&mut self, value: bool) { self.enabled = value; } #[wasm_bindgen(getter, js_name = "minConfidence")] pub fn min_confidence(&self) -> f64 { self.min_confidence } #[wasm_bindgen(setter, js_name = "minConfidence")] pub fn set_min_confidence(&mut self, value: f64) { self.min_confidence = value; } #[wasm_bindgen(getter, js_name = "detectMultiple")] pub fn detect_multiple(&self) -> bool { self.detect_multiple } #[wasm_bindgen(setter, js_name = "detectMultiple")] pub fn set_detect_multiple(&mut self, value: bool) { self.detect_multiple = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmLanguageDetectionConfig { kreuzberg::LanguageDetectionConfig::default().into() } } /// Configuration for an LLM provider/model via liter-llm. /// /// Each feature (VLM OCR, VLM embeddings, structured extraction) carries /// its own `LlmConfig`, allowing different providers per feature. /// /// # Example /// /// ```toml /// [structured_extraction.llm] /// model = "openai/gpt-4o" /// api_key = "sk-..." # or use KREUZBERG_LLM_API_KEY env var /// ``` #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmLlmConfig { model: String, api_key: Option, base_url: Option, timeout_secs: Option, max_retries: Option, temperature: Option, max_tokens: Option, } #[wasm_bindgen] impl WasmLlmConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( model: Option, apiKey: Option, baseUrl: Option, timeoutSecs: Option, maxRetries: Option, temperature: Option, maxTokens: Option, ) -> WasmLlmConfig { WasmLlmConfig { model: model.unwrap_or_default(), api_key: apiKey, base_url: baseUrl, timeout_secs: timeoutSecs, max_retries: maxRetries, temperature, max_tokens: maxTokens, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmLlmConfig { ::default() } #[wasm_bindgen(getter)] pub fn model(&self) -> String { self.model.clone() } #[wasm_bindgen(setter)] pub fn set_model(&mut self, value: String) { self.model = value; } #[wasm_bindgen(getter, js_name = "apiKey")] pub fn api_key(&self) -> Option { self.api_key.clone() } #[wasm_bindgen(setter, js_name = "apiKey")] pub fn set_api_key(&mut self, value: Option) { self.api_key = value; } #[wasm_bindgen(getter, js_name = "baseUrl")] pub fn base_url(&self) -> Option { self.base_url.clone() } #[wasm_bindgen(setter, js_name = "baseUrl")] pub fn set_base_url(&mut self, value: Option) { self.base_url = value; } #[wasm_bindgen(getter, js_name = "timeoutSecs")] pub fn timeout_secs(&self) -> Option { self.timeout_secs } #[wasm_bindgen(setter, js_name = "timeoutSecs")] pub fn set_timeout_secs(&mut self, value: Option) { self.timeout_secs = value; } #[wasm_bindgen(getter, js_name = "maxRetries")] pub fn max_retries(&self) -> Option { self.max_retries } #[wasm_bindgen(setter, js_name = "maxRetries")] pub fn set_max_retries(&mut self, value: Option) { self.max_retries = value; } #[wasm_bindgen(getter)] pub fn temperature(&self) -> Option { self.temperature } #[wasm_bindgen(setter)] pub fn set_temperature(&mut self, value: Option) { self.temperature = value; } #[wasm_bindgen(getter, js_name = "maxTokens")] pub fn max_tokens(&self) -> Option { self.max_tokens } #[wasm_bindgen(setter, js_name = "maxTokens")] pub fn set_max_tokens(&mut self, value: Option) { self.max_tokens = value; } } /// Configuration for LLM-based structured data extraction. /// /// Sends extracted document content to a VLM with a JSON schema, /// returning structured data that conforms to the schema. /// /// # Example /// /// ```toml /// [structured_extraction] /// schema_name = "invoice_data" /// strict = true /// /// [structured_extraction.schema] /// type = "object" /// properties.vendor = { type = "string" } /// properties.total = { type = "number" } /// required = ["vendor", "total"] /// /// [structured_extraction.llm] /// model = "openai/gpt-4o" /// ``` #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmStructuredExtractionConfig { schema: JsValue, schema_name: String, schema_description: Option, strict: bool, prompt: Option, llm: WasmLlmConfig, } #[wasm_bindgen] impl WasmStructuredExtractionConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( schema: JsValue, schemaName: String, strict: bool, llm: WasmLlmConfig, schemaDescription: Option, prompt: Option, ) -> WasmStructuredExtractionConfig { WasmStructuredExtractionConfig { schema, schema_name: schemaName, schema_description: schemaDescription, strict, prompt, llm, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmStructuredExtractionConfig { ::default() } #[wasm_bindgen(getter)] pub fn schema(&self) -> JsValue { self.schema.clone() } #[wasm_bindgen(setter)] pub fn set_schema(&mut self, value: JsValue) { self.schema = value; } #[wasm_bindgen(getter, js_name = "schemaName")] pub fn schema_name(&self) -> String { self.schema_name.clone() } #[wasm_bindgen(setter, js_name = "schemaName")] pub fn set_schema_name(&mut self, value: String) { self.schema_name = value; } #[wasm_bindgen(getter, js_name = "schemaDescription")] pub fn schema_description(&self) -> Option { self.schema_description.clone() } #[wasm_bindgen(setter, js_name = "schemaDescription")] pub fn set_schema_description(&mut self, value: Option) { self.schema_description = value; } #[wasm_bindgen(getter)] pub fn strict(&self) -> bool { self.strict } #[wasm_bindgen(setter)] pub fn set_strict(&mut self, value: bool) { self.strict = value; } #[wasm_bindgen(getter)] pub fn prompt(&self) -> Option { self.prompt.clone() } #[wasm_bindgen(setter)] pub fn set_prompt(&mut self, value: Option) { self.prompt = value; } #[wasm_bindgen(getter)] pub fn llm(&self) -> WasmLlmConfig { self.llm.clone() } #[wasm_bindgen(setter)] pub fn set_llm(&mut self, value: WasmLlmConfig) { self.llm = value; } } /// Quality thresholds for OCR fallback decisions and pipeline quality gating. /// /// All fields default to the values that match the previous hardcoded behavior, /// so `OcrQualityThresholds.default()` preserves existing semantics exactly. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrQualityThresholds { min_total_non_whitespace: usize, min_non_whitespace_per_page: f64, min_meaningful_word_len: usize, min_meaningful_words: usize, min_alnum_ratio: f64, min_garbage_chars: usize, max_fragmented_word_ratio: f64, critical_fragmented_word_ratio: f64, min_avg_word_length: f64, min_words_for_avg_length_check: usize, min_consecutive_repeat_ratio: f64, min_words_for_repeat_check: usize, substantive_min_chars: usize, non_text_min_chars: usize, alnum_ws_ratio_threshold: f64, pipeline_min_quality: f64, } #[wasm_bindgen] impl WasmOcrQualityThresholds { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( minTotalNonWhitespace: Option, minNonWhitespacePerPage: Option, minMeaningfulWordLen: Option, minMeaningfulWords: Option, minAlnumRatio: Option, minGarbageChars: Option, maxFragmentedWordRatio: Option, criticalFragmentedWordRatio: Option, minAvgWordLength: Option, minWordsForAvgLengthCheck: Option, minConsecutiveRepeatRatio: Option, minWordsForRepeatCheck: Option, substantiveMinChars: Option, nonTextMinChars: Option, alnumWsRatioThreshold: Option, pipelineMinQuality: Option, ) -> WasmOcrQualityThresholds { WasmOcrQualityThresholds { min_total_non_whitespace: minTotalNonWhitespace.unwrap_or(64), min_non_whitespace_per_page: minNonWhitespacePerPage.unwrap_or(32.0), min_meaningful_word_len: minMeaningfulWordLen.unwrap_or(4), min_meaningful_words: minMeaningfulWords.unwrap_or(3), min_alnum_ratio: minAlnumRatio.unwrap_or(0.3), min_garbage_chars: minGarbageChars.unwrap_or(5), max_fragmented_word_ratio: maxFragmentedWordRatio.unwrap_or(0.6), critical_fragmented_word_ratio: criticalFragmentedWordRatio.unwrap_or(0.8), min_avg_word_length: minAvgWordLength.unwrap_or(2.0), min_words_for_avg_length_check: minWordsForAvgLengthCheck.unwrap_or(50), min_consecutive_repeat_ratio: minConsecutiveRepeatRatio.unwrap_or(0.08), min_words_for_repeat_check: minWordsForRepeatCheck.unwrap_or(50), substantive_min_chars: substantiveMinChars.unwrap_or(100), non_text_min_chars: nonTextMinChars.unwrap_or(20), alnum_ws_ratio_threshold: alnumWsRatioThreshold.unwrap_or(0.4), pipeline_min_quality: pipelineMinQuality.unwrap_or(0.5), } } #[wasm_bindgen(getter, js_name = "minTotalNonWhitespace")] pub fn min_total_non_whitespace(&self) -> usize { self.min_total_non_whitespace } #[wasm_bindgen(setter, js_name = "minTotalNonWhitespace")] pub fn set_min_total_non_whitespace(&mut self, value: usize) { self.min_total_non_whitespace = value; } #[wasm_bindgen(getter, js_name = "minNonWhitespacePerPage")] pub fn min_non_whitespace_per_page(&self) -> f64 { self.min_non_whitespace_per_page } #[wasm_bindgen(setter, js_name = "minNonWhitespacePerPage")] pub fn set_min_non_whitespace_per_page(&mut self, value: f64) { self.min_non_whitespace_per_page = value; } #[wasm_bindgen(getter, js_name = "minMeaningfulWordLen")] pub fn min_meaningful_word_len(&self) -> usize { self.min_meaningful_word_len } #[wasm_bindgen(setter, js_name = "minMeaningfulWordLen")] pub fn set_min_meaningful_word_len(&mut self, value: usize) { self.min_meaningful_word_len = value; } #[wasm_bindgen(getter, js_name = "minMeaningfulWords")] pub fn min_meaningful_words(&self) -> usize { self.min_meaningful_words } #[wasm_bindgen(setter, js_name = "minMeaningfulWords")] pub fn set_min_meaningful_words(&mut self, value: usize) { self.min_meaningful_words = value; } #[wasm_bindgen(getter, js_name = "minAlnumRatio")] pub fn min_alnum_ratio(&self) -> f64 { self.min_alnum_ratio } #[wasm_bindgen(setter, js_name = "minAlnumRatio")] pub fn set_min_alnum_ratio(&mut self, value: f64) { self.min_alnum_ratio = value; } #[wasm_bindgen(getter, js_name = "minGarbageChars")] pub fn min_garbage_chars(&self) -> usize { self.min_garbage_chars } #[wasm_bindgen(setter, js_name = "minGarbageChars")] pub fn set_min_garbage_chars(&mut self, value: usize) { self.min_garbage_chars = value; } #[wasm_bindgen(getter, js_name = "maxFragmentedWordRatio")] pub fn max_fragmented_word_ratio(&self) -> f64 { self.max_fragmented_word_ratio } #[wasm_bindgen(setter, js_name = "maxFragmentedWordRatio")] pub fn set_max_fragmented_word_ratio(&mut self, value: f64) { self.max_fragmented_word_ratio = value; } #[wasm_bindgen(getter, js_name = "criticalFragmentedWordRatio")] pub fn critical_fragmented_word_ratio(&self) -> f64 { self.critical_fragmented_word_ratio } #[wasm_bindgen(setter, js_name = "criticalFragmentedWordRatio")] pub fn set_critical_fragmented_word_ratio(&mut self, value: f64) { self.critical_fragmented_word_ratio = value; } #[wasm_bindgen(getter, js_name = "minAvgWordLength")] pub fn min_avg_word_length(&self) -> f64 { self.min_avg_word_length } #[wasm_bindgen(setter, js_name = "minAvgWordLength")] pub fn set_min_avg_word_length(&mut self, value: f64) { self.min_avg_word_length = value; } #[wasm_bindgen(getter, js_name = "minWordsForAvgLengthCheck")] pub fn min_words_for_avg_length_check(&self) -> usize { self.min_words_for_avg_length_check } #[wasm_bindgen(setter, js_name = "minWordsForAvgLengthCheck")] pub fn set_min_words_for_avg_length_check(&mut self, value: usize) { self.min_words_for_avg_length_check = value; } #[wasm_bindgen(getter, js_name = "minConsecutiveRepeatRatio")] pub fn min_consecutive_repeat_ratio(&self) -> f64 { self.min_consecutive_repeat_ratio } #[wasm_bindgen(setter, js_name = "minConsecutiveRepeatRatio")] pub fn set_min_consecutive_repeat_ratio(&mut self, value: f64) { self.min_consecutive_repeat_ratio = value; } #[wasm_bindgen(getter, js_name = "minWordsForRepeatCheck")] pub fn min_words_for_repeat_check(&self) -> usize { self.min_words_for_repeat_check } #[wasm_bindgen(setter, js_name = "minWordsForRepeatCheck")] pub fn set_min_words_for_repeat_check(&mut self, value: usize) { self.min_words_for_repeat_check = value; } #[wasm_bindgen(getter, js_name = "substantiveMinChars")] pub fn substantive_min_chars(&self) -> usize { self.substantive_min_chars } #[wasm_bindgen(setter, js_name = "substantiveMinChars")] pub fn set_substantive_min_chars(&mut self, value: usize) { self.substantive_min_chars = value; } #[wasm_bindgen(getter, js_name = "nonTextMinChars")] pub fn non_text_min_chars(&self) -> usize { self.non_text_min_chars } #[wasm_bindgen(setter, js_name = "nonTextMinChars")] pub fn set_non_text_min_chars(&mut self, value: usize) { self.non_text_min_chars = value; } #[wasm_bindgen(getter, js_name = "alnumWsRatioThreshold")] pub fn alnum_ws_ratio_threshold(&self) -> f64 { self.alnum_ws_ratio_threshold } #[wasm_bindgen(setter, js_name = "alnumWsRatioThreshold")] pub fn set_alnum_ws_ratio_threshold(&mut self, value: f64) { self.alnum_ws_ratio_threshold = value; } #[wasm_bindgen(getter, js_name = "pipelineMinQuality")] pub fn pipeline_min_quality(&self) -> f64 { self.pipeline_min_quality } #[wasm_bindgen(setter, js_name = "pipelineMinQuality")] pub fn set_pipeline_min_quality(&mut self, value: f64) { self.pipeline_min_quality = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmOcrQualityThresholds { kreuzberg::OcrQualityThresholds::default().into() } } /// A single backend stage in the OCR pipeline. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrPipelineStage { backend: String, priority: u32, language: Option, tesseract_config: Option, paddle_ocr_config: Option, vlm_config: Option, backend_options: Option, } #[wasm_bindgen] impl WasmOcrPipelineStage { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( backend: String, priority: u32, language: Option, tesseractConfig: Option, paddleOcrConfig: Option, vlmConfig: Option, backendOptions: Option, ) -> WasmOcrPipelineStage { WasmOcrPipelineStage { backend, priority, language, tesseract_config: tesseractConfig, paddle_ocr_config: paddleOcrConfig, vlm_config: vlmConfig, backend_options: backendOptions, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrPipelineStage { ::default() } #[wasm_bindgen(getter)] pub fn backend(&self) -> String { self.backend.clone() } #[wasm_bindgen(setter)] pub fn set_backend(&mut self, value: String) { self.backend = value; } #[wasm_bindgen(getter)] pub fn priority(&self) -> u32 { self.priority } #[wasm_bindgen(setter)] pub fn set_priority(&mut self, value: u32) { self.priority = value; } #[wasm_bindgen(getter)] pub fn language(&self) -> Option { self.language.clone() } #[wasm_bindgen(setter)] pub fn set_language(&mut self, value: Option) { self.language = value; } #[wasm_bindgen(getter, js_name = "tesseractConfig")] pub fn tesseract_config(&self) -> Option { self.tesseract_config.clone() } #[wasm_bindgen(setter, js_name = "tesseractConfig")] pub fn set_tesseract_config(&mut self, value: Option) { self.tesseract_config = value; } #[wasm_bindgen(getter, js_name = "paddleOcrConfig")] pub fn paddle_ocr_config(&self) -> Option { self.paddle_ocr_config.clone() } #[wasm_bindgen(setter, js_name = "paddleOcrConfig")] pub fn set_paddle_ocr_config(&mut self, value: Option) { self.paddle_ocr_config = value; } #[wasm_bindgen(getter, js_name = "vlmConfig")] pub fn vlm_config(&self) -> Option { self.vlm_config.clone() } #[wasm_bindgen(setter, js_name = "vlmConfig")] pub fn set_vlm_config(&mut self, value: Option) { self.vlm_config = value; } #[wasm_bindgen(getter, js_name = "backendOptions")] pub fn backend_options(&self) -> Option { self.backend_options.clone() } #[wasm_bindgen(setter, js_name = "backendOptions")] pub fn set_backend_options(&mut self, value: Option) { self.backend_options = value; } } /// Multi-backend OCR pipeline with quality-based fallback. /// /// Backends are tried in priority order (highest first). After each backend /// produces output, quality is evaluated. If it meets `quality_thresholds.pipeline_min_quality`, /// the result is accepted. Otherwise the next backend is tried. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrPipelineConfig { stages: Vec, quality_thresholds: WasmOcrQualityThresholds, } #[wasm_bindgen] impl WasmOcrPipelineConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( stages: Vec, qualityThresholds: WasmOcrQualityThresholds, ) -> WasmOcrPipelineConfig { WasmOcrPipelineConfig { stages, quality_thresholds: qualityThresholds, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrPipelineConfig { ::default() } #[wasm_bindgen(getter)] pub fn stages(&self) -> Vec { self.stages.clone() } #[wasm_bindgen(setter)] pub fn set_stages(&mut self, value: Vec) { self.stages = value; } #[wasm_bindgen(getter, js_name = "qualityThresholds")] pub fn quality_thresholds(&self) -> WasmOcrQualityThresholds { self.quality_thresholds.clone() } #[wasm_bindgen(setter, js_name = "qualityThresholds")] pub fn set_quality_thresholds(&mut self, value: WasmOcrQualityThresholds) { self.quality_thresholds = value; } } /// OCR configuration. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrConfig { enabled: bool, backend: String, language: String, tesseract_config: Option, output_format: Option, paddle_ocr_config: Option, backend_options: Option, element_config: Option, quality_thresholds: Option, pipeline: Option, auto_rotate: bool, vlm_config: Option, vlm_prompt: Option, acceleration: Option, tessdata_bytes: Option, } #[wasm_bindgen] impl WasmOcrConfig { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( enabled: Option, backend: Option, language: Option, autoRotate: Option, tesseractConfig: Option, outputFormat: Option, paddleOcrConfig: Option, backendOptions: Option, elementConfig: Option, qualityThresholds: Option, pipeline: Option, vlmConfig: Option, vlmPrompt: Option, acceleration: Option, tessdataBytes: Option, ) -> WasmOcrConfig { WasmOcrConfig { enabled: enabled.unwrap_or(true), backend: backend.unwrap_or_default(), language: language.unwrap_or_default(), tesseract_config: tesseractConfig, output_format: outputFormat, paddle_ocr_config: paddleOcrConfig, backend_options: backendOptions, element_config: elementConfig, quality_thresholds: qualityThresholds, pipeline, auto_rotate: autoRotate.unwrap_or(false), vlm_config: vlmConfig, vlm_prompt: vlmPrompt, acceleration, tessdata_bytes: tessdataBytes, } } #[wasm_bindgen(getter)] pub fn enabled(&self) -> bool { self.enabled } #[wasm_bindgen(setter)] pub fn set_enabled(&mut self, value: bool) { self.enabled = value; } #[wasm_bindgen(getter)] pub fn backend(&self) -> String { self.backend.clone() } #[wasm_bindgen(setter)] pub fn set_backend(&mut self, value: String) { self.backend = value; } #[wasm_bindgen(getter)] pub fn language(&self) -> String { self.language.clone() } #[wasm_bindgen(setter)] pub fn set_language(&mut self, value: String) { self.language = value; } #[wasm_bindgen(getter, js_name = "tesseractConfig")] pub fn tesseract_config(&self) -> Option { self.tesseract_config.clone() } #[wasm_bindgen(setter, js_name = "tesseractConfig")] pub fn set_tesseract_config(&mut self, value: Option) { self.tesseract_config = value; } #[wasm_bindgen(getter, js_name = "outputFormat")] pub fn output_format(&self) -> Option { self.output_format.map(|v| v.to_api_str().to_owned()) } #[wasm_bindgen(setter, js_name = "outputFormat")] pub fn set_output_format(&mut self, value: Option) { self.output_format = value; } #[wasm_bindgen(getter, js_name = "paddleOcrConfig")] pub fn paddle_ocr_config(&self) -> Option { self.paddle_ocr_config.clone() } #[wasm_bindgen(setter, js_name = "paddleOcrConfig")] pub fn set_paddle_ocr_config(&mut self, value: Option) { self.paddle_ocr_config = value; } #[wasm_bindgen(getter, js_name = "backendOptions")] pub fn backend_options(&self) -> Option { self.backend_options.clone() } #[wasm_bindgen(setter, js_name = "backendOptions")] pub fn set_backend_options(&mut self, value: Option) { self.backend_options = value; } #[wasm_bindgen(getter, js_name = "elementConfig")] pub fn element_config(&self) -> Option { self.element_config.clone() } #[wasm_bindgen(setter, js_name = "elementConfig")] pub fn set_element_config(&mut self, value: Option) { self.element_config = value; } #[wasm_bindgen(getter, js_name = "qualityThresholds")] pub fn quality_thresholds(&self) -> Option { self.quality_thresholds.clone() } #[wasm_bindgen(setter, js_name = "qualityThresholds")] pub fn set_quality_thresholds(&mut self, value: Option) { self.quality_thresholds = value; } #[wasm_bindgen(getter)] pub fn pipeline(&self) -> Option { self.pipeline.clone() } #[wasm_bindgen(setter)] pub fn set_pipeline(&mut self, value: Option) { self.pipeline = value; } #[wasm_bindgen(getter, js_name = "autoRotate")] pub fn auto_rotate(&self) -> bool { self.auto_rotate } #[wasm_bindgen(setter, js_name = "autoRotate")] pub fn set_auto_rotate(&mut self, value: bool) { self.auto_rotate = value; } #[wasm_bindgen(getter, js_name = "vlmConfig")] pub fn vlm_config(&self) -> Option { self.vlm_config.clone() } #[wasm_bindgen(setter, js_name = "vlmConfig")] pub fn set_vlm_config(&mut self, value: Option) { self.vlm_config = value; } #[wasm_bindgen(getter, js_name = "vlmPrompt")] pub fn vlm_prompt(&self) -> Option { self.vlm_prompt.clone() } #[wasm_bindgen(setter, js_name = "vlmPrompt")] pub fn set_vlm_prompt(&mut self, value: Option) { self.vlm_prompt = value; } #[wasm_bindgen(getter)] pub fn acceleration(&self) -> Option { self.acceleration.clone() } #[wasm_bindgen(setter)] pub fn set_acceleration(&mut self, value: Option) { self.acceleration = value; } #[wasm_bindgen(getter, js_name = "tessdataBytes")] pub fn tessdata_bytes(&self) -> Option { self.tessdata_bytes.clone() } #[wasm_bindgen(setter, js_name = "tessdataBytes")] pub fn set_tessdata_bytes(&mut self, value: Option) { self.tessdata_bytes = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmOcrConfig { kreuzberg::OcrConfig::default().into() } } /// Page extraction and tracking configuration. /// /// Controls how pages are extracted, tracked, and represented in the extraction results. /// When `None`, page tracking is disabled. /// /// Page range tracking in chunk metadata (first_page/last_page) is automatically enabled /// when page boundaries are available and chunking is configured. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPageConfig { extract_pages: bool, insert_page_markers: bool, marker_format: String, } #[wasm_bindgen] impl WasmPageConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( extractPages: Option, insertPageMarkers: Option, markerFormat: Option, ) -> WasmPageConfig { WasmPageConfig { extract_pages: extractPages.unwrap_or(false), insert_page_markers: insertPageMarkers.unwrap_or(false), marker_format: markerFormat.unwrap_or_else(|| "\n\n\n\n".to_string()), } } #[wasm_bindgen(getter, js_name = "extractPages")] pub fn extract_pages(&self) -> bool { self.extract_pages } #[wasm_bindgen(setter, js_name = "extractPages")] pub fn set_extract_pages(&mut self, value: bool) { self.extract_pages = value; } #[wasm_bindgen(getter, js_name = "insertPageMarkers")] pub fn insert_page_markers(&self) -> bool { self.insert_page_markers } #[wasm_bindgen(setter, js_name = "insertPageMarkers")] pub fn set_insert_page_markers(&mut self, value: bool) { self.insert_page_markers = value; } #[wasm_bindgen(getter, js_name = "markerFormat")] pub fn marker_format(&self) -> String { self.marker_format.clone() } #[wasm_bindgen(setter, js_name = "markerFormat")] pub fn set_marker_format(&mut self, value: String) { self.marker_format = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmPageConfig { kreuzberg::PageConfig::default().into() } } /// Post-processor configuration. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPostProcessorConfig { enabled: bool, enabled_processors: Option>, disabled_processors: Option>, enabled_set: Option>, disabled_set: Option>, } #[wasm_bindgen] impl WasmPostProcessorConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( enabled: Option, enabledProcessors: Option>, disabledProcessors: Option>, enabledSet: Option>, disabledSet: Option>, ) -> WasmPostProcessorConfig { WasmPostProcessorConfig { enabled: enabled.unwrap_or(true), enabled_processors: enabledProcessors, disabled_processors: disabledProcessors, enabled_set: enabledSet, disabled_set: disabledSet, } } #[wasm_bindgen(getter)] pub fn enabled(&self) -> bool { self.enabled } #[wasm_bindgen(setter)] pub fn set_enabled(&mut self, value: bool) { self.enabled = value; } #[wasm_bindgen(getter, js_name = "enabledProcessors")] pub fn enabled_processors(&self) -> Option> { self.enabled_processors.clone() } #[wasm_bindgen(setter, js_name = "enabledProcessors")] pub fn set_enabled_processors(&mut self, value: Option>) { self.enabled_processors = value; } #[wasm_bindgen(getter, js_name = "disabledProcessors")] pub fn disabled_processors(&self) -> Option> { self.disabled_processors.clone() } #[wasm_bindgen(setter, js_name = "disabledProcessors")] pub fn set_disabled_processors(&mut self, value: Option>) { self.disabled_processors = value; } #[wasm_bindgen(getter, js_name = "enabledSet")] pub fn enabled_set(&self) -> Option> { self.enabled_set.clone() } #[wasm_bindgen(setter, js_name = "enabledSet")] pub fn set_enabled_set(&mut self, value: Option>) { self.enabled_set = value; } #[wasm_bindgen(getter, js_name = "disabledSet")] pub fn disabled_set(&self) -> Option> { self.disabled_set.clone() } #[wasm_bindgen(setter, js_name = "disabledSet")] pub fn set_disabled_set(&mut self, value: Option>) { self.disabled_set = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmPostProcessorConfig { kreuzberg::PostProcessorConfig::default().into() } } /// Chunking configuration. /// /// Configures text chunking for document content, including chunk size, /// overlap, trimming behavior, and optional embeddings. /// /// Use `..Default.default()` when constructing to allow for future field additions: #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmChunkingConfig { max_characters: usize, overlap: usize, trim: bool, chunker_type: WasmChunkerType, embedding: Option, preset: Option, sizing: JsValue, prepend_heading_context: bool, topic_threshold: Option, } #[wasm_bindgen] impl WasmChunkingConfig { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( maxCharacters: Option, overlap: Option, trim: Option, chunkerType: Option, sizing: Option, prependHeadingContext: Option, embedding: Option, preset: Option, topicThreshold: Option, ) -> WasmChunkingConfig { WasmChunkingConfig { max_characters: maxCharacters.unwrap_or(1000), overlap: overlap.unwrap_or(200), trim: trim.unwrap_or(true), chunker_type: chunkerType.unwrap_or_default(), embedding, preset, sizing: sizing.unwrap_or_default(), prepend_heading_context: prependHeadingContext.unwrap_or(false), topic_threshold: topicThreshold, } } #[wasm_bindgen(getter, js_name = "maxCharacters")] pub fn max_characters(&self) -> usize { self.max_characters } #[wasm_bindgen(setter, js_name = "maxCharacters")] pub fn set_max_characters(&mut self, value: usize) { self.max_characters = value; } #[wasm_bindgen(getter)] pub fn overlap(&self) -> usize { self.overlap } #[wasm_bindgen(setter)] pub fn set_overlap(&mut self, value: usize) { self.overlap = value; } #[wasm_bindgen(getter)] pub fn trim(&self) -> bool { self.trim } #[wasm_bindgen(setter)] pub fn set_trim(&mut self, value: bool) { self.trim = value; } #[wasm_bindgen(getter, js_name = "chunkerType")] pub fn chunker_type(&self) -> String { self.chunker_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "chunkerType")] pub fn set_chunker_type(&mut self, value: WasmChunkerType) { self.chunker_type = value; } #[wasm_bindgen(getter)] pub fn embedding(&self) -> Option { self.embedding.clone() } #[wasm_bindgen(setter)] pub fn set_embedding(&mut self, value: Option) { self.embedding = value; } #[wasm_bindgen(getter)] pub fn preset(&self) -> Option { self.preset.clone() } #[wasm_bindgen(setter)] pub fn set_preset(&mut self, value: Option) { self.preset = value; } #[wasm_bindgen(getter)] pub fn sizing(&self) -> JsValue { self.sizing.clone() } #[wasm_bindgen(setter)] pub fn set_sizing(&mut self, value: JsValue) { self.sizing = value; } #[wasm_bindgen(getter, js_name = "prependHeadingContext")] pub fn prepend_heading_context(&self) -> bool { self.prepend_heading_context } #[wasm_bindgen(setter, js_name = "prependHeadingContext")] pub fn set_prepend_heading_context(&mut self, value: bool) { self.prepend_heading_context = value; } #[wasm_bindgen(getter, js_name = "topicThreshold")] pub fn topic_threshold(&self) -> Option { self.topic_threshold } #[wasm_bindgen(setter, js_name = "topicThreshold")] pub fn set_topic_threshold(&mut self, value: Option) { self.topic_threshold = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmChunkingConfig { kreuzberg::ChunkingConfig::default().into() } } /// Embedding configuration for text chunks. /// /// Configures embedding generation using ONNX models via the vendored embedding engine. /// Requires the `embeddings` feature to be enabled. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmEmbeddingConfig { model: JsValue, normalize: bool, batch_size: usize, show_download_progress: bool, cache_dir: Option, acceleration: Option, max_embed_duration_secs: Option, } #[wasm_bindgen] impl WasmEmbeddingConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( model: Option, normalize: Option, batchSize: Option, showDownloadProgress: Option, cacheDir: Option, acceleration: Option, maxEmbedDurationSecs: Option, ) -> WasmEmbeddingConfig { WasmEmbeddingConfig { model: model.unwrap_or_default(), normalize: normalize.unwrap_or(true), batch_size: batchSize.unwrap_or(32), show_download_progress: showDownloadProgress.unwrap_or(false), cache_dir: cacheDir, acceleration, max_embed_duration_secs: maxEmbedDurationSecs, } } #[wasm_bindgen(getter)] pub fn model(&self) -> JsValue { self.model.clone() } #[wasm_bindgen(setter)] pub fn set_model(&mut self, value: JsValue) { self.model = value; } #[wasm_bindgen(getter)] pub fn normalize(&self) -> bool { self.normalize } #[wasm_bindgen(setter)] pub fn set_normalize(&mut self, value: bool) { self.normalize = value; } #[wasm_bindgen(getter, js_name = "batchSize")] pub fn batch_size(&self) -> usize { self.batch_size } #[wasm_bindgen(setter, js_name = "batchSize")] pub fn set_batch_size(&mut self, value: usize) { self.batch_size = value; } #[wasm_bindgen(getter, js_name = "showDownloadProgress")] pub fn show_download_progress(&self) -> bool { self.show_download_progress } #[wasm_bindgen(setter, js_name = "showDownloadProgress")] pub fn set_show_download_progress(&mut self, value: bool) { self.show_download_progress = value; } #[wasm_bindgen(getter, js_name = "cacheDir")] pub fn cache_dir(&self) -> Option { self.cache_dir.clone() } #[wasm_bindgen(setter, js_name = "cacheDir")] pub fn set_cache_dir(&mut self, value: Option) { self.cache_dir = value; } #[wasm_bindgen(getter)] pub fn acceleration(&self) -> Option { self.acceleration.clone() } #[wasm_bindgen(setter)] pub fn set_acceleration(&mut self, value: Option) { self.acceleration = value; } #[wasm_bindgen(getter, js_name = "maxEmbedDurationSecs")] pub fn max_embed_duration_secs(&self) -> Option { self.max_embed_duration_secs } #[wasm_bindgen(setter, js_name = "maxEmbedDurationSecs")] pub fn set_max_embed_duration_secs(&mut self, value: Option) { self.max_embed_duration_secs = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmEmbeddingConfig { kreuzberg::EmbeddingConfig::default().into() } } /// A supported document format entry. /// /// Represents a file extension and its corresponding MIME type that Kreuzberg can process. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmSupportedFormat { extension: String, mime_type: String, } #[wasm_bindgen] impl WasmSupportedFormat { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(extension: String, mimeType: String) -> WasmSupportedFormat { WasmSupportedFormat { extension, mime_type: mimeType, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmSupportedFormat { ::default() } #[wasm_bindgen(getter)] pub fn extension(&self) -> String { self.extension.clone() } #[wasm_bindgen(setter)] pub fn set_extension(&mut self, value: String) { self.extension = value; } #[wasm_bindgen(getter, js_name = "mimeType")] pub fn mime_type(&self) -> String { self.mime_type.clone() } #[wasm_bindgen(setter, js_name = "mimeType")] pub fn set_mime_type(&mut self, value: String) { self.mime_type = value; } } #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmStructuredDataResult { content: String, format: String, metadata: JsValue, text_fields: Vec, } #[wasm_bindgen] impl WasmStructuredDataResult { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( content: String, format: String, metadata: JsValue, textFields: Vec, ) -> WasmStructuredDataResult { WasmStructuredDataResult { content, format, metadata, text_fields: textFields, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmStructuredDataResult { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter)] pub fn format(&self) -> String { self.format.clone() } #[wasm_bindgen(setter)] pub fn set_format(&mut self, value: String) { self.format = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> JsValue { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: JsValue) { self.metadata = value; } #[wasm_bindgen(getter, js_name = "textFields")] pub fn text_fields(&self) -> Vec { self.text_fields.clone() } #[wasm_bindgen(setter, js_name = "textFields")] pub fn set_text_fields(&mut self, value: Vec) { self.text_fields = value; } } /// Application properties from docProps/app.xml for XLSX /// /// Contains Excel-specific document metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmXlsxAppProperties { application: Option, app_version: Option, doc_security: Option, scale_crop: Option, links_up_to_date: Option, shared_doc: Option, hyperlinks_changed: Option, company: Option, worksheet_names: Vec, } #[wasm_bindgen] impl WasmXlsxAppProperties { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( worksheetNames: Option>, application: Option, appVersion: Option, docSecurity: Option, scaleCrop: Option, linksUpToDate: Option, sharedDoc: Option, hyperlinksChanged: Option, company: Option, ) -> WasmXlsxAppProperties { WasmXlsxAppProperties { application, app_version: appVersion, doc_security: docSecurity, scale_crop: scaleCrop, links_up_to_date: linksUpToDate, shared_doc: sharedDoc, hyperlinks_changed: hyperlinksChanged, company, worksheet_names: worksheetNames.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmXlsxAppProperties { ::default() } #[wasm_bindgen(getter)] pub fn application(&self) -> Option { self.application.clone() } #[wasm_bindgen(setter)] pub fn set_application(&mut self, value: Option) { self.application = value; } #[wasm_bindgen(getter, js_name = "appVersion")] pub fn app_version(&self) -> Option { self.app_version.clone() } #[wasm_bindgen(setter, js_name = "appVersion")] pub fn set_app_version(&mut self, value: Option) { self.app_version = value; } #[wasm_bindgen(getter, js_name = "docSecurity")] pub fn doc_security(&self) -> Option { self.doc_security } #[wasm_bindgen(setter, js_name = "docSecurity")] pub fn set_doc_security(&mut self, value: Option) { self.doc_security = value; } #[wasm_bindgen(getter, js_name = "scaleCrop")] pub fn scale_crop(&self) -> Option { self.scale_crop } #[wasm_bindgen(setter, js_name = "scaleCrop")] pub fn set_scale_crop(&mut self, value: Option) { self.scale_crop = value; } #[wasm_bindgen(getter, js_name = "linksUpToDate")] pub fn links_up_to_date(&self) -> Option { self.links_up_to_date } #[wasm_bindgen(setter, js_name = "linksUpToDate")] pub fn set_links_up_to_date(&mut self, value: Option) { self.links_up_to_date = value; } #[wasm_bindgen(getter, js_name = "sharedDoc")] pub fn shared_doc(&self) -> Option { self.shared_doc } #[wasm_bindgen(setter, js_name = "sharedDoc")] pub fn set_shared_doc(&mut self, value: Option) { self.shared_doc = value; } #[wasm_bindgen(getter, js_name = "hyperlinksChanged")] pub fn hyperlinks_changed(&self) -> Option { self.hyperlinks_changed } #[wasm_bindgen(setter, js_name = "hyperlinksChanged")] pub fn set_hyperlinks_changed(&mut self, value: Option) { self.hyperlinks_changed = value; } #[wasm_bindgen(getter)] pub fn company(&self) -> Option { self.company.clone() } #[wasm_bindgen(setter)] pub fn set_company(&mut self, value: Option) { self.company = value; } #[wasm_bindgen(getter, js_name = "worksheetNames")] pub fn worksheet_names(&self) -> Vec { self.worksheet_names.clone() } #[wasm_bindgen(setter, js_name = "worksheetNames")] pub fn set_worksheet_names(&mut self, value: Vec) { self.worksheet_names = value; } } /// Application properties from docProps/app.xml for PPTX /// /// Contains PowerPoint-specific document metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPptxAppProperties { application: Option, app_version: Option, total_time: Option, company: Option, doc_security: Option, scale_crop: Option, links_up_to_date: Option, shared_doc: Option, hyperlinks_changed: Option, slides: Option, notes: Option, hidden_slides: Option, multimedia_clips: Option, presentation_format: Option, slide_titles: Vec, } #[wasm_bindgen] impl WasmPptxAppProperties { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( slideTitles: Option>, application: Option, appVersion: Option, totalTime: Option, company: Option, docSecurity: Option, scaleCrop: Option, linksUpToDate: Option, sharedDoc: Option, hyperlinksChanged: Option, slides: Option, notes: Option, hiddenSlides: Option, multimediaClips: Option, presentationFormat: Option, ) -> WasmPptxAppProperties { WasmPptxAppProperties { application, app_version: appVersion, total_time: totalTime, company, doc_security: docSecurity, scale_crop: scaleCrop, links_up_to_date: linksUpToDate, shared_doc: sharedDoc, hyperlinks_changed: hyperlinksChanged, slides, notes, hidden_slides: hiddenSlides, multimedia_clips: multimediaClips, presentation_format: presentationFormat, slide_titles: slideTitles.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPptxAppProperties { ::default() } #[wasm_bindgen(getter)] pub fn application(&self) -> Option { self.application.clone() } #[wasm_bindgen(setter)] pub fn set_application(&mut self, value: Option) { self.application = value; } #[wasm_bindgen(getter, js_name = "appVersion")] pub fn app_version(&self) -> Option { self.app_version.clone() } #[wasm_bindgen(setter, js_name = "appVersion")] pub fn set_app_version(&mut self, value: Option) { self.app_version = value; } #[wasm_bindgen(getter, js_name = "totalTime")] pub fn total_time(&self) -> Option { self.total_time } #[wasm_bindgen(setter, js_name = "totalTime")] pub fn set_total_time(&mut self, value: Option) { self.total_time = value; } #[wasm_bindgen(getter)] pub fn company(&self) -> Option { self.company.clone() } #[wasm_bindgen(setter)] pub fn set_company(&mut self, value: Option) { self.company = value; } #[wasm_bindgen(getter, js_name = "docSecurity")] pub fn doc_security(&self) -> Option { self.doc_security } #[wasm_bindgen(setter, js_name = "docSecurity")] pub fn set_doc_security(&mut self, value: Option) { self.doc_security = value; } #[wasm_bindgen(getter, js_name = "scaleCrop")] pub fn scale_crop(&self) -> Option { self.scale_crop } #[wasm_bindgen(setter, js_name = "scaleCrop")] pub fn set_scale_crop(&mut self, value: Option) { self.scale_crop = value; } #[wasm_bindgen(getter, js_name = "linksUpToDate")] pub fn links_up_to_date(&self) -> Option { self.links_up_to_date } #[wasm_bindgen(setter, js_name = "linksUpToDate")] pub fn set_links_up_to_date(&mut self, value: Option) { self.links_up_to_date = value; } #[wasm_bindgen(getter, js_name = "sharedDoc")] pub fn shared_doc(&self) -> Option { self.shared_doc } #[wasm_bindgen(setter, js_name = "sharedDoc")] pub fn set_shared_doc(&mut self, value: Option) { self.shared_doc = value; } #[wasm_bindgen(getter, js_name = "hyperlinksChanged")] pub fn hyperlinks_changed(&self) -> Option { self.hyperlinks_changed } #[wasm_bindgen(setter, js_name = "hyperlinksChanged")] pub fn set_hyperlinks_changed(&mut self, value: Option) { self.hyperlinks_changed = value; } #[wasm_bindgen(getter)] pub fn slides(&self) -> Option { self.slides } #[wasm_bindgen(setter)] pub fn set_slides(&mut self, value: Option) { self.slides = value; } #[wasm_bindgen(getter)] pub fn notes(&self) -> Option { self.notes } #[wasm_bindgen(setter)] pub fn set_notes(&mut self, value: Option) { self.notes = value; } #[wasm_bindgen(getter, js_name = "hiddenSlides")] pub fn hidden_slides(&self) -> Option { self.hidden_slides } #[wasm_bindgen(setter, js_name = "hiddenSlides")] pub fn set_hidden_slides(&mut self, value: Option) { self.hidden_slides = value; } #[wasm_bindgen(getter, js_name = "multimediaClips")] pub fn multimedia_clips(&self) -> Option { self.multimedia_clips } #[wasm_bindgen(setter, js_name = "multimediaClips")] pub fn set_multimedia_clips(&mut self, value: Option) { self.multimedia_clips = value; } #[wasm_bindgen(getter, js_name = "presentationFormat")] pub fn presentation_format(&self) -> Option { self.presentation_format.clone() } #[wasm_bindgen(setter, js_name = "presentationFormat")] pub fn set_presentation_format(&mut self, value: Option) { self.presentation_format = value; } #[wasm_bindgen(getter, js_name = "slideTitles")] pub fn slide_titles(&self) -> Vec { self.slide_titles.clone() } #[wasm_bindgen(setter, js_name = "slideTitles")] pub fn set_slide_titles(&mut self, value: Vec) { self.slide_titles = value; } } /// Configuration for security limits across extractors. /// /// All limits are intentionally conservative to prevent DoS attacks /// while still supporting legitimate documents. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmSecurityLimits { max_archive_size: usize, max_compression_ratio: usize, max_files_in_archive: usize, max_nesting_depth: usize, max_entity_length: usize, max_content_size: usize, max_iterations: usize, max_xml_depth: usize, max_table_cells: usize, } #[wasm_bindgen] impl WasmSecurityLimits { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( maxArchiveSize: Option, maxCompressionRatio: Option, maxFilesInArchive: Option, maxNestingDepth: Option, maxEntityLength: Option, maxContentSize: Option, maxIterations: Option, maxXmlDepth: Option, maxTableCells: Option, ) -> WasmSecurityLimits { WasmSecurityLimits { max_archive_size: maxArchiveSize.unwrap_or(524288000), max_compression_ratio: maxCompressionRatio.unwrap_or(100), max_files_in_archive: maxFilesInArchive.unwrap_or(10000), max_nesting_depth: maxNestingDepth.unwrap_or(1024), max_entity_length: maxEntityLength.unwrap_or(1048576), max_content_size: maxContentSize.unwrap_or(104857600), max_iterations: maxIterations.unwrap_or(10000000), max_xml_depth: maxXmlDepth.unwrap_or(1024), max_table_cells: maxTableCells.unwrap_or(100000), } } #[wasm_bindgen(getter, js_name = "maxArchiveSize")] pub fn max_archive_size(&self) -> usize { self.max_archive_size } #[wasm_bindgen(setter, js_name = "maxArchiveSize")] pub fn set_max_archive_size(&mut self, value: usize) { self.max_archive_size = value; } #[wasm_bindgen(getter, js_name = "maxCompressionRatio")] pub fn max_compression_ratio(&self) -> usize { self.max_compression_ratio } #[wasm_bindgen(setter, js_name = "maxCompressionRatio")] pub fn set_max_compression_ratio(&mut self, value: usize) { self.max_compression_ratio = value; } #[wasm_bindgen(getter, js_name = "maxFilesInArchive")] pub fn max_files_in_archive(&self) -> usize { self.max_files_in_archive } #[wasm_bindgen(setter, js_name = "maxFilesInArchive")] pub fn set_max_files_in_archive(&mut self, value: usize) { self.max_files_in_archive = value; } #[wasm_bindgen(getter, js_name = "maxNestingDepth")] pub fn max_nesting_depth(&self) -> usize { self.max_nesting_depth } #[wasm_bindgen(setter, js_name = "maxNestingDepth")] pub fn set_max_nesting_depth(&mut self, value: usize) { self.max_nesting_depth = value; } #[wasm_bindgen(getter, js_name = "maxEntityLength")] pub fn max_entity_length(&self) -> usize { self.max_entity_length } #[wasm_bindgen(setter, js_name = "maxEntityLength")] pub fn set_max_entity_length(&mut self, value: usize) { self.max_entity_length = value; } #[wasm_bindgen(getter, js_name = "maxContentSize")] pub fn max_content_size(&self) -> usize { self.max_content_size } #[wasm_bindgen(setter, js_name = "maxContentSize")] pub fn set_max_content_size(&mut self, value: usize) { self.max_content_size = value; } #[wasm_bindgen(getter, js_name = "maxIterations")] pub fn max_iterations(&self) -> usize { self.max_iterations } #[wasm_bindgen(setter, js_name = "maxIterations")] pub fn set_max_iterations(&mut self, value: usize) { self.max_iterations = value; } #[wasm_bindgen(getter, js_name = "maxXmlDepth")] pub fn max_xml_depth(&self) -> usize { self.max_xml_depth } #[wasm_bindgen(setter, js_name = "maxXmlDepth")] pub fn set_max_xml_depth(&mut self, value: usize) { self.max_xml_depth = value; } #[wasm_bindgen(getter, js_name = "maxTableCells")] pub fn max_table_cells(&self) -> usize { self.max_table_cells } #[wasm_bindgen(setter, js_name = "maxTableCells")] pub fn set_max_table_cells(&mut self, value: usize) { self.max_table_cells = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmSecurityLimits { kreuzberg::SecurityLimits::default().into() } } /// A PDF annotation extracted from a document page. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPdfAnnotation { annotation_type: WasmPdfAnnotationType, content: Option, page_number: u32, bounding_box: Option, } #[wasm_bindgen] impl WasmPdfAnnotation { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( annotationType: WasmPdfAnnotationType, pageNumber: u32, content: Option, boundingBox: Option, ) -> WasmPdfAnnotation { WasmPdfAnnotation { annotation_type: annotationType, content, page_number: pageNumber, bounding_box: boundingBox, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPdfAnnotation { ::default() } #[wasm_bindgen(getter, js_name = "annotationType")] pub fn annotation_type(&self) -> String { self.annotation_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "annotationType")] pub fn set_annotation_type(&mut self, value: WasmPdfAnnotationType) { self.annotation_type = value; } #[wasm_bindgen(getter)] pub fn content(&self) -> Option { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: Option) { self.content = value; } #[wasm_bindgen(getter, js_name = "pageNumber")] pub fn page_number(&self) -> u32 { self.page_number } #[wasm_bindgen(setter, js_name = "pageNumber")] pub fn set_page_number(&mut self, value: u32) { self.page_number = value; } #[wasm_bindgen(getter, js_name = "boundingBox")] pub fn bounding_box(&self) -> Option { self.bounding_box.clone() } #[wasm_bindgen(setter, js_name = "boundingBox")] pub fn set_bounding_box(&mut self, value: Option) { self.bounding_box = value; } } /// Comprehensive Djot document structure with semantic preservation. /// /// This type captures the full richness of Djot markup, including: /// - Block-level structures (headings, lists, blockquotes, code blocks, etc.) /// - Inline formatting (emphasis, strong, highlight, subscript, superscript, etc.) /// - Attributes (classes, IDs, key-value pairs) /// - Links, images, footnotes /// - Math expressions (inline and display) /// - Tables with full structure /// /// Available when the `djot` feature is enabled. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDjotContent { plain_text: String, blocks: Vec, metadata: WasmMetadata, tables: Vec, images: Vec, links: Vec, footnotes: Vec, attributes: Vec, } #[wasm_bindgen] impl WasmDjotContent { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( plainText: String, blocks: Vec, metadata: WasmMetadata, tables: Vec, images: Vec, links: Vec, footnotes: Vec, attributes: Vec, ) -> WasmDjotContent { WasmDjotContent { plain_text: plainText, blocks, metadata, tables, images, links, footnotes, attributes, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDjotContent { ::default() } #[wasm_bindgen(getter, js_name = "plainText")] pub fn plain_text(&self) -> String { self.plain_text.clone() } #[wasm_bindgen(setter, js_name = "plainText")] pub fn set_plain_text(&mut self, value: String) { self.plain_text = value; } #[wasm_bindgen(getter)] pub fn blocks(&self) -> Vec { self.blocks.clone() } #[wasm_bindgen(setter)] pub fn set_blocks(&mut self, value: Vec) { self.blocks = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> WasmMetadata { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: WasmMetadata) { self.metadata = value; } #[wasm_bindgen(getter)] pub fn tables(&self) -> Vec { self.tables.clone() } #[wasm_bindgen(setter)] pub fn set_tables(&mut self, value: Vec) { self.tables = value; } #[wasm_bindgen(getter)] pub fn images(&self) -> Vec { self.images.clone() } #[wasm_bindgen(setter)] pub fn set_images(&mut self, value: Vec) { self.images = value; } #[wasm_bindgen(getter)] pub fn links(&self) -> Vec { self.links.clone() } #[wasm_bindgen(setter)] pub fn set_links(&mut self, value: Vec) { self.links = value; } #[wasm_bindgen(getter)] pub fn footnotes(&self) -> Vec { self.footnotes.clone() } #[wasm_bindgen(setter)] pub fn set_footnotes(&mut self, value: Vec) { self.footnotes = value; } #[wasm_bindgen(getter)] pub fn attributes(&self) -> Vec { self.attributes.clone() } #[wasm_bindgen(setter)] pub fn set_attributes(&mut self, value: Vec) { self.attributes = value; } } /// Block-level element in a Djot document. /// /// Represents structural elements like headings, paragraphs, lists, code blocks, etc. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmFormattedBlock { block_type: WasmBlockType, level: Option, inline_content: Vec, attributes: Option, language: Option, code: Option, children: Vec, } #[wasm_bindgen] impl WasmFormattedBlock { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( blockType: WasmBlockType, inlineContent: Vec, children: Vec, level: Option, attributes: Option, language: Option, code: Option, ) -> WasmFormattedBlock { WasmFormattedBlock { block_type: blockType, level, inline_content: inlineContent, attributes, language, code, children, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmFormattedBlock { ::default() } #[wasm_bindgen(getter, js_name = "blockType")] pub fn block_type(&self) -> String { self.block_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "blockType")] pub fn set_block_type(&mut self, value: WasmBlockType) { self.block_type = value; } #[wasm_bindgen(getter)] pub fn level(&self) -> Option { self.level } #[wasm_bindgen(setter)] pub fn set_level(&mut self, value: Option) { self.level = value; } #[wasm_bindgen(getter, js_name = "inlineContent")] pub fn inline_content(&self) -> Vec { self.inline_content.clone() } #[wasm_bindgen(setter, js_name = "inlineContent")] pub fn set_inline_content(&mut self, value: Vec) { self.inline_content = value; } #[wasm_bindgen(getter)] pub fn attributes(&self) -> Option { self.attributes.clone() } #[wasm_bindgen(setter)] pub fn set_attributes(&mut self, value: Option) { self.attributes = value; } #[wasm_bindgen(getter)] pub fn language(&self) -> Option { self.language.clone() } #[wasm_bindgen(setter)] pub fn set_language(&mut self, value: Option) { self.language = value; } #[wasm_bindgen(getter)] pub fn code(&self) -> Option { self.code.clone() } #[wasm_bindgen(setter)] pub fn set_code(&mut self, value: Option) { self.code = value; } #[wasm_bindgen(getter)] pub fn children(&self) -> Vec { self.children.clone() } #[wasm_bindgen(setter)] pub fn set_children(&mut self, value: Vec) { self.children = value; } } /// Inline element within a block. /// /// Represents text with formatting, links, images, etc. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmInlineElement { element_type: WasmInlineType, content: String, attributes: Option, metadata: Option, } #[wasm_bindgen] impl WasmInlineElement { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( elementType: WasmInlineType, content: String, attributes: Option, metadata: Option, ) -> WasmInlineElement { WasmInlineElement { element_type: elementType, content, attributes, metadata, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmInlineElement { ::default() } #[wasm_bindgen(getter, js_name = "elementType")] pub fn element_type(&self) -> String { self.element_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "elementType")] pub fn set_element_type(&mut self, value: WasmInlineType) { self.element_type = value; } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter)] pub fn attributes(&self) -> Option { self.attributes.clone() } #[wasm_bindgen(setter)] pub fn set_attributes(&mut self, value: Option) { self.attributes = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> Option { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: Option) { self.metadata = value; } } /// Image element in Djot. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDjotImage { src: String, alt: String, title: Option, attributes: Option, } #[wasm_bindgen] impl WasmDjotImage { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(src: String, alt: String, title: Option, attributes: Option) -> WasmDjotImage { WasmDjotImage { src, alt, title, attributes, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDjotImage { ::default() } #[wasm_bindgen(getter)] pub fn src(&self) -> String { self.src.clone() } #[wasm_bindgen(setter)] pub fn set_src(&mut self, value: String) { self.src = value; } #[wasm_bindgen(getter)] pub fn alt(&self) -> String { self.alt.clone() } #[wasm_bindgen(setter)] pub fn set_alt(&mut self, value: String) { self.alt = value; } #[wasm_bindgen(getter)] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter)] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter)] pub fn attributes(&self) -> Option { self.attributes.clone() } #[wasm_bindgen(setter)] pub fn set_attributes(&mut self, value: Option) { self.attributes = value; } } /// Link element in Djot. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDjotLink { url: String, text: String, title: Option, attributes: Option, } #[wasm_bindgen] impl WasmDjotLink { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(url: String, text: String, title: Option, attributes: Option) -> WasmDjotLink { WasmDjotLink { url, text, title, attributes, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDjotLink { ::default() } #[wasm_bindgen(getter)] pub fn url(&self) -> String { self.url.clone() } #[wasm_bindgen(setter)] pub fn set_url(&mut self, value: String) { self.url = value; } #[wasm_bindgen(getter)] pub fn text(&self) -> String { self.text.clone() } #[wasm_bindgen(setter)] pub fn set_text(&mut self, value: String) { self.text = value; } #[wasm_bindgen(getter)] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter)] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter)] pub fn attributes(&self) -> Option { self.attributes.clone() } #[wasm_bindgen(setter)] pub fn set_attributes(&mut self, value: Option) { self.attributes = value; } } /// Footnote in Djot. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmFootnote { label: String, content: Vec, } #[wasm_bindgen] impl WasmFootnote { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(label: String, content: Vec) -> WasmFootnote { WasmFootnote { label, content } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmFootnote { ::default() } #[wasm_bindgen(getter)] pub fn label(&self) -> String { self.label.clone() } #[wasm_bindgen(setter)] pub fn set_label(&mut self, value: String) { self.label = value; } #[wasm_bindgen(getter)] pub fn content(&self) -> Vec { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: Vec) { self.content = value; } } /// Top-level structured document representation. /// /// A flat array of nodes with index-based parent/child references forming a tree. /// Root-level nodes have `parent: None`. Use `body_roots()` and `furniture_roots()` /// to iterate over top-level content by layer. /// /// # Validation /// /// Call `validate()` after construction to verify all node indices are in bounds /// and parent-child relationships are bidirectionally consistent. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDocumentStructure { nodes: Vec, source_format: Option, relationships: Vec, node_types: Vec, } #[wasm_bindgen] impl WasmDocumentStructure { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( nodes: Option>, relationships: Option>, nodeTypes: Option>, sourceFormat: Option, ) -> WasmDocumentStructure { WasmDocumentStructure { nodes: nodes.unwrap_or_default(), source_format: sourceFormat, relationships: relationships.unwrap_or_default(), node_types: nodeTypes.unwrap_or_default(), } } #[wasm_bindgen(getter)] pub fn nodes(&self) -> Vec { self.nodes.clone() } #[wasm_bindgen(setter)] pub fn set_nodes(&mut self, value: Vec) { self.nodes = value; } #[wasm_bindgen(getter, js_name = "sourceFormat")] pub fn source_format(&self) -> Option { self.source_format.clone() } #[wasm_bindgen(setter, js_name = "sourceFormat")] pub fn set_source_format(&mut self, value: Option) { self.source_format = value; } #[wasm_bindgen(getter)] pub fn relationships(&self) -> Vec { self.relationships.clone() } #[wasm_bindgen(setter)] pub fn set_relationships(&mut self, value: Vec) { self.relationships = value; } #[wasm_bindgen(getter, js_name = "nodeTypes")] pub fn node_types(&self) -> Vec { self.node_types.clone() } #[wasm_bindgen(setter, js_name = "nodeTypes")] pub fn set_node_types(&mut self, value: Vec) { self.node_types = value; } /// Compute and populate the `node_types` field from the current `nodes`. /// /// Call this after all nodes have been added to the structure. Internal /// construction paths (builder, derivation) call this automatically. /// /// # Examples #[wasm_bindgen(js_name = "finalizeNodeTypes")] pub fn finalize_node_types(&self) -> () { kreuzberg::DocumentStructure::from(self.clone()).finalize_node_types() } /// Check if the document structure is empty. #[wasm_bindgen(js_name = "isEmpty")] pub fn is_empty(&self) -> bool { kreuzberg::DocumentStructure::from(self.clone()).is_empty() } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmDocumentStructure { kreuzberg::DocumentStructure::default().into() } } /// A resolved relationship between two nodes in the document tree. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDocumentRelationship { source: u32, target: u32, kind: WasmRelationshipKind, } #[wasm_bindgen] impl WasmDocumentRelationship { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(source: u32, target: u32, kind: WasmRelationshipKind) -> WasmDocumentRelationship { WasmDocumentRelationship { source, target, kind } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDocumentRelationship { ::default() } #[wasm_bindgen(getter)] pub fn source(&self) -> u32 { self.source } #[wasm_bindgen(setter)] pub fn set_source(&mut self, value: u32) { self.source = value; } #[wasm_bindgen(getter)] pub fn target(&self) -> u32 { self.target } #[wasm_bindgen(setter)] pub fn set_target(&mut self, value: u32) { self.target = value; } #[wasm_bindgen(getter)] pub fn kind(&self) -> String { self.kind.to_api_str().to_owned() } #[wasm_bindgen(setter)] pub fn set_kind(&mut self, value: WasmRelationshipKind) { self.kind = value; } } /// A single node in the document tree. /// /// Each node has deterministic `id`, typed `content`, optional `parent`/`children` /// for tree structure, and metadata like page number, bounding box, and content layer. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDocumentNode { id: String, content: JsValue, parent: Option, children: Vec, content_layer: WasmContentLayer, page: Option, page_end: Option, bbox: Option, annotations: Vec, attributes: Option, } #[wasm_bindgen] impl WasmDocumentNode { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( id: String, content: JsValue, children: Vec, contentLayer: WasmContentLayer, annotations: Vec, parent: Option, page: Option, pageEnd: Option, bbox: Option, attributes: Option, ) -> WasmDocumentNode { WasmDocumentNode { id, content, parent, children, content_layer: contentLayer, page, page_end: pageEnd, bbox, annotations, attributes, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDocumentNode { ::default() } #[wasm_bindgen(getter)] pub fn id(&self) -> String { self.id.clone() } #[wasm_bindgen(setter)] pub fn set_id(&mut self, value: String) { self.id = value; } #[wasm_bindgen(getter)] pub fn content(&self) -> JsValue { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: JsValue) { self.content = value; } #[wasm_bindgen(getter)] pub fn parent(&self) -> Option { self.parent } #[wasm_bindgen(setter)] pub fn set_parent(&mut self, value: Option) { self.parent = value; } #[wasm_bindgen(getter)] pub fn children(&self) -> Vec { self.children.clone() } #[wasm_bindgen(setter)] pub fn set_children(&mut self, value: Vec) { self.children = value; } #[wasm_bindgen(getter, js_name = "contentLayer")] pub fn content_layer(&self) -> String { self.content_layer.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "contentLayer")] pub fn set_content_layer(&mut self, value: WasmContentLayer) { self.content_layer = value; } #[wasm_bindgen(getter)] pub fn page(&self) -> Option { self.page } #[wasm_bindgen(setter)] pub fn set_page(&mut self, value: Option) { self.page = value; } #[wasm_bindgen(getter, js_name = "pageEnd")] pub fn page_end(&self) -> Option { self.page_end } #[wasm_bindgen(setter, js_name = "pageEnd")] pub fn set_page_end(&mut self, value: Option) { self.page_end = value; } #[wasm_bindgen(getter)] pub fn bbox(&self) -> Option { self.bbox.clone() } #[wasm_bindgen(setter)] pub fn set_bbox(&mut self, value: Option) { self.bbox = value; } #[wasm_bindgen(getter)] pub fn annotations(&self) -> Vec { self.annotations.clone() } #[wasm_bindgen(setter)] pub fn set_annotations(&mut self, value: Vec) { self.annotations = value; } #[wasm_bindgen(getter)] pub fn attributes(&self) -> Option { self.attributes.clone() } #[wasm_bindgen(setter)] pub fn set_attributes(&mut self, value: Option) { self.attributes = value; } } /// Structured table grid with cell-level metadata. /// /// Stores row/column dimensions and a flat list of cells with position info. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmTableGrid { rows: u32, cols: u32, cells: Vec, } #[wasm_bindgen] impl WasmTableGrid { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(rows: Option, cols: Option, cells: Option>) -> WasmTableGrid { WasmTableGrid { rows: rows.unwrap_or_default(), cols: cols.unwrap_or_default(), cells: cells.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmTableGrid { ::default() } #[wasm_bindgen(getter)] pub fn rows(&self) -> u32 { self.rows } #[wasm_bindgen(setter)] pub fn set_rows(&mut self, value: u32) { self.rows = value; } #[wasm_bindgen(getter)] pub fn cols(&self) -> u32 { self.cols } #[wasm_bindgen(setter)] pub fn set_cols(&mut self, value: u32) { self.cols = value; } #[wasm_bindgen(getter)] pub fn cells(&self) -> Vec { self.cells.clone() } #[wasm_bindgen(setter)] pub fn set_cells(&mut self, value: Vec) { self.cells = value; } } /// Individual grid cell with position and span metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmGridCell { content: String, row: u32, col: u32, row_span: u32, col_span: u32, is_header: bool, bbox: Option, } #[wasm_bindgen] impl WasmGridCell { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( content: String, row: u32, col: u32, rowSpan: u32, colSpan: u32, isHeader: bool, bbox: Option, ) -> WasmGridCell { WasmGridCell { content, row, col, row_span: rowSpan, col_span: colSpan, is_header: isHeader, bbox, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmGridCell { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter)] pub fn row(&self) -> u32 { self.row } #[wasm_bindgen(setter)] pub fn set_row(&mut self, value: u32) { self.row = value; } #[wasm_bindgen(getter)] pub fn col(&self) -> u32 { self.col } #[wasm_bindgen(setter)] pub fn set_col(&mut self, value: u32) { self.col = value; } #[wasm_bindgen(getter, js_name = "rowSpan")] pub fn row_span(&self) -> u32 { self.row_span } #[wasm_bindgen(setter, js_name = "rowSpan")] pub fn set_row_span(&mut self, value: u32) { self.row_span = value; } #[wasm_bindgen(getter, js_name = "colSpan")] pub fn col_span(&self) -> u32 { self.col_span } #[wasm_bindgen(setter, js_name = "colSpan")] pub fn set_col_span(&mut self, value: u32) { self.col_span = value; } #[wasm_bindgen(getter, js_name = "isHeader")] pub fn is_header(&self) -> bool { self.is_header } #[wasm_bindgen(setter, js_name = "isHeader")] pub fn set_is_header(&mut self, value: bool) { self.is_header = value; } #[wasm_bindgen(getter)] pub fn bbox(&self) -> Option { self.bbox.clone() } #[wasm_bindgen(setter)] pub fn set_bbox(&mut self, value: Option) { self.bbox = value; } } /// Inline text annotation — byte-range based formatting and links. /// /// Annotations reference byte offsets into the node's text content, /// enabling precise identification of formatted regions. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmTextAnnotation { start: u32, end: u32, kind: JsValue, } #[wasm_bindgen] impl WasmTextAnnotation { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(start: u32, end: u32, kind: JsValue) -> WasmTextAnnotation { WasmTextAnnotation { start, end, kind } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmTextAnnotation { ::default() } #[wasm_bindgen(getter)] pub fn start(&self) -> u32 { self.start } #[wasm_bindgen(setter)] pub fn set_start(&mut self, value: u32) { self.start = value; } #[wasm_bindgen(getter)] pub fn end(&self) -> u32 { self.end } #[wasm_bindgen(setter)] pub fn set_end(&mut self, value: u32) { self.end = value; } #[wasm_bindgen(getter)] pub fn kind(&self) -> JsValue { self.kind.clone() } #[wasm_bindgen(setter)] pub fn set_kind(&mut self, value: JsValue) { self.kind = value; } } /// General extraction result used by the core extraction API. /// /// This is the main result type returned by all extraction functions. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmExtractionResult { content: String, mime_type: String, metadata: WasmMetadata, extraction_method: Option, tables: Vec, detected_languages: Option>, chunks: Option>, images: Option>, pages: Option>, elements: Option>, djot_content: Option, ocr_elements: Option>, document: Option, quality_score: Option, processing_warnings: Vec, annotations: Option>, children: Option>, uris: Option>, revisions: Option>, structured_output: Option, code_intelligence: Option, llm_usage: Option>, formatted_content: Option, ocr_internal_document: Option, } #[wasm_bindgen] impl WasmExtractionResult { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( content: Option, mimeType: Option, metadata: Option, tables: Option>, processingWarnings: Option>, extractionMethod: Option, detectedLanguages: Option>, chunks: Option>, images: Option>, pages: Option>, elements: Option>, djotContent: Option, ocrElements: Option>, document: Option, qualityScore: Option, annotations: Option>, children: Option>, uris: Option>, revisions: Option>, structuredOutput: Option, llmUsage: Option>, formattedContent: Option, ocrInternalDocument: Option, ) -> WasmExtractionResult { WasmExtractionResult { content: content.unwrap_or_default(), mime_type: mimeType.unwrap_or_default(), metadata: metadata.unwrap_or_default(), extraction_method: extractionMethod, tables: tables.unwrap_or_default(), detected_languages: detectedLanguages, chunks, images, pages, elements, djot_content: djotContent, ocr_elements: ocrElements, document, quality_score: qualityScore, processing_warnings: processingWarnings.unwrap_or_default(), annotations, children, uris, revisions, structured_output: structuredOutput, code_intelligence: Default::default(), llm_usage: llmUsage, formatted_content: formattedContent, ocr_internal_document: ocrInternalDocument, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmExtractionResult { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter, js_name = "mimeType")] pub fn mime_type(&self) -> String { self.mime_type.clone() } #[wasm_bindgen(setter, js_name = "mimeType")] pub fn set_mime_type(&mut self, value: String) { self.mime_type = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> WasmMetadata { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: WasmMetadata) { self.metadata = value; } #[wasm_bindgen(getter, js_name = "extractionMethod")] pub fn extraction_method(&self) -> Option { self.extraction_method.map(|v| v.to_api_str().to_owned()) } #[wasm_bindgen(setter, js_name = "extractionMethod")] pub fn set_extraction_method(&mut self, value: Option) { self.extraction_method = value; } #[wasm_bindgen(getter)] pub fn tables(&self) -> Vec { self.tables.clone() } #[wasm_bindgen(setter)] pub fn set_tables(&mut self, value: Vec) { self.tables = value; } #[wasm_bindgen(getter, js_name = "detectedLanguages")] pub fn detected_languages(&self) -> Option> { self.detected_languages.clone() } #[wasm_bindgen(setter, js_name = "detectedLanguages")] pub fn set_detected_languages(&mut self, value: Option>) { self.detected_languages = value; } #[wasm_bindgen(getter)] pub fn chunks(&self) -> Option { self.chunks.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_chunks(&mut self, value: Option>) { self.chunks = value; } #[wasm_bindgen(getter)] pub fn images(&self) -> Option { self.images.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_images(&mut self, value: Option>) { self.images = value; } #[wasm_bindgen(getter)] pub fn pages(&self) -> Option { self.pages.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_pages(&mut self, value: Option>) { self.pages = value; } #[wasm_bindgen(getter)] pub fn elements(&self) -> Option { self.elements.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_elements(&mut self, value: Option>) { self.elements = value; } #[wasm_bindgen(getter, js_name = "djotContent")] pub fn djot_content(&self) -> Option { self.djot_content.clone() } #[wasm_bindgen(setter, js_name = "djotContent")] pub fn set_djot_content(&mut self, value: Option) { self.djot_content = value; } #[wasm_bindgen(getter, js_name = "ocrElements")] pub fn ocr_elements(&self) -> Option { self.ocr_elements.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter, js_name = "ocrElements")] pub fn set_ocr_elements(&mut self, value: Option>) { self.ocr_elements = value; } #[wasm_bindgen(getter)] pub fn document(&self) -> Option { self.document.clone() } #[wasm_bindgen(setter)] pub fn set_document(&mut self, value: Option) { self.document = value; } #[wasm_bindgen(getter, js_name = "qualityScore")] pub fn quality_score(&self) -> Option { self.quality_score } #[wasm_bindgen(setter, js_name = "qualityScore")] pub fn set_quality_score(&mut self, value: Option) { self.quality_score = value; } #[wasm_bindgen(getter, js_name = "processingWarnings")] pub fn processing_warnings(&self) -> Vec { self.processing_warnings.clone() } #[wasm_bindgen(setter, js_name = "processingWarnings")] pub fn set_processing_warnings(&mut self, value: Vec) { self.processing_warnings = value; } #[wasm_bindgen(getter)] pub fn annotations(&self) -> Option { self.annotations.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_annotations(&mut self, value: Option>) { self.annotations = value; } #[wasm_bindgen(getter)] pub fn children(&self) -> Option { self.children.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_children(&mut self, value: Option>) { self.children = value; } #[wasm_bindgen(getter)] pub fn uris(&self) -> Option { self.uris.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_uris(&mut self, value: Option>) { self.uris = value; } #[wasm_bindgen(getter)] pub fn revisions(&self) -> Option { self.revisions.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_revisions(&mut self, value: Option>) { self.revisions = value; } #[wasm_bindgen(getter, js_name = "structuredOutput")] pub fn structured_output(&self) -> Option { self.structured_output.clone() } #[wasm_bindgen(setter, js_name = "structuredOutput")] pub fn set_structured_output(&mut self, value: Option) { self.structured_output = value; } #[wasm_bindgen(getter, js_name = "codeIntelligence")] pub fn code_intelligence(&self) -> Option { self.code_intelligence.clone() } #[wasm_bindgen(setter, js_name = "codeIntelligence")] pub fn set_code_intelligence(&mut self, value: Option) { self.code_intelligence = value; } #[wasm_bindgen(getter, js_name = "llmUsage")] pub fn llm_usage(&self) -> Option { self.llm_usage.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter, js_name = "llmUsage")] pub fn set_llm_usage(&mut self, value: Option>) { self.llm_usage = value; } #[wasm_bindgen(getter, js_name = "formattedContent")] pub fn formatted_content(&self) -> Option { self.formatted_content.clone() } #[wasm_bindgen(setter, js_name = "formattedContent")] pub fn set_formatted_content(&mut self, value: Option) { self.formatted_content = value; } #[wasm_bindgen(getter, js_name = "ocrInternalDocument")] pub fn ocr_internal_document(&self) -> Option { self.ocr_internal_document.clone() } #[wasm_bindgen(setter, js_name = "ocrInternalDocument")] pub fn set_ocr_internal_document(&mut self, value: Option) { self.ocr_internal_document = value; } /// Convert from an OCR result. #[wasm_bindgen(js_name = "fromOcr")] pub fn from_ocr(ocr: WasmOcrExtractionResult) -> WasmExtractionResult { let ocr_core: kreuzberg::OcrExtractionResult = ocr.into(); kreuzberg::ExtractionResult::from_ocr(ocr_core).into() } } /// A single file extracted from an archive. /// /// When archives (ZIP, TAR, 7Z, GZIP) are extracted with recursive extraction /// enabled, each processable file produces its own full `ExtractionResult`. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmArchiveEntry { path: String, mime_type: String, result: WasmExtractionResult, } #[wasm_bindgen] impl WasmArchiveEntry { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(path: String, mimeType: String, result: WasmExtractionResult) -> WasmArchiveEntry { WasmArchiveEntry { path, mime_type: mimeType, result, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmArchiveEntry { ::default() } #[wasm_bindgen(getter)] pub fn path(&self) -> String { self.path.clone() } #[wasm_bindgen(setter)] pub fn set_path(&mut self, value: String) { self.path = value; } #[wasm_bindgen(getter, js_name = "mimeType")] pub fn mime_type(&self) -> String { self.mime_type.clone() } #[wasm_bindgen(setter, js_name = "mimeType")] pub fn set_mime_type(&mut self, value: String) { self.mime_type = value; } #[wasm_bindgen(getter)] pub fn result(&self) -> WasmExtractionResult { self.result.clone() } #[wasm_bindgen(setter)] pub fn set_result(&mut self, value: WasmExtractionResult) { self.result = value; } } /// A non-fatal warning from a processing pipeline stage. /// /// Captures errors from optional features that don't prevent extraction /// but may indicate degraded results. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmProcessingWarning { source: String, message: String, } #[wasm_bindgen] impl WasmProcessingWarning { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(source: String, message: String) -> WasmProcessingWarning { WasmProcessingWarning { source, message } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmProcessingWarning { ::default() } #[wasm_bindgen(getter)] pub fn source(&self) -> String { self.source.clone() } #[wasm_bindgen(setter)] pub fn set_source(&mut self, value: String) { self.source = value; } #[wasm_bindgen(getter)] pub fn message(&self) -> String { self.message.clone() } #[wasm_bindgen(setter)] pub fn set_message(&mut self, value: String) { self.message = value; } } /// Token usage and cost data for a single LLM call made during extraction. /// /// Populated when VLM OCR, structured extraction, or LLM-based embeddings /// are used. Multiple entries may be present when multiple LLM calls occur /// within one extraction (e.g. VLM OCR + structured extraction). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmLlmUsage { model: String, source: String, input_tokens: Option, output_tokens: Option, total_tokens: Option, estimated_cost: Option, finish_reason: Option, } #[wasm_bindgen] impl WasmLlmUsage { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( model: Option, source: Option, inputTokens: Option, outputTokens: Option, totalTokens: Option, estimatedCost: Option, finishReason: Option, ) -> WasmLlmUsage { WasmLlmUsage { model: model.unwrap_or_default(), source: source.unwrap_or_default(), input_tokens: inputTokens, output_tokens: outputTokens, total_tokens: totalTokens, estimated_cost: estimatedCost, finish_reason: finishReason, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmLlmUsage { ::default() } #[wasm_bindgen(getter)] pub fn model(&self) -> String { self.model.clone() } #[wasm_bindgen(setter)] pub fn set_model(&mut self, value: String) { self.model = value; } #[wasm_bindgen(getter)] pub fn source(&self) -> String { self.source.clone() } #[wasm_bindgen(setter)] pub fn set_source(&mut self, value: String) { self.source = value; } #[wasm_bindgen(getter, js_name = "inputTokens")] pub fn input_tokens(&self) -> Option { self.input_tokens } #[wasm_bindgen(setter, js_name = "inputTokens")] pub fn set_input_tokens(&mut self, value: Option) { self.input_tokens = value; } #[wasm_bindgen(getter, js_name = "outputTokens")] pub fn output_tokens(&self) -> Option { self.output_tokens } #[wasm_bindgen(setter, js_name = "outputTokens")] pub fn set_output_tokens(&mut self, value: Option) { self.output_tokens = value; } #[wasm_bindgen(getter, js_name = "totalTokens")] pub fn total_tokens(&self) -> Option { self.total_tokens } #[wasm_bindgen(setter, js_name = "totalTokens")] pub fn set_total_tokens(&mut self, value: Option) { self.total_tokens = value; } #[wasm_bindgen(getter, js_name = "estimatedCost")] pub fn estimated_cost(&self) -> Option { self.estimated_cost } #[wasm_bindgen(setter, js_name = "estimatedCost")] pub fn set_estimated_cost(&mut self, value: Option) { self.estimated_cost = value; } #[wasm_bindgen(getter, js_name = "finishReason")] pub fn finish_reason(&self) -> Option { self.finish_reason.clone() } #[wasm_bindgen(setter, js_name = "finishReason")] pub fn set_finish_reason(&mut self, value: Option) { self.finish_reason = value; } } /// A text chunk with optional embedding and metadata. /// /// Chunks are created when chunking is enabled in `ExtractionConfig`. Each chunk /// contains the text content, optional embedding vector (if embedding generation /// is configured), and metadata about its position in the document. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmChunk { content: String, chunk_type: WasmChunkType, embedding: Option>, metadata: WasmChunkMetadata, } #[wasm_bindgen] impl WasmChunk { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( content: String, chunkType: WasmChunkType, metadata: WasmChunkMetadata, embedding: Option>, ) -> WasmChunk { WasmChunk { content, chunk_type: chunkType, embedding, metadata, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmChunk { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter, js_name = "chunkType")] pub fn chunk_type(&self) -> String { self.chunk_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "chunkType")] pub fn set_chunk_type(&mut self, value: WasmChunkType) { self.chunk_type = value; } #[wasm_bindgen(getter)] pub fn embedding(&self) -> Option> { self.embedding.clone() } #[wasm_bindgen(setter)] pub fn set_embedding(&mut self, value: Option>) { self.embedding = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> WasmChunkMetadata { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: WasmChunkMetadata) { self.metadata = value; } } /// Heading context for a chunk within a Markdown document. /// /// Contains the heading hierarchy from document root to this chunk's section. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmHeadingContext { headings: Vec, } #[wasm_bindgen] impl WasmHeadingContext { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(headings: Vec) -> WasmHeadingContext { WasmHeadingContext { headings } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmHeadingContext { ::default() } #[wasm_bindgen(getter)] pub fn headings(&self) -> Vec { self.headings.clone() } #[wasm_bindgen(setter)] pub fn set_headings(&mut self, value: Vec) { self.headings = value; } } /// A single heading in the hierarchy. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmHeadingLevel { level: u8, text: String, } #[wasm_bindgen] impl WasmHeadingLevel { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(level: u8, text: String) -> WasmHeadingLevel { WasmHeadingLevel { level, text } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmHeadingLevel { ::default() } #[wasm_bindgen(getter)] pub fn level(&self) -> u8 { self.level } #[wasm_bindgen(setter)] pub fn set_level(&mut self, value: u8) { self.level = value; } #[wasm_bindgen(getter)] pub fn text(&self) -> String { self.text.clone() } #[wasm_bindgen(setter)] pub fn set_text(&mut self, value: String) { self.text = value; } } /// Metadata about a chunk's position in the original document. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmChunkMetadata { byte_start: usize, byte_end: usize, token_count: Option, chunk_index: usize, total_chunks: usize, first_page: Option, last_page: Option, heading_context: Option, image_indices: Vec, } #[wasm_bindgen] impl WasmChunkMetadata { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( byteStart: usize, byteEnd: usize, chunkIndex: usize, totalChunks: usize, imageIndices: Vec, tokenCount: Option, firstPage: Option, lastPage: Option, headingContext: Option, ) -> WasmChunkMetadata { WasmChunkMetadata { byte_start: byteStart, byte_end: byteEnd, token_count: tokenCount, chunk_index: chunkIndex, total_chunks: totalChunks, first_page: firstPage, last_page: lastPage, heading_context: headingContext, image_indices: imageIndices, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmChunkMetadata { ::default() } #[wasm_bindgen(getter, js_name = "byteStart")] pub fn byte_start(&self) -> usize { self.byte_start } #[wasm_bindgen(setter, js_name = "byteStart")] pub fn set_byte_start(&mut self, value: usize) { self.byte_start = value; } #[wasm_bindgen(getter, js_name = "byteEnd")] pub fn byte_end(&self) -> usize { self.byte_end } #[wasm_bindgen(setter, js_name = "byteEnd")] pub fn set_byte_end(&mut self, value: usize) { self.byte_end = value; } #[wasm_bindgen(getter, js_name = "tokenCount")] pub fn token_count(&self) -> Option { self.token_count } #[wasm_bindgen(setter, js_name = "tokenCount")] pub fn set_token_count(&mut self, value: Option) { self.token_count = value; } #[wasm_bindgen(getter, js_name = "chunkIndex")] pub fn chunk_index(&self) -> usize { self.chunk_index } #[wasm_bindgen(setter, js_name = "chunkIndex")] pub fn set_chunk_index(&mut self, value: usize) { self.chunk_index = value; } #[wasm_bindgen(getter, js_name = "totalChunks")] pub fn total_chunks(&self) -> usize { self.total_chunks } #[wasm_bindgen(setter, js_name = "totalChunks")] pub fn set_total_chunks(&mut self, value: usize) { self.total_chunks = value; } #[wasm_bindgen(getter, js_name = "firstPage")] pub fn first_page(&self) -> Option { self.first_page } #[wasm_bindgen(setter, js_name = "firstPage")] pub fn set_first_page(&mut self, value: Option) { self.first_page = value; } #[wasm_bindgen(getter, js_name = "lastPage")] pub fn last_page(&self) -> Option { self.last_page } #[wasm_bindgen(setter, js_name = "lastPage")] pub fn set_last_page(&mut self, value: Option) { self.last_page = value; } #[wasm_bindgen(getter, js_name = "headingContext")] pub fn heading_context(&self) -> Option { self.heading_context.clone() } #[wasm_bindgen(setter, js_name = "headingContext")] pub fn set_heading_context(&mut self, value: Option) { self.heading_context = value; } #[wasm_bindgen(getter, js_name = "imageIndices")] pub fn image_indices(&self) -> Vec { self.image_indices.clone() } #[wasm_bindgen(setter, js_name = "imageIndices")] pub fn set_image_indices(&mut self, value: Vec) { self.image_indices = value; } } /// Extracted image from a document. /// /// Contains raw image data, metadata, and optional nested OCR results. /// Raw bytes allow cross-language compatibility - users can convert to /// PIL.Image (Python), Sharp (Node.js), or other formats as needed. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmExtractedImage { data: Vec, format: String, image_index: u32, page_number: Option, width: Option, height: Option, colorspace: Option, bits_per_component: Option, is_mask: bool, description: Option, ocr_result: Option, bounding_box: Option, source_path: Option, image_kind: Option, kind_confidence: Option, cluster_id: Option, } #[wasm_bindgen] impl WasmExtractedImage { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( data: Vec, format: String, imageIndex: u32, isMask: bool, pageNumber: Option, width: Option, height: Option, colorspace: Option, bitsPerComponent: Option, description: Option, ocrResult: Option, boundingBox: Option, sourcePath: Option, imageKind: Option, kindConfidence: Option, clusterId: Option, ) -> WasmExtractedImage { WasmExtractedImage { data, format, image_index: imageIndex, page_number: pageNumber, width, height, colorspace, bits_per_component: bitsPerComponent, is_mask: isMask, description, ocr_result: ocrResult, bounding_box: boundingBox, source_path: sourcePath, image_kind: imageKind, kind_confidence: kindConfidence, cluster_id: clusterId, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmExtractedImage { ::default() } #[wasm_bindgen(getter)] pub fn data(&self) -> Vec { self.data.clone() } #[wasm_bindgen(setter)] pub fn set_data(&mut self, value: Vec) { self.data = value; } #[wasm_bindgen(getter)] pub fn format(&self) -> String { self.format.clone() } #[wasm_bindgen(setter)] pub fn set_format(&mut self, value: String) { self.format = value; } #[wasm_bindgen(getter, js_name = "imageIndex")] pub fn image_index(&self) -> u32 { self.image_index } #[wasm_bindgen(setter, js_name = "imageIndex")] pub fn set_image_index(&mut self, value: u32) { self.image_index = value; } #[wasm_bindgen(getter, js_name = "pageNumber")] pub fn page_number(&self) -> Option { self.page_number } #[wasm_bindgen(setter, js_name = "pageNumber")] pub fn set_page_number(&mut self, value: Option) { self.page_number = value; } #[wasm_bindgen(getter)] pub fn width(&self) -> Option { self.width } #[wasm_bindgen(setter)] pub fn set_width(&mut self, value: Option) { self.width = value; } #[wasm_bindgen(getter)] pub fn height(&self) -> Option { self.height } #[wasm_bindgen(setter)] pub fn set_height(&mut self, value: Option) { self.height = value; } #[wasm_bindgen(getter)] pub fn colorspace(&self) -> Option { self.colorspace.clone() } #[wasm_bindgen(setter)] pub fn set_colorspace(&mut self, value: Option) { self.colorspace = value; } #[wasm_bindgen(getter, js_name = "bitsPerComponent")] pub fn bits_per_component(&self) -> Option { self.bits_per_component } #[wasm_bindgen(setter, js_name = "bitsPerComponent")] pub fn set_bits_per_component(&mut self, value: Option) { self.bits_per_component = value; } #[wasm_bindgen(getter, js_name = "isMask")] pub fn is_mask(&self) -> bool { self.is_mask } #[wasm_bindgen(setter, js_name = "isMask")] pub fn set_is_mask(&mut self, value: bool) { self.is_mask = value; } #[wasm_bindgen(getter)] pub fn description(&self) -> Option { self.description.clone() } #[wasm_bindgen(setter)] pub fn set_description(&mut self, value: Option) { self.description = value; } #[wasm_bindgen(getter, js_name = "ocrResult")] pub fn ocr_result(&self) -> Option { self.ocr_result.clone() } #[wasm_bindgen(setter, js_name = "ocrResult")] pub fn set_ocr_result(&mut self, value: Option) { self.ocr_result = value; } #[wasm_bindgen(getter, js_name = "boundingBox")] pub fn bounding_box(&self) -> Option { self.bounding_box.clone() } #[wasm_bindgen(setter, js_name = "boundingBox")] pub fn set_bounding_box(&mut self, value: Option) { self.bounding_box = value; } #[wasm_bindgen(getter, js_name = "sourcePath")] pub fn source_path(&self) -> Option { self.source_path.clone() } #[wasm_bindgen(setter, js_name = "sourcePath")] pub fn set_source_path(&mut self, value: Option) { self.source_path = value; } #[wasm_bindgen(getter, js_name = "imageKind")] pub fn image_kind(&self) -> Option { self.image_kind.map(|v| v.to_api_str().to_owned()) } #[wasm_bindgen(setter, js_name = "imageKind")] pub fn set_image_kind(&mut self, value: Option) { self.image_kind = value; } #[wasm_bindgen(getter, js_name = "kindConfidence")] pub fn kind_confidence(&self) -> Option { self.kind_confidence } #[wasm_bindgen(setter, js_name = "kindConfidence")] pub fn set_kind_confidence(&mut self, value: Option) { self.kind_confidence = value; } #[wasm_bindgen(getter, js_name = "clusterId")] pub fn cluster_id(&self) -> Option { self.cluster_id } #[wasm_bindgen(setter, js_name = "clusterId")] pub fn set_cluster_id(&mut self, value: Option) { self.cluster_id = value; } } /// Bounding box coordinates for element positioning. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmBoundingBox { x0: f64, y0: f64, x1: f64, y1: f64, } #[wasm_bindgen] impl WasmBoundingBox { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(x0: Option, y0: Option, x1: Option, y1: Option) -> WasmBoundingBox { WasmBoundingBox { x0: x0.unwrap_or_default(), y0: y0.unwrap_or_default(), x1: x1.unwrap_or_default(), y1: y1.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmBoundingBox { ::default() } #[wasm_bindgen(getter)] pub fn x0(&self) -> f64 { self.x0 } #[wasm_bindgen(setter)] pub fn set_x0(&mut self, value: f64) { self.x0 = value; } #[wasm_bindgen(getter)] pub fn y0(&self) -> f64 { self.y0 } #[wasm_bindgen(setter)] pub fn set_y0(&mut self, value: f64) { self.y0 = value; } #[wasm_bindgen(getter)] pub fn x1(&self) -> f64 { self.x1 } #[wasm_bindgen(setter)] pub fn set_x1(&mut self, value: f64) { self.x1 = value; } #[wasm_bindgen(getter)] pub fn y1(&self) -> f64 { self.y1 } #[wasm_bindgen(setter)] pub fn set_y1(&mut self, value: f64) { self.y1 = value; } } /// Metadata for a semantic element. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmElementMetadata { page_number: Option, filename: Option, coordinates: Option, element_index: Option, additional: JsValue, } #[wasm_bindgen] impl WasmElementMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( additional: JsValue, pageNumber: Option, filename: Option, coordinates: Option, elementIndex: Option, ) -> WasmElementMetadata { WasmElementMetadata { page_number: pageNumber, filename, coordinates, element_index: elementIndex, additional, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmElementMetadata { ::default() } #[wasm_bindgen(getter, js_name = "pageNumber")] pub fn page_number(&self) -> Option { self.page_number } #[wasm_bindgen(setter, js_name = "pageNumber")] pub fn set_page_number(&mut self, value: Option) { self.page_number = value; } #[wasm_bindgen(getter)] pub fn filename(&self) -> Option { self.filename.clone() } #[wasm_bindgen(setter)] pub fn set_filename(&mut self, value: Option) { self.filename = value; } #[wasm_bindgen(getter)] pub fn coordinates(&self) -> Option { self.coordinates.clone() } #[wasm_bindgen(setter)] pub fn set_coordinates(&mut self, value: Option) { self.coordinates = value; } #[wasm_bindgen(getter, js_name = "elementIndex")] pub fn element_index(&self) -> Option { self.element_index } #[wasm_bindgen(setter, js_name = "elementIndex")] pub fn set_element_index(&mut self, value: Option) { self.element_index = value; } #[wasm_bindgen(getter)] pub fn additional(&self) -> JsValue { self.additional.clone() } #[wasm_bindgen(setter)] pub fn set_additional(&mut self, value: JsValue) { self.additional = value; } } /// Semantic element extracted from document. /// /// Represents a logical unit of content with semantic classification, /// unique identifier, and metadata for tracking origin and position. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmElement { element_id: String, element_type: WasmElementType, text: String, metadata: WasmElementMetadata, } #[wasm_bindgen] impl WasmElement { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( elementId: String, elementType: WasmElementType, text: String, metadata: WasmElementMetadata, ) -> WasmElement { WasmElement { element_id: elementId, element_type: elementType, text, metadata, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmElement { ::default() } #[wasm_bindgen(getter, js_name = "elementId")] pub fn element_id(&self) -> String { self.element_id.clone() } #[wasm_bindgen(setter, js_name = "elementId")] pub fn set_element_id(&mut self, value: String) { self.element_id = value; } #[wasm_bindgen(getter, js_name = "elementType")] pub fn element_type(&self) -> String { self.element_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "elementType")] pub fn set_element_type(&mut self, value: WasmElementType) { self.element_type = value; } #[wasm_bindgen(getter)] pub fn text(&self) -> String { self.text.clone() } #[wasm_bindgen(setter)] pub fn set_text(&mut self, value: String) { self.text = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> WasmElementMetadata { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: WasmElementMetadata) { self.metadata = value; } } /// Excel workbook representation. /// /// Contains all sheets from an Excel file (.xlsx, .xls, etc.) with /// extracted content and metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmExcelWorkbook { sheets: Vec, metadata: JsValue, revisions: Option>, } #[wasm_bindgen] impl WasmExcelWorkbook { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( sheets: Vec, metadata: JsValue, revisions: Option>, ) -> WasmExcelWorkbook { WasmExcelWorkbook { sheets, metadata, revisions, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmExcelWorkbook { ::default() } #[wasm_bindgen(getter)] pub fn sheets(&self) -> Vec { self.sheets.clone() } #[wasm_bindgen(setter)] pub fn set_sheets(&mut self, value: Vec) { self.sheets = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> JsValue { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: JsValue) { self.metadata = value; } #[wasm_bindgen(getter)] pub fn revisions(&self) -> Option { self.revisions.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_revisions(&mut self, value: Option>) { self.revisions = value; } } /// Single Excel worksheet. /// /// Represents one sheet from an Excel workbook with its content /// converted to Markdown format and dimensional statistics. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmExcelSheet { name: String, markdown: String, row_count: usize, col_count: usize, cell_count: usize, table_cells: Option, } #[wasm_bindgen] impl WasmExcelSheet { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( name: String, markdown: String, rowCount: usize, colCount: usize, cellCount: usize, tableCells: Option, ) -> WasmExcelSheet { WasmExcelSheet { name, markdown, row_count: rowCount, col_count: colCount, cell_count: cellCount, table_cells: tableCells, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmExcelSheet { ::default() } #[wasm_bindgen(getter)] pub fn name(&self) -> String { self.name.clone() } #[wasm_bindgen(setter)] pub fn set_name(&mut self, value: String) { self.name = value; } #[wasm_bindgen(getter)] pub fn markdown(&self) -> String { self.markdown.clone() } #[wasm_bindgen(setter)] pub fn set_markdown(&mut self, value: String) { self.markdown = value; } #[wasm_bindgen(getter, js_name = "rowCount")] pub fn row_count(&self) -> usize { self.row_count } #[wasm_bindgen(setter, js_name = "rowCount")] pub fn set_row_count(&mut self, value: usize) { self.row_count = value; } #[wasm_bindgen(getter, js_name = "colCount")] pub fn col_count(&self) -> usize { self.col_count } #[wasm_bindgen(setter, js_name = "colCount")] pub fn set_col_count(&mut self, value: usize) { self.col_count = value; } #[wasm_bindgen(getter, js_name = "cellCount")] pub fn cell_count(&self) -> usize { self.cell_count } #[wasm_bindgen(setter, js_name = "cellCount")] pub fn set_cell_count(&mut self, value: usize) { self.cell_count = value; } #[wasm_bindgen(getter, js_name = "tableCells")] pub fn table_cells(&self) -> Option { self.table_cells.clone() } #[wasm_bindgen(setter, js_name = "tableCells")] pub fn set_table_cells(&mut self, value: Option) { self.table_cells = value; } } /// XML extraction result. /// /// Contains extracted text content from XML files along with /// structural statistics about the XML document. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmXmlExtractionResult { content: String, element_count: usize, unique_elements: Vec, } #[wasm_bindgen] impl WasmXmlExtractionResult { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(content: String, elementCount: usize, uniqueElements: Vec) -> WasmXmlExtractionResult { WasmXmlExtractionResult { content, element_count: elementCount, unique_elements: uniqueElements, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmXmlExtractionResult { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter, js_name = "elementCount")] pub fn element_count(&self) -> usize { self.element_count } #[wasm_bindgen(setter, js_name = "elementCount")] pub fn set_element_count(&mut self, value: usize) { self.element_count = value; } #[wasm_bindgen(getter, js_name = "uniqueElements")] pub fn unique_elements(&self) -> Vec { self.unique_elements.clone() } #[wasm_bindgen(setter, js_name = "uniqueElements")] pub fn set_unique_elements(&mut self, value: Vec) { self.unique_elements = value; } } /// Plain text and Markdown extraction result. /// /// Contains the extracted text along with statistics and, /// for Markdown files, structural elements like headers and links. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmTextExtractionResult { content: String, line_count: usize, word_count: usize, character_count: usize, headers: Option>, links: Option, code_blocks: Option, } #[wasm_bindgen] impl WasmTextExtractionResult { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( content: String, lineCount: usize, wordCount: usize, characterCount: usize, headers: Option>, links: Option, codeBlocks: Option, ) -> WasmTextExtractionResult { WasmTextExtractionResult { content, line_count: lineCount, word_count: wordCount, character_count: characterCount, headers, links, code_blocks: codeBlocks, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmTextExtractionResult { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter, js_name = "lineCount")] pub fn line_count(&self) -> usize { self.line_count } #[wasm_bindgen(setter, js_name = "lineCount")] pub fn set_line_count(&mut self, value: usize) { self.line_count = value; } #[wasm_bindgen(getter, js_name = "wordCount")] pub fn word_count(&self) -> usize { self.word_count } #[wasm_bindgen(setter, js_name = "wordCount")] pub fn set_word_count(&mut self, value: usize) { self.word_count = value; } #[wasm_bindgen(getter, js_name = "characterCount")] pub fn character_count(&self) -> usize { self.character_count } #[wasm_bindgen(setter, js_name = "characterCount")] pub fn set_character_count(&mut self, value: usize) { self.character_count = value; } #[wasm_bindgen(getter)] pub fn headers(&self) -> Option> { self.headers.clone() } #[wasm_bindgen(setter)] pub fn set_headers(&mut self, value: Option>) { self.headers = value; } #[wasm_bindgen(getter)] pub fn links(&self) -> Option { self.links.clone() } #[wasm_bindgen(setter)] pub fn set_links(&mut self, value: Option) { self.links = value; } #[wasm_bindgen(getter, js_name = "codeBlocks")] pub fn code_blocks(&self) -> Option { self.code_blocks.clone() } #[wasm_bindgen(setter, js_name = "codeBlocks")] pub fn set_code_blocks(&mut self, value: Option) { self.code_blocks = value; } } /// PowerPoint (PPTX) extraction result. /// /// Contains extracted slide content, metadata, and embedded images/tables. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPptxExtractionResult { content: String, metadata: WasmPptxMetadata, slide_count: usize, image_count: usize, table_count: usize, images: Vec, page_structure: Option, page_contents: Option>, document: Option, hyperlinks: Vec, office_metadata: JsValue, revisions: Option>, } #[wasm_bindgen] impl WasmPptxExtractionResult { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( content: String, metadata: WasmPptxMetadata, slideCount: usize, imageCount: usize, tableCount: usize, images: Vec, hyperlinks: Vec, officeMetadata: JsValue, pageStructure: Option, pageContents: Option>, document: Option, revisions: Option>, ) -> WasmPptxExtractionResult { WasmPptxExtractionResult { content, metadata, slide_count: slideCount, image_count: imageCount, table_count: tableCount, images, page_structure: pageStructure, page_contents: pageContents, document, hyperlinks, office_metadata: officeMetadata, revisions, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPptxExtractionResult { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> WasmPptxMetadata { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: WasmPptxMetadata) { self.metadata = value; } #[wasm_bindgen(getter, js_name = "slideCount")] pub fn slide_count(&self) -> usize { self.slide_count } #[wasm_bindgen(setter, js_name = "slideCount")] pub fn set_slide_count(&mut self, value: usize) { self.slide_count = value; } #[wasm_bindgen(getter, js_name = "imageCount")] pub fn image_count(&self) -> usize { self.image_count } #[wasm_bindgen(setter, js_name = "imageCount")] pub fn set_image_count(&mut self, value: usize) { self.image_count = value; } #[wasm_bindgen(getter, js_name = "tableCount")] pub fn table_count(&self) -> usize { self.table_count } #[wasm_bindgen(setter, js_name = "tableCount")] pub fn set_table_count(&mut self, value: usize) { self.table_count = value; } #[wasm_bindgen(getter)] pub fn images(&self) -> Vec { self.images.clone() } #[wasm_bindgen(setter)] pub fn set_images(&mut self, value: Vec) { self.images = value; } #[wasm_bindgen(getter, js_name = "pageStructure")] pub fn page_structure(&self) -> Option { self.page_structure.clone() } #[wasm_bindgen(setter, js_name = "pageStructure")] pub fn set_page_structure(&mut self, value: Option) { self.page_structure = value; } #[wasm_bindgen(getter, js_name = "pageContents")] pub fn page_contents(&self) -> Option { self.page_contents.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter, js_name = "pageContents")] pub fn set_page_contents(&mut self, value: Option>) { self.page_contents = value; } #[wasm_bindgen(getter)] pub fn document(&self) -> Option { self.document.clone() } #[wasm_bindgen(setter)] pub fn set_document(&mut self, value: Option) { self.document = value; } #[wasm_bindgen(getter)] pub fn hyperlinks(&self) -> Vec { self.hyperlinks.clone() } #[wasm_bindgen(setter)] pub fn set_hyperlinks(&mut self, value: Vec) { self.hyperlinks = value; } #[wasm_bindgen(getter, js_name = "officeMetadata")] pub fn office_metadata(&self) -> JsValue { self.office_metadata.clone() } #[wasm_bindgen(setter, js_name = "officeMetadata")] pub fn set_office_metadata(&mut self, value: JsValue) { self.office_metadata = value; } #[wasm_bindgen(getter)] pub fn revisions(&self) -> Option { self.revisions.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_revisions(&mut self, value: Option>) { self.revisions = value; } } /// Email extraction result. /// /// Complete representation of an extracted email message (.eml or .msg) /// including headers, body content, and attachments. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmEmailExtractionResult { subject: Option, from_email: Option, to_emails: Vec, cc_emails: Vec, bcc_emails: Vec, date: Option, message_id: Option, plain_text: Option, html_content: Option, content: String, attachments: Vec, metadata: JsValue, } #[wasm_bindgen] impl WasmEmailExtractionResult { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( toEmails: Vec, ccEmails: Vec, bccEmails: Vec, content: String, attachments: Vec, metadata: JsValue, subject: Option, fromEmail: Option, date: Option, messageId: Option, plainText: Option, htmlContent: Option, ) -> WasmEmailExtractionResult { WasmEmailExtractionResult { subject, from_email: fromEmail, to_emails: toEmails, cc_emails: ccEmails, bcc_emails: bccEmails, date, message_id: messageId, plain_text: plainText, html_content: htmlContent, content, attachments, metadata, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmEmailExtractionResult { ::default() } #[wasm_bindgen(getter)] pub fn subject(&self) -> Option { self.subject.clone() } #[wasm_bindgen(setter)] pub fn set_subject(&mut self, value: Option) { self.subject = value; } #[wasm_bindgen(getter, js_name = "fromEmail")] pub fn from_email(&self) -> Option { self.from_email.clone() } #[wasm_bindgen(setter, js_name = "fromEmail")] pub fn set_from_email(&mut self, value: Option) { self.from_email = value; } #[wasm_bindgen(getter, js_name = "toEmails")] pub fn to_emails(&self) -> Vec { self.to_emails.clone() } #[wasm_bindgen(setter, js_name = "toEmails")] pub fn set_to_emails(&mut self, value: Vec) { self.to_emails = value; } #[wasm_bindgen(getter, js_name = "ccEmails")] pub fn cc_emails(&self) -> Vec { self.cc_emails.clone() } #[wasm_bindgen(setter, js_name = "ccEmails")] pub fn set_cc_emails(&mut self, value: Vec) { self.cc_emails = value; } #[wasm_bindgen(getter, js_name = "bccEmails")] pub fn bcc_emails(&self) -> Vec { self.bcc_emails.clone() } #[wasm_bindgen(setter, js_name = "bccEmails")] pub fn set_bcc_emails(&mut self, value: Vec) { self.bcc_emails = value; } #[wasm_bindgen(getter)] pub fn date(&self) -> Option { self.date.clone() } #[wasm_bindgen(setter)] pub fn set_date(&mut self, value: Option) { self.date = value; } #[wasm_bindgen(getter, js_name = "messageId")] pub fn message_id(&self) -> Option { self.message_id.clone() } #[wasm_bindgen(setter, js_name = "messageId")] pub fn set_message_id(&mut self, value: Option) { self.message_id = value; } #[wasm_bindgen(getter, js_name = "plainText")] pub fn plain_text(&self) -> Option { self.plain_text.clone() } #[wasm_bindgen(setter, js_name = "plainText")] pub fn set_plain_text(&mut self, value: Option) { self.plain_text = value; } #[wasm_bindgen(getter, js_name = "htmlContent")] pub fn html_content(&self) -> Option { self.html_content.clone() } #[wasm_bindgen(setter, js_name = "htmlContent")] pub fn set_html_content(&mut self, value: Option) { self.html_content = value; } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter)] pub fn attachments(&self) -> Vec { self.attachments.clone() } #[wasm_bindgen(setter)] pub fn set_attachments(&mut self, value: Vec) { self.attachments = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> JsValue { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: JsValue) { self.metadata = value; } } /// Email attachment representation. /// /// Contains metadata and optionally the content of an email attachment. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmEmailAttachment { name: Option, filename: Option, mime_type: Option, size: Option, is_image: bool, data: Option>, } #[wasm_bindgen] impl WasmEmailAttachment { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( isImage: bool, name: Option, filename: Option, mimeType: Option, size: Option, data: Option>, ) -> WasmEmailAttachment { WasmEmailAttachment { name, filename, mime_type: mimeType, size, is_image: isImage, data, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmEmailAttachment { ::default() } #[wasm_bindgen(getter)] pub fn name(&self) -> Option { self.name.clone() } #[wasm_bindgen(setter)] pub fn set_name(&mut self, value: Option) { self.name = value; } #[wasm_bindgen(getter)] pub fn filename(&self) -> Option { self.filename.clone() } #[wasm_bindgen(setter)] pub fn set_filename(&mut self, value: Option) { self.filename = value; } #[wasm_bindgen(getter, js_name = "mimeType")] pub fn mime_type(&self) -> Option { self.mime_type.clone() } #[wasm_bindgen(setter, js_name = "mimeType")] pub fn set_mime_type(&mut self, value: Option) { self.mime_type = value; } #[wasm_bindgen(getter)] pub fn size(&self) -> Option { self.size } #[wasm_bindgen(setter)] pub fn set_size(&mut self, value: Option) { self.size = value; } #[wasm_bindgen(getter, js_name = "isImage")] pub fn is_image(&self) -> bool { self.is_image } #[wasm_bindgen(setter, js_name = "isImage")] pub fn set_is_image(&mut self, value: bool) { self.is_image = value; } #[wasm_bindgen(getter)] pub fn data(&self) -> Option> { self.data.clone() } #[wasm_bindgen(setter)] pub fn set_data(&mut self, value: Option>) { self.data = value; } } /// OCR extraction result. /// /// Result of performing OCR on an image or scanned document, /// including recognized text and detected tables. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrExtractionResult { content: String, mime_type: String, metadata: JsValue, tables: Vec, ocr_elements: Option>, internal_document: Option, } #[wasm_bindgen] impl WasmOcrExtractionResult { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( content: String, mimeType: String, metadata: JsValue, tables: Vec, ocrElements: Option>, internalDocument: Option, ) -> WasmOcrExtractionResult { WasmOcrExtractionResult { content, mime_type: mimeType, metadata, tables, ocr_elements: ocrElements, internal_document: internalDocument, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrExtractionResult { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter, js_name = "mimeType")] pub fn mime_type(&self) -> String { self.mime_type.clone() } #[wasm_bindgen(setter, js_name = "mimeType")] pub fn set_mime_type(&mut self, value: String) { self.mime_type = value; } #[wasm_bindgen(getter)] pub fn metadata(&self) -> JsValue { self.metadata.clone() } #[wasm_bindgen(setter)] pub fn set_metadata(&mut self, value: JsValue) { self.metadata = value; } #[wasm_bindgen(getter)] pub fn tables(&self) -> Vec { self.tables.clone() } #[wasm_bindgen(setter)] pub fn set_tables(&mut self, value: Vec) { self.tables = value; } #[wasm_bindgen(getter, js_name = "ocrElements")] pub fn ocr_elements(&self) -> Option { self.ocr_elements.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter, js_name = "ocrElements")] pub fn set_ocr_elements(&mut self, value: Option>) { self.ocr_elements = value; } #[wasm_bindgen(getter, js_name = "internalDocument")] pub fn internal_document(&self) -> Option { self.internal_document.clone() } #[wasm_bindgen(setter, js_name = "internalDocument")] pub fn set_internal_document(&mut self, value: Option) { self.internal_document = value; } } /// Table detected via OCR. /// /// Represents a table structure recognized during OCR processing. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrTable { cells: JsValue, markdown: String, page_number: u32, bounding_box: Option, } #[wasm_bindgen] impl WasmOcrTable { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( cells: JsValue, markdown: String, pageNumber: u32, boundingBox: Option, ) -> WasmOcrTable { WasmOcrTable { cells, markdown, page_number: pageNumber, bounding_box: boundingBox, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrTable { ::default() } #[wasm_bindgen(getter)] pub fn cells(&self) -> JsValue { self.cells.clone() } #[wasm_bindgen(setter)] pub fn set_cells(&mut self, value: JsValue) { self.cells = value; } #[wasm_bindgen(getter)] pub fn markdown(&self) -> String { self.markdown.clone() } #[wasm_bindgen(setter)] pub fn set_markdown(&mut self, value: String) { self.markdown = value; } #[wasm_bindgen(getter, js_name = "pageNumber")] pub fn page_number(&self) -> u32 { self.page_number } #[wasm_bindgen(setter, js_name = "pageNumber")] pub fn set_page_number(&mut self, value: u32) { self.page_number = value; } #[wasm_bindgen(getter, js_name = "boundingBox")] pub fn bounding_box(&self) -> Option { self.bounding_box.clone() } #[wasm_bindgen(setter, js_name = "boundingBox")] pub fn set_bounding_box(&mut self, value: Option) { self.bounding_box = value; } } /// Bounding box for an OCR-detected table in pixel coordinates. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrTableBoundingBox { left: u32, top: u32, right: u32, bottom: u32, } #[wasm_bindgen] impl WasmOcrTableBoundingBox { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(left: u32, top: u32, right: u32, bottom: u32) -> WasmOcrTableBoundingBox { WasmOcrTableBoundingBox { left, top, right, bottom, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrTableBoundingBox { ::default() } #[wasm_bindgen(getter)] pub fn left(&self) -> u32 { self.left } #[wasm_bindgen(setter)] pub fn set_left(&mut self, value: u32) { self.left = value; } #[wasm_bindgen(getter)] pub fn top(&self) -> u32 { self.top } #[wasm_bindgen(setter)] pub fn set_top(&mut self, value: u32) { self.top = value; } #[wasm_bindgen(getter)] pub fn right(&self) -> u32 { self.right } #[wasm_bindgen(setter)] pub fn set_right(&mut self, value: u32) { self.right = value; } #[wasm_bindgen(getter)] pub fn bottom(&self) -> u32 { self.bottom } #[wasm_bindgen(setter)] pub fn set_bottom(&mut self, value: u32) { self.bottom = value; } } /// Image preprocessing configuration for OCR. /// /// These settings control how images are preprocessed before OCR to improve /// text recognition quality. Different preprocessing strategies work better /// for different document types. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmImagePreprocessingConfig { target_dpi: i32, auto_rotate: bool, deskew: bool, denoise: bool, contrast_enhance: bool, binarization_method: String, invert_colors: bool, } #[wasm_bindgen] impl WasmImagePreprocessingConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( targetDpi: Option, autoRotate: Option, deskew: Option, denoise: Option, contrastEnhance: Option, binarizationMethod: Option, invertColors: Option, ) -> WasmImagePreprocessingConfig { WasmImagePreprocessingConfig { target_dpi: targetDpi.unwrap_or(300), auto_rotate: autoRotate.unwrap_or(true), deskew: deskew.unwrap_or(true), denoise: denoise.unwrap_or(false), contrast_enhance: contrastEnhance.unwrap_or(false), binarization_method: binarizationMethod.unwrap_or_else(|| "otsu".to_string()), invert_colors: invertColors.unwrap_or(false), } } #[wasm_bindgen(getter, js_name = "targetDpi")] pub fn target_dpi(&self) -> i32 { self.target_dpi } #[wasm_bindgen(setter, js_name = "targetDpi")] pub fn set_target_dpi(&mut self, value: i32) { self.target_dpi = value; } #[wasm_bindgen(getter, js_name = "autoRotate")] pub fn auto_rotate(&self) -> bool { self.auto_rotate } #[wasm_bindgen(setter, js_name = "autoRotate")] pub fn set_auto_rotate(&mut self, value: bool) { self.auto_rotate = value; } #[wasm_bindgen(getter)] pub fn deskew(&self) -> bool { self.deskew } #[wasm_bindgen(setter)] pub fn set_deskew(&mut self, value: bool) { self.deskew = value; } #[wasm_bindgen(getter)] pub fn denoise(&self) -> bool { self.denoise } #[wasm_bindgen(setter)] pub fn set_denoise(&mut self, value: bool) { self.denoise = value; } #[wasm_bindgen(getter, js_name = "contrastEnhance")] pub fn contrast_enhance(&self) -> bool { self.contrast_enhance } #[wasm_bindgen(setter, js_name = "contrastEnhance")] pub fn set_contrast_enhance(&mut self, value: bool) { self.contrast_enhance = value; } #[wasm_bindgen(getter, js_name = "binarizationMethod")] pub fn binarization_method(&self) -> String { self.binarization_method.clone() } #[wasm_bindgen(setter, js_name = "binarizationMethod")] pub fn set_binarization_method(&mut self, value: String) { self.binarization_method = value; } #[wasm_bindgen(getter, js_name = "invertColors")] pub fn invert_colors(&self) -> bool { self.invert_colors } #[wasm_bindgen(setter, js_name = "invertColors")] pub fn set_invert_colors(&mut self, value: bool) { self.invert_colors = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmImagePreprocessingConfig { kreuzberg::ImagePreprocessingConfig::default().into() } } /// Tesseract OCR configuration. /// /// Provides fine-grained control over Tesseract OCR engine parameters. /// Most users can use the defaults, but these settings allow optimization /// for specific document types (invoices, handwriting, etc.). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmTesseractConfig { language: String, psm: i32, output_format: String, oem: i32, min_confidence: f64, preprocessing: Option, enable_table_detection: bool, table_min_confidence: f64, table_column_threshold: i32, table_row_threshold_ratio: f64, use_cache: bool, classify_use_pre_adapted_templates: bool, language_model_ngram_on: bool, tessedit_dont_blkrej_good_wds: bool, tessedit_dont_rowrej_good_wds: bool, tessedit_enable_dict_correction: bool, tessedit_char_whitelist: String, tessedit_char_blacklist: String, tessedit_use_primary_params_model: bool, textord_space_size_is_variable: bool, thresholding_method: bool, } #[wasm_bindgen] impl WasmTesseractConfig { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( language: Option, psm: Option, outputFormat: Option, oem: Option, minConfidence: Option, enableTableDetection: Option, tableMinConfidence: Option, tableColumnThreshold: Option, tableRowThresholdRatio: Option, useCache: Option, classifyUsePreAdaptedTemplates: Option, languageModelNgramOn: Option, tesseditDontBlkrejGoodWds: Option, tesseditDontRowrejGoodWds: Option, tesseditEnableDictCorrection: Option, tesseditCharWhitelist: Option, tesseditCharBlacklist: Option, tesseditUsePrimaryParamsModel: Option, textordSpaceSizeIsVariable: Option, thresholdingMethod: Option, preprocessing: Option, ) -> WasmTesseractConfig { WasmTesseractConfig { language: language.unwrap_or_else(|| "eng".to_string()), psm: psm.unwrap_or(3), output_format: outputFormat.unwrap_or_else(|| "markdown".to_string()), oem: oem.unwrap_or(3), min_confidence: minConfidence.unwrap_or(0.0), preprocessing, enable_table_detection: enableTableDetection.unwrap_or(true), table_min_confidence: tableMinConfidence.unwrap_or(0.0), table_column_threshold: tableColumnThreshold.unwrap_or(50), table_row_threshold_ratio: tableRowThresholdRatio.unwrap_or(0.5), use_cache: useCache.unwrap_or(true), classify_use_pre_adapted_templates: classifyUsePreAdaptedTemplates.unwrap_or(true), language_model_ngram_on: languageModelNgramOn.unwrap_or(false), tessedit_dont_blkrej_good_wds: tesseditDontBlkrejGoodWds.unwrap_or(true), tessedit_dont_rowrej_good_wds: tesseditDontRowrejGoodWds.unwrap_or(true), tessedit_enable_dict_correction: tesseditEnableDictCorrection.unwrap_or(true), tessedit_char_whitelist: tesseditCharWhitelist.unwrap_or_else(|| "".to_string()), tessedit_char_blacklist: tesseditCharBlacklist.unwrap_or_else(|| "".to_string()), tessedit_use_primary_params_model: tesseditUsePrimaryParamsModel.unwrap_or(true), textord_space_size_is_variable: textordSpaceSizeIsVariable.unwrap_or(true), thresholding_method: thresholdingMethod.unwrap_or(false), } } #[wasm_bindgen(getter)] pub fn language(&self) -> String { self.language.clone() } #[wasm_bindgen(setter)] pub fn set_language(&mut self, value: String) { self.language = value; } #[wasm_bindgen(getter)] pub fn psm(&self) -> i32 { self.psm } #[wasm_bindgen(setter)] pub fn set_psm(&mut self, value: i32) { self.psm = value; } #[wasm_bindgen(getter, js_name = "outputFormat")] pub fn output_format(&self) -> String { self.output_format.clone() } #[wasm_bindgen(setter, js_name = "outputFormat")] pub fn set_output_format(&mut self, value: String) { self.output_format = value; } #[wasm_bindgen(getter)] pub fn oem(&self) -> i32 { self.oem } #[wasm_bindgen(setter)] pub fn set_oem(&mut self, value: i32) { self.oem = value; } #[wasm_bindgen(getter, js_name = "minConfidence")] pub fn min_confidence(&self) -> f64 { self.min_confidence } #[wasm_bindgen(setter, js_name = "minConfidence")] pub fn set_min_confidence(&mut self, value: f64) { self.min_confidence = value; } #[wasm_bindgen(getter)] pub fn preprocessing(&self) -> Option { self.preprocessing.clone() } #[wasm_bindgen(setter)] pub fn set_preprocessing(&mut self, value: Option) { self.preprocessing = value; } #[wasm_bindgen(getter, js_name = "enableTableDetection")] pub fn enable_table_detection(&self) -> bool { self.enable_table_detection } #[wasm_bindgen(setter, js_name = "enableTableDetection")] pub fn set_enable_table_detection(&mut self, value: bool) { self.enable_table_detection = value; } #[wasm_bindgen(getter, js_name = "tableMinConfidence")] pub fn table_min_confidence(&self) -> f64 { self.table_min_confidence } #[wasm_bindgen(setter, js_name = "tableMinConfidence")] pub fn set_table_min_confidence(&mut self, value: f64) { self.table_min_confidence = value; } #[wasm_bindgen(getter, js_name = "tableColumnThreshold")] pub fn table_column_threshold(&self) -> i32 { self.table_column_threshold } #[wasm_bindgen(setter, js_name = "tableColumnThreshold")] pub fn set_table_column_threshold(&mut self, value: i32) { self.table_column_threshold = value; } #[wasm_bindgen(getter, js_name = "tableRowThresholdRatio")] pub fn table_row_threshold_ratio(&self) -> f64 { self.table_row_threshold_ratio } #[wasm_bindgen(setter, js_name = "tableRowThresholdRatio")] pub fn set_table_row_threshold_ratio(&mut self, value: f64) { self.table_row_threshold_ratio = value; } #[wasm_bindgen(getter, js_name = "useCache")] pub fn use_cache(&self) -> bool { self.use_cache } #[wasm_bindgen(setter, js_name = "useCache")] pub fn set_use_cache(&mut self, value: bool) { self.use_cache = value; } #[wasm_bindgen(getter, js_name = "classifyUsePreAdaptedTemplates")] pub fn classify_use_pre_adapted_templates(&self) -> bool { self.classify_use_pre_adapted_templates } #[wasm_bindgen(setter, js_name = "classifyUsePreAdaptedTemplates")] pub fn set_classify_use_pre_adapted_templates(&mut self, value: bool) { self.classify_use_pre_adapted_templates = value; } #[wasm_bindgen(getter, js_name = "languageModelNgramOn")] pub fn language_model_ngram_on(&self) -> bool { self.language_model_ngram_on } #[wasm_bindgen(setter, js_name = "languageModelNgramOn")] pub fn set_language_model_ngram_on(&mut self, value: bool) { self.language_model_ngram_on = value; } #[wasm_bindgen(getter, js_name = "tesseditDontBlkrejGoodWds")] pub fn tessedit_dont_blkrej_good_wds(&self) -> bool { self.tessedit_dont_blkrej_good_wds } #[wasm_bindgen(setter, js_name = "tesseditDontBlkrejGoodWds")] pub fn set_tessedit_dont_blkrej_good_wds(&mut self, value: bool) { self.tessedit_dont_blkrej_good_wds = value; } #[wasm_bindgen(getter, js_name = "tesseditDontRowrejGoodWds")] pub fn tessedit_dont_rowrej_good_wds(&self) -> bool { self.tessedit_dont_rowrej_good_wds } #[wasm_bindgen(setter, js_name = "tesseditDontRowrejGoodWds")] pub fn set_tessedit_dont_rowrej_good_wds(&mut self, value: bool) { self.tessedit_dont_rowrej_good_wds = value; } #[wasm_bindgen(getter, js_name = "tesseditEnableDictCorrection")] pub fn tessedit_enable_dict_correction(&self) -> bool { self.tessedit_enable_dict_correction } #[wasm_bindgen(setter, js_name = "tesseditEnableDictCorrection")] pub fn set_tessedit_enable_dict_correction(&mut self, value: bool) { self.tessedit_enable_dict_correction = value; } #[wasm_bindgen(getter, js_name = "tesseditCharWhitelist")] pub fn tessedit_char_whitelist(&self) -> String { self.tessedit_char_whitelist.clone() } #[wasm_bindgen(setter, js_name = "tesseditCharWhitelist")] pub fn set_tessedit_char_whitelist(&mut self, value: String) { self.tessedit_char_whitelist = value; } #[wasm_bindgen(getter, js_name = "tesseditCharBlacklist")] pub fn tessedit_char_blacklist(&self) -> String { self.tessedit_char_blacklist.clone() } #[wasm_bindgen(setter, js_name = "tesseditCharBlacklist")] pub fn set_tessedit_char_blacklist(&mut self, value: String) { self.tessedit_char_blacklist = value; } #[wasm_bindgen(getter, js_name = "tesseditUsePrimaryParamsModel")] pub fn tessedit_use_primary_params_model(&self) -> bool { self.tessedit_use_primary_params_model } #[wasm_bindgen(setter, js_name = "tesseditUsePrimaryParamsModel")] pub fn set_tessedit_use_primary_params_model(&mut self, value: bool) { self.tessedit_use_primary_params_model = value; } #[wasm_bindgen(getter, js_name = "textordSpaceSizeIsVariable")] pub fn textord_space_size_is_variable(&self) -> bool { self.textord_space_size_is_variable } #[wasm_bindgen(setter, js_name = "textordSpaceSizeIsVariable")] pub fn set_textord_space_size_is_variable(&mut self, value: bool) { self.textord_space_size_is_variable = value; } #[wasm_bindgen(getter, js_name = "thresholdingMethod")] pub fn thresholding_method(&self) -> bool { self.thresholding_method } #[wasm_bindgen(setter, js_name = "thresholdingMethod")] pub fn set_thresholding_method(&mut self, value: bool) { self.thresholding_method = value; } #[allow(clippy::should_implement_trait)] #[wasm_bindgen] pub fn default() -> WasmTesseractConfig { kreuzberg::TesseractConfig::default().into() } } /// Image preprocessing metadata. /// /// Tracks the transformations applied to an image during OCR preprocessing, /// including DPI normalization, resizing, and resampling. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmImagePreprocessingMetadata { original_dimensions: Vec, original_dpi: Vec, target_dpi: i32, scale_factor: f64, auto_adjusted: bool, final_dpi: i32, new_dimensions: Option>, resample_method: String, dimension_clamped: bool, calculated_dpi: Option, skipped_resize: bool, resize_error: Option, } #[wasm_bindgen] impl WasmImagePreprocessingMetadata { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( originalDimensions: Vec, originalDpi: Vec, targetDpi: i32, scaleFactor: f64, autoAdjusted: bool, finalDpi: i32, resampleMethod: String, dimensionClamped: bool, skippedResize: bool, newDimensions: Option>, calculatedDpi: Option, resizeError: Option, ) -> WasmImagePreprocessingMetadata { WasmImagePreprocessingMetadata { original_dimensions: originalDimensions, original_dpi: originalDpi, target_dpi: targetDpi, scale_factor: scaleFactor, auto_adjusted: autoAdjusted, final_dpi: finalDpi, new_dimensions: newDimensions, resample_method: resampleMethod, dimension_clamped: dimensionClamped, calculated_dpi: calculatedDpi, skipped_resize: skippedResize, resize_error: resizeError, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmImagePreprocessingMetadata { ::default() } #[wasm_bindgen(getter, js_name = "originalDimensions")] pub fn original_dimensions(&self) -> Vec { self.original_dimensions.clone() } #[wasm_bindgen(setter, js_name = "originalDimensions")] pub fn set_original_dimensions(&mut self, value: Vec) { self.original_dimensions = value; } #[wasm_bindgen(getter, js_name = "originalDpi")] pub fn original_dpi(&self) -> Vec { self.original_dpi.clone() } #[wasm_bindgen(setter, js_name = "originalDpi")] pub fn set_original_dpi(&mut self, value: Vec) { self.original_dpi = value; } #[wasm_bindgen(getter, js_name = "targetDpi")] pub fn target_dpi(&self) -> i32 { self.target_dpi } #[wasm_bindgen(setter, js_name = "targetDpi")] pub fn set_target_dpi(&mut self, value: i32) { self.target_dpi = value; } #[wasm_bindgen(getter, js_name = "scaleFactor")] pub fn scale_factor(&self) -> f64 { self.scale_factor } #[wasm_bindgen(setter, js_name = "scaleFactor")] pub fn set_scale_factor(&mut self, value: f64) { self.scale_factor = value; } #[wasm_bindgen(getter, js_name = "autoAdjusted")] pub fn auto_adjusted(&self) -> bool { self.auto_adjusted } #[wasm_bindgen(setter, js_name = "autoAdjusted")] pub fn set_auto_adjusted(&mut self, value: bool) { self.auto_adjusted = value; } #[wasm_bindgen(getter, js_name = "finalDpi")] pub fn final_dpi(&self) -> i32 { self.final_dpi } #[wasm_bindgen(setter, js_name = "finalDpi")] pub fn set_final_dpi(&mut self, value: i32) { self.final_dpi = value; } #[wasm_bindgen(getter, js_name = "newDimensions")] pub fn new_dimensions(&self) -> Option> { self.new_dimensions.clone() } #[wasm_bindgen(setter, js_name = "newDimensions")] pub fn set_new_dimensions(&mut self, value: Option>) { self.new_dimensions = value; } #[wasm_bindgen(getter, js_name = "resampleMethod")] pub fn resample_method(&self) -> String { self.resample_method.clone() } #[wasm_bindgen(setter, js_name = "resampleMethod")] pub fn set_resample_method(&mut self, value: String) { self.resample_method = value; } #[wasm_bindgen(getter, js_name = "dimensionClamped")] pub fn dimension_clamped(&self) -> bool { self.dimension_clamped } #[wasm_bindgen(setter, js_name = "dimensionClamped")] pub fn set_dimension_clamped(&mut self, value: bool) { self.dimension_clamped = value; } #[wasm_bindgen(getter, js_name = "calculatedDpi")] pub fn calculated_dpi(&self) -> Option { self.calculated_dpi } #[wasm_bindgen(setter, js_name = "calculatedDpi")] pub fn set_calculated_dpi(&mut self, value: Option) { self.calculated_dpi = value; } #[wasm_bindgen(getter, js_name = "skippedResize")] pub fn skipped_resize(&self) -> bool { self.skipped_resize } #[wasm_bindgen(setter, js_name = "skippedResize")] pub fn set_skipped_resize(&mut self, value: bool) { self.skipped_resize = value; } #[wasm_bindgen(getter, js_name = "resizeError")] pub fn resize_error(&self) -> Option { self.resize_error.clone() } #[wasm_bindgen(setter, js_name = "resizeError")] pub fn set_resize_error(&mut self, value: Option) { self.resize_error = value; } } /// Extraction result metadata. /// /// Contains common fields applicable to all formats, format-specific metadata /// via a discriminated union, and additional custom fields from postprocessors. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmMetadata { title: Option, subject: Option, authors: Option>, keywords: Option>, language: Option, created_at: Option, modified_at: Option, created_by: Option, modified_by: Option, pages: Option, format: Option, image_preprocessing: Option, json_schema: Option, error: Option, extraction_duration_ms: Option, category: Option, tags: Option>, document_version: Option, abstract_text: Option, output_format: Option, ocr_used: bool, additional: JsValue, } #[wasm_bindgen] impl WasmMetadata { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( ocrUsed: Option, additional: Option, title: Option, subject: Option, authors: Option>, keywords: Option>, language: Option, createdAt: Option, modifiedAt: Option, createdBy: Option, modifiedBy: Option, pages: Option, format: Option, imagePreprocessing: Option, jsonSchema: Option, error: Option, extractionDurationMs: Option, category: Option, tags: Option>, documentVersion: Option, abstractText: Option, outputFormat: Option, ) -> WasmMetadata { WasmMetadata { title, subject, authors, keywords, language, created_at: createdAt, modified_at: modifiedAt, created_by: createdBy, modified_by: modifiedBy, pages, format, image_preprocessing: imagePreprocessing, json_schema: jsonSchema, error, extraction_duration_ms: extractionDurationMs, category, tags, document_version: documentVersion, abstract_text: abstractText, output_format: outputFormat, ocr_used: ocrUsed.unwrap_or_default(), additional: additional.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmMetadata { ::default() } #[wasm_bindgen(getter)] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter)] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter)] pub fn subject(&self) -> Option { self.subject.clone() } #[wasm_bindgen(setter)] pub fn set_subject(&mut self, value: Option) { self.subject = value; } #[wasm_bindgen(getter)] pub fn authors(&self) -> Option> { self.authors.clone() } #[wasm_bindgen(setter)] pub fn set_authors(&mut self, value: Option>) { self.authors = value; } #[wasm_bindgen(getter)] pub fn keywords(&self) -> Option> { self.keywords.clone() } #[wasm_bindgen(setter)] pub fn set_keywords(&mut self, value: Option>) { self.keywords = value; } #[wasm_bindgen(getter)] pub fn language(&self) -> Option { self.language.clone() } #[wasm_bindgen(setter)] pub fn set_language(&mut self, value: Option) { self.language = value; } #[wasm_bindgen(getter, js_name = "createdAt")] pub fn created_at(&self) -> Option { self.created_at.clone() } #[wasm_bindgen(setter, js_name = "createdAt")] pub fn set_created_at(&mut self, value: Option) { self.created_at = value; } #[wasm_bindgen(getter, js_name = "modifiedAt")] pub fn modified_at(&self) -> Option { self.modified_at.clone() } #[wasm_bindgen(setter, js_name = "modifiedAt")] pub fn set_modified_at(&mut self, value: Option) { self.modified_at = value; } #[wasm_bindgen(getter, js_name = "createdBy")] pub fn created_by(&self) -> Option { self.created_by.clone() } #[wasm_bindgen(setter, js_name = "createdBy")] pub fn set_created_by(&mut self, value: Option) { self.created_by = value; } #[wasm_bindgen(getter, js_name = "modifiedBy")] pub fn modified_by(&self) -> Option { self.modified_by.clone() } #[wasm_bindgen(setter, js_name = "modifiedBy")] pub fn set_modified_by(&mut self, value: Option) { self.modified_by = value; } #[wasm_bindgen(getter)] pub fn pages(&self) -> Option { self.pages.clone() } #[wasm_bindgen(setter)] pub fn set_pages(&mut self, value: Option) { self.pages = value; } #[wasm_bindgen(getter)] pub fn format(&self) -> Option { self.format.clone() } #[wasm_bindgen(setter)] pub fn set_format(&mut self, value: Option) { self.format = value; } #[wasm_bindgen(getter, js_name = "imagePreprocessing")] pub fn image_preprocessing(&self) -> Option { self.image_preprocessing.clone() } #[wasm_bindgen(setter, js_name = "imagePreprocessing")] pub fn set_image_preprocessing(&mut self, value: Option) { self.image_preprocessing = value; } #[wasm_bindgen(getter, js_name = "jsonSchema")] pub fn json_schema(&self) -> Option { self.json_schema.clone() } #[wasm_bindgen(setter, js_name = "jsonSchema")] pub fn set_json_schema(&mut self, value: Option) { self.json_schema = value; } #[wasm_bindgen(getter)] pub fn error(&self) -> Option { self.error.clone() } #[wasm_bindgen(setter)] pub fn set_error(&mut self, value: Option) { self.error = value; } #[wasm_bindgen(getter, js_name = "extractionDurationMs")] pub fn extraction_duration_ms(&self) -> Option { self.extraction_duration_ms } #[wasm_bindgen(setter, js_name = "extractionDurationMs")] pub fn set_extraction_duration_ms(&mut self, value: Option) { self.extraction_duration_ms = value; } #[wasm_bindgen(getter)] pub fn category(&self) -> Option { self.category.clone() } #[wasm_bindgen(setter)] pub fn set_category(&mut self, value: Option) { self.category = value; } #[wasm_bindgen(getter)] pub fn tags(&self) -> Option> { self.tags.clone() } #[wasm_bindgen(setter)] pub fn set_tags(&mut self, value: Option>) { self.tags = value; } #[wasm_bindgen(getter, js_name = "documentVersion")] pub fn document_version(&self) -> Option { self.document_version.clone() } #[wasm_bindgen(setter, js_name = "documentVersion")] pub fn set_document_version(&mut self, value: Option) { self.document_version = value; } #[wasm_bindgen(getter, js_name = "abstractText")] pub fn abstract_text(&self) -> Option { self.abstract_text.clone() } #[wasm_bindgen(setter, js_name = "abstractText")] pub fn set_abstract_text(&mut self, value: Option) { self.abstract_text = value; } #[wasm_bindgen(getter, js_name = "outputFormat")] pub fn output_format(&self) -> Option { self.output_format.clone() } #[wasm_bindgen(setter, js_name = "outputFormat")] pub fn set_output_format(&mut self, value: Option) { self.output_format = value; } #[wasm_bindgen(getter, js_name = "ocrUsed")] pub fn ocr_used(&self) -> bool { self.ocr_used } #[wasm_bindgen(setter, js_name = "ocrUsed")] pub fn set_ocr_used(&mut self, value: bool) { self.ocr_used = value; } #[wasm_bindgen(getter)] pub fn additional(&self) -> JsValue { self.additional.clone() } #[wasm_bindgen(setter)] pub fn set_additional(&mut self, value: JsValue) { self.additional = value; } /// Returns `true` when no metadata fields, format-specific metadata, or /// additional postprocessor fields are populated. #[wasm_bindgen(js_name = "isEmpty")] pub fn is_empty(&self) -> bool { kreuzberg::Metadata::from(self.clone()).is_empty() } } /// Excel/spreadsheet format metadata. /// /// Identifies the document as a spreadsheet source via the `FormatMetadata.Excel` /// discriminant. Sheet count and sheet names are stored inside this struct. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmExcelMetadata { sheet_count: Option, sheet_names: Option>, } #[wasm_bindgen] impl WasmExcelMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(sheetCount: Option, sheetNames: Option>) -> WasmExcelMetadata { WasmExcelMetadata { sheet_count: sheetCount, sheet_names: sheetNames, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmExcelMetadata { ::default() } #[wasm_bindgen(getter, js_name = "sheetCount")] pub fn sheet_count(&self) -> Option { self.sheet_count } #[wasm_bindgen(setter, js_name = "sheetCount")] pub fn set_sheet_count(&mut self, value: Option) { self.sheet_count = value; } #[wasm_bindgen(getter, js_name = "sheetNames")] pub fn sheet_names(&self) -> Option> { self.sheet_names.clone() } #[wasm_bindgen(setter, js_name = "sheetNames")] pub fn set_sheet_names(&mut self, value: Option>) { self.sheet_names = value; } } /// Email metadata extracted from .eml and .msg files. /// /// Includes sender/recipient information, message ID, and attachment list. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmEmailMetadata { from_email: Option, from_name: Option, to_emails: Vec, cc_emails: Vec, bcc_emails: Vec, message_id: Option, attachments: Vec, } #[wasm_bindgen] impl WasmEmailMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( toEmails: Option>, ccEmails: Option>, bccEmails: Option>, attachments: Option>, fromEmail: Option, fromName: Option, messageId: Option, ) -> WasmEmailMetadata { WasmEmailMetadata { from_email: fromEmail, from_name: fromName, to_emails: toEmails.unwrap_or_default(), cc_emails: ccEmails.unwrap_or_default(), bcc_emails: bccEmails.unwrap_or_default(), message_id: messageId, attachments: attachments.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmEmailMetadata { ::default() } #[wasm_bindgen(getter, js_name = "fromEmail")] pub fn from_email(&self) -> Option { self.from_email.clone() } #[wasm_bindgen(setter, js_name = "fromEmail")] pub fn set_from_email(&mut self, value: Option) { self.from_email = value; } #[wasm_bindgen(getter, js_name = "fromName")] pub fn from_name(&self) -> Option { self.from_name.clone() } #[wasm_bindgen(setter, js_name = "fromName")] pub fn set_from_name(&mut self, value: Option) { self.from_name = value; } #[wasm_bindgen(getter, js_name = "toEmails")] pub fn to_emails(&self) -> Vec { self.to_emails.clone() } #[wasm_bindgen(setter, js_name = "toEmails")] pub fn set_to_emails(&mut self, value: Vec) { self.to_emails = value; } #[wasm_bindgen(getter, js_name = "ccEmails")] pub fn cc_emails(&self) -> Vec { self.cc_emails.clone() } #[wasm_bindgen(setter, js_name = "ccEmails")] pub fn set_cc_emails(&mut self, value: Vec) { self.cc_emails = value; } #[wasm_bindgen(getter, js_name = "bccEmails")] pub fn bcc_emails(&self) -> Vec { self.bcc_emails.clone() } #[wasm_bindgen(setter, js_name = "bccEmails")] pub fn set_bcc_emails(&mut self, value: Vec) { self.bcc_emails = value; } #[wasm_bindgen(getter, js_name = "messageId")] pub fn message_id(&self) -> Option { self.message_id.clone() } #[wasm_bindgen(setter, js_name = "messageId")] pub fn set_message_id(&mut self, value: Option) { self.message_id = value; } #[wasm_bindgen(getter)] pub fn attachments(&self) -> Vec { self.attachments.clone() } #[wasm_bindgen(setter)] pub fn set_attachments(&mut self, value: Vec) { self.attachments = value; } } /// Archive (ZIP/TAR/7Z) metadata. /// /// Extracted from compressed archive files containing file lists and size information. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmArchiveMetadata { format: String, file_count: u32, file_list: Vec, total_size: u64, compressed_size: Option, } #[wasm_bindgen] impl WasmArchiveMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( format: Option, fileCount: Option, fileList: Option>, totalSize: Option, compressedSize: Option, ) -> WasmArchiveMetadata { WasmArchiveMetadata { format: format.unwrap_or_default(), file_count: fileCount.unwrap_or_default(), file_list: fileList.unwrap_or_default(), total_size: totalSize.unwrap_or_default(), compressed_size: compressedSize, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmArchiveMetadata { ::default() } #[wasm_bindgen(getter)] pub fn format(&self) -> String { self.format.clone() } #[wasm_bindgen(setter)] pub fn set_format(&mut self, value: String) { self.format = value; } #[wasm_bindgen(getter, js_name = "fileCount")] pub fn file_count(&self) -> u32 { self.file_count } #[wasm_bindgen(setter, js_name = "fileCount")] pub fn set_file_count(&mut self, value: u32) { self.file_count = value; } #[wasm_bindgen(getter, js_name = "fileList")] pub fn file_list(&self) -> Vec { self.file_list.clone() } #[wasm_bindgen(setter, js_name = "fileList")] pub fn set_file_list(&mut self, value: Vec) { self.file_list = value; } #[wasm_bindgen(getter, js_name = "totalSize")] pub fn total_size(&self) -> u64 { self.total_size } #[wasm_bindgen(setter, js_name = "totalSize")] pub fn set_total_size(&mut self, value: u64) { self.total_size = value; } #[wasm_bindgen(getter, js_name = "compressedSize")] pub fn compressed_size(&self) -> Option { self.compressed_size } #[wasm_bindgen(setter, js_name = "compressedSize")] pub fn set_compressed_size(&mut self, value: Option) { self.compressed_size = value; } } /// Image metadata extracted from image files. /// /// Includes dimensions, format, and EXIF data. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmImageMetadata { width: u32, height: u32, format: String, exif: JsValue, } #[wasm_bindgen] impl WasmImageMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( width: Option, height: Option, format: Option, exif: Option, ) -> WasmImageMetadata { WasmImageMetadata { width: width.unwrap_or_default(), height: height.unwrap_or_default(), format: format.unwrap_or_default(), exif: exif.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmImageMetadata { ::default() } #[wasm_bindgen(getter)] pub fn width(&self) -> u32 { self.width } #[wasm_bindgen(setter)] pub fn set_width(&mut self, value: u32) { self.width = value; } #[wasm_bindgen(getter)] pub fn height(&self) -> u32 { self.height } #[wasm_bindgen(setter)] pub fn set_height(&mut self, value: u32) { self.height = value; } #[wasm_bindgen(getter)] pub fn format(&self) -> String { self.format.clone() } #[wasm_bindgen(setter)] pub fn set_format(&mut self, value: String) { self.format = value; } #[wasm_bindgen(getter)] pub fn exif(&self) -> JsValue { self.exif.clone() } #[wasm_bindgen(setter)] pub fn set_exif(&mut self, value: JsValue) { self.exif = value; } } /// XML metadata extracted during XML parsing. /// /// Provides statistics about XML document structure. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmXmlMetadata { element_count: u32, unique_elements: Vec, } #[wasm_bindgen] impl WasmXmlMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(elementCount: Option, uniqueElements: Option>) -> WasmXmlMetadata { WasmXmlMetadata { element_count: elementCount.unwrap_or_default(), unique_elements: uniqueElements.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmXmlMetadata { ::default() } #[wasm_bindgen(getter, js_name = "elementCount")] pub fn element_count(&self) -> u32 { self.element_count } #[wasm_bindgen(setter, js_name = "elementCount")] pub fn set_element_count(&mut self, value: u32) { self.element_count = value; } #[wasm_bindgen(getter, js_name = "uniqueElements")] pub fn unique_elements(&self) -> Vec { self.unique_elements.clone() } #[wasm_bindgen(setter, js_name = "uniqueElements")] pub fn set_unique_elements(&mut self, value: Vec) { self.unique_elements = value; } } /// Text/Markdown metadata. /// /// Extracted from plain text and Markdown files. Includes word counts and, /// for Markdown, structural elements like headers and links. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmTextMetadata { line_count: u32, word_count: u32, character_count: u32, headers: Option>, links: Option, code_blocks: Option, } #[wasm_bindgen] impl WasmTextMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( lineCount: Option, wordCount: Option, characterCount: Option, headers: Option>, links: Option, codeBlocks: Option, ) -> WasmTextMetadata { WasmTextMetadata { line_count: lineCount.unwrap_or_default(), word_count: wordCount.unwrap_or_default(), character_count: characterCount.unwrap_or_default(), headers, links, code_blocks: codeBlocks, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmTextMetadata { ::default() } #[wasm_bindgen(getter, js_name = "lineCount")] pub fn line_count(&self) -> u32 { self.line_count } #[wasm_bindgen(setter, js_name = "lineCount")] pub fn set_line_count(&mut self, value: u32) { self.line_count = value; } #[wasm_bindgen(getter, js_name = "wordCount")] pub fn word_count(&self) -> u32 { self.word_count } #[wasm_bindgen(setter, js_name = "wordCount")] pub fn set_word_count(&mut self, value: u32) { self.word_count = value; } #[wasm_bindgen(getter, js_name = "characterCount")] pub fn character_count(&self) -> u32 { self.character_count } #[wasm_bindgen(setter, js_name = "characterCount")] pub fn set_character_count(&mut self, value: u32) { self.character_count = value; } #[wasm_bindgen(getter)] pub fn headers(&self) -> Option> { self.headers.clone() } #[wasm_bindgen(setter)] pub fn set_headers(&mut self, value: Option>) { self.headers = value; } #[wasm_bindgen(getter)] pub fn links(&self) -> Option { self.links.clone() } #[wasm_bindgen(setter)] pub fn set_links(&mut self, value: Option) { self.links = value; } #[wasm_bindgen(getter, js_name = "codeBlocks")] pub fn code_blocks(&self) -> Option { self.code_blocks.clone() } #[wasm_bindgen(setter, js_name = "codeBlocks")] pub fn set_code_blocks(&mut self, value: Option) { self.code_blocks = value; } } /// Header/heading element metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmHeaderMetadata { level: u8, text: String, id: Option, depth: u32, html_offset: u32, } #[wasm_bindgen] impl WasmHeaderMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(level: u8, text: String, depth: u32, htmlOffset: u32, id: Option) -> WasmHeaderMetadata { WasmHeaderMetadata { level, text, id, depth, html_offset: htmlOffset, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmHeaderMetadata { ::default() } #[wasm_bindgen(getter)] pub fn level(&self) -> u8 { self.level } #[wasm_bindgen(setter)] pub fn set_level(&mut self, value: u8) { self.level = value; } #[wasm_bindgen(getter)] pub fn text(&self) -> String { self.text.clone() } #[wasm_bindgen(setter)] pub fn set_text(&mut self, value: String) { self.text = value; } #[wasm_bindgen(getter)] pub fn id(&self) -> Option { self.id.clone() } #[wasm_bindgen(setter)] pub fn set_id(&mut self, value: Option) { self.id = value; } #[wasm_bindgen(getter)] pub fn depth(&self) -> u32 { self.depth } #[wasm_bindgen(setter)] pub fn set_depth(&mut self, value: u32) { self.depth = value; } #[wasm_bindgen(getter, js_name = "htmlOffset")] pub fn html_offset(&self) -> u32 { self.html_offset } #[wasm_bindgen(setter, js_name = "htmlOffset")] pub fn set_html_offset(&mut self, value: u32) { self.html_offset = value; } } /// Link element metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmLinkMetadata { href: String, text: String, title: Option, link_type: WasmLinkType, rel: Vec, attributes: JsValue, } #[wasm_bindgen] impl WasmLinkMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( href: String, text: String, linkType: WasmLinkType, rel: Vec, attributes: JsValue, title: Option, ) -> WasmLinkMetadata { WasmLinkMetadata { href, text, title, link_type: linkType, rel, attributes, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmLinkMetadata { ::default() } #[wasm_bindgen(getter)] pub fn href(&self) -> String { self.href.clone() } #[wasm_bindgen(setter)] pub fn set_href(&mut self, value: String) { self.href = value; } #[wasm_bindgen(getter)] pub fn text(&self) -> String { self.text.clone() } #[wasm_bindgen(setter)] pub fn set_text(&mut self, value: String) { self.text = value; } #[wasm_bindgen(getter)] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter)] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter, js_name = "linkType")] pub fn link_type(&self) -> String { self.link_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "linkType")] pub fn set_link_type(&mut self, value: WasmLinkType) { self.link_type = value; } #[wasm_bindgen(getter)] pub fn rel(&self) -> Vec { self.rel.clone() } #[wasm_bindgen(setter)] pub fn set_rel(&mut self, value: Vec) { self.rel = value; } #[wasm_bindgen(getter)] pub fn attributes(&self) -> JsValue { self.attributes.clone() } #[wasm_bindgen(setter)] pub fn set_attributes(&mut self, value: JsValue) { self.attributes = value; } } /// Image element metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmImageMetadataType { src: String, alt: Option, title: Option, dimensions: Option>, image_type: WasmImageType, attributes: JsValue, } #[wasm_bindgen] impl WasmImageMetadataType { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( src: String, imageType: WasmImageType, attributes: JsValue, alt: Option, title: Option, dimensions: Option>, ) -> WasmImageMetadataType { WasmImageMetadataType { src, alt, title, dimensions, image_type: imageType, attributes, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmImageMetadataType { ::default() } #[wasm_bindgen(getter)] pub fn src(&self) -> String { self.src.clone() } #[wasm_bindgen(setter)] pub fn set_src(&mut self, value: String) { self.src = value; } #[wasm_bindgen(getter)] pub fn alt(&self) -> Option { self.alt.clone() } #[wasm_bindgen(setter)] pub fn set_alt(&mut self, value: Option) { self.alt = value; } #[wasm_bindgen(getter)] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter)] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter)] pub fn dimensions(&self) -> Option> { self.dimensions.clone() } #[wasm_bindgen(setter)] pub fn set_dimensions(&mut self, value: Option>) { self.dimensions = value; } #[wasm_bindgen(getter, js_name = "imageType")] pub fn image_type(&self) -> String { self.image_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "imageType")] pub fn set_image_type(&mut self, value: WasmImageType) { self.image_type = value; } #[wasm_bindgen(getter)] pub fn attributes(&self) -> JsValue { self.attributes.clone() } #[wasm_bindgen(setter)] pub fn set_attributes(&mut self, value: JsValue) { self.attributes = value; } } /// Structured data (Schema.org, microdata, RDFa) block. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmStructuredData { data_type: WasmStructuredDataType, raw_json: String, schema_type: Option, } #[wasm_bindgen] impl WasmStructuredData { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(dataType: WasmStructuredDataType, rawJson: String, schemaType: Option) -> WasmStructuredData { WasmStructuredData { data_type: dataType, raw_json: rawJson, schema_type: schemaType, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmStructuredData { ::default() } #[wasm_bindgen(getter, js_name = "dataType")] pub fn data_type(&self) -> String { self.data_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "dataType")] pub fn set_data_type(&mut self, value: WasmStructuredDataType) { self.data_type = value; } #[wasm_bindgen(getter, js_name = "rawJson")] pub fn raw_json(&self) -> String { self.raw_json.clone() } #[wasm_bindgen(setter, js_name = "rawJson")] pub fn set_raw_json(&mut self, value: String) { self.raw_json = value; } #[wasm_bindgen(getter, js_name = "schemaType")] pub fn schema_type(&self) -> Option { self.schema_type.clone() } #[wasm_bindgen(setter, js_name = "schemaType")] pub fn set_schema_type(&mut self, value: Option) { self.schema_type = value; } } /// HTML metadata extracted from HTML documents. /// /// Includes document-level metadata, Open Graph data, Twitter Card metadata, /// and extracted structural elements (headers, links, images, structured data). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmHtmlMetadata { title: Option, description: Option, keywords: Vec, author: Option, canonical_url: Option, base_href: Option, language: Option, text_direction: Option, open_graph: JsValue, twitter_card: JsValue, meta_tags: JsValue, headers: Vec, links: Vec, images: Vec, structured_data: Vec, } #[wasm_bindgen] impl WasmHtmlMetadata { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( keywords: Option>, openGraph: Option, twitterCard: Option, metaTags: Option, headers: Option>, links: Option>, images: Option>, structuredData: Option>, title: Option, description: Option, author: Option, canonicalUrl: Option, baseHref: Option, language: Option, textDirection: Option, ) -> WasmHtmlMetadata { WasmHtmlMetadata { title, description, keywords: keywords.unwrap_or_default(), author, canonical_url: canonicalUrl, base_href: baseHref, language, text_direction: textDirection, open_graph: openGraph.unwrap_or_default(), twitter_card: twitterCard.unwrap_or_default(), meta_tags: metaTags.unwrap_or_default(), headers: headers.unwrap_or_default(), links: links.unwrap_or_default(), images: images.unwrap_or_default(), structured_data: structuredData.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmHtmlMetadata { ::default() } #[wasm_bindgen(getter)] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter)] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter)] pub fn description(&self) -> Option { self.description.clone() } #[wasm_bindgen(setter)] pub fn set_description(&mut self, value: Option) { self.description = value; } #[wasm_bindgen(getter)] pub fn keywords(&self) -> Vec { self.keywords.clone() } #[wasm_bindgen(setter)] pub fn set_keywords(&mut self, value: Vec) { self.keywords = value; } #[wasm_bindgen(getter)] pub fn author(&self) -> Option { self.author.clone() } #[wasm_bindgen(setter)] pub fn set_author(&mut self, value: Option) { self.author = value; } #[wasm_bindgen(getter, js_name = "canonicalUrl")] pub fn canonical_url(&self) -> Option { self.canonical_url.clone() } #[wasm_bindgen(setter, js_name = "canonicalUrl")] pub fn set_canonical_url(&mut self, value: Option) { self.canonical_url = value; } #[wasm_bindgen(getter, js_name = "baseHref")] pub fn base_href(&self) -> Option { self.base_href.clone() } #[wasm_bindgen(setter, js_name = "baseHref")] pub fn set_base_href(&mut self, value: Option) { self.base_href = value; } #[wasm_bindgen(getter)] pub fn language(&self) -> Option { self.language.clone() } #[wasm_bindgen(setter)] pub fn set_language(&mut self, value: Option) { self.language = value; } #[wasm_bindgen(getter, js_name = "textDirection")] pub fn text_direction(&self) -> Option { self.text_direction.map(|v| v.to_api_str().to_owned()) } #[wasm_bindgen(setter, js_name = "textDirection")] pub fn set_text_direction(&mut self, value: Option) { self.text_direction = value; } #[wasm_bindgen(getter, js_name = "openGraph")] pub fn open_graph(&self) -> JsValue { self.open_graph.clone() } #[wasm_bindgen(setter, js_name = "openGraph")] pub fn set_open_graph(&mut self, value: JsValue) { self.open_graph = value; } #[wasm_bindgen(getter, js_name = "twitterCard")] pub fn twitter_card(&self) -> JsValue { self.twitter_card.clone() } #[wasm_bindgen(setter, js_name = "twitterCard")] pub fn set_twitter_card(&mut self, value: JsValue) { self.twitter_card = value; } #[wasm_bindgen(getter, js_name = "metaTags")] pub fn meta_tags(&self) -> JsValue { self.meta_tags.clone() } #[wasm_bindgen(setter, js_name = "metaTags")] pub fn set_meta_tags(&mut self, value: JsValue) { self.meta_tags = value; } #[wasm_bindgen(getter)] pub fn headers(&self) -> Vec { self.headers.clone() } #[wasm_bindgen(setter)] pub fn set_headers(&mut self, value: Vec) { self.headers = value; } #[wasm_bindgen(getter)] pub fn links(&self) -> Vec { self.links.clone() } #[wasm_bindgen(setter)] pub fn set_links(&mut self, value: Vec) { self.links = value; } #[wasm_bindgen(getter)] pub fn images(&self) -> Vec { self.images.clone() } #[wasm_bindgen(setter)] pub fn set_images(&mut self, value: Vec) { self.images = value; } #[wasm_bindgen(getter, js_name = "structuredData")] pub fn structured_data(&self) -> Vec { self.structured_data.clone() } #[wasm_bindgen(setter, js_name = "structuredData")] pub fn set_structured_data(&mut self, value: Vec) { self.structured_data = value; } } /// OCR processing metadata. /// /// Captures information about OCR processing configuration and results. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrMetadata { language: String, psm: i32, output_format: String, table_count: u32, table_rows: Option, table_cols: Option, } #[wasm_bindgen] impl WasmOcrMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( language: Option, psm: Option, outputFormat: Option, tableCount: Option, tableRows: Option, tableCols: Option, ) -> WasmOcrMetadata { WasmOcrMetadata { language: language.unwrap_or_default(), psm: psm.unwrap_or_default(), output_format: outputFormat.unwrap_or_default(), table_count: tableCount.unwrap_or_default(), table_rows: tableRows, table_cols: tableCols, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrMetadata { ::default() } #[wasm_bindgen(getter)] pub fn language(&self) -> String { self.language.clone() } #[wasm_bindgen(setter)] pub fn set_language(&mut self, value: String) { self.language = value; } #[wasm_bindgen(getter)] pub fn psm(&self) -> i32 { self.psm } #[wasm_bindgen(setter)] pub fn set_psm(&mut self, value: i32) { self.psm = value; } #[wasm_bindgen(getter, js_name = "outputFormat")] pub fn output_format(&self) -> String { self.output_format.clone() } #[wasm_bindgen(setter, js_name = "outputFormat")] pub fn set_output_format(&mut self, value: String) { self.output_format = value; } #[wasm_bindgen(getter, js_name = "tableCount")] pub fn table_count(&self) -> u32 { self.table_count } #[wasm_bindgen(setter, js_name = "tableCount")] pub fn set_table_count(&mut self, value: u32) { self.table_count = value; } #[wasm_bindgen(getter, js_name = "tableRows")] pub fn table_rows(&self) -> Option { self.table_rows } #[wasm_bindgen(setter, js_name = "tableRows")] pub fn set_table_rows(&mut self, value: Option) { self.table_rows = value; } #[wasm_bindgen(getter, js_name = "tableCols")] pub fn table_cols(&self) -> Option { self.table_cols } #[wasm_bindgen(setter, js_name = "tableCols")] pub fn set_table_cols(&mut self, value: Option) { self.table_cols = value; } } /// Error metadata (for batch operations). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmErrorMetadata { error_type: String, message: String, } #[wasm_bindgen] impl WasmErrorMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(errorType: String, message: String) -> WasmErrorMetadata { WasmErrorMetadata { error_type: errorType, message, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmErrorMetadata { ::default() } #[wasm_bindgen(getter, js_name = "errorType")] pub fn error_type(&self) -> String { self.error_type.clone() } #[wasm_bindgen(setter, js_name = "errorType")] pub fn set_error_type(&mut self, value: String) { self.error_type = value; } #[wasm_bindgen(getter)] pub fn message(&self) -> String { self.message.clone() } #[wasm_bindgen(setter)] pub fn set_message(&mut self, value: String) { self.message = value; } } /// PowerPoint presentation metadata. /// /// Extracted from PPTX files containing slide counts and presentation details. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPptxMetadata { slide_count: u32, slide_names: Vec, image_count: Option, table_count: Option, } #[wasm_bindgen] impl WasmPptxMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( slideCount: Option, slideNames: Option>, imageCount: Option, tableCount: Option, ) -> WasmPptxMetadata { WasmPptxMetadata { slide_count: slideCount.unwrap_or_default(), slide_names: slideNames.unwrap_or_default(), image_count: imageCount, table_count: tableCount, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPptxMetadata { ::default() } #[wasm_bindgen(getter, js_name = "slideCount")] pub fn slide_count(&self) -> u32 { self.slide_count } #[wasm_bindgen(setter, js_name = "slideCount")] pub fn set_slide_count(&mut self, value: u32) { self.slide_count = value; } #[wasm_bindgen(getter, js_name = "slideNames")] pub fn slide_names(&self) -> Vec { self.slide_names.clone() } #[wasm_bindgen(setter, js_name = "slideNames")] pub fn set_slide_names(&mut self, value: Vec) { self.slide_names = value; } #[wasm_bindgen(getter, js_name = "imageCount")] pub fn image_count(&self) -> Option { self.image_count } #[wasm_bindgen(setter, js_name = "imageCount")] pub fn set_image_count(&mut self, value: Option) { self.image_count = value; } #[wasm_bindgen(getter, js_name = "tableCount")] pub fn table_count(&self) -> Option { self.table_count } #[wasm_bindgen(setter, js_name = "tableCount")] pub fn set_table_count(&mut self, value: Option) { self.table_count = value; } } /// CSV/TSV file metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmCsvMetadata { row_count: u32, column_count: u32, delimiter: Option, has_header: bool, column_types: Option>, } #[wasm_bindgen] impl WasmCsvMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( rowCount: Option, columnCount: Option, hasHeader: Option, delimiter: Option, columnTypes: Option>, ) -> WasmCsvMetadata { WasmCsvMetadata { row_count: rowCount.unwrap_or_default(), column_count: columnCount.unwrap_or_default(), delimiter, has_header: hasHeader.unwrap_or_default(), column_types: columnTypes, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmCsvMetadata { ::default() } #[wasm_bindgen(getter, js_name = "rowCount")] pub fn row_count(&self) -> u32 { self.row_count } #[wasm_bindgen(setter, js_name = "rowCount")] pub fn set_row_count(&mut self, value: u32) { self.row_count = value; } #[wasm_bindgen(getter, js_name = "columnCount")] pub fn column_count(&self) -> u32 { self.column_count } #[wasm_bindgen(setter, js_name = "columnCount")] pub fn set_column_count(&mut self, value: u32) { self.column_count = value; } #[wasm_bindgen(getter)] pub fn delimiter(&self) -> Option { self.delimiter.clone() } #[wasm_bindgen(setter)] pub fn set_delimiter(&mut self, value: Option) { self.delimiter = value; } #[wasm_bindgen(getter, js_name = "hasHeader")] pub fn has_header(&self) -> bool { self.has_header } #[wasm_bindgen(setter, js_name = "hasHeader")] pub fn set_has_header(&mut self, value: bool) { self.has_header = value; } #[wasm_bindgen(getter, js_name = "columnTypes")] pub fn column_types(&self) -> Option> { self.column_types.clone() } #[wasm_bindgen(setter, js_name = "columnTypes")] pub fn set_column_types(&mut self, value: Option>) { self.column_types = value; } } /// BibTeX bibliography metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmBibtexMetadata { entry_count: usize, citation_keys: Vec, authors: Vec, year_range: Option, entry_types: Option, } #[wasm_bindgen] impl WasmBibtexMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( entryCount: Option, citationKeys: Option>, authors: Option>, yearRange: Option, entryTypes: Option, ) -> WasmBibtexMetadata { WasmBibtexMetadata { entry_count: entryCount.unwrap_or_default(), citation_keys: citationKeys.unwrap_or_default(), authors: authors.unwrap_or_default(), year_range: yearRange, entry_types: entryTypes, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmBibtexMetadata { ::default() } #[wasm_bindgen(getter, js_name = "entryCount")] pub fn entry_count(&self) -> usize { self.entry_count } #[wasm_bindgen(setter, js_name = "entryCount")] pub fn set_entry_count(&mut self, value: usize) { self.entry_count = value; } #[wasm_bindgen(getter, js_name = "citationKeys")] pub fn citation_keys(&self) -> Vec { self.citation_keys.clone() } #[wasm_bindgen(setter, js_name = "citationKeys")] pub fn set_citation_keys(&mut self, value: Vec) { self.citation_keys = value; } #[wasm_bindgen(getter)] pub fn authors(&self) -> Vec { self.authors.clone() } #[wasm_bindgen(setter)] pub fn set_authors(&mut self, value: Vec) { self.authors = value; } #[wasm_bindgen(getter, js_name = "yearRange")] pub fn year_range(&self) -> Option { self.year_range.clone() } #[wasm_bindgen(setter, js_name = "yearRange")] pub fn set_year_range(&mut self, value: Option) { self.year_range = value; } #[wasm_bindgen(getter, js_name = "entryTypes")] pub fn entry_types(&self) -> Option { self.entry_types.clone() } #[wasm_bindgen(setter, js_name = "entryTypes")] pub fn set_entry_types(&mut self, value: Option) { self.entry_types = value; } } /// Citation file metadata (RIS, PubMed, EndNote). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmCitationMetadata { citation_count: usize, format: Option, authors: Vec, year_range: Option, dois: Vec, keywords: Vec, } #[wasm_bindgen] impl WasmCitationMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( citationCount: Option, authors: Option>, dois: Option>, keywords: Option>, format: Option, yearRange: Option, ) -> WasmCitationMetadata { WasmCitationMetadata { citation_count: citationCount.unwrap_or_default(), format, authors: authors.unwrap_or_default(), year_range: yearRange, dois: dois.unwrap_or_default(), keywords: keywords.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmCitationMetadata { ::default() } #[wasm_bindgen(getter, js_name = "citationCount")] pub fn citation_count(&self) -> usize { self.citation_count } #[wasm_bindgen(setter, js_name = "citationCount")] pub fn set_citation_count(&mut self, value: usize) { self.citation_count = value; } #[wasm_bindgen(getter)] pub fn format(&self) -> Option { self.format.clone() } #[wasm_bindgen(setter)] pub fn set_format(&mut self, value: Option) { self.format = value; } #[wasm_bindgen(getter)] pub fn authors(&self) -> Vec { self.authors.clone() } #[wasm_bindgen(setter)] pub fn set_authors(&mut self, value: Vec) { self.authors = value; } #[wasm_bindgen(getter, js_name = "yearRange")] pub fn year_range(&self) -> Option { self.year_range.clone() } #[wasm_bindgen(setter, js_name = "yearRange")] pub fn set_year_range(&mut self, value: Option) { self.year_range = value; } #[wasm_bindgen(getter)] pub fn dois(&self) -> Vec { self.dois.clone() } #[wasm_bindgen(setter)] pub fn set_dois(&mut self, value: Vec) { self.dois = value; } #[wasm_bindgen(getter)] pub fn keywords(&self) -> Vec { self.keywords.clone() } #[wasm_bindgen(setter)] pub fn set_keywords(&mut self, value: Vec) { self.keywords = value; } } /// Year range for bibliographic metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmYearRange { min: Option, max: Option, years: Vec, } #[wasm_bindgen] impl WasmYearRange { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(years: Vec, min: Option, max: Option) -> WasmYearRange { WasmYearRange { min, max, years } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmYearRange { ::default() } #[wasm_bindgen(getter)] pub fn min(&self) -> Option { self.min } #[wasm_bindgen(setter)] pub fn set_min(&mut self, value: Option) { self.min = value; } #[wasm_bindgen(getter)] pub fn max(&self) -> Option { self.max } #[wasm_bindgen(setter)] pub fn set_max(&mut self, value: Option) { self.max = value; } #[wasm_bindgen(getter)] pub fn years(&self) -> Vec { self.years.clone() } #[wasm_bindgen(setter)] pub fn set_years(&mut self, value: Vec) { self.years = value; } } /// FictionBook (FB2) metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmFictionBookMetadata { genres: Vec, sequences: Vec, annotation: Option, } #[wasm_bindgen] impl WasmFictionBookMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( genres: Option>, sequences: Option>, annotation: Option, ) -> WasmFictionBookMetadata { WasmFictionBookMetadata { genres: genres.unwrap_or_default(), sequences: sequences.unwrap_or_default(), annotation, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmFictionBookMetadata { ::default() } #[wasm_bindgen(getter)] pub fn genres(&self) -> Vec { self.genres.clone() } #[wasm_bindgen(setter)] pub fn set_genres(&mut self, value: Vec) { self.genres = value; } #[wasm_bindgen(getter)] pub fn sequences(&self) -> Vec { self.sequences.clone() } #[wasm_bindgen(setter)] pub fn set_sequences(&mut self, value: Vec) { self.sequences = value; } #[wasm_bindgen(getter)] pub fn annotation(&self) -> Option { self.annotation.clone() } #[wasm_bindgen(setter)] pub fn set_annotation(&mut self, value: Option) { self.annotation = value; } } /// dBASE (DBF) file metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDbfMetadata { record_count: usize, field_count: usize, fields: Vec, } #[wasm_bindgen] impl WasmDbfMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( recordCount: Option, fieldCount: Option, fields: Option>, ) -> WasmDbfMetadata { WasmDbfMetadata { record_count: recordCount.unwrap_or_default(), field_count: fieldCount.unwrap_or_default(), fields: fields.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDbfMetadata { ::default() } #[wasm_bindgen(getter, js_name = "recordCount")] pub fn record_count(&self) -> usize { self.record_count } #[wasm_bindgen(setter, js_name = "recordCount")] pub fn set_record_count(&mut self, value: usize) { self.record_count = value; } #[wasm_bindgen(getter, js_name = "fieldCount")] pub fn field_count(&self) -> usize { self.field_count } #[wasm_bindgen(setter, js_name = "fieldCount")] pub fn set_field_count(&mut self, value: usize) { self.field_count = value; } #[wasm_bindgen(getter)] pub fn fields(&self) -> Vec { self.fields.clone() } #[wasm_bindgen(setter)] pub fn set_fields(&mut self, value: Vec) { self.fields = value; } } /// dBASE field information. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDbfFieldInfo { name: String, field_type: String, } #[wasm_bindgen] impl WasmDbfFieldInfo { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(name: String, fieldType: String) -> WasmDbfFieldInfo { WasmDbfFieldInfo { name, field_type: fieldType, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDbfFieldInfo { ::default() } #[wasm_bindgen(getter)] pub fn name(&self) -> String { self.name.clone() } #[wasm_bindgen(setter)] pub fn set_name(&mut self, value: String) { self.name = value; } #[wasm_bindgen(getter, js_name = "fieldType")] pub fn field_type(&self) -> String { self.field_type.clone() } #[wasm_bindgen(setter, js_name = "fieldType")] pub fn set_field_type(&mut self, value: String) { self.field_type = value; } } /// JATS (Journal Article Tag Suite) metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmJatsMetadata { copyright: Option, license: Option, history_dates: JsValue, contributor_roles: Vec, } #[wasm_bindgen] impl WasmJatsMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( historyDates: Option, contributorRoles: Option>, copyright: Option, license: Option, ) -> WasmJatsMetadata { WasmJatsMetadata { copyright, license, history_dates: historyDates.unwrap_or_default(), contributor_roles: contributorRoles.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmJatsMetadata { ::default() } #[wasm_bindgen(getter)] pub fn copyright(&self) -> Option { self.copyright.clone() } #[wasm_bindgen(setter)] pub fn set_copyright(&mut self, value: Option) { self.copyright = value; } #[wasm_bindgen(getter)] pub fn license(&self) -> Option { self.license.clone() } #[wasm_bindgen(setter)] pub fn set_license(&mut self, value: Option) { self.license = value; } #[wasm_bindgen(getter, js_name = "historyDates")] pub fn history_dates(&self) -> JsValue { self.history_dates.clone() } #[wasm_bindgen(setter, js_name = "historyDates")] pub fn set_history_dates(&mut self, value: JsValue) { self.history_dates = value; } #[wasm_bindgen(getter, js_name = "contributorRoles")] pub fn contributor_roles(&self) -> Vec { self.contributor_roles.clone() } #[wasm_bindgen(setter, js_name = "contributorRoles")] pub fn set_contributor_roles(&mut self, value: Vec) { self.contributor_roles = value; } } /// JATS contributor with role. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmContributorRole { name: String, role: Option, } #[wasm_bindgen] impl WasmContributorRole { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(name: String, role: Option) -> WasmContributorRole { WasmContributorRole { name, role } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmContributorRole { ::default() } #[wasm_bindgen(getter)] pub fn name(&self) -> String { self.name.clone() } #[wasm_bindgen(setter)] pub fn set_name(&mut self, value: String) { self.name = value; } #[wasm_bindgen(getter)] pub fn role(&self) -> Option { self.role.clone() } #[wasm_bindgen(setter)] pub fn set_role(&mut self, value: Option) { self.role = value; } } /// EPUB metadata (Dublin Core extensions). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmEpubMetadata { coverage: Option, dc_format: Option, relation: Option, source: Option, dc_type: Option, cover_image: Option, } #[wasm_bindgen] impl WasmEpubMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( coverage: Option, dcFormat: Option, relation: Option, source: Option, dcType: Option, coverImage: Option, ) -> WasmEpubMetadata { WasmEpubMetadata { coverage, dc_format: dcFormat, relation, source, dc_type: dcType, cover_image: coverImage, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmEpubMetadata { ::default() } #[wasm_bindgen(getter)] pub fn coverage(&self) -> Option { self.coverage.clone() } #[wasm_bindgen(setter)] pub fn set_coverage(&mut self, value: Option) { self.coverage = value; } #[wasm_bindgen(getter, js_name = "dcFormat")] pub fn dc_format(&self) -> Option { self.dc_format.clone() } #[wasm_bindgen(setter, js_name = "dcFormat")] pub fn set_dc_format(&mut self, value: Option) { self.dc_format = value; } #[wasm_bindgen(getter)] pub fn relation(&self) -> Option { self.relation.clone() } #[wasm_bindgen(setter)] pub fn set_relation(&mut self, value: Option) { self.relation = value; } #[wasm_bindgen(getter)] pub fn source(&self) -> Option { self.source.clone() } #[wasm_bindgen(setter)] pub fn set_source(&mut self, value: Option) { self.source = value; } #[wasm_bindgen(getter, js_name = "dcType")] pub fn dc_type(&self) -> Option { self.dc_type.clone() } #[wasm_bindgen(setter, js_name = "dcType")] pub fn set_dc_type(&mut self, value: Option) { self.dc_type = value; } #[wasm_bindgen(getter, js_name = "coverImage")] pub fn cover_image(&self) -> Option { self.cover_image.clone() } #[wasm_bindgen(setter, js_name = "coverImage")] pub fn set_cover_image(&mut self, value: Option) { self.cover_image = value; } } /// Outlook PST archive metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPstMetadata { message_count: usize, } #[wasm_bindgen] impl WasmPstMetadata { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(messageCount: Option) -> WasmPstMetadata { WasmPstMetadata { message_count: messageCount.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPstMetadata { ::default() } #[wasm_bindgen(getter, js_name = "messageCount")] pub fn message_count(&self) -> usize { self.message_count } #[wasm_bindgen(setter, js_name = "messageCount")] pub fn set_message_count(&mut self, value: usize) { self.message_count = value; } } /// Confidence scores for an OCR element. /// /// Separates detection confidence (how confident that text exists at this location) /// from recognition confidence (how confident about the actual text content). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrConfidence { detection: Option, recognition: f64, } #[wasm_bindgen] impl WasmOcrConfidence { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(recognition: Option, detection: Option) -> WasmOcrConfidence { WasmOcrConfidence { detection, recognition: recognition.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrConfidence { ::default() } #[wasm_bindgen(getter)] pub fn detection(&self) -> Option { self.detection } #[wasm_bindgen(setter)] pub fn set_detection(&mut self, value: Option) { self.detection = value; } #[wasm_bindgen(getter)] pub fn recognition(&self) -> f64 { self.recognition } #[wasm_bindgen(setter)] pub fn set_recognition(&mut self, value: f64) { self.recognition = value; } } /// Rotation information for an OCR element. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrRotation { angle_degrees: f64, confidence: Option, } #[wasm_bindgen] impl WasmOcrRotation { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(angleDegrees: f64, confidence: Option) -> WasmOcrRotation { WasmOcrRotation { angle_degrees: angleDegrees, confidence, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrRotation { ::default() } #[wasm_bindgen(getter, js_name = "angleDegrees")] pub fn angle_degrees(&self) -> f64 { self.angle_degrees } #[wasm_bindgen(setter, js_name = "angleDegrees")] pub fn set_angle_degrees(&mut self, value: f64) { self.angle_degrees = value; } #[wasm_bindgen(getter)] pub fn confidence(&self) -> Option { self.confidence } #[wasm_bindgen(setter)] pub fn set_confidence(&mut self, value: Option) { self.confidence = value; } } /// A unified OCR element representing detected text with full metadata. /// /// This is the primary type for structured OCR output, preserving all information /// from both Tesseract and PaddleOCR backends. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrElement { text: String, geometry: JsValue, confidence: WasmOcrConfidence, level: WasmOcrElementLevel, rotation: Option, page_number: u32, parent_id: Option, backend_metadata: JsValue, } #[wasm_bindgen] impl WasmOcrElement { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( text: Option, geometry: Option, confidence: Option, level: Option, pageNumber: Option, backendMetadata: Option, rotation: Option, parentId: Option, ) -> WasmOcrElement { WasmOcrElement { text: text.unwrap_or_default(), geometry: geometry.unwrap_or_default(), confidence: confidence.unwrap_or_default(), level: level.unwrap_or_default(), rotation, page_number: pageNumber.unwrap_or_default(), parent_id: parentId, backend_metadata: backendMetadata.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrElement { ::default() } #[wasm_bindgen(getter)] pub fn text(&self) -> String { self.text.clone() } #[wasm_bindgen(setter)] pub fn set_text(&mut self, value: String) { self.text = value; } #[wasm_bindgen(getter)] pub fn geometry(&self) -> JsValue { self.geometry.clone() } #[wasm_bindgen(setter)] pub fn set_geometry(&mut self, value: JsValue) { self.geometry = value; } #[wasm_bindgen(getter)] pub fn confidence(&self) -> WasmOcrConfidence { self.confidence.clone() } #[wasm_bindgen(setter)] pub fn set_confidence(&mut self, value: WasmOcrConfidence) { self.confidence = value; } #[wasm_bindgen(getter)] pub fn level(&self) -> String { self.level.to_api_str().to_owned() } #[wasm_bindgen(setter)] pub fn set_level(&mut self, value: WasmOcrElementLevel) { self.level = value; } #[wasm_bindgen(getter)] pub fn rotation(&self) -> Option { self.rotation.clone() } #[wasm_bindgen(setter)] pub fn set_rotation(&mut self, value: Option) { self.rotation = value; } #[wasm_bindgen(getter, js_name = "pageNumber")] pub fn page_number(&self) -> u32 { self.page_number } #[wasm_bindgen(setter, js_name = "pageNumber")] pub fn set_page_number(&mut self, value: u32) { self.page_number = value; } #[wasm_bindgen(getter, js_name = "parentId")] pub fn parent_id(&self) -> Option { self.parent_id.clone() } #[wasm_bindgen(setter, js_name = "parentId")] pub fn set_parent_id(&mut self, value: Option) { self.parent_id = value; } #[wasm_bindgen(getter, js_name = "backendMetadata")] pub fn backend_metadata(&self) -> JsValue { self.backend_metadata.clone() } #[wasm_bindgen(setter, js_name = "backendMetadata")] pub fn set_backend_metadata(&mut self, value: JsValue) { self.backend_metadata = value; } } /// Configuration for OCR element extraction. /// /// Controls how OCR elements are extracted and filtered. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmOcrElementConfig { include_elements: bool, min_level: WasmOcrElementLevel, min_confidence: f64, build_hierarchy: bool, } #[wasm_bindgen] impl WasmOcrElementConfig { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( includeElements: Option, minLevel: Option, minConfidence: Option, buildHierarchy: Option, ) -> WasmOcrElementConfig { WasmOcrElementConfig { include_elements: includeElements.unwrap_or_default(), min_level: minLevel.unwrap_or_default(), min_confidence: minConfidence.unwrap_or_default(), build_hierarchy: buildHierarchy.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrElementConfig { ::default() } #[wasm_bindgen(getter, js_name = "includeElements")] pub fn include_elements(&self) -> bool { self.include_elements } #[wasm_bindgen(setter, js_name = "includeElements")] pub fn set_include_elements(&mut self, value: bool) { self.include_elements = value; } #[wasm_bindgen(getter, js_name = "minLevel")] pub fn min_level(&self) -> String { self.min_level.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "minLevel")] pub fn set_min_level(&mut self, value: WasmOcrElementLevel) { self.min_level = value; } #[wasm_bindgen(getter, js_name = "minConfidence")] pub fn min_confidence(&self) -> f64 { self.min_confidence } #[wasm_bindgen(setter, js_name = "minConfidence")] pub fn set_min_confidence(&mut self, value: f64) { self.min_confidence = value; } #[wasm_bindgen(getter, js_name = "buildHierarchy")] pub fn build_hierarchy(&self) -> bool { self.build_hierarchy } #[wasm_bindgen(setter, js_name = "buildHierarchy")] pub fn set_build_hierarchy(&mut self, value: bool) { self.build_hierarchy = value; } } /// Unified page structure for documents. /// /// Supports different page types (PDF pages, PPTX slides, Excel sheets) /// with character offset boundaries for chunk-to-page mapping. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPageStructure { total_count: u32, unit_type: WasmPageUnitType, boundaries: Option>, pages: Option>, } #[wasm_bindgen] impl WasmPageStructure { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( totalCount: u32, unitType: WasmPageUnitType, boundaries: Option>, pages: Option>, ) -> WasmPageStructure { WasmPageStructure { total_count: totalCount, unit_type: unitType, boundaries, pages, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPageStructure { ::default() } #[wasm_bindgen(getter, js_name = "totalCount")] pub fn total_count(&self) -> u32 { self.total_count } #[wasm_bindgen(setter, js_name = "totalCount")] pub fn set_total_count(&mut self, value: u32) { self.total_count = value; } #[wasm_bindgen(getter, js_name = "unitType")] pub fn unit_type(&self) -> String { self.unit_type.to_api_str().to_owned() } #[wasm_bindgen(setter, js_name = "unitType")] pub fn set_unit_type(&mut self, value: WasmPageUnitType) { self.unit_type = value; } #[wasm_bindgen(getter)] pub fn boundaries(&self) -> Option { self.boundaries.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_boundaries(&mut self, value: Option>) { self.boundaries = value; } #[wasm_bindgen(getter)] pub fn pages(&self) -> Option { self.pages.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter)] pub fn set_pages(&mut self, value: Option>) { self.pages = value; } } /// Byte offset boundary for a page. /// /// Tracks where a specific page's content starts and ends in the main content string, /// enabling mapping from byte positions to page numbers. Offsets are guaranteed to be /// at valid UTF-8 character boundaries when using standard String methods (push_str, push, etc.). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPageBoundary { byte_start: usize, byte_end: usize, page_number: u32, } #[wasm_bindgen] impl WasmPageBoundary { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(byteStart: usize, byteEnd: usize, pageNumber: u32) -> WasmPageBoundary { WasmPageBoundary { byte_start: byteStart, byte_end: byteEnd, page_number: pageNumber, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPageBoundary { ::default() } #[wasm_bindgen(getter, js_name = "byteStart")] pub fn byte_start(&self) -> usize { self.byte_start } #[wasm_bindgen(setter, js_name = "byteStart")] pub fn set_byte_start(&mut self, value: usize) { self.byte_start = value; } #[wasm_bindgen(getter, js_name = "byteEnd")] pub fn byte_end(&self) -> usize { self.byte_end } #[wasm_bindgen(setter, js_name = "byteEnd")] pub fn set_byte_end(&mut self, value: usize) { self.byte_end = value; } #[wasm_bindgen(getter, js_name = "pageNumber")] pub fn page_number(&self) -> u32 { self.page_number } #[wasm_bindgen(setter, js_name = "pageNumber")] pub fn set_page_number(&mut self, value: u32) { self.page_number = value; } } /// Metadata for individual page/slide/sheet. /// /// Captures per-page information including dimensions, content counts, /// and visibility state (for presentations). #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPageInfo { number: u32, title: Option, dimensions: Option>, image_count: Option, table_count: Option, hidden: Option, is_blank: Option, has_vector_graphics: bool, } #[wasm_bindgen] impl WasmPageInfo { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( number: u32, hasVectorGraphics: bool, title: Option, dimensions: Option>, imageCount: Option, tableCount: Option, hidden: Option, isBlank: Option, ) -> WasmPageInfo { WasmPageInfo { number, title, dimensions, image_count: imageCount, table_count: tableCount, hidden, is_blank: isBlank, has_vector_graphics: hasVectorGraphics, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPageInfo { ::default() } #[wasm_bindgen(getter)] pub fn number(&self) -> u32 { self.number } #[wasm_bindgen(setter)] pub fn set_number(&mut self, value: u32) { self.number = value; } #[wasm_bindgen(getter)] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter)] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter)] pub fn dimensions(&self) -> Option> { self.dimensions.clone() } #[wasm_bindgen(setter)] pub fn set_dimensions(&mut self, value: Option>) { self.dimensions = value; } #[wasm_bindgen(getter, js_name = "imageCount")] pub fn image_count(&self) -> Option { self.image_count } #[wasm_bindgen(setter, js_name = "imageCount")] pub fn set_image_count(&mut self, value: Option) { self.image_count = value; } #[wasm_bindgen(getter, js_name = "tableCount")] pub fn table_count(&self) -> Option { self.table_count } #[wasm_bindgen(setter, js_name = "tableCount")] pub fn set_table_count(&mut self, value: Option) { self.table_count = value; } #[wasm_bindgen(getter)] pub fn hidden(&self) -> Option { self.hidden } #[wasm_bindgen(setter)] pub fn set_hidden(&mut self, value: Option) { self.hidden = value; } #[wasm_bindgen(getter, js_name = "isBlank")] pub fn is_blank(&self) -> Option { self.is_blank } #[wasm_bindgen(setter, js_name = "isBlank")] pub fn set_is_blank(&mut self, value: Option) { self.is_blank = value; } #[wasm_bindgen(getter, js_name = "hasVectorGraphics")] pub fn has_vector_graphics(&self) -> bool { self.has_vector_graphics } #[wasm_bindgen(setter, js_name = "hasVectorGraphics")] pub fn set_has_vector_graphics(&mut self, value: bool) { self.has_vector_graphics = value; } } /// Content for a single page/slide. /// /// When page extraction is enabled, documents are split into per-page content /// with associated tables and images mapped to each page. /// /// # Performance /// /// Uses Arc-wrapped tables and images for memory efficiency: /// - `Vec>` enables zero-copy sharing of table data /// - `Vec>` enables zero-copy sharing of image data /// - Maintains exact JSON compatibility via custom Serialize/Deserialize /// /// This reduces memory overhead for documents with shared tables/images /// by avoiding redundant copies during serialization. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPageContent { page_number: u32, content: String, tables: Vec, image_indices: Vec, hierarchy: Option, is_blank: Option, layout_regions: Option>, speaker_notes: Option, section_name: Option, sheet_name: Option, } #[wasm_bindgen] impl WasmPageContent { #[allow(clippy::too_many_arguments)] #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( pageNumber: u32, content: String, tables: Vec, imageIndices: Vec, hierarchy: Option, isBlank: Option, layoutRegions: Option>, speakerNotes: Option, sectionName: Option, sheetName: Option, ) -> WasmPageContent { WasmPageContent { page_number: pageNumber, content, tables, image_indices: imageIndices, hierarchy, is_blank: isBlank, layout_regions: layoutRegions, speaker_notes: speakerNotes, section_name: sectionName, sheet_name: sheetName, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPageContent { ::default() } #[wasm_bindgen(getter, js_name = "pageNumber")] pub fn page_number(&self) -> u32 { self.page_number } #[wasm_bindgen(setter, js_name = "pageNumber")] pub fn set_page_number(&mut self, value: u32) { self.page_number = value; } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter)] pub fn tables(&self) -> Vec { self.tables.clone() } #[wasm_bindgen(setter)] pub fn set_tables(&mut self, value: Vec) { self.tables = value; } #[wasm_bindgen(getter, js_name = "imageIndices")] pub fn image_indices(&self) -> Vec { self.image_indices.clone() } #[wasm_bindgen(setter, js_name = "imageIndices")] pub fn set_image_indices(&mut self, value: Vec) { self.image_indices = value; } #[wasm_bindgen(getter)] pub fn hierarchy(&self) -> Option { self.hierarchy.clone() } #[wasm_bindgen(setter)] pub fn set_hierarchy(&mut self, value: Option) { self.hierarchy = value; } #[wasm_bindgen(getter, js_name = "isBlank")] pub fn is_blank(&self) -> Option { self.is_blank } #[wasm_bindgen(setter, js_name = "isBlank")] pub fn set_is_blank(&mut self, value: Option) { self.is_blank = value; } #[wasm_bindgen(getter, js_name = "layoutRegions")] pub fn layout_regions(&self) -> Option { self.layout_regions.as_ref().map(|items| { let arr = js_sys::Array::new(); for item in items { arr.push(&JsValue::from(item.clone())); } arr }) } #[wasm_bindgen(setter, js_name = "layoutRegions")] pub fn set_layout_regions(&mut self, value: Option>) { self.layout_regions = value; } #[wasm_bindgen(getter, js_name = "speakerNotes")] pub fn speaker_notes(&self) -> Option { self.speaker_notes.clone() } #[wasm_bindgen(setter, js_name = "speakerNotes")] pub fn set_speaker_notes(&mut self, value: Option) { self.speaker_notes = value; } #[wasm_bindgen(getter, js_name = "sectionName")] pub fn section_name(&self) -> Option { self.section_name.clone() } #[wasm_bindgen(setter, js_name = "sectionName")] pub fn set_section_name(&mut self, value: Option) { self.section_name = value; } #[wasm_bindgen(getter, js_name = "sheetName")] pub fn sheet_name(&self) -> Option { self.sheet_name.clone() } #[wasm_bindgen(setter, js_name = "sheetName")] pub fn set_sheet_name(&mut self, value: Option) { self.sheet_name = value; } } /// A detected layout region on a page. /// /// When layout detection is enabled, each page may have layout regions /// identifying different content types (text, pictures, tables, etc.) /// with confidence scores and spatial positions. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmLayoutRegion { class_name: String, confidence: f64, bounding_box: WasmBoundingBox, area_fraction: f64, } #[wasm_bindgen] impl WasmLayoutRegion { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( className: Option, confidence: Option, boundingBox: Option, areaFraction: Option, ) -> WasmLayoutRegion { WasmLayoutRegion { class_name: className.unwrap_or_default(), confidence: confidence.unwrap_or_default(), bounding_box: boundingBox.unwrap_or_default(), area_fraction: areaFraction.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmLayoutRegion { ::default() } #[wasm_bindgen(getter, js_name = "className")] pub fn class_name(&self) -> String { self.class_name.clone() } #[wasm_bindgen(setter, js_name = "className")] pub fn set_class_name(&mut self, value: String) { self.class_name = value; } #[wasm_bindgen(getter)] pub fn confidence(&self) -> f64 { self.confidence } #[wasm_bindgen(setter)] pub fn set_confidence(&mut self, value: f64) { self.confidence = value; } #[wasm_bindgen(getter, js_name = "boundingBox")] pub fn bounding_box(&self) -> WasmBoundingBox { self.bounding_box.clone() } #[wasm_bindgen(setter, js_name = "boundingBox")] pub fn set_bounding_box(&mut self, value: WasmBoundingBox) { self.bounding_box = value; } #[wasm_bindgen(getter, js_name = "areaFraction")] pub fn area_fraction(&self) -> f64 { self.area_fraction } #[wasm_bindgen(setter, js_name = "areaFraction")] pub fn set_area_fraction(&mut self, value: f64) { self.area_fraction = value; } } /// Page hierarchy structure containing heading levels and block information. /// /// Used when PDF text hierarchy extraction is enabled. Contains hierarchical /// blocks with heading levels (H1-H6) for semantic document structure. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmPageHierarchy { block_count: u32, blocks: Vec, } #[wasm_bindgen] impl WasmPageHierarchy { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(blockCount: u32, blocks: Vec) -> WasmPageHierarchy { WasmPageHierarchy { block_count: blockCount, blocks, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmPageHierarchy { ::default() } #[wasm_bindgen(getter, js_name = "blockCount")] pub fn block_count(&self) -> u32 { self.block_count } #[wasm_bindgen(setter, js_name = "blockCount")] pub fn set_block_count(&mut self, value: u32) { self.block_count = value; } #[wasm_bindgen(getter)] pub fn blocks(&self) -> Vec { self.blocks.clone() } #[wasm_bindgen(setter)] pub fn set_blocks(&mut self, value: Vec) { self.blocks = value; } } /// A text block with hierarchy level assignment. /// /// Represents a block of text with semantic heading information extracted from /// font size clustering and hierarchical analysis. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmHierarchicalBlock { text: String, font_size: f32, level: String, bbox: Option>, } #[wasm_bindgen] impl WasmHierarchicalBlock { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(text: String, fontSize: f32, level: String, bbox: Option>) -> WasmHierarchicalBlock { WasmHierarchicalBlock { text, font_size: fontSize, level, bbox, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmHierarchicalBlock { ::default() } #[wasm_bindgen(getter)] pub fn text(&self) -> String { self.text.clone() } #[wasm_bindgen(setter)] pub fn set_text(&mut self, value: String) { self.text = value; } #[wasm_bindgen(getter, js_name = "fontSize")] pub fn font_size(&self) -> f32 { self.font_size } #[wasm_bindgen(setter, js_name = "fontSize")] pub fn set_font_size(&mut self, value: f32) { self.font_size = value; } #[wasm_bindgen(getter)] pub fn level(&self) -> String { self.level.clone() } #[wasm_bindgen(setter)] pub fn set_level(&mut self, value: String) { self.level = value; } #[wasm_bindgen(getter)] pub fn bbox(&self) -> Option> { self.bbox.clone() } #[wasm_bindgen(setter)] pub fn set_bbox(&mut self, value: Option>) { self.bbox = value; } } /// A single changed cell within a table. /// /// Defined here (rather than only in `crate.diff`) so `RevisionDelta` can /// reference it unconditionally, without requiring the `diff` Cargo feature. /// `crate.diff` re-exports this type verbatim. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmCellChange { row: usize, col: usize, from: String, to: String, } #[wasm_bindgen] impl WasmCellChange { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(row: usize, col: usize, from: String, to: String) -> WasmCellChange { WasmCellChange { row, col, from, to } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmCellChange { ::default() } #[wasm_bindgen(getter)] pub fn row(&self) -> usize { self.row } #[wasm_bindgen(setter)] pub fn set_row(&mut self, value: usize) { self.row = value; } #[wasm_bindgen(getter)] pub fn col(&self) -> usize { self.col } #[wasm_bindgen(setter)] pub fn set_col(&mut self, value: usize) { self.col = value; } #[wasm_bindgen(getter)] pub fn from(&self) -> String { self.from.clone() } #[wasm_bindgen(setter)] pub fn set_from(&mut self, value: String) { self.from = value; } #[wasm_bindgen(getter)] pub fn to(&self) -> String { self.to.clone() } #[wasm_bindgen(setter)] pub fn set_to(&mut self, value: String) { self.to = value; } } /// A single tracked change embedded in a document. /// /// Populated by per-format extractors that understand change-tracking metadata /// (DOCX `w:ins`/`w:del`/`w:rPrChange`, ODT `text:change-*`, …). Every /// extractor defaults to `ExtractionResult.revisions = None` until a /// format-specific implementation is added. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmDocumentRevision { revision_id: String, author: Option, timestamp: Option, kind: WasmRevisionKind, anchor: Option, delta: WasmRevisionDelta, } #[wasm_bindgen] impl WasmDocumentRevision { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( revisionId: String, kind: WasmRevisionKind, delta: WasmRevisionDelta, author: Option, timestamp: Option, anchor: Option, ) -> WasmDocumentRevision { WasmDocumentRevision { revision_id: revisionId, author, timestamp, kind, anchor, delta, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDocumentRevision { ::default() } #[wasm_bindgen(getter, js_name = "revisionId")] pub fn revision_id(&self) -> String { self.revision_id.clone() } #[wasm_bindgen(setter, js_name = "revisionId")] pub fn set_revision_id(&mut self, value: String) { self.revision_id = value; } #[wasm_bindgen(getter)] pub fn author(&self) -> Option { self.author.clone() } #[wasm_bindgen(setter)] pub fn set_author(&mut self, value: Option) { self.author = value; } #[wasm_bindgen(getter)] pub fn timestamp(&self) -> Option { self.timestamp.clone() } #[wasm_bindgen(setter)] pub fn set_timestamp(&mut self, value: Option) { self.timestamp = value; } #[wasm_bindgen(getter)] pub fn kind(&self) -> String { self.kind.to_api_str().to_owned() } #[wasm_bindgen(setter)] pub fn set_kind(&mut self, value: WasmRevisionKind) { self.kind = value; } #[wasm_bindgen(getter)] pub fn anchor(&self) -> Option { self.anchor.clone() } #[wasm_bindgen(setter)] pub fn set_anchor(&mut self, value: Option) { self.anchor = value; } #[wasm_bindgen(getter)] pub fn delta(&self) -> WasmRevisionDelta { self.delta.clone() } #[wasm_bindgen(setter)] pub fn set_delta(&mut self, value: WasmRevisionDelta) { self.delta = value; } } /// The content changes that make up a single revision. /// /// For insertions and deletions the `content` field carries the added/removed /// lines as `DiffLine.Added` / `DiffLine.Removed` entries. For format /// changes, `content` is empty — the property diff is left as a TODO for a /// later enrichment pass. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmRevisionDelta { content: JsValue, table_changes: Vec, } #[wasm_bindgen] impl WasmRevisionDelta { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(content: Option, tableChanges: Option>) -> WasmRevisionDelta { WasmRevisionDelta { content: content.unwrap_or_default(), table_changes: tableChanges.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmRevisionDelta { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> JsValue { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: JsValue) { self.content = value; } #[wasm_bindgen(getter, js_name = "tableChanges")] pub fn table_changes(&self) -> Vec { self.table_changes.clone() } #[wasm_bindgen(setter, js_name = "tableChanges")] pub fn set_table_changes(&mut self, value: Vec) { self.table_changes = value; } } /// Extracted table structure. /// /// Represents a table detected and extracted from a document (PDF, image, etc.). /// Tables are converted to both structured cell data and Markdown format. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmTable { cells: JsValue, markdown: String, page_number: u32, bounding_box: Option, } #[wasm_bindgen] impl WasmTable { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( cells: Option, markdown: Option, pageNumber: Option, boundingBox: Option, ) -> WasmTable { WasmTable { cells: cells.unwrap_or_default(), markdown: markdown.unwrap_or_default(), page_number: pageNumber.unwrap_or_default(), bounding_box: boundingBox, } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmTable { ::default() } #[wasm_bindgen(getter)] pub fn cells(&self) -> JsValue { self.cells.clone() } #[wasm_bindgen(setter)] pub fn set_cells(&mut self, value: JsValue) { self.cells = value; } #[wasm_bindgen(getter)] pub fn markdown(&self) -> String { self.markdown.clone() } #[wasm_bindgen(setter)] pub fn set_markdown(&mut self, value: String) { self.markdown = value; } #[wasm_bindgen(getter, js_name = "pageNumber")] pub fn page_number(&self) -> u32 { self.page_number } #[wasm_bindgen(setter, js_name = "pageNumber")] pub fn set_page_number(&mut self, value: u32) { self.page_number = value; } #[wasm_bindgen(getter, js_name = "boundingBox")] pub fn bounding_box(&self) -> Option { self.bounding_box.clone() } #[wasm_bindgen(setter, js_name = "boundingBox")] pub fn set_bounding_box(&mut self, value: Option) { self.bounding_box = value; } } /// Individual table cell with content and optional styling. /// /// Future extension point for rich table support with cell-level metadata. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmTableCell { content: String, row_span: u32, col_span: u32, is_header: bool, } #[wasm_bindgen] impl WasmTableCell { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new( content: Option, rowSpan: Option, colSpan: Option, isHeader: Option, ) -> WasmTableCell { WasmTableCell { content: content.unwrap_or_default(), row_span: rowSpan.unwrap_or_default(), col_span: colSpan.unwrap_or_default(), is_header: isHeader.unwrap_or_default(), } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmTableCell { ::default() } #[wasm_bindgen(getter)] pub fn content(&self) -> String { self.content.clone() } #[wasm_bindgen(setter)] pub fn set_content(&mut self, value: String) { self.content = value; } #[wasm_bindgen(getter, js_name = "rowSpan")] pub fn row_span(&self) -> u32 { self.row_span } #[wasm_bindgen(setter, js_name = "rowSpan")] pub fn set_row_span(&mut self, value: u32) { self.row_span = value; } #[wasm_bindgen(getter, js_name = "colSpan")] pub fn col_span(&self) -> u32 { self.col_span } #[wasm_bindgen(setter, js_name = "colSpan")] pub fn set_col_span(&mut self, value: u32) { self.col_span = value; } #[wasm_bindgen(getter, js_name = "isHeader")] pub fn is_header(&self) -> bool { self.is_header } #[wasm_bindgen(setter, js_name = "isHeader")] pub fn set_is_header(&mut self, value: bool) { self.is_header = value; } } /// A URI extracted from a document. /// /// Represents any link, reference, or resource pointer found during extraction. /// The `kind` field classifies the URI semantically, while `label` carries /// optional human-readable display text. #[derive(Clone, Default)] #[wasm_bindgen] pub struct WasmExtractedUri { url: String, label: Option, page: Option, kind: WasmUriKind, } #[wasm_bindgen] impl WasmExtractedUri { #[allow(non_snake_case)] #[wasm_bindgen(constructor)] pub fn new(url: String, kind: WasmUriKind, label: Option, page: Option) -> WasmExtractedUri { WasmExtractedUri { url, label, page, kind } } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmExtractedUri { ::default() } #[wasm_bindgen(getter)] pub fn url(&self) -> String { self.url.clone() } #[wasm_bindgen(setter)] pub fn set_url(&mut self, value: String) { self.url = value; } #[wasm_bindgen(getter)] pub fn label(&self) -> Option { self.label.clone() } #[wasm_bindgen(setter)] pub fn set_label(&mut self, value: Option) { self.label = value; } #[wasm_bindgen(getter)] pub fn page(&self) -> Option { self.page } #[wasm_bindgen(setter)] pub fn set_page(&mut self, value: Option) { self.page = value; } #[wasm_bindgen(getter)] pub fn kind(&self) -> String { self.kind.to_api_str().to_owned() } #[wasm_bindgen(setter)] pub fn set_kind(&mut self, value: WasmUriKind) { self.kind = value; } } /// ONNX Runtime execution provider type. /// /// Determines which hardware backend is used for model inference. /// `Auto` (default) selects the best available provider per platform. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmExecutionProviderType { Auto = 0, Cpu = 1, CoreMl = 2, Cuda = 3, TensorRt = 4, } #[allow(clippy::derivable_impls)] impl Default for WasmExecutionProviderType { fn default() -> Self { Self::Auto } } impl WasmExecutionProviderType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Auto => "auto", Self::Cpu => "cpu", Self::CoreMl => "coreml", Self::Cuda => "cuda", Self::TensorRt => "tensorrt", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "auto" => Some(Self::Auto), "cpu" => Some(Self::Cpu), "coreml" => Some(Self::CoreMl), "cuda" => Some(Self::Cuda), "tensorrt" => Some(Self::TensorRt), _ => None, } } } /// Output format for extraction results. /// /// Controls the format of the `content` field in `ExtractionResult`. /// When set to `Markdown`, `Djot`, or `Html`, the output uses that format. /// `Plain` returns the raw extracted text. /// `Structured` returns JSON with full OCR element data including bounding /// boxes and confidence scores. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmOutputFormat { Plain = 0, Markdown = 1, Djot = 2, Html = 3, Json = 4, Structured = 5, Custom = 6, } #[allow(clippy::derivable_impls)] impl Default for WasmOutputFormat { fn default() -> Self { Self::Plain } } impl WasmOutputFormat { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Plain => "plain", Self::Markdown => "markdown", Self::Djot => "djot", Self::Html => "html", Self::Json => "json", Self::Structured => "structured", Self::Custom => "custom", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "plain" => Some(Self::Plain), "markdown" => Some(Self::Markdown), "djot" => Some(Self::Djot), "html" => Some(Self::Html), "json" => Some(Self::Json), "structured" => Some(Self::Structured), "custom" => Some(Self::Custom), _ => None, } } } /// Type of text chunker to use. /// /// # Variants /// /// * `Text` - Generic text splitter, splits on whitespace and punctuation /// * `Markdown` - Markdown-aware splitter, preserves formatting and structure /// * `Yaml` - YAML-aware splitter, creates one chunk per top-level key /// * `Semantic` - Topic-aware chunker. With an `EmbeddingConfig`, splits at /// embedding-based topic shifts tuned by `topic_threshold` (default 0.75, /// lower = more splits). Without an embedding, falls back to a /// structural-boundary heuristic (ALL-CAPS headers, numbered sections, /// blank-line paragraphs) and merges groups into chunks capped at /// `max_characters` (default 1000). `topic_threshold` has no effect in the /// fallback path. For best results, pair with an embedding model. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmChunkerType { Text = 0, Markdown = 1, Yaml = 2, Semantic = 3, } #[allow(clippy::derivable_impls)] impl Default for WasmChunkerType { fn default() -> Self { Self::Text } } impl WasmChunkerType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Text => "text", Self::Markdown => "markdown", Self::Yaml => "yaml", Self::Semantic => "semantic", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "text" => Some(Self::Text), "markdown" => Some(Self::Markdown), "yaml" => Some(Self::Yaml), "semantic" => Some(Self::Semantic), _ => None, } } } /// How chunk size is measured. /// /// Defaults to `Characters` (Unicode character count). When using token-based sizing, /// chunks are sized by token count according to the specified tokenizer. /// /// Token-based sizing uses HuggingFace tokenizers loaded at runtime. Any tokenizer /// available on HuggingFace Hub can be used, including OpenAI-compatible tokenizers /// (e.g., `Xenova/gpt-4o`, `Xenova/cl100k_base`). #[wasm_bindgen] #[derive(Clone, Default)] pub struct WasmChunkSizing { pub(crate) r#type: String, pub(crate) model: Option, pub(crate) cache_dir: Option, } #[wasm_bindgen] impl WasmChunkSizing { #[wasm_bindgen(constructor)] pub fn new() -> WasmChunkSizing { Self::default() } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmChunkSizing { ::default() } #[wasm_bindgen(getter, js_name = "type")] pub fn r#type(&self) -> String { self.r#type.clone() } #[wasm_bindgen(setter, js_name = "type")] pub fn set_type(&mut self, value: String) { self.r#type = value; } #[wasm_bindgen(getter, js_name = "model")] pub fn model(&self) -> Option { self.model.clone() } #[wasm_bindgen(setter, js_name = "model")] pub fn set_model(&mut self, value: Option) { self.model = value; } #[wasm_bindgen(getter, js_name = "cacheDir")] pub fn cache_dir(&self) -> Option { self.cache_dir.clone() } #[wasm_bindgen(setter, js_name = "cacheDir")] pub fn set_cache_dir(&mut self, value: Option) { self.cache_dir = value; } } /// Embedding model types supported by Kreuzberg. #[wasm_bindgen] #[derive(Clone, Default)] pub struct WasmEmbeddingModelType { pub(crate) r#type: String, pub(crate) name: Option, pub(crate) model_id: Option, pub(crate) dimensions: Option, pub(crate) llm: Option, } #[wasm_bindgen] impl WasmEmbeddingModelType { #[wasm_bindgen(constructor)] pub fn new() -> WasmEmbeddingModelType { Self::default() } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmEmbeddingModelType { ::default() } #[wasm_bindgen(getter, js_name = "type")] pub fn r#type(&self) -> String { self.r#type.clone() } #[wasm_bindgen(setter, js_name = "type")] pub fn set_type(&mut self, value: String) { self.r#type = value; } #[wasm_bindgen(getter, js_name = "name")] pub fn name(&self) -> Option { self.name.clone() } #[wasm_bindgen(setter, js_name = "name")] pub fn set_name(&mut self, value: Option) { self.name = value; } #[wasm_bindgen(getter, js_name = "modelId")] pub fn model_id(&self) -> Option { self.model_id.clone() } #[wasm_bindgen(setter, js_name = "modelId")] pub fn set_model_id(&mut self, value: Option) { self.model_id = value; } #[wasm_bindgen(getter, js_name = "dimensions")] pub fn dimensions(&self) -> Option { self.dimensions.clone() } #[wasm_bindgen(setter, js_name = "dimensions")] pub fn set_dimensions(&mut self, value: Option) { self.dimensions = value; } #[wasm_bindgen(getter, js_name = "llm")] pub fn llm(&self) -> Option { self.llm.clone() } #[wasm_bindgen(setter, js_name = "llm")] pub fn set_llm(&mut self, value: Option) { self.llm = value; } } /// Type of list detection. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmListType { Bullet = 0, Numbered = 1, Lettered = 2, Indented = 3, } #[allow(clippy::derivable_impls)] impl Default for WasmListType { fn default() -> Self { Self::Bullet } } impl WasmListType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Bullet => "Bullet", Self::Numbered => "Numbered", Self::Lettered => "Lettered", Self::Indented => "Indented", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "Bullet" => Some(Self::Bullet), "Numbered" => Some(Self::Numbered), "Lettered" => Some(Self::Lettered), "Indented" => Some(Self::Indented), _ => None, } } } /// OCR backend types. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmOcrBackendType { Tesseract = 0, EasyOCR = 1, PaddleOCR = 2, Custom = 3, } #[allow(clippy::derivable_impls)] impl Default for WasmOcrBackendType { fn default() -> Self { Self::Tesseract } } impl WasmOcrBackendType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Tesseract => "Tesseract", Self::EasyOCR => "EasyOCR", Self::PaddleOCR => "PaddleOCR", Self::Custom => "Custom", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "Tesseract" => Some(Self::Tesseract), "EasyOCR" => Some(Self::EasyOCR), "PaddleOCR" => Some(Self::PaddleOCR), "Custom" => Some(Self::Custom), _ => None, } } } /// Processing stages for post-processors. /// /// Post-processors are executed in stage order (Early → Middle → Late). /// Use stages to control the order of post-processing operations. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmProcessingStage { Early = 0, Middle = 1, Late = 2, } #[allow(clippy::derivable_impls)] impl Default for WasmProcessingStage { fn default() -> Self { Self::Early } } impl WasmProcessingStage { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Early => "Early", Self::Middle => "Middle", Self::Late => "Late", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "Early" => Some(Self::Early), "Middle" => Some(Self::Middle), "Late" => Some(Self::Late), _ => None, } } } /// Type of PDF annotation. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmPdfAnnotationType { Text = 0, Highlight = 1, Link = 2, Stamp = 3, Underline = 4, StrikeOut = 5, Other = 6, } #[allow(clippy::derivable_impls)] impl Default for WasmPdfAnnotationType { fn default() -> Self { Self::Text } } impl WasmPdfAnnotationType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Text => "text", Self::Highlight => "highlight", Self::Link => "link", Self::Stamp => "stamp", Self::Underline => "underline", Self::StrikeOut => "strike_out", Self::Other => "other", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "text" => Some(Self::Text), "highlight" => Some(Self::Highlight), "link" => Some(Self::Link), "stamp" => Some(Self::Stamp), "underline" => Some(Self::Underline), "strike_out" => Some(Self::StrikeOut), "other" => Some(Self::Other), _ => None, } } } /// Types of block-level elements in Djot. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmBlockType { Paragraph = 0, Heading = 1, Blockquote = 2, CodeBlock = 3, ListItem = 4, OrderedList = 5, BulletList = 6, TaskList = 7, DefinitionList = 8, DefinitionTerm = 9, DefinitionDescription = 10, Div = 11, Section = 12, ThematicBreak = 13, RawBlock = 14, MathDisplay = 15, } #[allow(clippy::derivable_impls)] impl Default for WasmBlockType { fn default() -> Self { Self::Paragraph } } impl WasmBlockType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Paragraph => "paragraph", Self::Heading => "heading", Self::Blockquote => "blockquote", Self::CodeBlock => "code_block", Self::ListItem => "list_item", Self::OrderedList => "ordered_list", Self::BulletList => "bullet_list", Self::TaskList => "task_list", Self::DefinitionList => "definition_list", Self::DefinitionTerm => "definition_term", Self::DefinitionDescription => "definition_description", Self::Div => "div", Self::Section => "section", Self::ThematicBreak => "thematic_break", Self::RawBlock => "raw_block", Self::MathDisplay => "math_display", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "paragraph" => Some(Self::Paragraph), "heading" => Some(Self::Heading), "blockquote" => Some(Self::Blockquote), "code_block" => Some(Self::CodeBlock), "list_item" => Some(Self::ListItem), "ordered_list" => Some(Self::OrderedList), "bullet_list" => Some(Self::BulletList), "task_list" => Some(Self::TaskList), "definition_list" => Some(Self::DefinitionList), "definition_term" => Some(Self::DefinitionTerm), "definition_description" => Some(Self::DefinitionDescription), "div" => Some(Self::Div), "section" => Some(Self::Section), "thematic_break" => Some(Self::ThematicBreak), "raw_block" => Some(Self::RawBlock), "math_display" => Some(Self::MathDisplay), _ => None, } } } /// Types of inline elements in Djot. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmInlineType { Text = 0, Strong = 1, Emphasis = 2, Highlight = 3, Subscript = 4, Superscript = 5, Insert = 6, Delete = 7, Code = 8, Link = 9, Image = 10, Span = 11, Math = 12, RawInline = 13, FootnoteRef = 14, Symbol = 15, } #[allow(clippy::derivable_impls)] impl Default for WasmInlineType { fn default() -> Self { Self::Text } } impl WasmInlineType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Text => "text", Self::Strong => "strong", Self::Emphasis => "emphasis", Self::Highlight => "highlight", Self::Subscript => "subscript", Self::Superscript => "superscript", Self::Insert => "insert", Self::Delete => "delete", Self::Code => "code", Self::Link => "link", Self::Image => "image", Self::Span => "span", Self::Math => "math", Self::RawInline => "raw_inline", Self::FootnoteRef => "footnote_ref", Self::Symbol => "symbol", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "text" => Some(Self::Text), "strong" => Some(Self::Strong), "emphasis" => Some(Self::Emphasis), "highlight" => Some(Self::Highlight), "subscript" => Some(Self::Subscript), "superscript" => Some(Self::Superscript), "insert" => Some(Self::Insert), "delete" => Some(Self::Delete), "code" => Some(Self::Code), "link" => Some(Self::Link), "image" => Some(Self::Image), "span" => Some(Self::Span), "math" => Some(Self::Math), "raw_inline" => Some(Self::RawInline), "footnote_ref" => Some(Self::FootnoteRef), "symbol" => Some(Self::Symbol), _ => None, } } } /// Semantic kind of a relationship between document elements. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmRelationshipKind { FootnoteReference = 0, CitationReference = 1, InternalLink = 2, Caption = 3, Label = 4, TocEntry = 5, CrossReference = 6, } #[allow(clippy::derivable_impls)] impl Default for WasmRelationshipKind { fn default() -> Self { Self::FootnoteReference } } impl WasmRelationshipKind { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::FootnoteReference => "footnote_reference", Self::CitationReference => "citation_reference", Self::InternalLink => "internal_link", Self::Caption => "caption", Self::Label => "label", Self::TocEntry => "toc_entry", Self::CrossReference => "cross_reference", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "footnote_reference" => Some(Self::FootnoteReference), "citation_reference" => Some(Self::CitationReference), "internal_link" => Some(Self::InternalLink), "caption" => Some(Self::Caption), "label" => Some(Self::Label), "toc_entry" => Some(Self::TocEntry), "cross_reference" => Some(Self::CrossReference), _ => None, } } } /// Content layer classification for document nodes. /// /// Replaces separate body/furniture arrays with per-node granularity. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmContentLayer { Body = 0, Header = 1, Footer = 2, Footnote = 3, } #[allow(clippy::derivable_impls)] impl Default for WasmContentLayer { fn default() -> Self { Self::Body } } impl WasmContentLayer { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Body => "body", Self::Header => "header", Self::Footer => "footer", Self::Footnote => "footnote", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "body" => Some(Self::Body), "header" => Some(Self::Header), "footer" => Some(Self::Footer), "footnote" => Some(Self::Footnote), _ => None, } } } /// Tagged enum for node content. Each variant carries only type-specific data. /// /// Uses `#[serde(tag = "node_type")]` to avoid "type" keyword collision in /// Go/Java/TypeScript bindings. #[wasm_bindgen] #[derive(Clone, Default)] pub struct WasmNodeContent { pub(crate) node_type: String, pub(crate) text: Option, pub(crate) level: Option, pub(crate) ordered: Option, pub(crate) grid: Option, pub(crate) description: Option, pub(crate) image_index: Option, pub(crate) src: Option, pub(crate) language: Option, pub(crate) label: Option, pub(crate) heading_level: Option, pub(crate) heading_text: Option, pub(crate) number: Option, pub(crate) title: Option, pub(crate) term: Option, pub(crate) definition: Option, pub(crate) key: Option, pub(crate) kind: Option, pub(crate) format: Option, pub(crate) content: Option, pub(crate) entries: Option, } #[wasm_bindgen] impl WasmNodeContent { #[wasm_bindgen(constructor)] pub fn new() -> WasmNodeContent { Self::default() } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmNodeContent { ::default() } #[wasm_bindgen(getter, js_name = "nodeType")] pub fn node_type(&self) -> String { self.node_type.clone() } #[wasm_bindgen(setter, js_name = "nodeType")] pub fn set_node_type(&mut self, value: String) { self.node_type = value; } #[wasm_bindgen(getter, js_name = "text")] pub fn text(&self) -> Option { self.text.clone() } #[wasm_bindgen(setter, js_name = "text")] pub fn set_text(&mut self, value: Option) { self.text = value; } #[wasm_bindgen(getter, js_name = "level")] pub fn level(&self) -> Option { self.level.clone() } #[wasm_bindgen(setter, js_name = "level")] pub fn set_level(&mut self, value: Option) { self.level = value; } #[wasm_bindgen(getter, js_name = "ordered")] pub fn ordered(&self) -> Option { self.ordered.clone() } #[wasm_bindgen(setter, js_name = "ordered")] pub fn set_ordered(&mut self, value: Option) { self.ordered = value; } #[wasm_bindgen(getter, js_name = "grid")] pub fn grid(&self) -> Option { self.grid.clone() } #[wasm_bindgen(setter, js_name = "grid")] pub fn set_grid(&mut self, value: Option) { self.grid = value; } #[wasm_bindgen(getter, js_name = "description")] pub fn description(&self) -> Option { self.description.clone() } #[wasm_bindgen(setter, js_name = "description")] pub fn set_description(&mut self, value: Option) { self.description = value; } #[wasm_bindgen(getter, js_name = "imageIndex")] pub fn image_index(&self) -> Option { self.image_index.clone() } #[wasm_bindgen(setter, js_name = "imageIndex")] pub fn set_image_index(&mut self, value: Option) { self.image_index = value; } #[wasm_bindgen(getter, js_name = "src")] pub fn src(&self) -> Option { self.src.clone() } #[wasm_bindgen(setter, js_name = "src")] pub fn set_src(&mut self, value: Option) { self.src = value; } #[wasm_bindgen(getter, js_name = "language")] pub fn language(&self) -> Option { self.language.clone() } #[wasm_bindgen(setter, js_name = "language")] pub fn set_language(&mut self, value: Option) { self.language = value; } #[wasm_bindgen(getter, js_name = "label")] pub fn label(&self) -> Option { self.label.clone() } #[wasm_bindgen(setter, js_name = "label")] pub fn set_label(&mut self, value: Option) { self.label = value; } #[wasm_bindgen(getter, js_name = "headingLevel")] pub fn heading_level(&self) -> Option { self.heading_level.clone() } #[wasm_bindgen(setter, js_name = "headingLevel")] pub fn set_heading_level(&mut self, value: Option) { self.heading_level = value; } #[wasm_bindgen(getter, js_name = "headingText")] pub fn heading_text(&self) -> Option { self.heading_text.clone() } #[wasm_bindgen(setter, js_name = "headingText")] pub fn set_heading_text(&mut self, value: Option) { self.heading_text = value; } #[wasm_bindgen(getter, js_name = "number")] pub fn number(&self) -> Option { self.number.clone() } #[wasm_bindgen(setter, js_name = "number")] pub fn set_number(&mut self, value: Option) { self.number = value; } #[wasm_bindgen(getter, js_name = "title")] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter, js_name = "title")] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter, js_name = "term")] pub fn term(&self) -> Option { self.term.clone() } #[wasm_bindgen(setter, js_name = "term")] pub fn set_term(&mut self, value: Option) { self.term = value; } #[wasm_bindgen(getter, js_name = "definition")] pub fn definition(&self) -> Option { self.definition.clone() } #[wasm_bindgen(setter, js_name = "definition")] pub fn set_definition(&mut self, value: Option) { self.definition = value; } #[wasm_bindgen(getter, js_name = "key")] pub fn key(&self) -> Option { self.key.clone() } #[wasm_bindgen(setter, js_name = "key")] pub fn set_key(&mut self, value: Option) { self.key = value; } #[wasm_bindgen(getter, js_name = "kind")] pub fn kind(&self) -> Option { self.kind.clone() } #[wasm_bindgen(setter, js_name = "kind")] pub fn set_kind(&mut self, value: Option) { self.kind = value; } #[wasm_bindgen(getter, js_name = "format")] pub fn format(&self) -> Option { self.format.clone() } #[wasm_bindgen(setter, js_name = "format")] pub fn set_format(&mut self, value: Option) { self.format = value; } #[wasm_bindgen(getter, js_name = "content")] pub fn content(&self) -> Option { self.content.clone() } #[wasm_bindgen(setter, js_name = "content")] pub fn set_content(&mut self, value: Option) { self.content = value; } #[wasm_bindgen(getter, js_name = "entries")] pub fn entries(&self) -> Option { self.entries.clone() } #[wasm_bindgen(setter, js_name = "entries")] pub fn set_entries(&mut self, value: Option) { self.entries = value; } } /// Types of inline text annotations. #[wasm_bindgen] #[derive(Clone, Default)] pub struct WasmAnnotationKind { pub(crate) annotation_type: String, pub(crate) url: Option, pub(crate) title: Option, pub(crate) value: Option, pub(crate) name: Option, } #[wasm_bindgen] impl WasmAnnotationKind { #[wasm_bindgen(constructor)] pub fn new() -> WasmAnnotationKind { Self::default() } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmAnnotationKind { ::default() } #[wasm_bindgen(getter, js_name = "annotationType")] pub fn annotation_type(&self) -> String { self.annotation_type.clone() } #[wasm_bindgen(setter, js_name = "annotationType")] pub fn set_annotation_type(&mut self, value: String) { self.annotation_type = value; } #[wasm_bindgen(getter, js_name = "url")] pub fn url(&self) -> Option { self.url.clone() } #[wasm_bindgen(setter, js_name = "url")] pub fn set_url(&mut self, value: Option) { self.url = value; } #[wasm_bindgen(getter, js_name = "title")] pub fn title(&self) -> Option { self.title.clone() } #[wasm_bindgen(setter, js_name = "title")] pub fn set_title(&mut self, value: Option) { self.title = value; } #[wasm_bindgen(getter, js_name = "value")] pub fn value(&self) -> Option { self.value.clone() } #[wasm_bindgen(setter, js_name = "value")] pub fn set_value(&mut self, value: Option) { self.value = value; } #[wasm_bindgen(getter, js_name = "name")] pub fn name(&self) -> Option { self.name.clone() } #[wasm_bindgen(setter, js_name = "name")] pub fn set_name(&mut self, value: Option) { self.name = value; } } /// How the extracted text was produced. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmExtractionMethod { Native = 0, Ocr = 1, Mixed = 2, } #[allow(clippy::derivable_impls)] impl Default for WasmExtractionMethod { fn default() -> Self { Self::Native } } impl WasmExtractionMethod { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Native => "native", Self::Ocr => "ocr", Self::Mixed => "mixed", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "native" => Some(Self::Native), "ocr" => Some(Self::Ocr), "mixed" => Some(Self::Mixed), _ => None, } } } /// Semantic structural classification of a text chunk. /// /// Assigned by the heuristic classifier in `chunking.classifier`. /// Defaults to `Unknown` when no rule matches. /// Designed to be extended in future versions without breaking changes. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmChunkType { Heading = 0, PartyList = 1, Definitions = 2, OperativeClause = 3, SignatureBlock = 4, Schedule = 5, TableLike = 6, Formula = 7, CodeBlock = 8, Image = 9, OrgChart = 10, Diagram = 11, Unknown = 12, } #[allow(clippy::derivable_impls)] impl Default for WasmChunkType { fn default() -> Self { Self::Unknown } } impl WasmChunkType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Heading => "heading", Self::PartyList => "party_list", Self::Definitions => "definitions", Self::OperativeClause => "operative_clause", Self::SignatureBlock => "signature_block", Self::Schedule => "schedule", Self::TableLike => "table_like", Self::Formula => "formula", Self::CodeBlock => "code_block", Self::Image => "image", Self::OrgChart => "org_chart", Self::Diagram => "diagram", Self::Unknown => "unknown", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "heading" => Some(Self::Heading), "party_list" => Some(Self::PartyList), "definitions" => Some(Self::Definitions), "operative_clause" => Some(Self::OperativeClause), "signature_block" => Some(Self::SignatureBlock), "schedule" => Some(Self::Schedule), "table_like" => Some(Self::TableLike), "formula" => Some(Self::Formula), "code_block" => Some(Self::CodeBlock), "image" => Some(Self::Image), "org_chart" => Some(Self::OrgChart), "diagram" => Some(Self::Diagram), "unknown" => Some(Self::Unknown), _ => None, } } } /// Heuristic classification of what an image likely depicts. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmImageKind { Photograph = 0, Diagram = 1, Chart = 2, Drawing = 3, TextBlock = 4, Decoration = 5, Logo = 6, Icon = 7, TileFragment = 8, Mask = 9, PageRaster = 10, Unknown = 11, } #[allow(clippy::derivable_impls)] impl Default for WasmImageKind { fn default() -> Self { Self::Photograph } } impl WasmImageKind { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Photograph => "photograph", Self::Diagram => "diagram", Self::Chart => "chart", Self::Drawing => "drawing", Self::TextBlock => "text_block", Self::Decoration => "decoration", Self::Logo => "logo", Self::Icon => "icon", Self::TileFragment => "tile_fragment", Self::Mask => "mask", Self::PageRaster => "page_raster", Self::Unknown => "unknown", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "photograph" => Some(Self::Photograph), "diagram" => Some(Self::Diagram), "chart" => Some(Self::Chart), "drawing" => Some(Self::Drawing), "text_block" => Some(Self::TextBlock), "decoration" => Some(Self::Decoration), "logo" => Some(Self::Logo), "icon" => Some(Self::Icon), "tile_fragment" => Some(Self::TileFragment), "mask" => Some(Self::Mask), "page_raster" => Some(Self::PageRaster), "unknown" => Some(Self::Unknown), _ => None, } } } /// Result-shape selection for extraction results. /// /// Distinct from `OutputFormat` (which controls rendering — Plain, Markdown, /// HTML, etc.). `ResultFormat` controls the *shape* of the result: a unified content /// blob vs. an element-based decomposition. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmResultFormat { Unified = 0, ElementBased = 1, } #[allow(clippy::derivable_impls)] impl Default for WasmResultFormat { fn default() -> Self { Self::Unified } } impl WasmResultFormat { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Unified => "unified", Self::ElementBased => "element_based", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "unified" => Some(Self::Unified), "element_based" => Some(Self::ElementBased), _ => None, } } } /// Semantic element type classification. /// /// Categorizes text content into semantic units for downstream processing. /// Supports the element types commonly found in Unstructured documents. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmElementType { Title = 0, NarrativeText = 1, Heading = 2, ListItem = 3, Table = 4, Image = 5, PageBreak = 6, CodeBlock = 7, BlockQuote = 8, Footer = 9, Header = 10, } #[allow(clippy::derivable_impls)] impl Default for WasmElementType { fn default() -> Self { Self::Title } } impl WasmElementType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Title => "title", Self::NarrativeText => "narrative_text", Self::Heading => "heading", Self::ListItem => "list_item", Self::Table => "table", Self::Image => "image", Self::PageBreak => "page_break", Self::CodeBlock => "code_block", Self::BlockQuote => "block_quote", Self::Footer => "footer", Self::Header => "header", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "title" => Some(Self::Title), "narrative_text" => Some(Self::NarrativeText), "heading" => Some(Self::Heading), "list_item" => Some(Self::ListItem), "table" => Some(Self::Table), "image" => Some(Self::Image), "page_break" => Some(Self::PageBreak), "code_block" => Some(Self::CodeBlock), "block_quote" => Some(Self::BlockQuote), "footer" => Some(Self::Footer), "header" => Some(Self::Header), _ => None, } } } /// Format-specific metadata (discriminated union). /// /// Only one format type can exist per extraction result. This provides /// type-safe, clean metadata without nested optionals. #[wasm_bindgen] #[derive(Clone, Default)] pub struct WasmFormatMetadata { pub(crate) format_type: String, pub(crate) _0: Option, } #[wasm_bindgen] impl WasmFormatMetadata { #[wasm_bindgen(constructor)] pub fn new() -> WasmFormatMetadata { Self::default() } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmFormatMetadata { ::default() } #[wasm_bindgen(getter, js_name = "formatType")] pub fn format_type(&self) -> String { self.format_type.clone() } #[wasm_bindgen(setter, js_name = "formatType")] pub fn set_format_type(&mut self, value: String) { self.format_type = value; } #[wasm_bindgen(getter, js_name = "0")] pub fn field_0(&self) -> Option { self._0.clone() } #[wasm_bindgen(setter, js_name = "0")] pub fn set_field_0(&mut self, value: Option) { self._0 = value; } } /// Text direction enumeration for HTML documents. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmTextDirection { LeftToRight = 0, RightToLeft = 1, Auto = 2, } #[allow(clippy::derivable_impls)] impl Default for WasmTextDirection { fn default() -> Self { Self::LeftToRight } } impl WasmTextDirection { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::LeftToRight => "ltr", Self::RightToLeft => "rtl", Self::Auto => "auto", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "ltr" => Some(Self::LeftToRight), "rtl" => Some(Self::RightToLeft), "auto" => Some(Self::Auto), _ => None, } } } /// Link type classification. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmLinkType { Anchor = 0, Internal = 1, External = 2, Email = 3, Phone = 4, Other = 5, } #[allow(clippy::derivable_impls)] impl Default for WasmLinkType { fn default() -> Self { Self::Anchor } } impl WasmLinkType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Anchor => "anchor", Self::Internal => "internal", Self::External => "external", Self::Email => "email", Self::Phone => "phone", Self::Other => "other", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "anchor" => Some(Self::Anchor), "internal" => Some(Self::Internal), "external" => Some(Self::External), "email" => Some(Self::Email), "phone" => Some(Self::Phone), "other" => Some(Self::Other), _ => None, } } } /// Image type classification. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmImageType { DataUri = 0, InlineSvg = 1, External = 2, Relative = 3, } #[allow(clippy::derivable_impls)] impl Default for WasmImageType { fn default() -> Self { Self::DataUri } } impl WasmImageType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::DataUri => "data-uri", Self::InlineSvg => "inline-svg", Self::External => "external", Self::Relative => "relative", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "data-uri" => Some(Self::DataUri), "inline-svg" => Some(Self::InlineSvg), "external" => Some(Self::External), "relative" => Some(Self::Relative), _ => None, } } } /// Structured data type classification. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmStructuredDataType { JsonLd = 0, Microdata = 1, RDFa = 2, } #[allow(clippy::derivable_impls)] impl Default for WasmStructuredDataType { fn default() -> Self { Self::JsonLd } } impl WasmStructuredDataType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::JsonLd => "json-ld", Self::Microdata => "microdata", Self::RDFa => "rdfa", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "json-ld" => Some(Self::JsonLd), "microdata" => Some(Self::Microdata), "rdfa" => Some(Self::RDFa), _ => None, } } } /// Bounding geometry for an OCR element. /// /// Supports both axis-aligned rectangles (from Tesseract) and 4-point quadrilaterals /// (from PaddleOCR and rotated text detection). #[wasm_bindgen] #[derive(Clone, Default)] pub struct WasmOcrBoundingGeometry { pub(crate) r#type: String, pub(crate) left: Option, pub(crate) top: Option, pub(crate) width: Option, pub(crate) height: Option, pub(crate) points: Option, } #[wasm_bindgen] impl WasmOcrBoundingGeometry { #[wasm_bindgen(constructor)] pub fn new() -> WasmOcrBoundingGeometry { Self::default() } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmOcrBoundingGeometry { ::default() } #[wasm_bindgen(getter, js_name = "type")] pub fn r#type(&self) -> String { self.r#type.clone() } #[wasm_bindgen(setter, js_name = "type")] pub fn set_type(&mut self, value: String) { self.r#type = value; } #[wasm_bindgen(getter, js_name = "left")] pub fn left(&self) -> Option { self.left.clone() } #[wasm_bindgen(setter, js_name = "left")] pub fn set_left(&mut self, value: Option) { self.left = value; } #[wasm_bindgen(getter, js_name = "top")] pub fn top(&self) -> Option { self.top.clone() } #[wasm_bindgen(setter, js_name = "top")] pub fn set_top(&mut self, value: Option) { self.top = value; } #[wasm_bindgen(getter, js_name = "width")] pub fn width(&self) -> Option { self.width.clone() } #[wasm_bindgen(setter, js_name = "width")] pub fn set_width(&mut self, value: Option) { self.width = value; } #[wasm_bindgen(getter, js_name = "height")] pub fn height(&self) -> Option { self.height.clone() } #[wasm_bindgen(setter, js_name = "height")] pub fn set_height(&mut self, value: Option) { self.height = value; } #[wasm_bindgen(getter, js_name = "points")] pub fn points(&self) -> Option { self.points.clone() } #[wasm_bindgen(setter, js_name = "points")] pub fn set_points(&mut self, value: Option) { self.points = value; } } /// Hierarchical level of an OCR element. /// /// Maps to Tesseract's page segmentation hierarchy and provides /// equivalent semantics for PaddleOCR. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmOcrElementLevel { Word = 0, Line = 1, Block = 2, Page = 3, } #[allow(clippy::derivable_impls)] impl Default for WasmOcrElementLevel { fn default() -> Self { Self::Line } } impl WasmOcrElementLevel { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Word => "word", Self::Line => "line", Self::Block => "block", Self::Page => "page", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "word" => Some(Self::Word), "line" => Some(Self::Line), "block" => Some(Self::Block), "page" => Some(Self::Page), _ => None, } } } /// Type of paginated unit in a document. /// /// Distinguishes between different types of "pages" (PDF pages, presentation slides, spreadsheet sheets). #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmPageUnitType { Page = 0, Slide = 1, Sheet = 2, } #[allow(clippy::derivable_impls)] impl Default for WasmPageUnitType { fn default() -> Self { Self::Page } } impl WasmPageUnitType { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Page => "page", Self::Slide => "slide", Self::Sheet => "sheet", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "page" => Some(Self::Page), "slide" => Some(Self::Slide), "sheet" => Some(Self::Sheet), _ => None, } } } /// A single line in a unified-diff hunk. /// /// Defined here (rather than only in `crate.diff`) so `RevisionDelta` can /// reference it unconditionally, without requiring the `diff` Cargo feature. /// `crate.diff` re-exports this type verbatim. #[wasm_bindgen] #[derive(Clone, Default)] pub struct WasmDiffLine { pub(crate) kind: String, pub(crate) _0: Option, } #[wasm_bindgen] impl WasmDiffLine { #[wasm_bindgen(constructor)] pub fn new() -> WasmDiffLine { Self::default() } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmDiffLine { ::default() } #[wasm_bindgen(getter, js_name = "kind")] pub fn kind(&self) -> String { self.kind.clone() } #[wasm_bindgen(setter, js_name = "kind")] pub fn set_kind(&mut self, value: String) { self.kind = value; } #[wasm_bindgen(getter, js_name = "0")] pub fn field_0(&self) -> Option { self._0.clone() } #[wasm_bindgen(setter, js_name = "0")] pub fn set_field_0(&mut self, value: Option) { self._0 = value; } } /// Semantic classification of a tracked change. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmRevisionKind { Insertion = 0, Deletion = 1, FormatChange = 2, Comment = 3, } #[allow(clippy::derivable_impls)] impl Default for WasmRevisionKind { fn default() -> Self { Self::Insertion } } impl WasmRevisionKind { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Insertion => "insertion", Self::Deletion => "deletion", Self::FormatChange => "format_change", Self::Comment => "comment", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "insertion" => Some(Self::Insertion), "deletion" => Some(Self::Deletion), "format_change" => Some(Self::FormatChange), "comment" => Some(Self::Comment), _ => None, } } } /// Best-effort document location for a revision. #[wasm_bindgen] #[derive(Clone, Default)] pub struct WasmRevisionAnchor { pub(crate) r#type: String, pub(crate) index: Option, pub(crate) row: Option, pub(crate) col: Option, pub(crate) table_index: Option, pub(crate) name: Option, } #[wasm_bindgen] impl WasmRevisionAnchor { #[wasm_bindgen(constructor)] pub fn new() -> WasmRevisionAnchor { Self::default() } #[wasm_bindgen] #[allow(clippy::should_implement_trait)] pub fn default() -> WasmRevisionAnchor { ::default() } #[wasm_bindgen(getter, js_name = "type")] pub fn r#type(&self) -> String { self.r#type.clone() } #[wasm_bindgen(setter, js_name = "type")] pub fn set_type(&mut self, value: String) { self.r#type = value; } #[wasm_bindgen(getter, js_name = "index")] pub fn index(&self) -> Option { self.index.clone() } #[wasm_bindgen(setter, js_name = "index")] pub fn set_index(&mut self, value: Option) { self.index = value; } #[wasm_bindgen(getter, js_name = "row")] pub fn row(&self) -> Option { self.row.clone() } #[wasm_bindgen(setter, js_name = "row")] pub fn set_row(&mut self, value: Option) { self.row = value; } #[wasm_bindgen(getter, js_name = "col")] pub fn col(&self) -> Option { self.col.clone() } #[wasm_bindgen(setter, js_name = "col")] pub fn set_col(&mut self, value: Option) { self.col = value; } #[wasm_bindgen(getter, js_name = "tableIndex")] pub fn table_index(&self) -> Option { self.table_index.clone() } #[wasm_bindgen(setter, js_name = "tableIndex")] pub fn set_table_index(&mut self, value: Option) { self.table_index = value; } #[wasm_bindgen(getter, js_name = "name")] pub fn name(&self) -> Option { self.name.clone() } #[wasm_bindgen(setter, js_name = "name")] pub fn set_name(&mut self, value: Option) { self.name = value; } } /// Semantic classification of an extracted URI. #[wasm_bindgen] #[derive(Clone, Copy, PartialEq, Eq)] pub enum WasmUriKind { Hyperlink = 0, Image = 1, Anchor = 2, Citation = 3, Reference = 4, Email = 5, } #[allow(clippy::derivable_impls)] impl Default for WasmUriKind { fn default() -> Self { Self::Hyperlink } } impl WasmUriKind { /// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`). pub fn to_api_str(self) -> &'static str { match self { Self::Hyperlink => "hyperlink", Self::Image => "image", Self::Anchor => "anchor", Self::Citation => "citation", Self::Reference => "reference", Self::Email => "email", } } /// Parses a serde wire string and returns the corresponding variant, or None if unrecognized. pub fn from_api_str(s: &str) -> Option { match s { "hyperlink" => Some(Self::Hyperlink), "image" => Some(Self::Image), "anchor" => Some(Self::Anchor), "citation" => Some(Self::Citation), "reference" => Some(Self::Reference), "email" => Some(Self::Email), _ => None, } } } #[derive(serde::Deserialize, Default)] #[serde(default)] pub struct ExtractionConfigInput { #[serde(rename = "useCache")] pub use_cache: Option, #[serde(rename = "enableQualityProcessing")] pub enable_quality_processing: Option, #[serde(rename = "ocr")] pub ocr: Option, #[serde(rename = "forceOcr")] pub force_ocr: Option, #[serde(rename = "forceOcrPages")] pub force_ocr_pages: Option>, #[serde(rename = "disableOcr")] pub disable_ocr: Option, #[serde(rename = "chunking")] pub chunking: Option, #[serde(rename = "contentFilter")] pub content_filter: Option, #[serde(rename = "images")] pub images: Option, #[cfg(feature = "pdf")] #[serde(skip)] pub pdf_options: Option, #[serde(rename = "tokenReduction")] pub token_reduction: Option, #[serde(rename = "languageDetection")] pub language_detection: Option, #[serde(rename = "pages")] pub pages: Option, #[cfg(any(feature = "keywords-yake", feature = "keywords-rake"))] #[serde(skip)] pub keywords: Option, #[serde(rename = "postprocessor")] pub postprocessor: Option, #[cfg(feature = "html")] #[serde(skip)] pub html_options: Option, #[cfg(feature = "html")] #[serde(skip)] pub html_output: Option, #[serde(rename = "extractionTimeoutSecs")] pub extraction_timeout_secs: Option, #[serde(rename = "maxConcurrentExtractions")] pub max_concurrent_extractions: Option, #[serde(rename = "resultFormat")] pub result_format: Option, #[serde(rename = "securityLimits")] pub security_limits: Option, #[serde(rename = "maxEmbeddedFileBytes")] pub max_embedded_file_bytes: Option, #[serde(rename = "outputFormat")] pub output_format: Option, #[cfg(feature = "layout-types")] #[serde(skip)] pub layout: Option, #[serde(rename = "useLayoutForMarkdown")] pub use_layout_for_markdown: Option, #[serde(rename = "includeDocumentStructure")] pub include_document_structure: Option, #[serde(rename = "acceleration")] pub acceleration: Option, #[serde(rename = "cacheNamespace")] pub cache_namespace: Option, #[serde(rename = "cacheTtlSecs")] pub cache_ttl_secs: Option, #[serde(rename = "email")] pub email: Option, #[serde(rename = "concurrency")] pub concurrency: Option, #[serde(rename = "maxArchiveDepth")] pub max_archive_depth: Option, #[cfg(feature = "tree-sitter")] #[serde(skip)] pub tree_sitter: Option, #[serde(rename = "structuredExtraction")] pub structured_extraction: Option, #[serde(rename = "cancelToken")] pub cancel_token: Option, } impl From for kreuzberg::ExtractionConfig { fn from(val: ExtractionConfigInput) -> Self { let mut out = Self::default(); if let Some(v) = val.use_cache { out.use_cache = v.into(); } if let Some(v) = val.enable_quality_processing { out.enable_quality_processing = v.into(); } if let Some(v) = val.ocr { out.ocr = v.into(); } if let Some(v) = val.force_ocr { out.force_ocr = v.into(); } if let Some(v) = val.force_ocr_pages { out.force_ocr_pages = v.into(); } if let Some(v) = val.disable_ocr { out.disable_ocr = v.into(); } if let Some(v) = val.chunking { out.chunking = v.into(); } if let Some(v) = val.content_filter { out.content_filter = v.into(); } if let Some(v) = val.images { out.images = v.into(); } #[cfg(feature = "pdf")] if let Some(v) = val.pdf_options { out.pdf_options = v.into(); } if let Some(v) = val.token_reduction { out.token_reduction = v.into(); } if let Some(v) = val.language_detection { out.language_detection = v.into(); } if let Some(v) = val.pages { out.pages = v.into(); } #[cfg(any(feature = "keywords-yake", feature = "keywords-rake"))] if let Some(v) = val.keywords { out.keywords = v.into(); } if let Some(v) = val.postprocessor { out.postprocessor = v.into(); } #[cfg(feature = "html")] if let Some(v) = val.html_options { out.html_options = serde_json::from_str(&v).unwrap_or_default(); } #[cfg(feature = "html")] if let Some(v) = val.html_output { out.html_output = v.into(); } if let Some(v) = val.extraction_timeout_secs { out.extraction_timeout_secs = v.into(); } if let Some(v) = val.max_concurrent_extractions { out.max_concurrent_extractions = v.into(); } if let Some(v) = val.result_format { out.result_format = v.into(); } if let Some(v) = val.security_limits { out.security_limits = v.into(); } if let Some(v) = val.max_embedded_file_bytes { out.max_embedded_file_bytes = v.into(); } if let Some(v) = val.output_format { out.output_format = v.into(); } #[cfg(feature = "layout-types")] if let Some(v) = val.layout { out.layout = v.into(); } if let Some(v) = val.use_layout_for_markdown { out.use_layout_for_markdown = v.into(); } if let Some(v) = val.include_document_structure { out.include_document_structure = v.into(); } if let Some(v) = val.acceleration { out.acceleration = v.into(); } if let Some(v) = val.cache_namespace { out.cache_namespace = v.into(); } if let Some(v) = val.cache_ttl_secs { out.cache_ttl_secs = v.into(); } if let Some(v) = val.email { out.email = v.into(); } if let Some(v) = val.concurrency { out.concurrency = serde_json::from_str(&v).unwrap_or_default(); } if let Some(v) = val.max_archive_depth { out.max_archive_depth = v.into(); } #[cfg(feature = "tree-sitter")] if let Some(v) = val.tree_sitter { out.tree_sitter = v.into(); } if let Some(v) = val.structured_extraction { out.structured_extraction = v.into(); } if let Some(v) = val.cancel_token { out.cancel_token = serde_json::from_str(&v).unwrap_or_default(); } out } } /// Extract content from a byte array. /// /// This is the main entry point for in-memory extraction. It performs the following steps: /// 1. Validate MIME type /// 2. Handle legacy format conversion if needed /// 3. Select appropriate extractor from registry /// 4. Extract content /// 5. Run post-processing pipeline /// /// # Arguments /// /// * `content` - The byte array to extract /// * `mime_type` - MIME type of the content /// * `config` - Extraction configuration /// /// # Returns /// /// An `ExtractionResult` containing the extracted content and metadata. /// /// # Errors /// /// Returns `KreuzbergError.Validation` if MIME type is invalid. /// Returns `KreuzbergError.UnsupportedFormat` if MIME type is not supported. /// /// # Example #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "extractBytes")] pub async fn extract_bytes( content: Vec, mime_type: String, config: JsValue, ) -> Result { let config_core: kreuzberg::ExtractionConfig = if config.is_undefined() { kreuzberg::ExtractionConfig::default() } else { serde_wasm_bindgen::from_value::(config) .map_err(|e| JsValue::from_str(&e.to_string()))? }; let result = kreuzberg::extract_bytes(&content, &mime_type, &config_core) .await .map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(WasmExtractionResult::from(result)) } /// Extract content from a file. /// /// This is the main entry point for file-based extraction. It performs the following steps: /// 1. Check cache for existing result (if caching enabled) /// 2. Detect or validate MIME type /// 3. Select appropriate extractor from registry /// 4. Extract content /// 5. Run post-processing pipeline /// 6. Store result in cache (if caching enabled) /// /// # Arguments /// /// * `path` - Path to the file to extract /// * `mime_type` - Optional MIME type override. If undefined, will be auto-detected /// * `config` - Extraction configuration /// /// # Returns /// /// An `ExtractionResult` containing the extracted content and metadata. /// /// # Errors /// /// Returns `KreuzbergError.Io` if the file doesn't exist (NotFound) or for other file I/O errors. /// Returns `KreuzbergError.UnsupportedFormat` if MIME type is not supported. /// /// # Example #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "extractFile")] pub async fn extract_file( path: String, mime_type: Option, config: JsValue, ) -> Result { let config_core: kreuzberg::ExtractionConfig = if config.is_undefined() { kreuzberg::ExtractionConfig::default() } else { serde_wasm_bindgen::from_value::(config) .map_err(|e| JsValue::from_str(&e.to_string()))? }; let result = kreuzberg::extract_file(std::path::PathBuf::from(path), mime_type.as_deref(), &config_core) .await .map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(WasmExtractionResult::from(result)) } /// Detect MIME type from raw file bytes. /// /// Uses magic byte signatures to detect file type from content. /// Falls back to `infer` crate for comprehensive detection. /// /// For ZIP-based files, inspects contents to distinguish Office Open XML /// formats (DOCX, XLSX, PPTX) from plain ZIP archives. /// /// # Arguments /// /// * `content` - Raw file bytes /// /// # Returns /// /// The detected MIME type string. /// /// # Errors /// /// Returns `KreuzbergError.UnsupportedFormat` if MIME type cannot be determined. #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "detectMimeTypeFromBytes")] pub fn detect_mime_type_from_bytes(content: Vec) -> Result { let result = kreuzberg::detect_mime_type_from_bytes(&content).map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } /// Get file extensions for a given MIME type. /// /// Returns all known file extensions that map to the specified MIME type. /// /// # Arguments /// /// * `mime_type` - The MIME type to look up /// /// # Returns /// /// A vector of file extensions (without leading dot) for the MIME type. /// /// # Example #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "getExtensionsForMime")] pub fn get_extensions_for_mime(mime_type: String) -> Result, JsValue> { let result = kreuzberg::get_extensions_for_mime(&mime_type).map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } /// List the names of all registered embedding backends. /// /// Used by `kreuzberg-cli`, the api/mcp endpoints, and generated language /// bindings. #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "listEmbeddingBackends")] pub fn list_embedding_backends() -> Result, JsValue> { let result = kreuzberg::list_embedding_backends().map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } /// List names of all registered document extractors. #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "listDocumentExtractors")] pub fn list_document_extractors() -> Result, JsValue> { let result = kreuzberg::list_document_extractors().map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } /// List all registered OCR backends. /// /// Returns the names of all OCR backends currently registered in the global registry. /// /// # Returns /// /// A vector of OCR backend names. /// /// # Example #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "listOcrBackends")] pub fn list_ocr_backends() -> Result, JsValue> { let result = kreuzberg::list_ocr_backends().map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } /// List all registered post-processor names. /// /// Returns a vector of all post-processor names currently registered in the /// global registry. /// /// # Returns /// /// - `Ok(Vec)` - Vector of post-processor names /// - `Err(...)` if the registry lock is poisoned /// /// # Example #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "listPostProcessors")] pub fn list_post_processors() -> Result, JsValue> { let result = kreuzberg::list_post_processors().map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } /// List names of all registered renderers. /// /// # Errors /// /// Returns an error if the registry lock is poisoned. #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "listRenderers")] pub fn list_renderers() -> Result, JsValue> { let result = kreuzberg::list_renderers().map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } /// List names of all registered validators. #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "listValidators")] pub fn list_validators() -> Result, JsValue> { let result = kreuzberg::list_validators().map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } /// Detect the MIME type of a file at the given path. /// /// Uses the file extension and optionally the file content to determine the MIME type. /// Set `check_exists` to `true` to verify the file exists before detection. #[allow(clippy::missing_errors_doc)] #[wasm_bindgen(js_name = "detectMimeType")] pub fn detect_mime_type(path: String, check_exists: bool) -> Result { let result = kreuzberg::detect_mime_type(path, check_exists).map_err(|e| JsValue::from_str(&e.to_string()))?; Ok(result) } #[cfg(target_arch = "wasm32")] mod __alef_wasm_bridge_ocrbackend { use super::*; /// Wrapper that bridges a foreign Wasm object to the `OcrBackend` trait. pub struct WasmOcrBackendBridge { inner: wasm_bindgen::JsValue, cached_name: String, } impl std::fmt::Debug for WasmOcrBackendBridge { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "WasmOcrBackendBridge") } } impl WasmOcrBackendBridge { /// Create a new bridge wrapping a JS object. /// /// Validates that the JS object provides all required methods. pub fn new(js_obj: wasm_bindgen::JsValue) -> Result { if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("processImage")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "process_image")); } if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("supportsLanguage")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "supports_language")); } if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("backendType")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "backend_type")); } let cached_name = { let key = wasm_bindgen::JsValue::from_str("name"); js_sys::Reflect::get(&js_obj, &key) .ok() .and_then(|v| v.dyn_into::().ok()) .and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok()) .and_then(|v| v.as_string()) .unwrap_or_else(|| "wasm_bridge".to_string()) }; Ok(Self { inner: js_obj, cached_name, }) } } impl kreuzberg::plugins::Plugin for WasmOcrBackendBridge { fn name(&self) -> &str { &self.cached_name } fn version(&self) -> String { let key = wasm_bindgen::JsValue::from_str("version"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result result.as_string().unwrap_or_default() } fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("initialize"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "initialize" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?; let func: js_sys::Function = func_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize")) })?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?; // Convert result Ok(()) } fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("shutdown"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "shutdown" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?; // Convert result Ok(()) } } #[async_trait::async_trait(?Send)] impl kreuzberg::OcrBackend for WasmOcrBackendBridge { async fn process_image( &self, image_bytes: &[u8], config: &kreuzberg::OcrConfig, ) -> std::result::Result { let key = wasm_bindgen::JsValue::from_str("processImage"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "process_image" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "process_image")))?; let func: js_sys::Function = func_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "process_image")) })?; let args = js_sys::Array::new(); args.push(&js_sys::Uint8Array::from(image_bytes).into()); args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL)); let promise_val = func.apply(&self.inner, &args).map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "process_image")) })?; let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "process_image")) })?; let result = wasm_bindgen_futures::JsFuture::from(promise) .await .map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?; // Convert result result .as_string() .ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string())) .and_then(|s| { serde_json::from_str::(&s) .map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e))) }) } fn supports_language(&self, lang: &str) -> bool { let key = wasm_bindgen::JsValue::from_str("supportsLanguage"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); args.push(&wasm_bindgen::JsValue::from_str(lang)); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result // Convert JS boolean to Rust bool result.as_bool().unwrap_or_default() } fn backend_type(&self) -> kreuzberg::OcrBackendType { let key = wasm_bindgen::JsValue::from_str("backendType"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result // Convert bare enum string (non-JSON) to kreuzberg::OcrBackendType result .as_string() .and_then(|s| { serde_json::from_str::(&format!("\"{}\"", s)) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e))) }) .unwrap_or_default() } } #[wasm_bindgen(js_name = "registerOcrBackend")] pub fn register_ocr_backend(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> { let required_methods = vec!["processImage", "supportsLanguage", "backendType"]; for method_name in required_methods { if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) { return Err(wasm_bindgen::JsValue::from_str(&format!( "Backend missing required method: {}", method_name ))); } } let wrapper = WasmOcrBackendBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?; let arc: std::sync::Arc = std::sync::Arc::new(wrapper); let registry = kreuzberg::plugins::registry::get_ocr_backend_registry(); let mut registry = registry.write(); registry .register(arc) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "unregisterOcrBackend")] pub fn unregister_ocr_backend(name: String) -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::ocr_backend::unregister_ocr_backend(&name) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "clearOcrBackends")] pub fn clear_ocr_backends() -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::ocr_backend::clear_ocr_backends() .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } } #[cfg(target_arch = "wasm32")] pub use __alef_wasm_bridge_ocrbackend::*; #[cfg(target_arch = "wasm32")] mod __alef_wasm_bridge_postprocessor { use super::*; /// Wrapper that bridges a foreign Wasm object to the `PostProcessor` trait. pub struct WasmPostProcessorBridge { inner: wasm_bindgen::JsValue, cached_name: String, } impl std::fmt::Debug for WasmPostProcessorBridge { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "WasmPostProcessorBridge") } } impl WasmPostProcessorBridge { /// Create a new bridge wrapping a JS object. /// /// Validates that the JS object provides all required methods. pub fn new(js_obj: wasm_bindgen::JsValue) -> Result { if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("process")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "process")); } if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("processingStage")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "processing_stage")); } let cached_name = { let key = wasm_bindgen::JsValue::from_str("name"); js_sys::Reflect::get(&js_obj, &key) .ok() .and_then(|v| v.dyn_into::().ok()) .and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok()) .and_then(|v| v.as_string()) .unwrap_or_else(|| "wasm_bridge".to_string()) }; Ok(Self { inner: js_obj, cached_name, }) } } impl kreuzberg::plugins::Plugin for WasmPostProcessorBridge { fn name(&self) -> &str { &self.cached_name } fn version(&self) -> String { let key = wasm_bindgen::JsValue::from_str("version"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result result.as_string().unwrap_or_default() } fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("initialize"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "initialize" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?; let func: js_sys::Function = func_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize")) })?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?; // Convert result Ok(()) } fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("shutdown"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "shutdown" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?; // Convert result Ok(()) } } #[async_trait::async_trait(?Send)] impl kreuzberg::PostProcessor for WasmPostProcessorBridge { async fn process( &self, result: &mut kreuzberg::ExtractionResult, config: &kreuzberg::ExtractionConfig, ) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("process"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "process" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "process")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "process")))?; let args = js_sys::Array::new(); args.push(&serde_wasm_bindgen::to_value(result).unwrap_or(wasm_bindgen::JsValue::NULL)); args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL)); let promise_val = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "process")))?; let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "process")) })?; let result = wasm_bindgen_futures::JsFuture::from(promise) .await .map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?; // Convert result Ok(()) } fn processing_stage(&self) -> kreuzberg::ProcessingStage { let key = wasm_bindgen::JsValue::from_str("processingStage"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result // Convert bare enum string (non-JSON) to kreuzberg::ProcessingStage result .as_string() .and_then(|s| { serde_json::from_str::(&format!("\"{}\"", s)) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e))) }) .unwrap_or_default() } } #[wasm_bindgen(js_name = "registerPostProcessor")] pub fn register_post_processor(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> { let required_methods = vec!["process", "processingStage"]; for method_name in required_methods { if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) { return Err(wasm_bindgen::JsValue::from_str(&format!( "Backend missing required method: {}", method_name ))); } } let wrapper = WasmPostProcessorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?; let arc: std::sync::Arc = std::sync::Arc::new(wrapper); let registry = kreuzberg::plugins::registry::get_post_processor_registry(); let mut registry = registry.write(); registry .register(arc) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "unregisterPostProcessor")] pub fn unregister_post_processor(name: String) -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::post_processor::unregister_post_processor(&name) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "clearPostProcessors")] pub fn clear_post_processors() -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::post_processor::clear_post_processors() .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } } #[cfg(target_arch = "wasm32")] pub use __alef_wasm_bridge_postprocessor::*; #[cfg(target_arch = "wasm32")] mod __alef_wasm_bridge_validator { use super::*; /// Wrapper that bridges a foreign Wasm object to the `Validator` trait. pub struct WasmValidatorBridge { inner: wasm_bindgen::JsValue, cached_name: String, } impl std::fmt::Debug for WasmValidatorBridge { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "WasmValidatorBridge") } } impl WasmValidatorBridge { /// Create a new bridge wrapping a JS object. /// /// Validates that the JS object provides all required methods. pub fn new(js_obj: wasm_bindgen::JsValue) -> Result { if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("validate")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "validate")); } let cached_name = { let key = wasm_bindgen::JsValue::from_str("name"); js_sys::Reflect::get(&js_obj, &key) .ok() .and_then(|v| v.dyn_into::().ok()) .and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok()) .and_then(|v| v.as_string()) .unwrap_or_else(|| "wasm_bridge".to_string()) }; Ok(Self { inner: js_obj, cached_name, }) } } impl kreuzberg::plugins::Plugin for WasmValidatorBridge { fn name(&self) -> &str { &self.cached_name } fn version(&self) -> String { let key = wasm_bindgen::JsValue::from_str("version"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result result.as_string().unwrap_or_default() } fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("initialize"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "initialize" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?; let func: js_sys::Function = func_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize")) })?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?; // Convert result Ok(()) } fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("shutdown"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "shutdown" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?; // Convert result Ok(()) } } #[async_trait::async_trait(?Send)] impl kreuzberg::Validator for WasmValidatorBridge { async fn validate( &self, result: &kreuzberg::ExtractionResult, config: &kreuzberg::ExtractionConfig, ) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("validate"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "validate" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "validate")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "validate")))?; let args = js_sys::Array::new(); args.push(&serde_wasm_bindgen::to_value(result).unwrap_or(wasm_bindgen::JsValue::NULL)); args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL)); let promise_val = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "validate")))?; let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "validate")) })?; let result = wasm_bindgen_futures::JsFuture::from(promise) .await .map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?; // Convert result Ok(()) } } #[wasm_bindgen(js_name = "registerValidator")] pub fn register_validator(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> { let required_methods = vec!["validate"]; for method_name in required_methods { if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) { return Err(wasm_bindgen::JsValue::from_str(&format!( "Backend missing required method: {}", method_name ))); } } let wrapper = WasmValidatorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?; let arc: std::sync::Arc = std::sync::Arc::new(wrapper); let registry = kreuzberg::plugins::registry::get_validator_registry(); let mut registry = registry.write(); registry .register(arc) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "unregisterValidator")] pub fn unregister_validator(name: String) -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::validator::unregister_validator(&name) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "clearValidators")] pub fn clear_validators() -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::validator::clear_validators().map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } } #[cfg(target_arch = "wasm32")] pub use __alef_wasm_bridge_validator::*; #[cfg(target_arch = "wasm32")] mod __alef_wasm_bridge_embeddingbackend { use super::*; /// Wrapper that bridges a foreign Wasm object to the `EmbeddingBackend` trait. pub struct WasmEmbeddingBackendBridge { inner: wasm_bindgen::JsValue, cached_name: String, } impl std::fmt::Debug for WasmEmbeddingBackendBridge { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "WasmEmbeddingBackendBridge") } } impl WasmEmbeddingBackendBridge { /// Create a new bridge wrapping a JS object. /// /// Validates that the JS object provides all required methods. pub fn new(js_obj: wasm_bindgen::JsValue) -> Result { if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("dimensions")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "dimensions")); } if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("embed")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "embed")); } let cached_name = { let key = wasm_bindgen::JsValue::from_str("name"); js_sys::Reflect::get(&js_obj, &key) .ok() .and_then(|v| v.dyn_into::().ok()) .and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok()) .and_then(|v| v.as_string()) .unwrap_or_else(|| "wasm_bridge".to_string()) }; Ok(Self { inner: js_obj, cached_name, }) } } impl kreuzberg::plugins::Plugin for WasmEmbeddingBackendBridge { fn name(&self) -> &str { &self.cached_name } fn version(&self) -> String { let key = wasm_bindgen::JsValue::from_str("version"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result result.as_string().unwrap_or_default() } fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("initialize"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "initialize" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?; let func: js_sys::Function = func_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize")) })?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?; // Convert result Ok(()) } fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("shutdown"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "shutdown" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?; // Convert result Ok(()) } } #[async_trait::async_trait(?Send)] impl kreuzberg::EmbeddingBackend for WasmEmbeddingBackendBridge { fn dimensions(&self) -> usize { let key = wasm_bindgen::JsValue::from_str("dimensions"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result // Convert JS result to usize result .as_string() .and_then(|s| { serde_json::from_str::(&s) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e))) }) .unwrap_or_default() } async fn embed(&self, texts: Vec) -> std::result::Result>, kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("embed"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "embed" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "embed")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "embed")))?; let args = js_sys::Array::new(); args.push(&serde_wasm_bindgen::to_value(&texts).unwrap_or(wasm_bindgen::JsValue::NULL)); let promise_val = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "embed")))?; let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "embed")) })?; let result = wasm_bindgen_futures::JsFuture::from(promise) .await .map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?; // Convert result result .as_string() .ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string())) .and_then(|s| { serde_json::from_str::>>(&s) .map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e))) }) } } #[wasm_bindgen(js_name = "registerEmbeddingBackend")] pub fn register_embedding_backend(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> { let required_methods = vec!["dimensions", "embed"]; for method_name in required_methods { if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) { return Err(wasm_bindgen::JsValue::from_str(&format!( "Backend missing required method: {}", method_name ))); } } let wrapper = WasmEmbeddingBackendBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?; let arc: std::sync::Arc = std::sync::Arc::new(wrapper); let registry = kreuzberg::plugins::registry::get_embedding_backend_registry(); let mut registry = registry.write(); registry .register(arc) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "unregisterEmbeddingBackend")] pub fn unregister_embedding_backend(name: String) -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::embedding_backend::unregister_embedding_backend(&name) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "clearEmbeddingBackends")] pub fn clear_embedding_backends() -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::embedding_backend::clear_embedding_backends() .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } } #[cfg(target_arch = "wasm32")] pub use __alef_wasm_bridge_embeddingbackend::*; #[cfg(target_arch = "wasm32")] mod __alef_wasm_bridge_documentextractor { use super::*; /// Wrapper that bridges a foreign Wasm object to the `DocumentExtractor` trait. pub struct WasmDocumentExtractorBridge { inner: wasm_bindgen::JsValue, cached_name: String, } impl std::fmt::Debug for WasmDocumentExtractorBridge { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "WasmDocumentExtractorBridge") } } impl WasmDocumentExtractorBridge { /// Create a new bridge wrapping a JS object. /// /// Validates that the JS object provides all required methods. pub fn new(js_obj: wasm_bindgen::JsValue) -> Result { if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("extractBytes")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "extract_bytes")); } if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("supportedMimeTypes")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "supported_mime_types")); } let cached_name = { let key = wasm_bindgen::JsValue::from_str("name"); js_sys::Reflect::get(&js_obj, &key) .ok() .and_then(|v| v.dyn_into::().ok()) .and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok()) .and_then(|v| v.as_string()) .unwrap_or_else(|| "wasm_bridge".to_string()) }; Ok(Self { inner: js_obj, cached_name, }) } } impl kreuzberg::plugins::Plugin for WasmDocumentExtractorBridge { fn name(&self) -> &str { &self.cached_name } fn version(&self) -> String { let key = wasm_bindgen::JsValue::from_str("version"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result result.as_string().unwrap_or_default() } fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("initialize"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "initialize" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?; let func: js_sys::Function = func_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize")) })?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?; // Convert result Ok(()) } fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("shutdown"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "shutdown" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?; // Convert result Ok(()) } } #[async_trait::async_trait(?Send)] impl kreuzberg::DocumentExtractor for WasmDocumentExtractorBridge { async fn extract_bytes( &self, content: &[u8], mime_type: &str, config: &kreuzberg::ExtractionConfig, ) -> std::result::Result { let key = wasm_bindgen::JsValue::from_str("extractBytes"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "extract_bytes" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "extract_bytes")))?; let func: js_sys::Function = func_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "extract_bytes")) })?; let args = js_sys::Array::new(); args.push(&js_sys::Uint8Array::from(content).into()); args.push(&wasm_bindgen::JsValue::from_str(mime_type)); args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL)); let promise_val = func.apply(&self.inner, &args).map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "extract_bytes")) })?; let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "extract_bytes")) })?; let result = wasm_bindgen_futures::JsFuture::from(promise) .await .map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?; // Convert result result .as_string() .ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string())) .and_then(|s| { serde_json::from_str::(&s) .map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e))) }) } fn supported_mime_types(&self) -> &[&str] { let __types: Vec = { let key = wasm_bindgen::JsValue::from_str("supportedMimeTypes"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result // Convert JS result to Vec result .as_string() .and_then(|s| { serde_json::from_str::>(&s).map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)) }) }) .unwrap_or_default() }; let __strs: Vec<&'static str> = __types .into_iter() .map(|s| -> &'static str { Box::leak(s.into_boxed_str()) }) .collect(); Box::leak(__strs.into_boxed_slice()) } } #[wasm_bindgen(js_name = "registerDocumentExtractor")] pub fn register_document_extractor(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> { let required_methods = vec!["extractBytes", "supportedMimeTypes"]; for method_name in required_methods { if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) { return Err(wasm_bindgen::JsValue::from_str(&format!( "Backend missing required method: {}", method_name ))); } } let wrapper = WasmDocumentExtractorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?; let arc: std::sync::Arc = std::sync::Arc::new(wrapper); let registry = kreuzberg::plugins::registry::get_document_extractor_registry(); let mut registry = registry.write(); registry .register(arc) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "unregisterDocumentExtractor")] pub fn unregister_document_extractor(name: String) -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::document_extractor::unregister_document_extractor(&name) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "clearDocumentExtractors")] pub fn clear_document_extractors() -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::document_extractor::clear_document_extractors() .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } } #[cfg(target_arch = "wasm32")] pub use __alef_wasm_bridge_documentextractor::*; #[cfg(target_arch = "wasm32")] mod __alef_wasm_bridge_renderer { use super::*; /// Wrapper that bridges a foreign Wasm object to the `Renderer` trait. pub struct WasmRendererBridge { inner: wasm_bindgen::JsValue, cached_name: String, } impl std::fmt::Debug for WasmRendererBridge { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "WasmRendererBridge") } } impl WasmRendererBridge { /// Create a new bridge wrapping a JS object. /// /// Validates that the JS object provides all required methods. pub fn new(js_obj: wasm_bindgen::JsValue) -> Result { if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("render")).unwrap_or(false) { return Err(format!("JS object missing required method: {}", "render")); } let cached_name = { let key = wasm_bindgen::JsValue::from_str("name"); js_sys::Reflect::get(&js_obj, &key) .ok() .and_then(|v| v.dyn_into::().ok()) .and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok()) .and_then(|v| v.as_string()) .unwrap_or_else(|| "wasm_bridge".to_string()) }; Ok(Self { inner: js_obj, cached_name, }) } } impl kreuzberg::plugins::Plugin for WasmRendererBridge { fn name(&self) -> &str { &self.cached_name } fn version(&self) -> String { let key = wasm_bindgen::JsValue::from_str("version"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Default::default(); } let func_val = match js_sys::Reflect::get(&self.inner, &key) { Ok(f) => f, Err(_) => return Default::default(), }; let func: js_sys::Function = match func_val.dyn_into() { Ok(f) => f, Err(_) => return Default::default(), }; // Build args array let args = js_sys::Array::new(); // Call the function let result = match func.apply(&self.inner, &args) { Ok(r) => r, Err(_) => return Default::default(), }; // Convert result result.as_string().unwrap_or_default() } fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("initialize"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "initialize" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?; let func: js_sys::Function = func_val.dyn_into().map_err(|_| { kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize")) })?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?; // Convert result Ok(()) } fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> { let key = wasm_bindgen::JsValue::from_str("shutdown"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "shutdown" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?; // Build args array let args = js_sys::Array::new(); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?; // Convert result Ok(()) } } impl kreuzberg::Renderer for WasmRendererBridge { fn render(&self, doc: &kreuzberg::InternalDocument) -> std::result::Result { let key = wasm_bindgen::JsValue::from_str("render"); let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false); if !has_method { return Err(kreuzberg::KreuzbergError::Other(format!( "Method '{}' not found on JS object", "render" ))); } let func_val = js_sys::Reflect::get(&self.inner, &key) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "render")))?; let func: js_sys::Function = func_val .dyn_into() .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "render")))?; // Build args array let args = js_sys::Array::new(); args.push(&serde_wasm_bindgen::to_value(doc).unwrap_or(wasm_bindgen::JsValue::NULL)); // Call the function let result = func .apply(&self.inner, &args) .map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "render")))?; // Convert result result .as_string() .ok_or_else(|| kreuzberg::KreuzbergError::Other("Expected string return".to_string())) } } #[wasm_bindgen(js_name = "registerRenderer")] pub fn register_renderer(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> { let required_methods = vec!["render"]; for method_name in required_methods { if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) { return Err(wasm_bindgen::JsValue::from_str(&format!( "Backend missing required method: {}", method_name ))); } } let wrapper = WasmRendererBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?; let arc: std::sync::Arc = std::sync::Arc::new(wrapper); let registry = kreuzberg::plugins::registry::get_renderer_registry(); let mut registry = registry.write(); registry .register(arc) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "unregisterRenderer")] pub fn unregister_renderer(name: String) -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::renderer::unregister_renderer(&name) .map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } #[wasm_bindgen(js_name = "clearRenderers")] pub fn clear_renderers() -> Result<(), wasm_bindgen::JsValue> { kreuzberg::plugins::renderer::clear_renderers().map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string())) } } #[cfg(target_arch = "wasm32")] pub use __alef_wasm_bridge_renderer::*; #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmCacheStats { fn from(val: kreuzberg::CacheStats) -> Self { Self { total_files: val.total_files, total_size_mb: val.total_size_mb, available_space_mb: val.available_space_mb, oldest_file_age_days: val.oldest_file_age_days, newest_file_age_days: val.newest_file_age_days, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::AccelerationConfig { fn from(val: WasmAccelerationConfig) -> Self { Self { provider: val.provider.into(), device_id: val.device_id, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmAccelerationConfig { fn from(val: kreuzberg::AccelerationConfig) -> Self { Self { provider: val.provider.into(), device_id: val.device_id, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ContentFilterConfig { fn from(val: WasmContentFilterConfig) -> Self { Self { include_headers: val.include_headers, include_footers: val.include_footers, strip_repeating_text: val.strip_repeating_text, include_watermarks: val.include_watermarks, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmContentFilterConfig { fn from(val: kreuzberg::ContentFilterConfig) -> Self { Self { include_headers: val.include_headers, include_footers: val.include_footers, strip_repeating_text: val.strip_repeating_text, include_watermarks: val.include_watermarks, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::EmailConfig { fn from(val: WasmEmailConfig) -> Self { Self { msg_fallback_codepage: val.msg_fallback_codepage, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmEmailConfig { fn from(val: kreuzberg::EmailConfig) -> Self { Self { msg_fallback_codepage: val.msg_fallback_codepage, } } } #[allow(clippy::needless_update)] #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ExtractionConfig { fn from(val: WasmExtractionConfig) -> Self { Self { use_cache: val.use_cache, enable_quality_processing: val.enable_quality_processing, ocr: val.ocr.map(Into::into), force_ocr: val.force_ocr, force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()), disable_ocr: val.disable_ocr, chunking: val.chunking.map(Into::into), content_filter: val.content_filter.map(Into::into), images: val.images.map(Into::into), token_reduction: val.token_reduction.map(Into::into), language_detection: val.language_detection.map(Into::into), pages: val.pages.map(Into::into), postprocessor: val.postprocessor.map(Into::into), html_options: Default::default(), extraction_timeout_secs: val.extraction_timeout_secs, max_concurrent_extractions: val.max_concurrent_extractions, result_format: val.result_format.into(), security_limits: val.security_limits.map(Into::into), max_embedded_file_bytes: val.max_embedded_file_bytes, output_format: val.output_format.into(), use_layout_for_markdown: val.use_layout_for_markdown, include_document_structure: val.include_document_structure, acceleration: val.acceleration.map(Into::into), cache_namespace: val.cache_namespace, cache_ttl_secs: val.cache_ttl_secs, email: val.email.map(Into::into), concurrency: Default::default(), max_archive_depth: val.max_archive_depth, structured_extraction: val.structured_extraction.map(Into::into), cancel_token: Default::default(), ..Default::default() } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmExtractionConfig { fn from(val: kreuzberg::ExtractionConfig) -> Self { Self { use_cache: val.use_cache, enable_quality_processing: val.enable_quality_processing, ocr: val.ocr.map(Into::into), force_ocr: val.force_ocr, force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()), disable_ocr: val.disable_ocr, chunking: val.chunking.map(Into::into), content_filter: val.content_filter.map(Into::into), images: val.images.map(Into::into), token_reduction: val.token_reduction.map(Into::into), language_detection: val.language_detection.map(Into::into), pages: val.pages.map(Into::into), postprocessor: val.postprocessor.map(Into::into), html_options: val.html_options.as_ref().map(|v| format!("{v:?}")), extraction_timeout_secs: val.extraction_timeout_secs, max_concurrent_extractions: val.max_concurrent_extractions, result_format: val.result_format.into(), security_limits: val.security_limits.map(Into::into), max_embedded_file_bytes: val.max_embedded_file_bytes, output_format: val.output_format.into(), use_layout_for_markdown: val.use_layout_for_markdown, include_document_structure: val.include_document_structure, acceleration: val.acceleration.map(Into::into), cache_namespace: val.cache_namespace, cache_ttl_secs: val.cache_ttl_secs, email: val.email.map(Into::into), concurrency: val.concurrency.as_ref().map(|v| format!("{v:?}")), max_archive_depth: val.max_archive_depth, structured_extraction: val.structured_extraction.map(Into::into), cancel_token: val.cancel_token.as_ref().map(|v| format!("{v:?}")), } } } #[allow(clippy::needless_update)] #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::FileExtractionConfig { fn from(val: WasmFileExtractionConfig) -> Self { Self { enable_quality_processing: val.enable_quality_processing, ocr: val.ocr.map(Into::into), force_ocr: val.force_ocr, force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()), disable_ocr: val.disable_ocr, chunking: val.chunking.map(Into::into), content_filter: val.content_filter.map(Into::into), images: val.images.map(Into::into), token_reduction: val.token_reduction.map(Into::into), language_detection: val.language_detection.map(Into::into), pages: val.pages.map(Into::into), postprocessor: val.postprocessor.map(Into::into), html_options: Default::default(), result_format: val.result_format.map(Into::into), output_format: val.output_format.map(Into::into), include_document_structure: val.include_document_structure, timeout_secs: val.timeout_secs, structured_extraction: val.structured_extraction.map(Into::into), ..Default::default() } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmFileExtractionConfig { fn from(val: kreuzberg::FileExtractionConfig) -> Self { Self { enable_quality_processing: val.enable_quality_processing, ocr: val.ocr.map(Into::into), force_ocr: val.force_ocr, force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()), disable_ocr: val.disable_ocr, chunking: val.chunking.map(Into::into), content_filter: val.content_filter.map(Into::into), images: val.images.map(Into::into), token_reduction: val.token_reduction.map(Into::into), language_detection: val.language_detection.map(Into::into), pages: val.pages.map(Into::into), postprocessor: val.postprocessor.map(Into::into), html_options: val.html_options.as_ref().map(|v| format!("{v:?}")), result_format: val.result_format.map(Into::into), output_format: val.output_format.map(Into::into), include_document_structure: val.include_document_structure, timeout_secs: val.timeout_secs, structured_extraction: val.structured_extraction.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::BatchBytesItem { fn from(val: WasmBatchBytesItem) -> Self { Self { content: val.content.to_vec().into(), mime_type: val.mime_type, config: val.config.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmBatchBytesItem { fn from(val: kreuzberg::BatchBytesItem) -> Self { Self { content: val.content.to_vec().into(), mime_type: val.mime_type, config: val.config.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::BatchFileItem { fn from(val: WasmBatchFileItem) -> Self { Self { path: val.path.into(), config: val.config.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmBatchFileItem { fn from(val: kreuzberg::BatchFileItem) -> Self { Self { path: val.path.to_string_lossy().to_string(), config: val.config.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ImageExtractionConfig { fn from(val: WasmImageExtractionConfig) -> Self { Self { extract_images: val.extract_images, target_dpi: val.target_dpi, max_image_dimension: val.max_image_dimension, inject_placeholders: val.inject_placeholders, auto_adjust_dpi: val.auto_adjust_dpi, min_dpi: val.min_dpi, max_dpi: val.max_dpi, max_images_per_page: val.max_images_per_page, classify: val.classify, include_page_rasters: val.include_page_rasters, run_ocr_on_images: val.run_ocr_on_images, ocr_text_only: val.ocr_text_only, append_ocr_text: val.append_ocr_text, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmImageExtractionConfig { fn from(val: kreuzberg::ImageExtractionConfig) -> Self { Self { extract_images: val.extract_images, target_dpi: val.target_dpi, max_image_dimension: val.max_image_dimension, inject_placeholders: val.inject_placeholders, auto_adjust_dpi: val.auto_adjust_dpi, min_dpi: val.min_dpi, max_dpi: val.max_dpi, max_images_per_page: val.max_images_per_page, classify: val.classify, include_page_rasters: val.include_page_rasters, run_ocr_on_images: val.run_ocr_on_images, ocr_text_only: val.ocr_text_only, append_ocr_text: val.append_ocr_text, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::TokenReductionOptions { fn from(val: WasmTokenReductionOptions) -> Self { Self { mode: val.mode, preserve_important_words: val.preserve_important_words, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmTokenReductionOptions { fn from(val: kreuzberg::TokenReductionOptions) -> Self { Self { mode: val.mode, preserve_important_words: val.preserve_important_words, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::LanguageDetectionConfig { fn from(val: WasmLanguageDetectionConfig) -> Self { Self { enabled: val.enabled, min_confidence: val.min_confidence, detect_multiple: val.detect_multiple, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmLanguageDetectionConfig { fn from(val: kreuzberg::LanguageDetectionConfig) -> Self { Self { enabled: val.enabled, min_confidence: val.min_confidence, detect_multiple: val.detect_multiple, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::LlmConfig { fn from(val: WasmLlmConfig) -> Self { Self { model: val.model, api_key: val.api_key, base_url: val.base_url, timeout_secs: val.timeout_secs, max_retries: val.max_retries, temperature: val.temperature, max_tokens: val.max_tokens, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmLlmConfig { fn from(val: kreuzberg::LlmConfig) -> Self { Self { model: val.model, api_key: val.api_key, base_url: val.base_url, timeout_secs: val.timeout_secs, max_retries: val.max_retries, temperature: val.temperature, max_tokens: val.max_tokens, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::StructuredExtractionConfig { fn from(val: WasmStructuredExtractionConfig) -> Self { Self { schema: serde_wasm_bindgen::from_value(val.schema.clone()).unwrap_or_default(), schema_name: val.schema_name, schema_description: val.schema_description, strict: val.strict, prompt: val.prompt, llm: val.llm.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmStructuredExtractionConfig { fn from(val: kreuzberg::StructuredExtractionConfig) -> Self { Self { schema: serde_wasm_bindgen::to_value(&val.schema).unwrap_or(JsValue::NULL), schema_name: val.schema_name, schema_description: val.schema_description, strict: val.strict, prompt: val.prompt, llm: val.llm.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrQualityThresholds { fn from(val: WasmOcrQualityThresholds) -> Self { Self { min_total_non_whitespace: val.min_total_non_whitespace, min_non_whitespace_per_page: val.min_non_whitespace_per_page, min_meaningful_word_len: val.min_meaningful_word_len, min_meaningful_words: val.min_meaningful_words, min_alnum_ratio: val.min_alnum_ratio, min_garbage_chars: val.min_garbage_chars, max_fragmented_word_ratio: val.max_fragmented_word_ratio, critical_fragmented_word_ratio: val.critical_fragmented_word_ratio, min_avg_word_length: val.min_avg_word_length, min_words_for_avg_length_check: val.min_words_for_avg_length_check, min_consecutive_repeat_ratio: val.min_consecutive_repeat_ratio, min_words_for_repeat_check: val.min_words_for_repeat_check, substantive_min_chars: val.substantive_min_chars, non_text_min_chars: val.non_text_min_chars, alnum_ws_ratio_threshold: val.alnum_ws_ratio_threshold, pipeline_min_quality: val.pipeline_min_quality, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrQualityThresholds { fn from(val: kreuzberg::OcrQualityThresholds) -> Self { Self { min_total_non_whitespace: val.min_total_non_whitespace, min_non_whitespace_per_page: val.min_non_whitespace_per_page, min_meaningful_word_len: val.min_meaningful_word_len, min_meaningful_words: val.min_meaningful_words, min_alnum_ratio: val.min_alnum_ratio, min_garbage_chars: val.min_garbage_chars, max_fragmented_word_ratio: val.max_fragmented_word_ratio, critical_fragmented_word_ratio: val.critical_fragmented_word_ratio, min_avg_word_length: val.min_avg_word_length, min_words_for_avg_length_check: val.min_words_for_avg_length_check, min_consecutive_repeat_ratio: val.min_consecutive_repeat_ratio, min_words_for_repeat_check: val.min_words_for_repeat_check, substantive_min_chars: val.substantive_min_chars, non_text_min_chars: val.non_text_min_chars, alnum_ws_ratio_threshold: val.alnum_ws_ratio_threshold, pipeline_min_quality: val.pipeline_min_quality, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrPipelineStage { fn from(val: WasmOcrPipelineStage) -> Self { Self { backend: val.backend, priority: val.priority, language: val.language, tesseract_config: val.tesseract_config.map(Into::into), paddle_ocr_config: val .paddle_ocr_config .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), vlm_config: val.vlm_config.map(Into::into), backend_options: val .backend_options .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrPipelineStage { fn from(val: kreuzberg::OcrPipelineStage) -> Self { Self { backend: val.backend, priority: val.priority, language: val.language, tesseract_config: val.tesseract_config.map(Into::into), paddle_ocr_config: val .paddle_ocr_config .as_ref() .and_then(|v| serde_wasm_bindgen::to_value(v).ok()), vlm_config: val.vlm_config.map(Into::into), backend_options: val .backend_options .as_ref() .and_then(|v| serde_wasm_bindgen::to_value(v).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrPipelineConfig { fn from(val: WasmOcrPipelineConfig) -> Self { Self { stages: val.stages.into_iter().map(Into::into).collect(), quality_thresholds: val.quality_thresholds.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrPipelineConfig { fn from(val: kreuzberg::OcrPipelineConfig) -> Self { Self { stages: val.stages.into_iter().map(Into::into).collect(), quality_thresholds: val.quality_thresholds.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrConfig { fn from(val: WasmOcrConfig) -> Self { Self { enabled: val.enabled, backend: val.backend, language: val.language, tesseract_config: val.tesseract_config.map(Into::into), output_format: val.output_format.map(Into::into), paddle_ocr_config: val .paddle_ocr_config .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), backend_options: val .backend_options .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), element_config: val.element_config.map(Into::into), quality_thresholds: val.quality_thresholds.map(Into::into), pipeline: val.pipeline.map(Into::into), auto_rotate: val.auto_rotate, vlm_config: val.vlm_config.map(Into::into), vlm_prompt: val.vlm_prompt, acceleration: val.acceleration.map(Into::into), tessdata_bytes: val .tessdata_bytes .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrConfig { fn from(val: kreuzberg::OcrConfig) -> Self { Self { enabled: val.enabled, backend: val.backend, language: val.language, tesseract_config: val.tesseract_config.map(Into::into), output_format: val.output_format.map(Into::into), paddle_ocr_config: val .paddle_ocr_config .as_ref() .and_then(|v| serde_wasm_bindgen::to_value(v).ok()), backend_options: val .backend_options .as_ref() .and_then(|v| serde_wasm_bindgen::to_value(v).ok()), element_config: val.element_config.map(Into::into), quality_thresholds: val.quality_thresholds.map(Into::into), pipeline: val.pipeline.map(Into::into), auto_rotate: val.auto_rotate, vlm_config: val.vlm_config.map(Into::into), vlm_prompt: val.vlm_prompt, acceleration: val.acceleration.map(Into::into), tessdata_bytes: val .tessdata_bytes .as_ref() .and_then(|v| serde_json::to_string(v).ok()) .and_then(|s| js_sys::JSON::parse(&s).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PageConfig { fn from(val: WasmPageConfig) -> Self { Self { extract_pages: val.extract_pages, insert_page_markers: val.insert_page_markers, marker_format: val.marker_format, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPageConfig { fn from(val: kreuzberg::PageConfig) -> Self { Self { extract_pages: val.extract_pages, insert_page_markers: val.insert_page_markers, marker_format: val.marker_format, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PostProcessorConfig { fn from(val: WasmPostProcessorConfig) -> Self { Self { enabled: val.enabled, enabled_processors: val.enabled_processors.map(|v| v.into_iter().collect()), disabled_processors: val.disabled_processors.map(|v| v.into_iter().collect()), enabled_set: val.enabled_set.map(|v| v.into_iter().collect()), disabled_set: val.disabled_set.map(|v| v.into_iter().collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPostProcessorConfig { fn from(val: kreuzberg::PostProcessorConfig) -> Self { Self { enabled: val.enabled, enabled_processors: val.enabled_processors.map(|v| v.into_iter().collect()), disabled_processors: val.disabled_processors.map(|v| v.into_iter().collect()), enabled_set: val.enabled_set.map(|v| v.into_iter().collect()), disabled_set: val.disabled_set.map(|v| v.into_iter().collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ChunkingConfig { fn from(val: WasmChunkingConfig) -> Self { Self { max_characters: val.max_characters, overlap: val.overlap, trim: val.trim, chunker_type: val.chunker_type.into(), embedding: val.embedding.map(Into::into), preset: val.preset, sizing: serde_wasm_bindgen::from_value(val.sizing.clone()).unwrap_or_default(), prepend_heading_context: val.prepend_heading_context, topic_threshold: val.topic_threshold, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmChunkingConfig { fn from(val: kreuzberg::ChunkingConfig) -> Self { Self { max_characters: val.max_characters, overlap: val.overlap, trim: val.trim, chunker_type: val.chunker_type.into(), embedding: val.embedding.map(Into::into), preset: val.preset, sizing: serde_wasm_bindgen::to_value(&val.sizing).unwrap_or(JsValue::NULL), prepend_heading_context: val.prepend_heading_context, topic_threshold: val.topic_threshold, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::EmbeddingConfig { fn from(val: WasmEmbeddingConfig) -> Self { Self { model: serde_wasm_bindgen::from_value(val.model.clone()).unwrap_or_default(), normalize: val.normalize, batch_size: val.batch_size, show_download_progress: val.show_download_progress, cache_dir: val.cache_dir.map(Into::into), acceleration: val.acceleration.map(Into::into), max_embed_duration_secs: val.max_embed_duration_secs, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmEmbeddingConfig { fn from(val: kreuzberg::EmbeddingConfig) -> Self { Self { model: serde_wasm_bindgen::to_value(&val.model).unwrap_or(JsValue::NULL), normalize: val.normalize, batch_size: val.batch_size, show_download_progress: val.show_download_progress, cache_dir: val.cache_dir.map(|p| p.to_string_lossy().to_string()), acceleration: val.acceleration.map(Into::into), max_embed_duration_secs: val.max_embed_duration_secs, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmSupportedFormat { fn from(val: kreuzberg::SupportedFormat) -> Self { Self { extension: val.extension, mime_type: val.mime_type, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmStructuredDataResult { fn from(val: kreuzberg::extraction::structured::StructuredDataResult) -> Self { Self { content: val.content, format: val.format.to_string(), metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default()) .unwrap_or(JsValue::NULL), text_fields: val.text_fields.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmXlsxAppProperties { fn from(val: kreuzberg::extraction::office_metadata::app_properties::XlsxAppProperties) -> Self { Self { application: val.application, app_version: val.app_version, doc_security: val.doc_security, scale_crop: val.scale_crop, links_up_to_date: val.links_up_to_date, shared_doc: val.shared_doc, hyperlinks_changed: val.hyperlinks_changed, company: val.company, worksheet_names: val.worksheet_names.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPptxAppProperties { fn from(val: kreuzberg::extraction::office_metadata::app_properties::PptxAppProperties) -> Self { Self { application: val.application, app_version: val.app_version, total_time: val.total_time, company: val.company, doc_security: val.doc_security, scale_crop: val.scale_crop, links_up_to_date: val.links_up_to_date, shared_doc: val.shared_doc, hyperlinks_changed: val.hyperlinks_changed, slides: val.slides, notes: val.notes, hidden_slides: val.hidden_slides, multimedia_clips: val.multimedia_clips, presentation_format: val.presentation_format, slide_titles: val.slide_titles.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::SecurityLimits { fn from(val: WasmSecurityLimits) -> Self { Self { max_archive_size: val.max_archive_size, max_compression_ratio: val.max_compression_ratio, max_files_in_archive: val.max_files_in_archive, max_nesting_depth: val.max_nesting_depth, max_entity_length: val.max_entity_length, max_content_size: val.max_content_size, max_iterations: val.max_iterations, max_xml_depth: val.max_xml_depth, max_table_cells: val.max_table_cells, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmSecurityLimits { fn from(val: kreuzberg::SecurityLimits) -> Self { Self { max_archive_size: val.max_archive_size, max_compression_ratio: val.max_compression_ratio, max_files_in_archive: val.max_files_in_archive, max_nesting_depth: val.max_nesting_depth, max_entity_length: val.max_entity_length, max_content_size: val.max_content_size, max_iterations: val.max_iterations, max_xml_depth: val.max_xml_depth, max_table_cells: val.max_table_cells, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PdfAnnotation { fn from(val: WasmPdfAnnotation) -> Self { Self { annotation_type: val.annotation_type.into(), content: val.content, page_number: val.page_number, bounding_box: val.bounding_box.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPdfAnnotation { fn from(val: kreuzberg::PdfAnnotation) -> Self { Self { annotation_type: val.annotation_type.into(), content: val.content, page_number: val.page_number, bounding_box: val.bounding_box.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DjotContent { fn from(val: WasmDjotContent) -> Self { Self { plain_text: val.plain_text, blocks: val.blocks.into_iter().map(Into::into).collect(), metadata: val.metadata.into(), tables: val.tables.into_iter().map(Into::into).collect(), images: val.images.into_iter().map(Into::into).collect(), links: val.links.into_iter().map(Into::into).collect(), footnotes: val.footnotes.into_iter().map(Into::into).collect(), attributes: Default::default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDjotContent { fn from(val: kreuzberg::DjotContent) -> Self { Self { plain_text: val.plain_text, blocks: val.blocks.into_iter().map(Into::into).collect(), metadata: val.metadata.into(), tables: val.tables.into_iter().map(Into::into).collect(), images: val.images.into_iter().map(Into::into).collect(), links: val.links.into_iter().map(Into::into).collect(), footnotes: val.footnotes.into_iter().map(Into::into).collect(), attributes: val.attributes.iter().map(|i| format!("{:?}", i)).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::FormattedBlock { fn from(val: WasmFormattedBlock) -> Self { Self { block_type: val.block_type.into(), level: val.level, inline_content: val.inline_content.into_iter().map(Into::into).collect(), attributes: Default::default(), language: val.language, code: val.code, children: val.children.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmFormattedBlock { fn from(val: kreuzberg::FormattedBlock) -> Self { Self { block_type: val.block_type.into(), level: val.level, inline_content: val.inline_content.into_iter().map(Into::into).collect(), attributes: val.attributes.as_ref().map(|v| format!("{v:?}")), language: val.language, code: val.code, children: val.children.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::InlineElement { fn from(val: WasmInlineElement) -> Self { Self { element_type: val.element_type.into(), content: val.content, attributes: Default::default(), metadata: val .metadata .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmInlineElement { fn from(val: kreuzberg::InlineElement) -> Self { Self { element_type: val.element_type.into(), content: val.content, attributes: val.attributes.as_ref().map(|v| format!("{v:?}")), metadata: val .metadata .as_ref() .and_then(|v| serde_json::to_string(v).ok()) .and_then(|s| js_sys::JSON::parse(&s).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DjotImage { fn from(val: WasmDjotImage) -> Self { Self { src: val.src, alt: val.alt, title: val.title, attributes: Default::default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDjotImage { fn from(val: kreuzberg::DjotImage) -> Self { Self { src: val.src, alt: val.alt, title: val.title, attributes: val.attributes.as_ref().map(|v| format!("{v:?}")), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DjotLink { fn from(val: WasmDjotLink) -> Self { Self { url: val.url, text: val.text, title: val.title, attributes: Default::default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDjotLink { fn from(val: kreuzberg::DjotLink) -> Self { Self { url: val.url, text: val.text, title: val.title, attributes: val.attributes.as_ref().map(|v| format!("{v:?}")), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::Footnote { fn from(val: WasmFootnote) -> Self { Self { label: val.label, content: val.content.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmFootnote { fn from(val: kreuzberg::Footnote) -> Self { Self { label: val.label, content: val.content.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DocumentStructure { fn from(val: WasmDocumentStructure) -> Self { Self { nodes: val.nodes.into_iter().map(Into::into).collect(), source_format: val.source_format, relationships: val.relationships.into_iter().map(Into::into).collect(), node_types: val.node_types.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDocumentStructure { fn from(val: kreuzberg::DocumentStructure) -> Self { Self { nodes: val.nodes.into_iter().map(Into::into).collect(), source_format: val.source_format, relationships: val.relationships.into_iter().map(Into::into).collect(), node_types: val.node_types.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DocumentRelationship { fn from(val: WasmDocumentRelationship) -> Self { Self { source: kreuzberg::NodeIndex(val.source), target: kreuzberg::NodeIndex(val.target), kind: val.kind.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDocumentRelationship { fn from(val: kreuzberg::DocumentRelationship) -> Self { Self { source: val.source.0, target: val.target.0, kind: val.kind.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DocumentNode { fn from(val: WasmDocumentNode) -> Self { Self { id: Default::default(), content: serde_wasm_bindgen::from_value(val.content.clone()).unwrap_or_default(), parent: (val.parent).map(kreuzberg::NodeIndex), children: (val.children.into_iter().collect::>()) .into_iter() .map(kreuzberg::NodeIndex) .collect(), content_layer: val.content_layer.into(), page: val.page, page_end: val.page_end, bbox: val.bbox.map(Into::into), annotations: val.annotations.into_iter().map(Into::into).collect(), attributes: val .attributes .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDocumentNode { fn from(val: kreuzberg::DocumentNode) -> Self { Self { id: format!("{:?}", val.id), content: serde_wasm_bindgen::to_value(&val.content).unwrap_or(JsValue::NULL), parent: val.parent.map(|v| v.0), children: val .children .iter() .map(|v| v.0) .collect::>() .into_iter() .collect(), content_layer: val.content_layer.into(), page: val.page, page_end: val.page_end, bbox: val.bbox.map(Into::into), annotations: val.annotations.into_iter().map(Into::into).collect(), attributes: val .attributes .as_ref() .and_then(|v| serde_json::to_string(v).ok()) .and_then(|s| js_sys::JSON::parse(&s).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::TableGrid { fn from(val: WasmTableGrid) -> Self { Self { rows: val.rows, cols: val.cols, cells: val.cells.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmTableGrid { fn from(val: kreuzberg::TableGrid) -> Self { Self { rows: val.rows, cols: val.cols, cells: val.cells.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::GridCell { fn from(val: WasmGridCell) -> Self { Self { content: val.content, row: val.row, col: val.col, row_span: val.row_span, col_span: val.col_span, is_header: val.is_header, bbox: val.bbox.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmGridCell { fn from(val: kreuzberg::GridCell) -> Self { Self { content: val.content, row: val.row, col: val.col, row_span: val.row_span, col_span: val.col_span, is_header: val.is_header, bbox: val.bbox.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::TextAnnotation { fn from(val: WasmTextAnnotation) -> Self { Self { start: val.start, end: val.end, kind: serde_wasm_bindgen::from_value(val.kind.clone()).unwrap_or_default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmTextAnnotation { fn from(val: kreuzberg::TextAnnotation) -> Self { Self { start: val.start, end: val.end, kind: serde_wasm_bindgen::to_value(&val.kind).unwrap_or(JsValue::NULL), } } } #[allow(clippy::needless_update)] #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ExtractionResult { fn from(val: WasmExtractionResult) -> Self { Self { content: val.content, mime_type: val.mime_type.into(), metadata: val.metadata.into(), extraction_method: val.extraction_method.map(Into::into), tables: val.tables.into_iter().map(Into::into).collect(), detected_languages: val.detected_languages.map(|v| v.into_iter().collect()), chunks: val.chunks.map(|v| v.into_iter().map(Into::into).collect()), images: val.images.map(|v| v.into_iter().map(Into::into).collect()), pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()), elements: val.elements.map(|v| v.into_iter().map(Into::into).collect()), djot_content: val.djot_content.map(Into::into), ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()), document: val.document.map(Into::into), quality_score: val.quality_score, processing_warnings: val.processing_warnings.into_iter().map(Into::into).collect(), annotations: val.annotations.map(|v| v.into_iter().map(Into::into).collect()), children: val.children.map(|v| v.into_iter().map(Into::into).collect()), uris: val.uris.map(|v| v.into_iter().map(Into::into).collect()), revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()), structured_output: val .structured_output .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), code_intelligence: val .code_intelligence .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), llm_usage: val.llm_usage.map(|v| v.into_iter().map(Into::into).collect()), formatted_content: val.formatted_content, ocr_internal_document: Default::default(), ..Default::default() } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmExtractionResult { fn from(val: kreuzberg::ExtractionResult) -> Self { Self { content: val.content, mime_type: val.mime_type.to_string(), metadata: val.metadata.into(), extraction_method: val.extraction_method.map(Into::into), tables: val.tables.into_iter().map(Into::into).collect(), detected_languages: val.detected_languages.map(|v| v.into_iter().collect()), chunks: val.chunks.map(|v| v.into_iter().map(Into::into).collect()), images: val.images.map(|v| v.into_iter().map(Into::into).collect()), pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()), elements: val.elements.map(|v| v.into_iter().map(Into::into).collect()), djot_content: val.djot_content.map(Into::into), ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()), document: val.document.map(Into::into), quality_score: val.quality_score, processing_warnings: val.processing_warnings.into_iter().map(Into::into).collect(), annotations: val.annotations.map(|v| v.into_iter().map(Into::into).collect()), children: val.children.map(|v| v.into_iter().map(Into::into).collect()), uris: val.uris.map(|v| v.into_iter().map(Into::into).collect()), revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()), structured_output: val .structured_output .as_ref() .and_then(|v| serde_wasm_bindgen::to_value(v).ok()), code_intelligence: val .code_intelligence .as_ref() .and_then(|v| serde_wasm_bindgen::to_value(v).ok()), llm_usage: val.llm_usage.map(|v| v.into_iter().map(Into::into).collect()), formatted_content: val.formatted_content, ocr_internal_document: val.ocr_internal_document.as_ref().map(|v| format!("{v:?}")), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ArchiveEntry { fn from(val: WasmArchiveEntry) -> Self { Self { path: val.path, mime_type: val.mime_type, result: Box::new(val.result.into()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmArchiveEntry { fn from(val: kreuzberg::ArchiveEntry) -> Self { Self { path: val.path, mime_type: val.mime_type, result: (*val.result).into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ProcessingWarning { fn from(val: WasmProcessingWarning) -> Self { Self { source: val.source.into(), message: val.message.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmProcessingWarning { fn from(val: kreuzberg::ProcessingWarning) -> Self { Self { source: val.source.to_string(), message: val.message.to_string(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::LlmUsage { fn from(val: WasmLlmUsage) -> Self { Self { model: val.model, source: val.source, input_tokens: val.input_tokens, output_tokens: val.output_tokens, total_tokens: val.total_tokens, estimated_cost: val.estimated_cost, finish_reason: val.finish_reason, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmLlmUsage { fn from(val: kreuzberg::LlmUsage) -> Self { Self { model: val.model, source: val.source, input_tokens: val.input_tokens, output_tokens: val.output_tokens, total_tokens: val.total_tokens, estimated_cost: val.estimated_cost, finish_reason: val.finish_reason, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::Chunk { fn from(val: WasmChunk) -> Self { Self { content: val.content, chunk_type: val.chunk_type.into(), embedding: val.embedding.map(|v| v.into_iter().collect()), metadata: val.metadata.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmChunk { fn from(val: kreuzberg::Chunk) -> Self { Self { content: val.content, chunk_type: val.chunk_type.into(), embedding: val.embedding.map(|v| v.into_iter().collect()), metadata: val.metadata.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::HeadingContext { fn from(val: WasmHeadingContext) -> Self { Self { headings: val.headings.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmHeadingContext { fn from(val: kreuzberg::HeadingContext) -> Self { Self { headings: val.headings.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::HeadingLevel { fn from(val: WasmHeadingLevel) -> Self { Self { level: val.level, text: val.text, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmHeadingLevel { fn from(val: kreuzberg::HeadingLevel) -> Self { Self { level: val.level, text: val.text, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ChunkMetadata { fn from(val: WasmChunkMetadata) -> Self { Self { byte_start: val.byte_start, byte_end: val.byte_end, token_count: val.token_count, chunk_index: val.chunk_index, total_chunks: val.total_chunks, first_page: val.first_page, last_page: val.last_page, heading_context: val.heading_context.map(Into::into), image_indices: val.image_indices.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmChunkMetadata { fn from(val: kreuzberg::ChunkMetadata) -> Self { Self { byte_start: val.byte_start, byte_end: val.byte_end, token_count: val.token_count, chunk_index: val.chunk_index, total_chunks: val.total_chunks, first_page: val.first_page, last_page: val.last_page, heading_context: val.heading_context.map(Into::into), image_indices: val.image_indices.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ExtractedImage { fn from(val: WasmExtractedImage) -> Self { Self { data: val.data.to_vec().into(), format: val.format.into(), image_index: val.image_index, page_number: val.page_number, width: val.width, height: val.height, colorspace: val.colorspace, bits_per_component: val.bits_per_component, is_mask: val.is_mask, description: val.description, ocr_result: val.ocr_result.map(Into::into).map(Box::new), bounding_box: val.bounding_box.map(Into::into), source_path: val.source_path, image_kind: val.image_kind.map(Into::into), kind_confidence: val.kind_confidence, cluster_id: val.cluster_id, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmExtractedImage { fn from(val: kreuzberg::ExtractedImage) -> Self { Self { data: val.data.to_vec().into(), format: val.format.to_string(), image_index: val.image_index, page_number: val.page_number, width: val.width, height: val.height, colorspace: val.colorspace, bits_per_component: val.bits_per_component, is_mask: val.is_mask, description: val.description, ocr_result: val.ocr_result.map(|v| (*v).into()), bounding_box: val.bounding_box.map(Into::into), source_path: val.source_path, image_kind: val.image_kind.map(Into::into), kind_confidence: val.kind_confidence, cluster_id: val.cluster_id, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::BoundingBox { fn from(val: WasmBoundingBox) -> Self { Self { x0: val.x0, y0: val.y0, x1: val.x1, y1: val.y1, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmBoundingBox { fn from(val: kreuzberg::BoundingBox) -> Self { Self { x0: val.x0, y0: val.y0, x1: val.x1, y1: val.y1, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ElementMetadata { fn from(val: WasmElementMetadata) -> Self { Self { page_number: val.page_number, filename: val.filename, coordinates: val.coordinates.map(Into::into), element_index: val.element_index, additional: serde_wasm_bindgen::from_value(val.additional.clone()).unwrap_or_default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmElementMetadata { fn from(val: kreuzberg::ElementMetadata) -> Self { Self { page_number: val.page_number, filename: val.filename, coordinates: val.coordinates.map(Into::into), element_index: val.element_index, additional: js_sys::JSON::parse(&serde_json::to_string(&val.additional).unwrap_or_default()) .unwrap_or(JsValue::NULL), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::Element { fn from(val: WasmElement) -> Self { Self { element_id: Default::default(), element_type: val.element_type.into(), text: val.text, metadata: val.metadata.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmElement { fn from(val: kreuzberg::Element) -> Self { Self { element_id: format!("{:?}", val.element_id), element_type: val.element_type.into(), text: val.text, metadata: val.metadata.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmExcelWorkbook { fn from(val: kreuzberg::ExcelWorkbook) -> Self { Self { sheets: val.sheets.into_iter().map(Into::into).collect(), metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default()) .unwrap_or(JsValue::NULL), revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmExcelSheet { fn from(val: kreuzberg::ExcelSheet) -> Self { Self { name: val.name, markdown: val.markdown, row_count: val.row_count, col_count: val.col_count, cell_count: val.cell_count, table_cells: val .table_cells .as_ref() .and_then(|v| serde_wasm_bindgen::to_value(v).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmXmlExtractionResult { fn from(val: kreuzberg::XmlExtractionResult) -> Self { Self { content: val.content, element_count: val.element_count, unique_elements: val.unique_elements.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmTextExtractionResult { fn from(val: kreuzberg::TextExtractionResult) -> Self { Self { content: val.content, line_count: val.line_count, word_count: val.word_count, character_count: val.character_count, headers: val.headers.map(|v| v.into_iter().collect()), links: val.links.as_ref().and_then(|v| { serde_wasm_bindgen::to_value( &v.iter() .map(|(a, b)| vec![a.to_string(), b.to_string()]) .collect::>>(), ) .ok() }), code_blocks: val.code_blocks.as_ref().and_then(|v| { serde_wasm_bindgen::to_value( &v.iter() .map(|(a, b)| vec![a.to_string(), b.to_string()]) .collect::>>(), ) .ok() }), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPptxExtractionResult { fn from(val: kreuzberg::PptxExtractionResult) -> Self { Self { content: val.content, metadata: val.metadata.into(), slide_count: val.slide_count, image_count: val.image_count, table_count: val.table_count, images: val.images.into_iter().map(Into::into).collect(), page_structure: val.page_structure.map(Into::into), page_contents: val.page_contents.map(|v| v.into_iter().map(Into::into).collect()), document: val.document.map(Into::into), hyperlinks: val.hyperlinks.iter().map(|i| format!("{:?}", i)).collect(), office_metadata: js_sys::JSON::parse(&serde_json::to_string(&val.office_metadata).unwrap_or_default()) .unwrap_or(JsValue::NULL), revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmEmailExtractionResult { fn from(val: kreuzberg::EmailExtractionResult) -> Self { Self { subject: val.subject, from_email: val.from_email, to_emails: val.to_emails.into_iter().collect(), cc_emails: val.cc_emails.into_iter().collect(), bcc_emails: val.bcc_emails.into_iter().collect(), date: val.date, message_id: val.message_id, plain_text: val.plain_text, html_content: val.html_content, content: val.content, attachments: val.attachments.into_iter().map(Into::into).collect(), metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default()) .unwrap_or(JsValue::NULL), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmEmailAttachment { fn from(val: kreuzberg::EmailAttachment) -> Self { Self { name: val.name, filename: val.filename, mime_type: val.mime_type, size: val.size, is_image: val.is_image, data: val.data.map(|v| v.to_vec().into()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrExtractionResult { fn from(val: WasmOcrExtractionResult) -> Self { Self { content: val.content, mime_type: val.mime_type, metadata: serde_wasm_bindgen::from_value(val.metadata.clone()).unwrap_or_default(), tables: val.tables.into_iter().map(Into::into).collect(), ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()), internal_document: Default::default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrExtractionResult { fn from(val: kreuzberg::OcrExtractionResult) -> Self { Self { content: val.content, mime_type: val.mime_type, metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default()) .unwrap_or(JsValue::NULL), tables: val.tables.into_iter().map(Into::into).collect(), ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()), internal_document: val.internal_document.as_ref().map(|v| format!("{v:?}")), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrTable { fn from(val: WasmOcrTable) -> Self { Self { cells: serde_wasm_bindgen::from_value(val.cells.clone()).unwrap_or_default(), markdown: val.markdown, page_number: val.page_number, bounding_box: val.bounding_box.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrTable { fn from(val: kreuzberg::OcrTable) -> Self { Self { cells: serde_wasm_bindgen::to_value(&val.cells).unwrap_or(JsValue::NULL), markdown: val.markdown, page_number: val.page_number, bounding_box: val.bounding_box.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrTableBoundingBox { fn from(val: WasmOcrTableBoundingBox) -> Self { Self { left: val.left, top: val.top, right: val.right, bottom: val.bottom, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrTableBoundingBox { fn from(val: kreuzberg::OcrTableBoundingBox) -> Self { Self { left: val.left, top: val.top, right: val.right, bottom: val.bottom, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ImagePreprocessingConfig { fn from(val: WasmImagePreprocessingConfig) -> Self { Self { target_dpi: val.target_dpi, auto_rotate: val.auto_rotate, deskew: val.deskew, denoise: val.denoise, contrast_enhance: val.contrast_enhance, binarization_method: val.binarization_method, invert_colors: val.invert_colors, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmImagePreprocessingConfig { fn from(val: kreuzberg::ImagePreprocessingConfig) -> Self { Self { target_dpi: val.target_dpi, auto_rotate: val.auto_rotate, deskew: val.deskew, denoise: val.denoise, contrast_enhance: val.contrast_enhance, binarization_method: val.binarization_method, invert_colors: val.invert_colors, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::TesseractConfig { fn from(val: WasmTesseractConfig) -> Self { Self { language: val.language, psm: val.psm, output_format: val.output_format, oem: val.oem, min_confidence: val.min_confidence, preprocessing: val.preprocessing.map(Into::into), enable_table_detection: val.enable_table_detection, table_min_confidence: val.table_min_confidence, table_column_threshold: val.table_column_threshold, table_row_threshold_ratio: val.table_row_threshold_ratio, use_cache: val.use_cache, classify_use_pre_adapted_templates: val.classify_use_pre_adapted_templates, language_model_ngram_on: val.language_model_ngram_on, tessedit_dont_blkrej_good_wds: val.tessedit_dont_blkrej_good_wds, tessedit_dont_rowrej_good_wds: val.tessedit_dont_rowrej_good_wds, tessedit_enable_dict_correction: val.tessedit_enable_dict_correction, tessedit_char_whitelist: val.tessedit_char_whitelist, tessedit_char_blacklist: val.tessedit_char_blacklist, tessedit_use_primary_params_model: val.tessedit_use_primary_params_model, textord_space_size_is_variable: val.textord_space_size_is_variable, thresholding_method: val.thresholding_method, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmTesseractConfig { fn from(val: kreuzberg::TesseractConfig) -> Self { Self { language: val.language, psm: val.psm, output_format: val.output_format, oem: val.oem, min_confidence: val.min_confidence, preprocessing: val.preprocessing.map(Into::into), enable_table_detection: val.enable_table_detection, table_min_confidence: val.table_min_confidence, table_column_threshold: val.table_column_threshold, table_row_threshold_ratio: val.table_row_threshold_ratio, use_cache: val.use_cache, classify_use_pre_adapted_templates: val.classify_use_pre_adapted_templates, language_model_ngram_on: val.language_model_ngram_on, tessedit_dont_blkrej_good_wds: val.tessedit_dont_blkrej_good_wds, tessedit_dont_rowrej_good_wds: val.tessedit_dont_rowrej_good_wds, tessedit_enable_dict_correction: val.tessedit_enable_dict_correction, tessedit_char_whitelist: val.tessedit_char_whitelist, tessedit_char_blacklist: val.tessedit_char_blacklist, tessedit_use_primary_params_model: val.tessedit_use_primary_params_model, textord_space_size_is_variable: val.textord_space_size_is_variable, thresholding_method: val.thresholding_method, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ImagePreprocessingMetadata { fn from(val: WasmImagePreprocessingMetadata) -> Self { Self { original_dimensions: Default::default(), original_dpi: Default::default(), target_dpi: val.target_dpi, scale_factor: val.scale_factor, auto_adjusted: val.auto_adjusted, final_dpi: val.final_dpi, new_dimensions: Default::default(), resample_method: val.resample_method, dimension_clamped: val.dimension_clamped, calculated_dpi: val.calculated_dpi, skipped_resize: val.skipped_resize, resize_error: val.resize_error, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmImagePreprocessingMetadata { fn from(val: kreuzberg::ImagePreprocessingMetadata) -> Self { Self { original_dimensions: vec![val.original_dimensions.0 as _, val.original_dimensions.1 as _], original_dpi: vec![val.original_dpi.0 as _, val.original_dpi.1 as _], target_dpi: val.target_dpi, scale_factor: val.scale_factor, auto_adjusted: val.auto_adjusted, final_dpi: val.final_dpi, new_dimensions: val.new_dimensions.map(|t| { let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect(); arr }), resample_method: val.resample_method, dimension_clamped: val.dimension_clamped, calculated_dpi: val.calculated_dpi, skipped_resize: val.skipped_resize, resize_error: val.resize_error, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::Metadata { fn from(val: WasmMetadata) -> Self { Self { title: val.title, subject: val.subject, authors: val.authors.map(|v| v.into_iter().collect()), keywords: val.keywords.map(|v| v.into_iter().collect()), language: val.language, created_at: val.created_at, modified_at: val.modified_at, created_by: val.created_by, modified_by: val.modified_by, pages: val.pages.map(Into::into), format: val .format .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), image_preprocessing: val.image_preprocessing.map(Into::into), json_schema: val .json_schema .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), error: val.error.map(Into::into), extraction_duration_ms: val.extraction_duration_ms, category: val.category, tags: val.tags.map(|v| v.into_iter().collect()), document_version: val.document_version, abstract_text: val.abstract_text, output_format: val.output_format, ocr_used: val.ocr_used, additional: serde_wasm_bindgen::from_value(val.additional.clone()).unwrap_or_default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmMetadata { fn from(val: kreuzberg::Metadata) -> Self { Self { title: val.title, subject: val.subject, authors: val.authors.map(|v| v.into_iter().collect()), keywords: val.keywords.map(|v| v.into_iter().collect()), language: val.language, created_at: val.created_at, modified_at: val.modified_at, created_by: val.created_by, modified_by: val.modified_by, pages: val.pages.map(Into::into), format: val.format.as_ref().and_then(|v| serde_wasm_bindgen::to_value(v).ok()), image_preprocessing: val.image_preprocessing.map(Into::into), json_schema: val .json_schema .as_ref() .and_then(|v| serde_wasm_bindgen::to_value(v).ok()), error: val.error.map(Into::into), extraction_duration_ms: val.extraction_duration_ms, category: val.category, tags: val.tags.map(|v| v.into_iter().collect()), document_version: val.document_version, abstract_text: val.abstract_text, output_format: val.output_format, ocr_used: val.ocr_used, additional: js_sys::JSON::parse(&serde_json::to_string(&val.additional).unwrap_or_default()) .unwrap_or(JsValue::NULL), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ExcelMetadata { fn from(val: WasmExcelMetadata) -> Self { Self { sheet_count: val.sheet_count, sheet_names: val.sheet_names.map(|v| v.into_iter().collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmExcelMetadata { fn from(val: kreuzberg::ExcelMetadata) -> Self { Self { sheet_count: val.sheet_count, sheet_names: val.sheet_names.map(|v| v.into_iter().collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::EmailMetadata { fn from(val: WasmEmailMetadata) -> Self { Self { from_email: val.from_email, from_name: val.from_name, to_emails: val.to_emails.into_iter().collect(), cc_emails: val.cc_emails.into_iter().collect(), bcc_emails: val.bcc_emails.into_iter().collect(), message_id: val.message_id, attachments: val.attachments.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmEmailMetadata { fn from(val: kreuzberg::EmailMetadata) -> Self { Self { from_email: val.from_email, from_name: val.from_name, to_emails: val.to_emails.into_iter().collect(), cc_emails: val.cc_emails.into_iter().collect(), bcc_emails: val.bcc_emails.into_iter().collect(), message_id: val.message_id, attachments: val.attachments.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ArchiveMetadata { fn from(val: WasmArchiveMetadata) -> Self { Self { format: val.format.into(), file_count: val.file_count, file_list: val.file_list.into_iter().collect(), total_size: val.total_size, compressed_size: val.compressed_size, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmArchiveMetadata { fn from(val: kreuzberg::ArchiveMetadata) -> Self { Self { format: val.format.to_string(), file_count: val.file_count, file_list: val.file_list.into_iter().collect(), total_size: val.total_size, compressed_size: val.compressed_size, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ImageMetadata { fn from(val: WasmImageMetadata) -> Self { Self { width: val.width, height: val.height, format: val.format, exif: serde_wasm_bindgen::from_value(val.exif.clone()).unwrap_or_default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmImageMetadata { fn from(val: kreuzberg::ImageMetadata) -> Self { Self { width: val.width, height: val.height, format: val.format, exif: js_sys::JSON::parse(&serde_json::to_string(&val.exif).unwrap_or_default()).unwrap_or(JsValue::NULL), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::XmlMetadata { fn from(val: WasmXmlMetadata) -> Self { Self { element_count: val.element_count, unique_elements: val.unique_elements.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmXmlMetadata { fn from(val: kreuzberg::XmlMetadata) -> Self { Self { element_count: val.element_count, unique_elements: val.unique_elements.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::TextMetadata { fn from(val: WasmTextMetadata) -> Self { Self { line_count: val.line_count, word_count: val.word_count, character_count: val.character_count, headers: val.headers.map(|v| v.into_iter().collect()), links: Default::default(), code_blocks: Default::default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmTextMetadata { fn from(val: kreuzberg::TextMetadata) -> Self { Self { line_count: val.line_count, word_count: val.word_count, character_count: val.character_count, headers: val.headers.map(|v| v.into_iter().collect()), links: val.links.as_ref().and_then(|v| { serde_wasm_bindgen::to_value( &v.iter() .map(|(a, b)| vec![a.to_string(), b.to_string()]) .collect::>>(), ) .ok() }), code_blocks: val.code_blocks.as_ref().and_then(|v| { serde_wasm_bindgen::to_value( &v.iter() .map(|(a, b)| vec![a.to_string(), b.to_string()]) .collect::>>(), ) .ok() }), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::HeaderMetadata { fn from(val: WasmHeaderMetadata) -> Self { Self { level: val.level, text: val.text, id: val.id, depth: val.depth, html_offset: val.html_offset, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmHeaderMetadata { fn from(val: kreuzberg::HeaderMetadata) -> Self { Self { level: val.level, text: val.text, id: val.id, depth: val.depth, html_offset: val.html_offset, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::LinkMetadata { fn from(val: WasmLinkMetadata) -> Self { Self { href: val.href, text: val.text, title: val.title, link_type: val.link_type.into(), rel: val.rel.into_iter().collect(), attributes: Default::default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmLinkMetadata { fn from(val: kreuzberg::LinkMetadata) -> Self { Self { href: val.href, text: val.text, title: val.title, link_type: val.link_type.into(), rel: val.rel.into_iter().collect(), attributes: serde_wasm_bindgen::to_value( &val.attributes .iter() .map(|(a, b)| vec![a.to_string(), b.to_string()]) .collect::>>(), ) .unwrap_or(JsValue::NULL), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ImageMetadataType { fn from(val: WasmImageMetadataType) -> Self { Self { src: val.src, alt: val.alt, title: val.title, dimensions: Default::default(), image_type: val.image_type.into(), attributes: Default::default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmImageMetadataType { fn from(val: kreuzberg::ImageMetadataType) -> Self { Self { src: val.src, alt: val.alt, title: val.title, dimensions: val.dimensions.map(|t| { let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect(); arr }), image_type: val.image_type.into(), attributes: serde_wasm_bindgen::to_value( &val.attributes .iter() .map(|(a, b)| vec![a.to_string(), b.to_string()]) .collect::>>(), ) .unwrap_or(JsValue::NULL), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::StructuredData { fn from(val: WasmStructuredData) -> Self { Self { data_type: val.data_type.into(), raw_json: val.raw_json, schema_type: val.schema_type, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmStructuredData { fn from(val: kreuzberg::StructuredData) -> Self { Self { data_type: val.data_type.into(), raw_json: val.raw_json, schema_type: val.schema_type, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::HtmlMetadata { fn from(val: WasmHtmlMetadata) -> Self { Self { title: val.title, description: val.description, keywords: val.keywords.into_iter().collect(), author: val.author, canonical_url: val.canonical_url, base_href: val.base_href, language: val.language, text_direction: val.text_direction.map(Into::into), open_graph: serde_wasm_bindgen::from_value(val.open_graph.clone()).unwrap_or_default(), twitter_card: serde_wasm_bindgen::from_value(val.twitter_card.clone()).unwrap_or_default(), meta_tags: serde_wasm_bindgen::from_value(val.meta_tags.clone()).unwrap_or_default(), headers: val.headers.into_iter().map(Into::into).collect(), links: val.links.into_iter().map(Into::into).collect(), images: val.images.into_iter().map(Into::into).collect(), structured_data: val.structured_data.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmHtmlMetadata { fn from(val: kreuzberg::HtmlMetadata) -> Self { Self { title: val.title, description: val.description, keywords: val.keywords.into_iter().collect(), author: val.author, canonical_url: val.canonical_url, base_href: val.base_href, language: val.language, text_direction: val.text_direction.map(Into::into), open_graph: js_sys::JSON::parse(&serde_json::to_string(&val.open_graph).unwrap_or_default()) .unwrap_or(JsValue::NULL), twitter_card: js_sys::JSON::parse(&serde_json::to_string(&val.twitter_card).unwrap_or_default()) .unwrap_or(JsValue::NULL), meta_tags: js_sys::JSON::parse(&serde_json::to_string(&val.meta_tags).unwrap_or_default()) .unwrap_or(JsValue::NULL), headers: val.headers.into_iter().map(Into::into).collect(), links: val.links.into_iter().map(Into::into).collect(), images: val.images.into_iter().map(Into::into).collect(), structured_data: val.structured_data.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrMetadata { fn from(val: WasmOcrMetadata) -> Self { Self { language: val.language, psm: val.psm, output_format: val.output_format, table_count: val.table_count, table_rows: val.table_rows, table_cols: val.table_cols, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrMetadata { fn from(val: kreuzberg::OcrMetadata) -> Self { Self { language: val.language, psm: val.psm, output_format: val.output_format, table_count: val.table_count, table_rows: val.table_rows, table_cols: val.table_cols, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ErrorMetadata { fn from(val: WasmErrorMetadata) -> Self { Self { error_type: val.error_type, message: val.message, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmErrorMetadata { fn from(val: kreuzberg::ErrorMetadata) -> Self { Self { error_type: val.error_type, message: val.message, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PptxMetadata { fn from(val: WasmPptxMetadata) -> Self { Self { slide_count: val.slide_count, slide_names: val.slide_names.into_iter().collect(), image_count: val.image_count, table_count: val.table_count, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPptxMetadata { fn from(val: kreuzberg::PptxMetadata) -> Self { Self { slide_count: val.slide_count, slide_names: val.slide_names.into_iter().collect(), image_count: val.image_count, table_count: val.table_count, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::CsvMetadata { fn from(val: WasmCsvMetadata) -> Self { Self { row_count: val.row_count, column_count: val.column_count, delimiter: val.delimiter, has_header: val.has_header, column_types: val.column_types.map(|v| v.into_iter().collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmCsvMetadata { fn from(val: kreuzberg::CsvMetadata) -> Self { Self { row_count: val.row_count, column_count: val.column_count, delimiter: val.delimiter, has_header: val.has_header, column_types: val.column_types.map(|v| v.into_iter().collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::BibtexMetadata { fn from(val: WasmBibtexMetadata) -> Self { Self { entry_count: val.entry_count, citation_keys: val.citation_keys.into_iter().collect(), authors: val.authors.into_iter().collect(), year_range: val.year_range.map(Into::into), entry_types: val .entry_types .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmBibtexMetadata { fn from(val: kreuzberg::BibtexMetadata) -> Self { Self { entry_count: val.entry_count, citation_keys: val.citation_keys.into_iter().collect(), authors: val.authors.into_iter().collect(), year_range: val.year_range.map(Into::into), entry_types: val .entry_types .as_ref() .and_then(|v| serde_json::to_string(v).ok()) .and_then(|s| js_sys::JSON::parse(&s).ok()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::CitationMetadata { fn from(val: WasmCitationMetadata) -> Self { Self { citation_count: val.citation_count, format: val.format, authors: val.authors.into_iter().collect(), year_range: val.year_range.map(Into::into), dois: val.dois.into_iter().collect(), keywords: val.keywords.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmCitationMetadata { fn from(val: kreuzberg::CitationMetadata) -> Self { Self { citation_count: val.citation_count, format: val.format, authors: val.authors.into_iter().collect(), year_range: val.year_range.map(Into::into), dois: val.dois.into_iter().collect(), keywords: val.keywords.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::YearRange { fn from(val: WasmYearRange) -> Self { Self { min: val.min, max: val.max, years: val.years.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmYearRange { fn from(val: kreuzberg::YearRange) -> Self { Self { min: val.min, max: val.max, years: val.years.into_iter().collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::FictionBookMetadata { fn from(val: WasmFictionBookMetadata) -> Self { Self { genres: val.genres.into_iter().collect(), sequences: val.sequences.into_iter().collect(), annotation: val.annotation, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmFictionBookMetadata { fn from(val: kreuzberg::FictionBookMetadata) -> Self { Self { genres: val.genres.into_iter().collect(), sequences: val.sequences.into_iter().collect(), annotation: val.annotation, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DbfMetadata { fn from(val: WasmDbfMetadata) -> Self { Self { record_count: val.record_count, field_count: val.field_count, fields: val.fields.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDbfMetadata { fn from(val: kreuzberg::DbfMetadata) -> Self { Self { record_count: val.record_count, field_count: val.field_count, fields: val.fields.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DbfFieldInfo { fn from(val: WasmDbfFieldInfo) -> Self { Self { name: val.name, field_type: val.field_type, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDbfFieldInfo { fn from(val: kreuzberg::DbfFieldInfo) -> Self { Self { name: val.name, field_type: val.field_type, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::JatsMetadata { fn from(val: WasmJatsMetadata) -> Self { Self { copyright: val.copyright, license: val.license, history_dates: serde_wasm_bindgen::from_value(val.history_dates.clone()).unwrap_or_default(), contributor_roles: val.contributor_roles.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmJatsMetadata { fn from(val: kreuzberg::JatsMetadata) -> Self { Self { copyright: val.copyright, license: val.license, history_dates: js_sys::JSON::parse(&serde_json::to_string(&val.history_dates).unwrap_or_default()) .unwrap_or(JsValue::NULL), contributor_roles: val.contributor_roles.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ContributorRole { fn from(val: WasmContributorRole) -> Self { Self { name: val.name, role: val.role, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmContributorRole { fn from(val: kreuzberg::ContributorRole) -> Self { Self { name: val.name, role: val.role, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::EpubMetadata { fn from(val: WasmEpubMetadata) -> Self { Self { coverage: val.coverage, dc_format: val.dc_format, relation: val.relation, source: val.source, dc_type: val.dc_type, cover_image: val.cover_image, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmEpubMetadata { fn from(val: kreuzberg::EpubMetadata) -> Self { Self { coverage: val.coverage, dc_format: val.dc_format, relation: val.relation, source: val.source, dc_type: val.dc_type, cover_image: val.cover_image, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PstMetadata { fn from(val: WasmPstMetadata) -> Self { Self { message_count: val.message_count, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPstMetadata { fn from(val: kreuzberg::PstMetadata) -> Self { Self { message_count: val.message_count, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrConfidence { fn from(val: WasmOcrConfidence) -> Self { Self { detection: val.detection, recognition: val.recognition, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrConfidence { fn from(val: kreuzberg::OcrConfidence) -> Self { Self { detection: val.detection, recognition: val.recognition, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrRotation { fn from(val: WasmOcrRotation) -> Self { Self { angle_degrees: val.angle_degrees, confidence: val.confidence, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrRotation { fn from(val: kreuzberg::OcrRotation) -> Self { Self { angle_degrees: val.angle_degrees, confidence: val.confidence, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrElement { fn from(val: WasmOcrElement) -> Self { Self { text: val.text, geometry: serde_wasm_bindgen::from_value(val.geometry.clone()).unwrap_or_default(), confidence: val.confidence.into(), level: val.level.into(), rotation: val.rotation.map(Into::into), page_number: val.page_number, parent_id: val.parent_id, backend_metadata: serde_wasm_bindgen::from_value(val.backend_metadata.clone()).unwrap_or_default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrElement { fn from(val: kreuzberg::OcrElement) -> Self { Self { text: val.text, geometry: serde_wasm_bindgen::to_value(&val.geometry).unwrap_or(JsValue::NULL), confidence: val.confidence.into(), level: val.level.into(), rotation: val.rotation.map(Into::into), page_number: val.page_number, parent_id: val.parent_id, backend_metadata: js_sys::JSON::parse(&serde_json::to_string(&val.backend_metadata).unwrap_or_default()) .unwrap_or(JsValue::NULL), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::OcrElementConfig { fn from(val: WasmOcrElementConfig) -> Self { Self { include_elements: val.include_elements, min_level: val.min_level.into(), min_confidence: val.min_confidence, build_hierarchy: val.build_hierarchy, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmOcrElementConfig { fn from(val: kreuzberg::OcrElementConfig) -> Self { Self { include_elements: val.include_elements, min_level: val.min_level.into(), min_confidence: val.min_confidence, build_hierarchy: val.build_hierarchy, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PageStructure { fn from(val: WasmPageStructure) -> Self { Self { total_count: val.total_count, unit_type: val.unit_type.into(), boundaries: val.boundaries.map(|v| v.into_iter().map(Into::into).collect()), pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPageStructure { fn from(val: kreuzberg::PageStructure) -> Self { Self { total_count: val.total_count, unit_type: val.unit_type.into(), boundaries: val.boundaries.map(|v| v.into_iter().map(Into::into).collect()), pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PageBoundary { fn from(val: WasmPageBoundary) -> Self { Self { byte_start: val.byte_start, byte_end: val.byte_end, page_number: val.page_number, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPageBoundary { fn from(val: kreuzberg::PageBoundary) -> Self { Self { byte_start: val.byte_start, byte_end: val.byte_end, page_number: val.page_number, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PageInfo { fn from(val: WasmPageInfo) -> Self { Self { number: val.number, title: val.title, dimensions: Default::default(), image_count: val.image_count, table_count: val.table_count, hidden: val.hidden, is_blank: val.is_blank, has_vector_graphics: val.has_vector_graphics, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPageInfo { fn from(val: kreuzberg::PageInfo) -> Self { Self { number: val.number, title: val.title, dimensions: val.dimensions.map(|t| { let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect(); arr }), image_count: val.image_count, table_count: val.table_count, hidden: val.hidden, is_blank: val.is_blank, has_vector_graphics: val.has_vector_graphics, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PageContent { fn from(val: WasmPageContent) -> Self { Self { page_number: val.page_number, content: val.content, tables: val.tables.into_iter().map(|v| std::sync::Arc::new(v.into())).collect(), image_indices: val.image_indices.into_iter().collect(), hierarchy: val.hierarchy.map(Into::into), is_blank: val.is_blank, layout_regions: val.layout_regions.map(|v| v.into_iter().map(Into::into).collect()), speaker_notes: val.speaker_notes, section_name: val.section_name, sheet_name: val.sheet_name, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPageContent { fn from(val: kreuzberg::PageContent) -> Self { Self { page_number: val.page_number, content: val.content, tables: val.tables.into_iter().map(|v| (*v).clone().into()).collect(), image_indices: val.image_indices.into_iter().collect(), hierarchy: val.hierarchy.map(Into::into), is_blank: val.is_blank, layout_regions: val.layout_regions.map(|v| v.into_iter().map(Into::into).collect()), speaker_notes: val.speaker_notes, section_name: val.section_name, sheet_name: val.sheet_name, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::LayoutRegion { fn from(val: WasmLayoutRegion) -> Self { Self { class_name: val.class_name, confidence: val.confidence, bounding_box: val.bounding_box.into(), area_fraction: val.area_fraction, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmLayoutRegion { fn from(val: kreuzberg::LayoutRegion) -> Self { Self { class_name: val.class_name, confidence: val.confidence, bounding_box: val.bounding_box.into(), area_fraction: val.area_fraction, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::PageHierarchy { fn from(val: WasmPageHierarchy) -> Self { Self { block_count: val.block_count, blocks: val.blocks.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmPageHierarchy { fn from(val: kreuzberg::PageHierarchy) -> Self { Self { block_count: val.block_count, blocks: val.blocks.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::HierarchicalBlock { fn from(val: WasmHierarchicalBlock) -> Self { Self { text: val.text, font_size: val.font_size, level: val.level, bbox: Default::default(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmHierarchicalBlock { fn from(val: kreuzberg::HierarchicalBlock) -> Self { Self { text: val.text, font_size: val.font_size, level: val.level, bbox: val.bbox.map(|t| { let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect(); arr }), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::CellChange { fn from(val: WasmCellChange) -> Self { Self { row: val.row, col: val.col, from: val.from, to: val.to, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmCellChange { fn from(val: kreuzberg::CellChange) -> Self { Self { row: val.row, col: val.col, from: val.from, to: val.to, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::DocumentRevision { fn from(val: WasmDocumentRevision) -> Self { Self { revision_id: val.revision_id, author: val.author, timestamp: val.timestamp, kind: val.kind.into(), anchor: val .anchor .as_ref() .and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()), delta: val.delta.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmDocumentRevision { fn from(val: kreuzberg::DocumentRevision) -> Self { Self { revision_id: val.revision_id, author: val.author, timestamp: val.timestamp, kind: val.kind.into(), anchor: val.anchor.as_ref().and_then(|v| serde_wasm_bindgen::to_value(v).ok()), delta: val.delta.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::RevisionDelta { fn from(val: WasmRevisionDelta) -> Self { Self { content: serde_wasm_bindgen::from_value(val.content.clone()).unwrap_or_default(), table_changes: val.table_changes.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmRevisionDelta { fn from(val: kreuzberg::RevisionDelta) -> Self { Self { content: serde_wasm_bindgen::to_value(&val.content).unwrap_or(JsValue::NULL), table_changes: val.table_changes.into_iter().map(Into::into).collect(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::Table { fn from(val: WasmTable) -> Self { Self { cells: serde_wasm_bindgen::from_value(val.cells.clone()).unwrap_or_default(), markdown: val.markdown, page_number: val.page_number, bounding_box: val.bounding_box.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmTable { fn from(val: kreuzberg::Table) -> Self { Self { cells: serde_wasm_bindgen::to_value(&val.cells).unwrap_or(JsValue::NULL), markdown: val.markdown, page_number: val.page_number, bounding_box: val.bounding_box.map(Into::into), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmTableCell { fn from(val: kreuzberg::TableCell) -> Self { Self { content: val.content, row_span: val.row_span, col_span: val.col_span, is_header: val.is_header, } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for kreuzberg::ExtractedUri { fn from(val: WasmExtractedUri) -> Self { Self { url: val.url, label: val.label, page: val.page, kind: val.kind.into(), } } } #[allow(clippy::redundant_closure, clippy::useless_conversion)] impl From for WasmExtractedUri { fn from(val: kreuzberg::ExtractedUri) -> Self { Self { url: val.url, label: val.label, page: val.page, kind: val.kind.into(), } } } impl From for kreuzberg::ExecutionProviderType { fn from(val: WasmExecutionProviderType) -> Self { match val { WasmExecutionProviderType::Auto => Self::Auto, WasmExecutionProviderType::Cpu => Self::Cpu, WasmExecutionProviderType::CoreMl => Self::CoreMl, WasmExecutionProviderType::Cuda => Self::Cuda, WasmExecutionProviderType::TensorRt => Self::TensorRt, } } } impl From for WasmExecutionProviderType { fn from(val: kreuzberg::ExecutionProviderType) -> Self { match val { kreuzberg::ExecutionProviderType::Auto => Self::Auto, kreuzberg::ExecutionProviderType::Cpu => Self::Cpu, kreuzberg::ExecutionProviderType::CoreMl => Self::CoreMl, kreuzberg::ExecutionProviderType::Cuda => Self::Cuda, kreuzberg::ExecutionProviderType::TensorRt => Self::TensorRt, } } } impl From for kreuzberg::OutputFormat { fn from(val: WasmOutputFormat) -> Self { match val { WasmOutputFormat::Plain => Self::Plain, WasmOutputFormat::Markdown => Self::Markdown, WasmOutputFormat::Djot => Self::Djot, WasmOutputFormat::Html => Self::Html, WasmOutputFormat::Json => Self::Json, WasmOutputFormat::Structured => Self::Structured, WasmOutputFormat::Custom => Self::Custom(Default::default()), } } } impl From for WasmOutputFormat { fn from(val: kreuzberg::OutputFormat) -> Self { match val { kreuzberg::OutputFormat::Plain => Self::Plain, kreuzberg::OutputFormat::Markdown => Self::Markdown, kreuzberg::OutputFormat::Djot => Self::Djot, kreuzberg::OutputFormat::Html => Self::Html, kreuzberg::OutputFormat::Json => Self::Json, kreuzberg::OutputFormat::Structured => Self::Structured, kreuzberg::OutputFormat::Custom(..) => Self::Custom, } } } impl From for kreuzberg::ChunkerType { fn from(val: WasmChunkerType) -> Self { match val { WasmChunkerType::Text => Self::Text, WasmChunkerType::Markdown => Self::Markdown, WasmChunkerType::Yaml => Self::Yaml, WasmChunkerType::Semantic => Self::Semantic, } } } impl From for WasmChunkerType { fn from(val: kreuzberg::ChunkerType) -> Self { match val { kreuzberg::ChunkerType::Text => Self::Text, kreuzberg::ChunkerType::Markdown => Self::Markdown, kreuzberg::ChunkerType::Yaml => Self::Yaml, kreuzberg::ChunkerType::Semantic => Self::Semantic, } } } impl From for kreuzberg::ChunkSizing { fn from(val: WasmChunkSizing) -> Self { match val.r#type.as_str() { "characters" => Self::Characters, "tokenizer" => Self::Tokenizer { model: val.model.clone().unwrap_or_default(), cache_dir: val.cache_dir.clone().map(Into::into), }, _ => Self::Characters, } } } impl From for WasmChunkSizing { fn from(val: kreuzberg::ChunkSizing) -> Self { match val { kreuzberg::ChunkSizing::Characters => Self { r#type: "characters".to_string(), cache_dir: None, model: None, }, kreuzberg::ChunkSizing::Tokenizer { model, cache_dir } => Self { r#type: "tokenizer".to_string(), cache_dir: cache_dir.map(|p| p.to_string_lossy().to_string()), model: Some(model), }, } } } impl From for kreuzberg::EmbeddingModelType { fn from(val: WasmEmbeddingModelType) -> Self { match val.r#type.as_str() { "preset" => Self::Preset { name: val.name.clone().unwrap_or_default(), }, "custom" => Self::Custom { model_id: val.model_id.clone().unwrap_or_default(), dimensions: val.dimensions.clone().unwrap_or_default(), }, "llm" => Self::Llm { llm: val.llm.clone().map(Into::into).unwrap_or_default(), }, "plugin" => Self::Plugin { name: val.name.clone().unwrap_or_default(), }, _ => Self::Preset { name: Default::default(), }, } } } impl From for WasmEmbeddingModelType { fn from(val: kreuzberg::EmbeddingModelType) -> Self { match val { kreuzberg::EmbeddingModelType::Preset { name } => Self { r#type: "preset".to_string(), dimensions: None, llm: None, model_id: None, name: Some(name), }, kreuzberg::EmbeddingModelType::Custom { model_id, dimensions } => Self { r#type: "custom".to_string(), dimensions: Some(dimensions), llm: None, model_id: Some(model_id), name: None, }, kreuzberg::EmbeddingModelType::Llm { llm } => Self { r#type: "llm".to_string(), dimensions: None, llm: Some(llm.into()), model_id: None, name: None, }, kreuzberg::EmbeddingModelType::Plugin { name } => Self { r#type: "plugin".to_string(), dimensions: None, llm: None, model_id: None, name: Some(name), }, } } } impl From for WasmListType { fn from(val: kreuzberg::extraction::transform::ListType) -> Self { match val { kreuzberg::extraction::transform::ListType::Bullet => Self::Bullet, kreuzberg::extraction::transform::ListType::Numbered => Self::Numbered, kreuzberg::extraction::transform::ListType::Lettered => Self::Lettered, kreuzberg::extraction::transform::ListType::Indented => Self::Indented, } } } impl From for WasmOcrBackendType { fn from(val: kreuzberg::OcrBackendType) -> Self { match val { kreuzberg::OcrBackendType::Tesseract => Self::Tesseract, kreuzberg::OcrBackendType::EasyOCR => Self::EasyOCR, kreuzberg::OcrBackendType::PaddleOCR => Self::PaddleOCR, kreuzberg::OcrBackendType::Custom => Self::Custom, } } } impl From for WasmProcessingStage { fn from(val: kreuzberg::ProcessingStage) -> Self { match val { kreuzberg::ProcessingStage::Early => Self::Early, kreuzberg::ProcessingStage::Middle => Self::Middle, kreuzberg::ProcessingStage::Late => Self::Late, } } } impl From for kreuzberg::PdfAnnotationType { fn from(val: WasmPdfAnnotationType) -> Self { match val { WasmPdfAnnotationType::Text => Self::Text, WasmPdfAnnotationType::Highlight => Self::Highlight, WasmPdfAnnotationType::Link => Self::Link, WasmPdfAnnotationType::Stamp => Self::Stamp, WasmPdfAnnotationType::Underline => Self::Underline, WasmPdfAnnotationType::StrikeOut => Self::StrikeOut, WasmPdfAnnotationType::Other => Self::Other, } } } impl From for WasmPdfAnnotationType { fn from(val: kreuzberg::PdfAnnotationType) -> Self { match val { kreuzberg::PdfAnnotationType::Text => Self::Text, kreuzberg::PdfAnnotationType::Highlight => Self::Highlight, kreuzberg::PdfAnnotationType::Link => Self::Link, kreuzberg::PdfAnnotationType::Stamp => Self::Stamp, kreuzberg::PdfAnnotationType::Underline => Self::Underline, kreuzberg::PdfAnnotationType::StrikeOut => Self::StrikeOut, kreuzberg::PdfAnnotationType::Other => Self::Other, } } } impl From for kreuzberg::BlockType { fn from(val: WasmBlockType) -> Self { match val { WasmBlockType::Paragraph => Self::Paragraph, WasmBlockType::Heading => Self::Heading, WasmBlockType::Blockquote => Self::Blockquote, WasmBlockType::CodeBlock => Self::CodeBlock, WasmBlockType::ListItem => Self::ListItem, WasmBlockType::OrderedList => Self::OrderedList, WasmBlockType::BulletList => Self::BulletList, WasmBlockType::TaskList => Self::TaskList, WasmBlockType::DefinitionList => Self::DefinitionList, WasmBlockType::DefinitionTerm => Self::DefinitionTerm, WasmBlockType::DefinitionDescription => Self::DefinitionDescription, WasmBlockType::Div => Self::Div, WasmBlockType::Section => Self::Section, WasmBlockType::ThematicBreak => Self::ThematicBreak, WasmBlockType::RawBlock => Self::RawBlock, WasmBlockType::MathDisplay => Self::MathDisplay, } } } impl From for WasmBlockType { fn from(val: kreuzberg::BlockType) -> Self { match val { kreuzberg::BlockType::Paragraph => Self::Paragraph, kreuzberg::BlockType::Heading => Self::Heading, kreuzberg::BlockType::Blockquote => Self::Blockquote, kreuzberg::BlockType::CodeBlock => Self::CodeBlock, kreuzberg::BlockType::ListItem => Self::ListItem, kreuzberg::BlockType::OrderedList => Self::OrderedList, kreuzberg::BlockType::BulletList => Self::BulletList, kreuzberg::BlockType::TaskList => Self::TaskList, kreuzberg::BlockType::DefinitionList => Self::DefinitionList, kreuzberg::BlockType::DefinitionTerm => Self::DefinitionTerm, kreuzberg::BlockType::DefinitionDescription => Self::DefinitionDescription, kreuzberg::BlockType::Div => Self::Div, kreuzberg::BlockType::Section => Self::Section, kreuzberg::BlockType::ThematicBreak => Self::ThematicBreak, kreuzberg::BlockType::RawBlock => Self::RawBlock, kreuzberg::BlockType::MathDisplay => Self::MathDisplay, } } } impl From for kreuzberg::InlineType { fn from(val: WasmInlineType) -> Self { match val { WasmInlineType::Text => Self::Text, WasmInlineType::Strong => Self::Strong, WasmInlineType::Emphasis => Self::Emphasis, WasmInlineType::Highlight => Self::Highlight, WasmInlineType::Subscript => Self::Subscript, WasmInlineType::Superscript => Self::Superscript, WasmInlineType::Insert => Self::Insert, WasmInlineType::Delete => Self::Delete, WasmInlineType::Code => Self::Code, WasmInlineType::Link => Self::Link, WasmInlineType::Image => Self::Image, WasmInlineType::Span => Self::Span, WasmInlineType::Math => Self::Math, WasmInlineType::RawInline => Self::RawInline, WasmInlineType::FootnoteRef => Self::FootnoteRef, WasmInlineType::Symbol => Self::Symbol, } } } impl From for WasmInlineType { fn from(val: kreuzberg::InlineType) -> Self { match val { kreuzberg::InlineType::Text => Self::Text, kreuzberg::InlineType::Strong => Self::Strong, kreuzberg::InlineType::Emphasis => Self::Emphasis, kreuzberg::InlineType::Highlight => Self::Highlight, kreuzberg::InlineType::Subscript => Self::Subscript, kreuzberg::InlineType::Superscript => Self::Superscript, kreuzberg::InlineType::Insert => Self::Insert, kreuzberg::InlineType::Delete => Self::Delete, kreuzberg::InlineType::Code => Self::Code, kreuzberg::InlineType::Link => Self::Link, kreuzberg::InlineType::Image => Self::Image, kreuzberg::InlineType::Span => Self::Span, kreuzberg::InlineType::Math => Self::Math, kreuzberg::InlineType::RawInline => Self::RawInline, kreuzberg::InlineType::FootnoteRef => Self::FootnoteRef, kreuzberg::InlineType::Symbol => Self::Symbol, } } } impl From for kreuzberg::RelationshipKind { fn from(val: WasmRelationshipKind) -> Self { match val { WasmRelationshipKind::FootnoteReference => Self::FootnoteReference, WasmRelationshipKind::CitationReference => Self::CitationReference, WasmRelationshipKind::InternalLink => Self::InternalLink, WasmRelationshipKind::Caption => Self::Caption, WasmRelationshipKind::Label => Self::Label, WasmRelationshipKind::TocEntry => Self::TocEntry, WasmRelationshipKind::CrossReference => Self::CrossReference, } } } impl From for WasmRelationshipKind { fn from(val: kreuzberg::RelationshipKind) -> Self { match val { kreuzberg::RelationshipKind::FootnoteReference => Self::FootnoteReference, kreuzberg::RelationshipKind::CitationReference => Self::CitationReference, kreuzberg::RelationshipKind::InternalLink => Self::InternalLink, kreuzberg::RelationshipKind::Caption => Self::Caption, kreuzberg::RelationshipKind::Label => Self::Label, kreuzberg::RelationshipKind::TocEntry => Self::TocEntry, kreuzberg::RelationshipKind::CrossReference => Self::CrossReference, } } } impl From for kreuzberg::ContentLayer { fn from(val: WasmContentLayer) -> Self { match val { WasmContentLayer::Body => Self::Body, WasmContentLayer::Header => Self::Header, WasmContentLayer::Footer => Self::Footer, WasmContentLayer::Footnote => Self::Footnote, } } } impl From for WasmContentLayer { fn from(val: kreuzberg::ContentLayer) -> Self { match val { kreuzberg::ContentLayer::Body => Self::Body, kreuzberg::ContentLayer::Header => Self::Header, kreuzberg::ContentLayer::Footer => Self::Footer, kreuzberg::ContentLayer::Footnote => Self::Footnote, } } } impl From for kreuzberg::NodeContent { fn from(val: WasmNodeContent) -> Self { match val.node_type.as_str() { "title" => Self::Title { text: val.text.clone().unwrap_or_default(), }, "heading" => Self::Heading { level: val.level.clone().unwrap_or_default(), text: val.text.clone().unwrap_or_default(), }, "paragraph" => Self::Paragraph { text: val.text.clone().unwrap_or_default(), }, "list" => Self::List { ordered: val.ordered.clone().unwrap_or_default(), }, "list_item" => Self::ListItem { text: val.text.clone().unwrap_or_default(), }, "table" => Self::Table { grid: val.grid.clone().map(Into::into).unwrap_or_default(), }, "image" => Self::Image { description: val.description.clone(), image_index: val.image_index.clone(), src: val.src.clone(), }, "code" => Self::Code { text: val.text.clone().unwrap_or_default(), language: val.language.clone(), }, "quote" => Self::Quote, "formula" => Self::Formula { text: val.text.clone().unwrap_or_default(), }, "footnote" => Self::Footnote { text: val.text.clone().unwrap_or_default(), }, "group" => Self::Group { label: val.label.clone(), heading_level: val.heading_level.clone(), heading_text: val.heading_text.clone(), }, "page_break" => Self::PageBreak, "slide" => Self::Slide { number: val.number.clone().unwrap_or_default(), title: val.title.clone(), }, "definition_list" => Self::DefinitionList, "definition_item" => Self::DefinitionItem { term: val.term.clone().unwrap_or_default(), definition: val.definition.clone().unwrap_or_default(), }, "citation" => Self::Citation { key: val.key.clone().unwrap_or_default(), text: val.text.clone().unwrap_or_default(), }, "admonition" => Self::Admonition { kind: val.kind.clone().unwrap_or_default(), title: val.title.clone(), }, "raw_block" => Self::RawBlock { format: val.format.clone().unwrap_or_default(), content: val.content.clone().unwrap_or_default(), }, "metadata_block" => Self::MetadataBlock { entries: val .entries .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::>(v.clone()).ok()) .unwrap_or_default(), }, _ => Self::Title { text: Default::default(), }, } } } impl From for WasmNodeContent { fn from(val: kreuzberg::NodeContent) -> Self { match val { kreuzberg::NodeContent::Title { text } => Self { node_type: "title".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: Some(text), title: None, }, kreuzberg::NodeContent::Heading { level, text } => Self { node_type: "heading".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: Some(level), number: None, ordered: None, src: None, term: None, text: Some(text), title: None, }, kreuzberg::NodeContent::Paragraph { text } => Self { node_type: "paragraph".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: Some(text), title: None, }, kreuzberg::NodeContent::List { ordered } => Self { node_type: "list".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: Some(ordered), src: None, term: None, text: None, title: None, }, kreuzberg::NodeContent::ListItem { text } => Self { node_type: "list_item".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: Some(text), title: None, }, kreuzberg::NodeContent::Table { grid } => Self { node_type: "table".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: Some(grid.into()), heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: None, title: None, }, kreuzberg::NodeContent::Image { description, image_index, src, } => Self { node_type: "image".to_string(), content: None, definition: None, description, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src, term: None, text: None, title: None, }, kreuzberg::NodeContent::Code { text, language } => Self { node_type: "code".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language, level: None, number: None, ordered: None, src: None, term: None, text: Some(text), title: None, }, kreuzberg::NodeContent::Quote => Self { node_type: "quote".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: None, title: None, }, kreuzberg::NodeContent::Formula { text } => Self { node_type: "formula".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: Some(text), title: None, }, kreuzberg::NodeContent::Footnote { text } => Self { node_type: "footnote".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: Some(text), title: None, }, kreuzberg::NodeContent::Group { label, heading_level, heading_text, } => Self { node_type: "group".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level, heading_text, image_index: None, key: None, kind: None, label, language: None, level: None, number: None, ordered: None, src: None, term: None, text: None, title: None, }, kreuzberg::NodeContent::PageBreak => Self { node_type: "page_break".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: None, title: None, }, kreuzberg::NodeContent::Slide { number, title } => Self { node_type: "slide".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: Some(number), ordered: None, src: None, term: None, text: None, title, }, kreuzberg::NodeContent::DefinitionList => Self { node_type: "definition_list".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: None, title: None, }, kreuzberg::NodeContent::DefinitionItem { term, definition } => Self { node_type: "definition_item".to_string(), content: None, definition: Some(definition), description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: Some(term), text: None, title: None, }, kreuzberg::NodeContent::Citation { key, text } => Self { node_type: "citation".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: Some(key), kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: Some(text), title: None, }, kreuzberg::NodeContent::Admonition { kind, title } => Self { node_type: "admonition".to_string(), content: None, definition: None, description: None, entries: None, format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: Some(kind), label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: None, title, }, kreuzberg::NodeContent::RawBlock { format, content } => Self { node_type: "raw_block".to_string(), content: Some(content), definition: None, description: None, entries: None, format: Some(format), grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: None, title: None, }, kreuzberg::NodeContent::MetadataBlock { entries } => Self { node_type: "metadata_block".to_string(), content: None, definition: None, description: None, entries: serde_wasm_bindgen::to_value(&entries).ok(), format: None, grid: None, heading_level: None, heading_text: None, image_index: None, key: None, kind: None, label: None, language: None, level: None, number: None, ordered: None, src: None, term: None, text: None, title: None, }, } } } impl From for kreuzberg::AnnotationKind { fn from(val: WasmAnnotationKind) -> Self { match val.annotation_type.as_str() { "bold" => Self::Bold, "italic" => Self::Italic, "underline" => Self::Underline, "strikethrough" => Self::Strikethrough, "code" => Self::Code, "subscript" => Self::Subscript, "superscript" => Self::Superscript, "link" => Self::Link { url: val.url.clone().unwrap_or_default(), title: val.title.clone(), }, "highlight" => Self::Highlight, "color" => Self::Color { value: val.value.clone().unwrap_or_default(), }, "font_size" => Self::FontSize { value: val.value.clone().unwrap_or_default(), }, "custom" => Self::Custom { name: val.name.clone().unwrap_or_default(), value: val.value.clone(), }, _ => Self::Bold, } } } impl From for WasmAnnotationKind { fn from(val: kreuzberg::AnnotationKind) -> Self { match val { kreuzberg::AnnotationKind::Bold => Self { annotation_type: "bold".to_string(), name: None, title: None, url: None, value: None, }, kreuzberg::AnnotationKind::Italic => Self { annotation_type: "italic".to_string(), name: None, title: None, url: None, value: None, }, kreuzberg::AnnotationKind::Underline => Self { annotation_type: "underline".to_string(), name: None, title: None, url: None, value: None, }, kreuzberg::AnnotationKind::Strikethrough => Self { annotation_type: "strikethrough".to_string(), name: None, title: None, url: None, value: None, }, kreuzberg::AnnotationKind::Code => Self { annotation_type: "code".to_string(), name: None, title: None, url: None, value: None, }, kreuzberg::AnnotationKind::Subscript => Self { annotation_type: "subscript".to_string(), name: None, title: None, url: None, value: None, }, kreuzberg::AnnotationKind::Superscript => Self { annotation_type: "superscript".to_string(), name: None, title: None, url: None, value: None, }, kreuzberg::AnnotationKind::Link { url, title } => Self { annotation_type: "link".to_string(), name: None, title, url: Some(url), value: None, }, kreuzberg::AnnotationKind::Highlight => Self { annotation_type: "highlight".to_string(), name: None, title: None, url: None, value: None, }, kreuzberg::AnnotationKind::Color { value } => Self { annotation_type: "color".to_string(), name: None, title: None, url: None, value: Some(value), }, kreuzberg::AnnotationKind::FontSize { value } => Self { annotation_type: "font_size".to_string(), name: None, title: None, url: None, value: Some(value), }, kreuzberg::AnnotationKind::Custom { name, value } => Self { annotation_type: "custom".to_string(), name: Some(name), title: None, url: None, value, }, } } } impl From for kreuzberg::ExtractionMethod { fn from(val: WasmExtractionMethod) -> Self { match val { WasmExtractionMethod::Native => Self::Native, WasmExtractionMethod::Ocr => Self::Ocr, WasmExtractionMethod::Mixed => Self::Mixed, } } } impl From for WasmExtractionMethod { fn from(val: kreuzberg::ExtractionMethod) -> Self { match val { kreuzberg::ExtractionMethod::Native => Self::Native, kreuzberg::ExtractionMethod::Ocr => Self::Ocr, kreuzberg::ExtractionMethod::Mixed => Self::Mixed, } } } impl From for kreuzberg::ChunkType { fn from(val: WasmChunkType) -> Self { match val { WasmChunkType::Heading => Self::Heading, WasmChunkType::PartyList => Self::PartyList, WasmChunkType::Definitions => Self::Definitions, WasmChunkType::OperativeClause => Self::OperativeClause, WasmChunkType::SignatureBlock => Self::SignatureBlock, WasmChunkType::Schedule => Self::Schedule, WasmChunkType::TableLike => Self::TableLike, WasmChunkType::Formula => Self::Formula, WasmChunkType::CodeBlock => Self::CodeBlock, WasmChunkType::Image => Self::Image, WasmChunkType::OrgChart => Self::OrgChart, WasmChunkType::Diagram => Self::Diagram, WasmChunkType::Unknown => Self::Unknown, } } } impl From for WasmChunkType { fn from(val: kreuzberg::ChunkType) -> Self { match val { kreuzberg::ChunkType::Heading => Self::Heading, kreuzberg::ChunkType::PartyList => Self::PartyList, kreuzberg::ChunkType::Definitions => Self::Definitions, kreuzberg::ChunkType::OperativeClause => Self::OperativeClause, kreuzberg::ChunkType::SignatureBlock => Self::SignatureBlock, kreuzberg::ChunkType::Schedule => Self::Schedule, kreuzberg::ChunkType::TableLike => Self::TableLike, kreuzberg::ChunkType::Formula => Self::Formula, kreuzberg::ChunkType::CodeBlock => Self::CodeBlock, kreuzberg::ChunkType::Image => Self::Image, kreuzberg::ChunkType::OrgChart => Self::OrgChart, kreuzberg::ChunkType::Diagram => Self::Diagram, kreuzberg::ChunkType::Unknown => Self::Unknown, } } } impl From for kreuzberg::ImageKind { fn from(val: WasmImageKind) -> Self { match val { WasmImageKind::Photograph => Self::Photograph, WasmImageKind::Diagram => Self::Diagram, WasmImageKind::Chart => Self::Chart, WasmImageKind::Drawing => Self::Drawing, WasmImageKind::TextBlock => Self::TextBlock, WasmImageKind::Decoration => Self::Decoration, WasmImageKind::Logo => Self::Logo, WasmImageKind::Icon => Self::Icon, WasmImageKind::TileFragment => Self::TileFragment, WasmImageKind::Mask => Self::Mask, WasmImageKind::PageRaster => Self::PageRaster, WasmImageKind::Unknown => Self::Unknown, } } } impl From for WasmImageKind { fn from(val: kreuzberg::ImageKind) -> Self { match val { kreuzberg::ImageKind::Photograph => Self::Photograph, kreuzberg::ImageKind::Diagram => Self::Diagram, kreuzberg::ImageKind::Chart => Self::Chart, kreuzberg::ImageKind::Drawing => Self::Drawing, kreuzberg::ImageKind::TextBlock => Self::TextBlock, kreuzberg::ImageKind::Decoration => Self::Decoration, kreuzberg::ImageKind::Logo => Self::Logo, kreuzberg::ImageKind::Icon => Self::Icon, kreuzberg::ImageKind::TileFragment => Self::TileFragment, kreuzberg::ImageKind::Mask => Self::Mask, kreuzberg::ImageKind::PageRaster => Self::PageRaster, kreuzberg::ImageKind::Unknown => Self::Unknown, } } } impl From for kreuzberg::ResultFormat { fn from(val: WasmResultFormat) -> Self { match val { WasmResultFormat::Unified => Self::Unified, WasmResultFormat::ElementBased => Self::ElementBased, } } } impl From for WasmResultFormat { fn from(val: kreuzberg::ResultFormat) -> Self { match val { kreuzberg::ResultFormat::Unified => Self::Unified, kreuzberg::ResultFormat::ElementBased => Self::ElementBased, } } } impl From for kreuzberg::ElementType { fn from(val: WasmElementType) -> Self { match val { WasmElementType::Title => Self::Title, WasmElementType::NarrativeText => Self::NarrativeText, WasmElementType::Heading => Self::Heading, WasmElementType::ListItem => Self::ListItem, WasmElementType::Table => Self::Table, WasmElementType::Image => Self::Image, WasmElementType::PageBreak => Self::PageBreak, WasmElementType::CodeBlock => Self::CodeBlock, WasmElementType::BlockQuote => Self::BlockQuote, WasmElementType::Footer => Self::Footer, WasmElementType::Header => Self::Header, } } } impl From for WasmElementType { fn from(val: kreuzberg::ElementType) -> Self { match val { kreuzberg::ElementType::Title => Self::Title, kreuzberg::ElementType::NarrativeText => Self::NarrativeText, kreuzberg::ElementType::Heading => Self::Heading, kreuzberg::ElementType::ListItem => Self::ListItem, kreuzberg::ElementType::Table => Self::Table, kreuzberg::ElementType::Image => Self::Image, kreuzberg::ElementType::PageBreak => Self::PageBreak, kreuzberg::ElementType::CodeBlock => Self::CodeBlock, kreuzberg::ElementType::BlockQuote => Self::BlockQuote, kreuzberg::ElementType::Footer => Self::Footer, kreuzberg::ElementType::Header => Self::Header, } } } impl From for kreuzberg::FormatMetadata { fn from(val: WasmFormatMetadata) -> Self { match val.format_type.as_str() { "pdf" => Self::Pdf( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "docx" => Self::Docx(Box::new( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), )), "excel" => Self::Excel( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "email" => Self::Email( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "pptx" => Self::Pptx( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "archive" => Self::Archive( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "image" => Self::Image( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "xml" => Self::Xml( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "text" => Self::Text( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "html" => Self::Html(Box::new( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), )), "ocr" => Self::Ocr( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "csv" => Self::Csv( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "bibtex" => Self::Bibtex( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "citation" => Self::Citation( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "fiction_book" => Self::FictionBook( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "dbf" => Self::Dbf( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "jats" => Self::Jats( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "epub" => Self::Epub( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "pst" => Self::Pst( val._0 .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::(v.clone()).ok()) .unwrap_or_default(), ), "code" => Self::Code(Default::default()), _ => Self::Pdf(Default::default()), } } } impl From for WasmFormatMetadata { fn from(val: kreuzberg::FormatMetadata) -> Self { match val { kreuzberg::FormatMetadata::Pdf(field0) => Self { format_type: "pdf".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Docx(field0) => Self { format_type: "docx".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Excel(field0) => Self { format_type: "excel".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Email(field0) => Self { format_type: "email".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Pptx(field0) => Self { format_type: "pptx".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Archive(field0) => Self { format_type: "archive".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Image(field0) => Self { format_type: "image".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Xml(field0) => Self { format_type: "xml".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Text(field0) => Self { format_type: "text".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Html(field0) => Self { format_type: "html".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Ocr(field0) => Self { format_type: "ocr".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Csv(field0) => Self { format_type: "csv".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Bibtex(field0) => Self { format_type: "bibtex".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Citation(field0) => Self { format_type: "citation".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::FictionBook(field0) => Self { format_type: "fiction_book".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Dbf(field0) => Self { format_type: "dbf".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Jats(field0) => Self { format_type: "jats".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Epub(field0) => Self { format_type: "epub".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Pst(field0) => Self { format_type: "pst".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, kreuzberg::FormatMetadata::Code(field0) => Self { format_type: "code".to_string(), _0: serde_wasm_bindgen::to_value(&field0).ok(), }, } } } impl From for kreuzberg::TextDirection { fn from(val: WasmTextDirection) -> Self { match val { WasmTextDirection::LeftToRight => Self::LeftToRight, WasmTextDirection::RightToLeft => Self::RightToLeft, WasmTextDirection::Auto => Self::Auto, } } } impl From for WasmTextDirection { fn from(val: kreuzberg::TextDirection) -> Self { match val { kreuzberg::TextDirection::LeftToRight => Self::LeftToRight, kreuzberg::TextDirection::RightToLeft => Self::RightToLeft, kreuzberg::TextDirection::Auto => Self::Auto, } } } impl From for kreuzberg::LinkType { fn from(val: WasmLinkType) -> Self { match val { WasmLinkType::Anchor => Self::Anchor, WasmLinkType::Internal => Self::Internal, WasmLinkType::External => Self::External, WasmLinkType::Email => Self::Email, WasmLinkType::Phone => Self::Phone, WasmLinkType::Other => Self::Other, } } } impl From for WasmLinkType { fn from(val: kreuzberg::LinkType) -> Self { match val { kreuzberg::LinkType::Anchor => Self::Anchor, kreuzberg::LinkType::Internal => Self::Internal, kreuzberg::LinkType::External => Self::External, kreuzberg::LinkType::Email => Self::Email, kreuzberg::LinkType::Phone => Self::Phone, kreuzberg::LinkType::Other => Self::Other, } } } impl From for kreuzberg::ImageType { fn from(val: WasmImageType) -> Self { match val { WasmImageType::DataUri => Self::DataUri, WasmImageType::InlineSvg => Self::InlineSvg, WasmImageType::External => Self::External, WasmImageType::Relative => Self::Relative, } } } impl From for WasmImageType { fn from(val: kreuzberg::ImageType) -> Self { match val { kreuzberg::ImageType::DataUri => Self::DataUri, kreuzberg::ImageType::InlineSvg => Self::InlineSvg, kreuzberg::ImageType::External => Self::External, kreuzberg::ImageType::Relative => Self::Relative, } } } impl From for kreuzberg::StructuredDataType { fn from(val: WasmStructuredDataType) -> Self { match val { WasmStructuredDataType::JsonLd => Self::JsonLd, WasmStructuredDataType::Microdata => Self::Microdata, WasmStructuredDataType::RDFa => Self::RDFa, } } } impl From for WasmStructuredDataType { fn from(val: kreuzberg::StructuredDataType) -> Self { match val { kreuzberg::StructuredDataType::JsonLd => Self::JsonLd, kreuzberg::StructuredDataType::Microdata => Self::Microdata, kreuzberg::StructuredDataType::RDFa => Self::RDFa, } } } impl From for kreuzberg::OcrBoundingGeometry { fn from(val: WasmOcrBoundingGeometry) -> Self { match val.r#type.as_str() { "rectangle" => Self::Rectangle { left: val.left.clone().unwrap_or_default(), top: val.top.clone().unwrap_or_default(), width: val.width.clone().unwrap_or_default(), height: val.height.clone().unwrap_or_default(), }, "quadrilateral" => Self::Quadrilateral { points: val .points .as_ref() .and_then(|v| serde_wasm_bindgen::from_value::<[(u32, u32); 4]>(v.clone()).ok()) .unwrap_or_default(), }, _ => Self::Rectangle { left: Default::default(), top: Default::default(), width: Default::default(), height: Default::default(), }, } } } impl From for WasmOcrBoundingGeometry { fn from(val: kreuzberg::OcrBoundingGeometry) -> Self { match val { kreuzberg::OcrBoundingGeometry::Rectangle { left, top, width, height, } => Self { r#type: "rectangle".to_string(), height: Some(height), left: Some(left), points: None, top: Some(top), width: Some(width), }, kreuzberg::OcrBoundingGeometry::Quadrilateral { points } => Self { r#type: "quadrilateral".to_string(), height: None, left: None, points: serde_wasm_bindgen::to_value(&points).ok(), top: None, width: None, }, } } } impl From for kreuzberg::OcrElementLevel { fn from(val: WasmOcrElementLevel) -> Self { match val { WasmOcrElementLevel::Word => Self::Word, WasmOcrElementLevel::Line => Self::Line, WasmOcrElementLevel::Block => Self::Block, WasmOcrElementLevel::Page => Self::Page, } } } impl From for WasmOcrElementLevel { fn from(val: kreuzberg::OcrElementLevel) -> Self { match val { kreuzberg::OcrElementLevel::Word => Self::Word, kreuzberg::OcrElementLevel::Line => Self::Line, kreuzberg::OcrElementLevel::Block => Self::Block, kreuzberg::OcrElementLevel::Page => Self::Page, } } } impl From for kreuzberg::PageUnitType { fn from(val: WasmPageUnitType) -> Self { match val { WasmPageUnitType::Page => Self::Page, WasmPageUnitType::Slide => Self::Slide, WasmPageUnitType::Sheet => Self::Sheet, } } } impl From for WasmPageUnitType { fn from(val: kreuzberg::PageUnitType) -> Self { match val { kreuzberg::PageUnitType::Page => Self::Page, kreuzberg::PageUnitType::Slide => Self::Slide, kreuzberg::PageUnitType::Sheet => Self::Sheet, } } } impl From for kreuzberg::DiffLine { fn from(val: WasmDiffLine) -> Self { match val.kind.as_str() { "context" => Self::Context(val._0.clone().unwrap_or_default()), "added" => Self::Added(val._0.clone().unwrap_or_default()), "removed" => Self::Removed(val._0.clone().unwrap_or_default()), _ => Self::Context(Default::default()), } } } impl From for WasmDiffLine { fn from(val: kreuzberg::DiffLine) -> Self { match val { kreuzberg::DiffLine::Context(field0) => Self { kind: "context".to_string(), _0: Some(field0), }, kreuzberg::DiffLine::Added(field0) => Self { kind: "added".to_string(), _0: Some(field0), }, kreuzberg::DiffLine::Removed(field0) => Self { kind: "removed".to_string(), _0: Some(field0), }, } } } impl From for kreuzberg::RevisionKind { fn from(val: WasmRevisionKind) -> Self { match val { WasmRevisionKind::Insertion => Self::Insertion, WasmRevisionKind::Deletion => Self::Deletion, WasmRevisionKind::FormatChange => Self::FormatChange, WasmRevisionKind::Comment => Self::Comment, } } } impl From for WasmRevisionKind { fn from(val: kreuzberg::RevisionKind) -> Self { match val { kreuzberg::RevisionKind::Insertion => Self::Insertion, kreuzberg::RevisionKind::Deletion => Self::Deletion, kreuzberg::RevisionKind::FormatChange => Self::FormatChange, kreuzberg::RevisionKind::Comment => Self::Comment, } } } impl From for kreuzberg::RevisionAnchor { fn from(val: WasmRevisionAnchor) -> Self { match val.r#type.as_str() { "paragraph" => Self::Paragraph { index: val.index.clone().unwrap_or_default(), }, "table_cell" => Self::TableCell { row: val.row.clone().unwrap_or_default(), col: val.col.clone().unwrap_or_default(), table_index: val.table_index.clone().unwrap_or_default(), }, "page" => Self::Page { index: val.index.clone().unwrap_or_default(), }, "slide" => Self::Slide { index: val.index.clone().unwrap_or_default(), }, "sheet" => Self::Sheet { index: val.index.clone().unwrap_or_default(), name: val.name.clone(), }, _ => Self::Paragraph { index: Default::default(), }, } } } impl From for WasmRevisionAnchor { fn from(val: kreuzberg::RevisionAnchor) -> Self { match val { kreuzberg::RevisionAnchor::Paragraph { index } => Self { r#type: "paragraph".to_string(), col: None, index: Some(index), name: None, row: None, table_index: None, }, kreuzberg::RevisionAnchor::TableCell { row, col, table_index } => Self { r#type: "table_cell".to_string(), col: Some(col), index: None, name: None, row: Some(row), table_index: Some(table_index), }, kreuzberg::RevisionAnchor::Page { index } => Self { r#type: "page".to_string(), col: None, index: Some(index), name: None, row: None, table_index: None, }, kreuzberg::RevisionAnchor::Slide { index } => Self { r#type: "slide".to_string(), col: None, index: Some(index), name: None, row: None, table_index: None, }, kreuzberg::RevisionAnchor::Sheet { index, name } => Self { r#type: "sheet".to_string(), col: None, index: Some(index), name, row: None, table_index: None, }, } } } impl From for kreuzberg::UriKind { fn from(val: WasmUriKind) -> Self { match val { WasmUriKind::Hyperlink => Self::Hyperlink, WasmUriKind::Image => Self::Image, WasmUriKind::Anchor => Self::Anchor, WasmUriKind::Citation => Self::Citation, WasmUriKind::Reference => Self::Reference, WasmUriKind::Email => Self::Email, } } } impl From for WasmUriKind { fn from(val: kreuzberg::UriKind) -> Self { match val { kreuzberg::UriKind::Hyperlink => Self::Hyperlink, kreuzberg::UriKind::Image => Self::Image, kreuzberg::UriKind::Anchor => Self::Anchor, kreuzberg::UriKind::Citation => Self::Citation, kreuzberg::UriKind::Reference => Self::Reference, kreuzberg::UriKind::Email => Self::Email, } } } /// Return the error code string for a `kreuzberg::error::KreuzbergError` variant. #[allow(dead_code)] fn kreuzberg_error_error_code(e: &kreuzberg::error::KreuzbergError) -> &'static str { #[allow(unreachable_patterns)] match e { kreuzberg::error::KreuzbergError::Io(..) => "io", kreuzberg::error::KreuzbergError::Parsing { .. } => "parsing", kreuzberg::error::KreuzbergError::Ocr { .. } => "ocr", kreuzberg::error::KreuzbergError::Validation { .. } => "validation", kreuzberg::error::KreuzbergError::Cache { .. } => "cache", kreuzberg::error::KreuzbergError::ImageProcessing { .. } => "image_processing", kreuzberg::error::KreuzbergError::Serialization { .. } => "serialization", kreuzberg::error::KreuzbergError::MissingDependency(..) => "missing_dependency", kreuzberg::error::KreuzbergError::Plugin { .. } => "plugin", kreuzberg::error::KreuzbergError::LockPoisoned(..) => "lock_poisoned", kreuzberg::error::KreuzbergError::UnsupportedFormat(..) => "unsupported_format", kreuzberg::error::KreuzbergError::Embedding { .. } => "embedding", kreuzberg::error::KreuzbergError::Timeout { .. } => "timeout", kreuzberg::error::KreuzbergError::Cancelled => "cancelled", kreuzberg::error::KreuzbergError::Security { .. } => "security", kreuzberg::error::KreuzbergError::Other(..) => "other", _ => "kreuzberg_error", } } /// Convert a `kreuzberg::error::KreuzbergError` error to a `JsValue` object with `code` and `message` fields. #[allow(dead_code)] fn kreuzberg_error_to_js_value(e: kreuzberg::error::KreuzbergError) -> wasm_bindgen::JsValue { let code = kreuzberg_error_error_code(&e); let message = e.to_string(); let obj = js_sys::Object::new(); js_sys::Reflect::set(&obj, &"code".into(), &code.into()).ok(); js_sys::Reflect::set(&obj, &"message".into(), &message.into()).ok(); obj.into() }