Files
fil/crates/kreuzberg-wasm/src/lib.rs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

19849 lines
611 KiB
Rust
Generated

// This file is auto-generated by alef. DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// Re-generate with: alef generate
#![allow(dead_code, unused_imports, unused_variables)]
#![allow(
clippy::too_many_arguments,
clippy::let_unit_value,
clippy::needless_borrow,
clippy::map_identity,
clippy::just_underscores_and_digits,
clippy::unused_unit,
clippy::unnecessary_cast,
clippy::unwrap_or_default,
clippy::derivable_impls,
clippy::needless_borrows_for_generic_args,
clippy::unnecessary_fallible_conversions,
clippy::useless_conversion,
clippy::arc_with_non_send_sync,
clippy::collapsible_if,
clippy::clone_on_copy,
clippy::should_implement_trait,
clippy::await_holding_refcell_ref
)]
use std::sync::Arc;
use std::sync::Mutex;
use wasm_bindgen::prelude::*;
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmCacheStats {
total_files: usize,
total_size_mb: f64,
available_space_mb: f64,
oldest_file_age_days: f64,
newest_file_age_days: f64,
}
#[wasm_bindgen]
impl WasmCacheStats {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
totalFiles: usize,
totalSizeMb: f64,
availableSpaceMb: f64,
oldestFileAgeDays: f64,
newestFileAgeDays: f64,
) -> WasmCacheStats {
WasmCacheStats {
total_files: totalFiles,
total_size_mb: totalSizeMb,
available_space_mb: availableSpaceMb,
oldest_file_age_days: oldestFileAgeDays,
newest_file_age_days: newestFileAgeDays,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmCacheStats {
<WasmCacheStats as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "totalFiles")]
pub fn total_files(&self) -> usize {
self.total_files
}
#[wasm_bindgen(setter, js_name = "totalFiles")]
pub fn set_total_files(&mut self, value: usize) {
self.total_files = value;
}
#[wasm_bindgen(getter, js_name = "totalSizeMb")]
pub fn total_size_mb(&self) -> f64 {
self.total_size_mb
}
#[wasm_bindgen(setter, js_name = "totalSizeMb")]
pub fn set_total_size_mb(&mut self, value: f64) {
self.total_size_mb = value;
}
#[wasm_bindgen(getter, js_name = "availableSpaceMb")]
pub fn available_space_mb(&self) -> f64 {
self.available_space_mb
}
#[wasm_bindgen(setter, js_name = "availableSpaceMb")]
pub fn set_available_space_mb(&mut self, value: f64) {
self.available_space_mb = value;
}
#[wasm_bindgen(getter, js_name = "oldestFileAgeDays")]
pub fn oldest_file_age_days(&self) -> f64 {
self.oldest_file_age_days
}
#[wasm_bindgen(setter, js_name = "oldestFileAgeDays")]
pub fn set_oldest_file_age_days(&mut self, value: f64) {
self.oldest_file_age_days = value;
}
#[wasm_bindgen(getter, js_name = "newestFileAgeDays")]
pub fn newest_file_age_days(&self) -> f64 {
self.newest_file_age_days
}
#[wasm_bindgen(setter, js_name = "newestFileAgeDays")]
pub fn set_newest_file_age_days(&mut self, value: f64) {
self.newest_file_age_days = value;
}
}
/// Hardware acceleration configuration for ONNX Runtime models.
///
/// Controls which execution provider (CPU, CoreML, CUDA, TensorRT) is used
/// for inference in layout detection and embedding generation.
///
/// # Example
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmAccelerationConfig {
provider: WasmExecutionProviderType,
device_id: u32,
}
#[wasm_bindgen]
impl WasmAccelerationConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(provider: Option<WasmExecutionProviderType>, deviceId: Option<u32>) -> WasmAccelerationConfig {
WasmAccelerationConfig {
provider: provider.unwrap_or_default(),
device_id: deviceId.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmAccelerationConfig {
<WasmAccelerationConfig as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn provider(&self) -> String {
self.provider.to_api_str().to_owned()
}
#[wasm_bindgen(setter)]
pub fn set_provider(&mut self, value: WasmExecutionProviderType) {
self.provider = value;
}
#[wasm_bindgen(getter, js_name = "deviceId")]
pub fn device_id(&self) -> u32 {
self.device_id
}
#[wasm_bindgen(setter, js_name = "deviceId")]
pub fn set_device_id(&mut self, value: u32) {
self.device_id = value;
}
}
/// Cross-extractor content filtering configuration.
///
/// Controls whether "furniture" content (headers, footers, page numbers,
/// watermarks, repeating text) is included in or stripped from extraction
/// results. Applies across all extractors (PDF, DOCX, RTF, ODT, HTML, etc.)
/// with format-specific implementation.
///
/// When `None` on `ExtractionConfig`, each extractor uses its current
/// default behavior unchanged.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmContentFilterConfig {
include_headers: bool,
include_footers: bool,
strip_repeating_text: bool,
include_watermarks: bool,
}
#[wasm_bindgen]
impl WasmContentFilterConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
includeHeaders: Option<bool>,
includeFooters: Option<bool>,
stripRepeatingText: Option<bool>,
includeWatermarks: Option<bool>,
) -> WasmContentFilterConfig {
WasmContentFilterConfig {
include_headers: includeHeaders.unwrap_or(false),
include_footers: includeFooters.unwrap_or(false),
strip_repeating_text: stripRepeatingText.unwrap_or(true),
include_watermarks: includeWatermarks.unwrap_or(false),
}
}
#[wasm_bindgen(getter, js_name = "includeHeaders")]
pub fn include_headers(&self) -> bool {
self.include_headers
}
#[wasm_bindgen(setter, js_name = "includeHeaders")]
pub fn set_include_headers(&mut self, value: bool) {
self.include_headers = value;
}
#[wasm_bindgen(getter, js_name = "includeFooters")]
pub fn include_footers(&self) -> bool {
self.include_footers
}
#[wasm_bindgen(setter, js_name = "includeFooters")]
pub fn set_include_footers(&mut self, value: bool) {
self.include_footers = value;
}
#[wasm_bindgen(getter, js_name = "stripRepeatingText")]
pub fn strip_repeating_text(&self) -> bool {
self.strip_repeating_text
}
#[wasm_bindgen(setter, js_name = "stripRepeatingText")]
pub fn set_strip_repeating_text(&mut self, value: bool) {
self.strip_repeating_text = value;
}
#[wasm_bindgen(getter, js_name = "includeWatermarks")]
pub fn include_watermarks(&self) -> bool {
self.include_watermarks
}
#[wasm_bindgen(setter, js_name = "includeWatermarks")]
pub fn set_include_watermarks(&mut self, value: bool) {
self.include_watermarks = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmContentFilterConfig {
kreuzberg::ContentFilterConfig::default().into()
}
}
/// Configuration for email extraction.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmEmailConfig {
msg_fallback_codepage: Option<u32>,
}
#[wasm_bindgen]
impl WasmEmailConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(msgFallbackCodepage: Option<u32>) -> WasmEmailConfig {
WasmEmailConfig {
msg_fallback_codepage: msgFallbackCodepage,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmEmailConfig {
<WasmEmailConfig as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "msgFallbackCodepage")]
pub fn msg_fallback_codepage(&self) -> Option<u32> {
self.msg_fallback_codepage
}
#[wasm_bindgen(setter, js_name = "msgFallbackCodepage")]
pub fn set_msg_fallback_codepage(&mut self, value: Option<u32>) {
self.msg_fallback_codepage = value;
}
}
/// Main extraction configuration.
///
/// This struct contains all configuration options for the extraction process.
/// It can be loaded from TOML, YAML, or JSON files, or created programmatically.
///
/// # Example
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmExtractionConfig {
use_cache: bool,
enable_quality_processing: bool,
ocr: Option<WasmOcrConfig>,
force_ocr: bool,
force_ocr_pages: Option<Vec<u32>>,
disable_ocr: bool,
chunking: Option<WasmChunkingConfig>,
content_filter: Option<WasmContentFilterConfig>,
images: Option<WasmImageExtractionConfig>,
token_reduction: Option<WasmTokenReductionOptions>,
language_detection: Option<WasmLanguageDetectionConfig>,
pages: Option<WasmPageConfig>,
postprocessor: Option<WasmPostProcessorConfig>,
html_options: Option<String>,
extraction_timeout_secs: Option<u64>,
max_concurrent_extractions: Option<usize>,
result_format: WasmResultFormat,
security_limits: Option<WasmSecurityLimits>,
max_embedded_file_bytes: Option<u64>,
output_format: WasmOutputFormat,
use_layout_for_markdown: bool,
include_document_structure: bool,
acceleration: Option<WasmAccelerationConfig>,
cache_namespace: Option<String>,
cache_ttl_secs: Option<u64>,
email: Option<WasmEmailConfig>,
concurrency: Option<String>,
max_archive_depth: usize,
structured_extraction: Option<WasmStructuredExtractionConfig>,
cancel_token: Option<String>,
}
#[wasm_bindgen]
impl WasmExtractionConfig {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
useCache: Option<bool>,
enableQualityProcessing: Option<bool>,
forceOcr: Option<bool>,
disableOcr: Option<bool>,
resultFormat: Option<WasmResultFormat>,
outputFormat: Option<WasmOutputFormat>,
useLayoutForMarkdown: Option<bool>,
includeDocumentStructure: Option<bool>,
maxArchiveDepth: Option<usize>,
ocr: Option<WasmOcrConfig>,
forceOcrPages: Option<Vec<u32>>,
chunking: Option<WasmChunkingConfig>,
contentFilter: Option<WasmContentFilterConfig>,
images: Option<WasmImageExtractionConfig>,
tokenReduction: Option<WasmTokenReductionOptions>,
languageDetection: Option<WasmLanguageDetectionConfig>,
pages: Option<WasmPageConfig>,
postprocessor: Option<WasmPostProcessorConfig>,
extractionTimeoutSecs: Option<u64>,
maxConcurrentExtractions: Option<usize>,
securityLimits: Option<WasmSecurityLimits>,
maxEmbeddedFileBytes: Option<u64>,
acceleration: Option<WasmAccelerationConfig>,
cacheNamespace: Option<String>,
cacheTtlSecs: Option<u64>,
email: Option<WasmEmailConfig>,
concurrency: Option<String>,
structuredExtraction: Option<WasmStructuredExtractionConfig>,
cancelToken: Option<String>,
) -> WasmExtractionConfig {
WasmExtractionConfig {
use_cache: useCache.unwrap_or(true),
enable_quality_processing: enableQualityProcessing.unwrap_or(true),
ocr,
force_ocr: forceOcr.unwrap_or(false),
force_ocr_pages: forceOcrPages,
disable_ocr: disableOcr.unwrap_or(false),
chunking,
content_filter: contentFilter,
images,
token_reduction: tokenReduction,
language_detection: languageDetection,
pages,
postprocessor,
html_options: Default::default(),
extraction_timeout_secs: extractionTimeoutSecs,
max_concurrent_extractions: maxConcurrentExtractions,
result_format: resultFormat.unwrap_or_default(),
security_limits: securityLimits,
max_embedded_file_bytes: maxEmbeddedFileBytes,
output_format: outputFormat.unwrap_or_default(),
use_layout_for_markdown: useLayoutForMarkdown.unwrap_or(false),
include_document_structure: includeDocumentStructure.unwrap_or(false),
acceleration,
cache_namespace: cacheNamespace,
cache_ttl_secs: cacheTtlSecs,
email,
concurrency,
max_archive_depth: maxArchiveDepth.unwrap_or_default(),
structured_extraction: structuredExtraction,
cancel_token: cancelToken,
}
}
#[wasm_bindgen(getter, js_name = "useCache")]
pub fn use_cache(&self) -> bool {
self.use_cache
}
#[wasm_bindgen(setter, js_name = "useCache")]
pub fn set_use_cache(&mut self, value: bool) {
self.use_cache = value;
}
#[wasm_bindgen(getter, js_name = "enableQualityProcessing")]
pub fn enable_quality_processing(&self) -> bool {
self.enable_quality_processing
}
#[wasm_bindgen(setter, js_name = "enableQualityProcessing")]
pub fn set_enable_quality_processing(&mut self, value: bool) {
self.enable_quality_processing = value;
}
#[wasm_bindgen(getter)]
pub fn ocr(&self) -> Option<WasmOcrConfig> {
self.ocr.clone()
}
#[wasm_bindgen(setter)]
pub fn set_ocr(&mut self, value: Option<WasmOcrConfig>) {
self.ocr = value;
}
#[wasm_bindgen(getter, js_name = "forceOcr")]
pub fn force_ocr(&self) -> bool {
self.force_ocr
}
#[wasm_bindgen(setter, js_name = "forceOcr")]
pub fn set_force_ocr(&mut self, value: bool) {
self.force_ocr = value;
}
#[wasm_bindgen(getter, js_name = "forceOcrPages")]
pub fn force_ocr_pages(&self) -> Option<Vec<u32>> {
self.force_ocr_pages.clone()
}
#[wasm_bindgen(setter, js_name = "forceOcrPages")]
pub fn set_force_ocr_pages(&mut self, value: Option<Vec<u32>>) {
self.force_ocr_pages = value;
}
#[wasm_bindgen(getter, js_name = "disableOcr")]
pub fn disable_ocr(&self) -> bool {
self.disable_ocr
}
#[wasm_bindgen(setter, js_name = "disableOcr")]
pub fn set_disable_ocr(&mut self, value: bool) {
self.disable_ocr = value;
}
#[wasm_bindgen(getter)]
pub fn chunking(&self) -> Option<WasmChunkingConfig> {
self.chunking.clone()
}
#[wasm_bindgen(setter)]
pub fn set_chunking(&mut self, value: Option<WasmChunkingConfig>) {
self.chunking = value;
}
#[wasm_bindgen(getter, js_name = "contentFilter")]
pub fn content_filter(&self) -> Option<WasmContentFilterConfig> {
self.content_filter.clone()
}
#[wasm_bindgen(setter, js_name = "contentFilter")]
pub fn set_content_filter(&mut self, value: Option<WasmContentFilterConfig>) {
self.content_filter = value;
}
#[wasm_bindgen(getter)]
pub fn images(&self) -> Option<WasmImageExtractionConfig> {
self.images.clone()
}
#[wasm_bindgen(setter)]
pub fn set_images(&mut self, value: Option<WasmImageExtractionConfig>) {
self.images = value;
}
#[wasm_bindgen(getter, js_name = "tokenReduction")]
pub fn token_reduction(&self) -> Option<WasmTokenReductionOptions> {
self.token_reduction.clone()
}
#[wasm_bindgen(setter, js_name = "tokenReduction")]
pub fn set_token_reduction(&mut self, value: Option<WasmTokenReductionOptions>) {
self.token_reduction = value;
}
#[wasm_bindgen(getter, js_name = "languageDetection")]
pub fn language_detection(&self) -> Option<WasmLanguageDetectionConfig> {
self.language_detection.clone()
}
#[wasm_bindgen(setter, js_name = "languageDetection")]
pub fn set_language_detection(&mut self, value: Option<WasmLanguageDetectionConfig>) {
self.language_detection = value;
}
#[wasm_bindgen(getter)]
pub fn pages(&self) -> Option<WasmPageConfig> {
self.pages.clone()
}
#[wasm_bindgen(setter)]
pub fn set_pages(&mut self, value: Option<WasmPageConfig>) {
self.pages = value;
}
#[wasm_bindgen(getter)]
pub fn postprocessor(&self) -> Option<WasmPostProcessorConfig> {
self.postprocessor.clone()
}
#[wasm_bindgen(setter)]
pub fn set_postprocessor(&mut self, value: Option<WasmPostProcessorConfig>) {
self.postprocessor = value;
}
#[wasm_bindgen(getter, js_name = "htmlOptions")]
pub fn html_options(&self) -> Option<String> {
self.html_options.clone()
}
#[wasm_bindgen(setter, js_name = "htmlOptions")]
pub fn set_html_options(&mut self, value: Option<String>) {
self.html_options = value;
}
#[wasm_bindgen(getter, js_name = "extractionTimeoutSecs")]
pub fn extraction_timeout_secs(&self) -> Option<u64> {
self.extraction_timeout_secs
}
#[wasm_bindgen(setter, js_name = "extractionTimeoutSecs")]
pub fn set_extraction_timeout_secs(&mut self, value: Option<u64>) {
self.extraction_timeout_secs = value;
}
#[wasm_bindgen(getter, js_name = "maxConcurrentExtractions")]
pub fn max_concurrent_extractions(&self) -> Option<usize> {
self.max_concurrent_extractions
}
#[wasm_bindgen(setter, js_name = "maxConcurrentExtractions")]
pub fn set_max_concurrent_extractions(&mut self, value: Option<usize>) {
self.max_concurrent_extractions = value;
}
#[wasm_bindgen(getter, js_name = "resultFormat")]
pub fn result_format(&self) -> String {
self.result_format.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "resultFormat")]
pub fn set_result_format(&mut self, value: WasmResultFormat) {
self.result_format = value;
}
#[wasm_bindgen(getter, js_name = "securityLimits")]
pub fn security_limits(&self) -> Option<WasmSecurityLimits> {
self.security_limits.clone()
}
#[wasm_bindgen(setter, js_name = "securityLimits")]
pub fn set_security_limits(&mut self, value: Option<WasmSecurityLimits>) {
self.security_limits = value;
}
#[wasm_bindgen(getter, js_name = "maxEmbeddedFileBytes")]
pub fn max_embedded_file_bytes(&self) -> Option<u64> {
self.max_embedded_file_bytes
}
#[wasm_bindgen(setter, js_name = "maxEmbeddedFileBytes")]
pub fn set_max_embedded_file_bytes(&mut self, value: Option<u64>) {
self.max_embedded_file_bytes = value;
}
#[wasm_bindgen(getter, js_name = "outputFormat")]
pub fn output_format(&self) -> String {
self.output_format.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "outputFormat")]
pub fn set_output_format(&mut self, value: WasmOutputFormat) {
self.output_format = value;
}
#[wasm_bindgen(getter, js_name = "useLayoutForMarkdown")]
pub fn use_layout_for_markdown(&self) -> bool {
self.use_layout_for_markdown
}
#[wasm_bindgen(setter, js_name = "useLayoutForMarkdown")]
pub fn set_use_layout_for_markdown(&mut self, value: bool) {
self.use_layout_for_markdown = value;
}
#[wasm_bindgen(getter, js_name = "includeDocumentStructure")]
pub fn include_document_structure(&self) -> bool {
self.include_document_structure
}
#[wasm_bindgen(setter, js_name = "includeDocumentStructure")]
pub fn set_include_document_structure(&mut self, value: bool) {
self.include_document_structure = value;
}
#[wasm_bindgen(getter)]
pub fn acceleration(&self) -> Option<WasmAccelerationConfig> {
self.acceleration.clone()
}
#[wasm_bindgen(setter)]
pub fn set_acceleration(&mut self, value: Option<WasmAccelerationConfig>) {
self.acceleration = value;
}
#[wasm_bindgen(getter, js_name = "cacheNamespace")]
pub fn cache_namespace(&self) -> Option<String> {
self.cache_namespace.clone()
}
#[wasm_bindgen(setter, js_name = "cacheNamespace")]
pub fn set_cache_namespace(&mut self, value: Option<String>) {
self.cache_namespace = value;
}
#[wasm_bindgen(getter, js_name = "cacheTtlSecs")]
pub fn cache_ttl_secs(&self) -> Option<u64> {
self.cache_ttl_secs
}
#[wasm_bindgen(setter, js_name = "cacheTtlSecs")]
pub fn set_cache_ttl_secs(&mut self, value: Option<u64>) {
self.cache_ttl_secs = value;
}
#[wasm_bindgen(getter)]
pub fn email(&self) -> Option<WasmEmailConfig> {
self.email.clone()
}
#[wasm_bindgen(setter)]
pub fn set_email(&mut self, value: Option<WasmEmailConfig>) {
self.email = value;
}
#[wasm_bindgen(getter)]
pub fn concurrency(&self) -> Option<String> {
self.concurrency.clone()
}
#[wasm_bindgen(setter)]
pub fn set_concurrency(&mut self, value: Option<String>) {
self.concurrency = value;
}
#[wasm_bindgen(getter, js_name = "maxArchiveDepth")]
pub fn max_archive_depth(&self) -> usize {
self.max_archive_depth
}
#[wasm_bindgen(setter, js_name = "maxArchiveDepth")]
pub fn set_max_archive_depth(&mut self, value: usize) {
self.max_archive_depth = value;
}
#[wasm_bindgen(getter, js_name = "structuredExtraction")]
pub fn structured_extraction(&self) -> Option<WasmStructuredExtractionConfig> {
self.structured_extraction.clone()
}
#[wasm_bindgen(setter, js_name = "structuredExtraction")]
pub fn set_structured_extraction(&mut self, value: Option<WasmStructuredExtractionConfig>) {
self.structured_extraction = value;
}
#[wasm_bindgen(getter, js_name = "cancelToken")]
pub fn cancel_token(&self) -> Option<String> {
self.cancel_token.clone()
}
#[wasm_bindgen(setter, js_name = "cancelToken")]
pub fn set_cancel_token(&mut self, value: Option<String>) {
self.cancel_token = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmExtractionConfig {
kreuzberg::ExtractionConfig::default().into()
}
/// Check if image processing is needed by examining OCR and image extraction settings.
///
/// Returns `true` if either OCR is enabled or image extraction is configured,
/// indicating that image decompression and processing should occur.
/// Returns `false` if both are disabled, allowing optimization to skip unnecessary
/// image decompression for text-only extraction workflows.
///
/// # Optimization Impact
/// For text-only extractions (no OCR, no image extraction), skipping image
/// decompression can improve CPU utilization by 5-10% by avoiding wasteful
/// image I/O and processing when results won't be used.
#[wasm_bindgen(js_name = "needsImageProcessing")]
pub fn needs_image_processing(&self) -> bool {
kreuzberg::ExtractionConfig::from(self.clone()).needs_image_processing()
}
}
/// Per-file extraction configuration overrides for batch processing.
///
/// All fields are `Option<T>` — `None` means "use the batch-level default."
/// This type is used with `batch_extract_files` and
/// `batch_extract_bytes` to allow heterogeneous
/// extraction settings within a single batch.
///
/// # Excluded Fields
///
/// The following `ExtractionConfig` fields are batch-level only and
/// cannot be overridden per file:
/// - `max_concurrent_extractions` — controls batch parallelism
/// - `use_cache` — global caching policy
/// - `acceleration` — shared ONNX execution provider
/// - `security_limits` — global archive security policy
///
/// # Example
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmFileExtractionConfig {
enable_quality_processing: Option<bool>,
ocr: Option<WasmOcrConfig>,
force_ocr: Option<bool>,
force_ocr_pages: Option<Vec<u32>>,
disable_ocr: Option<bool>,
chunking: Option<WasmChunkingConfig>,
content_filter: Option<WasmContentFilterConfig>,
images: Option<WasmImageExtractionConfig>,
token_reduction: Option<WasmTokenReductionOptions>,
language_detection: Option<WasmLanguageDetectionConfig>,
pages: Option<WasmPageConfig>,
postprocessor: Option<WasmPostProcessorConfig>,
html_options: Option<String>,
result_format: Option<WasmResultFormat>,
output_format: Option<WasmOutputFormat>,
include_document_structure: Option<bool>,
timeout_secs: Option<u64>,
structured_extraction: Option<WasmStructuredExtractionConfig>,
}
#[wasm_bindgen]
impl WasmFileExtractionConfig {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
enableQualityProcessing: Option<bool>,
ocr: Option<WasmOcrConfig>,
forceOcr: Option<bool>,
forceOcrPages: Option<Vec<u32>>,
disableOcr: Option<bool>,
chunking: Option<WasmChunkingConfig>,
contentFilter: Option<WasmContentFilterConfig>,
images: Option<WasmImageExtractionConfig>,
tokenReduction: Option<WasmTokenReductionOptions>,
languageDetection: Option<WasmLanguageDetectionConfig>,
pages: Option<WasmPageConfig>,
postprocessor: Option<WasmPostProcessorConfig>,
resultFormat: Option<WasmResultFormat>,
outputFormat: Option<WasmOutputFormat>,
includeDocumentStructure: Option<bool>,
timeoutSecs: Option<u64>,
structuredExtraction: Option<WasmStructuredExtractionConfig>,
) -> WasmFileExtractionConfig {
WasmFileExtractionConfig {
enable_quality_processing: enableQualityProcessing,
ocr,
force_ocr: forceOcr,
force_ocr_pages: forceOcrPages,
disable_ocr: disableOcr,
chunking,
content_filter: contentFilter,
images,
token_reduction: tokenReduction,
language_detection: languageDetection,
pages,
postprocessor,
html_options: Default::default(),
result_format: resultFormat,
output_format: outputFormat,
include_document_structure: includeDocumentStructure,
timeout_secs: timeoutSecs,
structured_extraction: structuredExtraction,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmFileExtractionConfig {
<WasmFileExtractionConfig as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "enableQualityProcessing")]
pub fn enable_quality_processing(&self) -> Option<bool> {
self.enable_quality_processing
}
#[wasm_bindgen(setter, js_name = "enableQualityProcessing")]
pub fn set_enable_quality_processing(&mut self, value: Option<bool>) {
self.enable_quality_processing = value;
}
#[wasm_bindgen(getter)]
pub fn ocr(&self) -> Option<WasmOcrConfig> {
self.ocr.clone()
}
#[wasm_bindgen(setter)]
pub fn set_ocr(&mut self, value: Option<WasmOcrConfig>) {
self.ocr = value;
}
#[wasm_bindgen(getter, js_name = "forceOcr")]
pub fn force_ocr(&self) -> Option<bool> {
self.force_ocr
}
#[wasm_bindgen(setter, js_name = "forceOcr")]
pub fn set_force_ocr(&mut self, value: Option<bool>) {
self.force_ocr = value;
}
#[wasm_bindgen(getter, js_name = "forceOcrPages")]
pub fn force_ocr_pages(&self) -> Option<Vec<u32>> {
self.force_ocr_pages.clone()
}
#[wasm_bindgen(setter, js_name = "forceOcrPages")]
pub fn set_force_ocr_pages(&mut self, value: Option<Vec<u32>>) {
self.force_ocr_pages = value;
}
#[wasm_bindgen(getter, js_name = "disableOcr")]
pub fn disable_ocr(&self) -> Option<bool> {
self.disable_ocr
}
#[wasm_bindgen(setter, js_name = "disableOcr")]
pub fn set_disable_ocr(&mut self, value: Option<bool>) {
self.disable_ocr = value;
}
#[wasm_bindgen(getter)]
pub fn chunking(&self) -> Option<WasmChunkingConfig> {
self.chunking.clone()
}
#[wasm_bindgen(setter)]
pub fn set_chunking(&mut self, value: Option<WasmChunkingConfig>) {
self.chunking = value;
}
#[wasm_bindgen(getter, js_name = "contentFilter")]
pub fn content_filter(&self) -> Option<WasmContentFilterConfig> {
self.content_filter.clone()
}
#[wasm_bindgen(setter, js_name = "contentFilter")]
pub fn set_content_filter(&mut self, value: Option<WasmContentFilterConfig>) {
self.content_filter = value;
}
#[wasm_bindgen(getter)]
pub fn images(&self) -> Option<WasmImageExtractionConfig> {
self.images.clone()
}
#[wasm_bindgen(setter)]
pub fn set_images(&mut self, value: Option<WasmImageExtractionConfig>) {
self.images = value;
}
#[wasm_bindgen(getter, js_name = "tokenReduction")]
pub fn token_reduction(&self) -> Option<WasmTokenReductionOptions> {
self.token_reduction.clone()
}
#[wasm_bindgen(setter, js_name = "tokenReduction")]
pub fn set_token_reduction(&mut self, value: Option<WasmTokenReductionOptions>) {
self.token_reduction = value;
}
#[wasm_bindgen(getter, js_name = "languageDetection")]
pub fn language_detection(&self) -> Option<WasmLanguageDetectionConfig> {
self.language_detection.clone()
}
#[wasm_bindgen(setter, js_name = "languageDetection")]
pub fn set_language_detection(&mut self, value: Option<WasmLanguageDetectionConfig>) {
self.language_detection = value;
}
#[wasm_bindgen(getter)]
pub fn pages(&self) -> Option<WasmPageConfig> {
self.pages.clone()
}
#[wasm_bindgen(setter)]
pub fn set_pages(&mut self, value: Option<WasmPageConfig>) {
self.pages = value;
}
#[wasm_bindgen(getter)]
pub fn postprocessor(&self) -> Option<WasmPostProcessorConfig> {
self.postprocessor.clone()
}
#[wasm_bindgen(setter)]
pub fn set_postprocessor(&mut self, value: Option<WasmPostProcessorConfig>) {
self.postprocessor = value;
}
#[wasm_bindgen(getter, js_name = "htmlOptions")]
pub fn html_options(&self) -> Option<String> {
self.html_options.clone()
}
#[wasm_bindgen(setter, js_name = "htmlOptions")]
pub fn set_html_options(&mut self, value: Option<String>) {
self.html_options = value;
}
#[wasm_bindgen(getter, js_name = "resultFormat")]
pub fn result_format(&self) -> Option<String> {
self.result_format.map(|v| v.to_api_str().to_owned())
}
#[wasm_bindgen(setter, js_name = "resultFormat")]
pub fn set_result_format(&mut self, value: Option<WasmResultFormat>) {
self.result_format = value;
}
#[wasm_bindgen(getter, js_name = "outputFormat")]
pub fn output_format(&self) -> Option<String> {
self.output_format.map(|v| v.to_api_str().to_owned())
}
#[wasm_bindgen(setter, js_name = "outputFormat")]
pub fn set_output_format(&mut self, value: Option<WasmOutputFormat>) {
self.output_format = value;
}
#[wasm_bindgen(getter, js_name = "includeDocumentStructure")]
pub fn include_document_structure(&self) -> Option<bool> {
self.include_document_structure
}
#[wasm_bindgen(setter, js_name = "includeDocumentStructure")]
pub fn set_include_document_structure(&mut self, value: Option<bool>) {
self.include_document_structure = value;
}
#[wasm_bindgen(getter, js_name = "timeoutSecs")]
pub fn timeout_secs(&self) -> Option<u64> {
self.timeout_secs
}
#[wasm_bindgen(setter, js_name = "timeoutSecs")]
pub fn set_timeout_secs(&mut self, value: Option<u64>) {
self.timeout_secs = value;
}
#[wasm_bindgen(getter, js_name = "structuredExtraction")]
pub fn structured_extraction(&self) -> Option<WasmStructuredExtractionConfig> {
self.structured_extraction.clone()
}
#[wasm_bindgen(setter, js_name = "structuredExtraction")]
pub fn set_structured_extraction(&mut self, value: Option<WasmStructuredExtractionConfig>) {
self.structured_extraction = value;
}
}
/// Batch item for byte array extraction.
///
/// Used with `batch_extract_bytes` and `batch_extract_bytes_sync`
/// to represent a single item in a batch extraction job.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmBatchBytesItem {
content: Vec<u8>,
mime_type: String,
config: Option<WasmFileExtractionConfig>,
}
#[wasm_bindgen]
impl WasmBatchBytesItem {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(content: Vec<u8>, mimeType: String, config: Option<WasmFileExtractionConfig>) -> WasmBatchBytesItem {
WasmBatchBytesItem {
content,
mime_type: mimeType,
config,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmBatchBytesItem {
<WasmBatchBytesItem as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> Vec<u8> {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: Vec<u8>) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "mimeType")]
pub fn mime_type(&self) -> String {
self.mime_type.clone()
}
#[wasm_bindgen(setter, js_name = "mimeType")]
pub fn set_mime_type(&mut self, value: String) {
self.mime_type = value;
}
#[wasm_bindgen(getter)]
pub fn config(&self) -> Option<WasmFileExtractionConfig> {
self.config.clone()
}
#[wasm_bindgen(setter)]
pub fn set_config(&mut self, value: Option<WasmFileExtractionConfig>) {
self.config = value;
}
}
/// Batch item for file extraction.
///
/// Used with `batch_extract_files` and `batch_extract_files_sync`
/// to represent a single file in a batch extraction job.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmBatchFileItem {
path: String,
config: Option<WasmFileExtractionConfig>,
}
#[wasm_bindgen]
impl WasmBatchFileItem {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(path: String, config: Option<WasmFileExtractionConfig>) -> WasmBatchFileItem {
WasmBatchFileItem { path, config }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmBatchFileItem {
<WasmBatchFileItem as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn path(&self) -> String {
self.path.clone()
}
#[wasm_bindgen(setter)]
pub fn set_path(&mut self, value: String) {
self.path = value;
}
#[wasm_bindgen(getter)]
pub fn config(&self) -> Option<WasmFileExtractionConfig> {
self.config.clone()
}
#[wasm_bindgen(setter)]
pub fn set_config(&mut self, value: Option<WasmFileExtractionConfig>) {
self.config = value;
}
}
/// Image extraction configuration.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmImageExtractionConfig {
extract_images: bool,
target_dpi: i32,
max_image_dimension: i32,
inject_placeholders: bool,
auto_adjust_dpi: bool,
min_dpi: i32,
max_dpi: i32,
max_images_per_page: Option<u32>,
classify: bool,
include_page_rasters: bool,
run_ocr_on_images: bool,
ocr_text_only: bool,
append_ocr_text: bool,
}
#[wasm_bindgen]
impl WasmImageExtractionConfig {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
extractImages: Option<bool>,
targetDpi: Option<i32>,
maxImageDimension: Option<i32>,
injectPlaceholders: Option<bool>,
autoAdjustDpi: Option<bool>,
minDpi: Option<i32>,
maxDpi: Option<i32>,
classify: Option<bool>,
includePageRasters: Option<bool>,
runOcrOnImages: Option<bool>,
ocrTextOnly: Option<bool>,
appendOcrText: Option<bool>,
maxImagesPerPage: Option<u32>,
) -> WasmImageExtractionConfig {
WasmImageExtractionConfig {
extract_images: extractImages.unwrap_or(true),
target_dpi: targetDpi.unwrap_or(300),
max_image_dimension: maxImageDimension.unwrap_or(4096),
inject_placeholders: injectPlaceholders.unwrap_or(true),
auto_adjust_dpi: autoAdjustDpi.unwrap_or(true),
min_dpi: minDpi.unwrap_or(72),
max_dpi: maxDpi.unwrap_or(600),
max_images_per_page: maxImagesPerPage,
classify: classify.unwrap_or(true),
include_page_rasters: includePageRasters.unwrap_or(false),
run_ocr_on_images: runOcrOnImages.unwrap_or(true),
ocr_text_only: ocrTextOnly.unwrap_or(false),
append_ocr_text: appendOcrText.unwrap_or(false),
}
}
#[wasm_bindgen(getter, js_name = "extractImages")]
pub fn extract_images(&self) -> bool {
self.extract_images
}
#[wasm_bindgen(setter, js_name = "extractImages")]
pub fn set_extract_images(&mut self, value: bool) {
self.extract_images = value;
}
#[wasm_bindgen(getter, js_name = "targetDpi")]
pub fn target_dpi(&self) -> i32 {
self.target_dpi
}
#[wasm_bindgen(setter, js_name = "targetDpi")]
pub fn set_target_dpi(&mut self, value: i32) {
self.target_dpi = value;
}
#[wasm_bindgen(getter, js_name = "maxImageDimension")]
pub fn max_image_dimension(&self) -> i32 {
self.max_image_dimension
}
#[wasm_bindgen(setter, js_name = "maxImageDimension")]
pub fn set_max_image_dimension(&mut self, value: i32) {
self.max_image_dimension = value;
}
#[wasm_bindgen(getter, js_name = "injectPlaceholders")]
pub fn inject_placeholders(&self) -> bool {
self.inject_placeholders
}
#[wasm_bindgen(setter, js_name = "injectPlaceholders")]
pub fn set_inject_placeholders(&mut self, value: bool) {
self.inject_placeholders = value;
}
#[wasm_bindgen(getter, js_name = "autoAdjustDpi")]
pub fn auto_adjust_dpi(&self) -> bool {
self.auto_adjust_dpi
}
#[wasm_bindgen(setter, js_name = "autoAdjustDpi")]
pub fn set_auto_adjust_dpi(&mut self, value: bool) {
self.auto_adjust_dpi = value;
}
#[wasm_bindgen(getter, js_name = "minDpi")]
pub fn min_dpi(&self) -> i32 {
self.min_dpi
}
#[wasm_bindgen(setter, js_name = "minDpi")]
pub fn set_min_dpi(&mut self, value: i32) {
self.min_dpi = value;
}
#[wasm_bindgen(getter, js_name = "maxDpi")]
pub fn max_dpi(&self) -> i32 {
self.max_dpi
}
#[wasm_bindgen(setter, js_name = "maxDpi")]
pub fn set_max_dpi(&mut self, value: i32) {
self.max_dpi = value;
}
#[wasm_bindgen(getter, js_name = "maxImagesPerPage")]
pub fn max_images_per_page(&self) -> Option<u32> {
self.max_images_per_page
}
#[wasm_bindgen(setter, js_name = "maxImagesPerPage")]
pub fn set_max_images_per_page(&mut self, value: Option<u32>) {
self.max_images_per_page = value;
}
#[wasm_bindgen(getter)]
pub fn classify(&self) -> bool {
self.classify
}
#[wasm_bindgen(setter)]
pub fn set_classify(&mut self, value: bool) {
self.classify = value;
}
#[wasm_bindgen(getter, js_name = "includePageRasters")]
pub fn include_page_rasters(&self) -> bool {
self.include_page_rasters
}
#[wasm_bindgen(setter, js_name = "includePageRasters")]
pub fn set_include_page_rasters(&mut self, value: bool) {
self.include_page_rasters = value;
}
#[wasm_bindgen(getter, js_name = "runOcrOnImages")]
pub fn run_ocr_on_images(&self) -> bool {
self.run_ocr_on_images
}
#[wasm_bindgen(setter, js_name = "runOcrOnImages")]
pub fn set_run_ocr_on_images(&mut self, value: bool) {
self.run_ocr_on_images = value;
}
#[wasm_bindgen(getter, js_name = "ocrTextOnly")]
pub fn ocr_text_only(&self) -> bool {
self.ocr_text_only
}
#[wasm_bindgen(setter, js_name = "ocrTextOnly")]
pub fn set_ocr_text_only(&mut self, value: bool) {
self.ocr_text_only = value;
}
#[wasm_bindgen(getter, js_name = "appendOcrText")]
pub fn append_ocr_text(&self) -> bool {
self.append_ocr_text
}
#[wasm_bindgen(setter, js_name = "appendOcrText")]
pub fn set_append_ocr_text(&mut self, value: bool) {
self.append_ocr_text = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmImageExtractionConfig {
kreuzberg::ImageExtractionConfig::default().into()
}
}
/// Token reduction configuration.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmTokenReductionOptions {
mode: String,
preserve_important_words: bool,
}
#[wasm_bindgen]
impl WasmTokenReductionOptions {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(mode: Option<String>, preserveImportantWords: Option<bool>) -> WasmTokenReductionOptions {
WasmTokenReductionOptions {
mode: mode.unwrap_or_default(),
preserve_important_words: preserveImportantWords.unwrap_or(true),
}
}
#[wasm_bindgen(getter)]
pub fn mode(&self) -> String {
self.mode.clone()
}
#[wasm_bindgen(setter)]
pub fn set_mode(&mut self, value: String) {
self.mode = value;
}
#[wasm_bindgen(getter, js_name = "preserveImportantWords")]
pub fn preserve_important_words(&self) -> bool {
self.preserve_important_words
}
#[wasm_bindgen(setter, js_name = "preserveImportantWords")]
pub fn set_preserve_important_words(&mut self, value: bool) {
self.preserve_important_words = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmTokenReductionOptions {
kreuzberg::TokenReductionOptions::default().into()
}
}
/// Language detection configuration.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmLanguageDetectionConfig {
enabled: bool,
min_confidence: f64,
detect_multiple: bool,
}
#[wasm_bindgen]
impl WasmLanguageDetectionConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
enabled: Option<bool>,
minConfidence: Option<f64>,
detectMultiple: Option<bool>,
) -> WasmLanguageDetectionConfig {
WasmLanguageDetectionConfig {
enabled: enabled.unwrap_or(true),
min_confidence: minConfidence.unwrap_or(0.8),
detect_multiple: detectMultiple.unwrap_or(false),
}
}
#[wasm_bindgen(getter)]
pub fn enabled(&self) -> bool {
self.enabled
}
#[wasm_bindgen(setter)]
pub fn set_enabled(&mut self, value: bool) {
self.enabled = value;
}
#[wasm_bindgen(getter, js_name = "minConfidence")]
pub fn min_confidence(&self) -> f64 {
self.min_confidence
}
#[wasm_bindgen(setter, js_name = "minConfidence")]
pub fn set_min_confidence(&mut self, value: f64) {
self.min_confidence = value;
}
#[wasm_bindgen(getter, js_name = "detectMultiple")]
pub fn detect_multiple(&self) -> bool {
self.detect_multiple
}
#[wasm_bindgen(setter, js_name = "detectMultiple")]
pub fn set_detect_multiple(&mut self, value: bool) {
self.detect_multiple = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmLanguageDetectionConfig {
kreuzberg::LanguageDetectionConfig::default().into()
}
}
/// Configuration for an LLM provider/model via liter-llm.
///
/// Each feature (VLM OCR, VLM embeddings, structured extraction) carries
/// its own `LlmConfig`, allowing different providers per feature.
///
/// # Example
///
/// ```toml
/// [structured_extraction.llm]
/// model = "openai/gpt-4o"
/// api_key = "sk-..." # or use KREUZBERG_LLM_API_KEY env var
/// ```
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmLlmConfig {
model: String,
api_key: Option<String>,
base_url: Option<String>,
timeout_secs: Option<u64>,
max_retries: Option<u32>,
temperature: Option<f64>,
max_tokens: Option<u64>,
}
#[wasm_bindgen]
impl WasmLlmConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
model: Option<String>,
apiKey: Option<String>,
baseUrl: Option<String>,
timeoutSecs: Option<u64>,
maxRetries: Option<u32>,
temperature: Option<f64>,
maxTokens: Option<u64>,
) -> WasmLlmConfig {
WasmLlmConfig {
model: model.unwrap_or_default(),
api_key: apiKey,
base_url: baseUrl,
timeout_secs: timeoutSecs,
max_retries: maxRetries,
temperature,
max_tokens: maxTokens,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmLlmConfig {
<WasmLlmConfig as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn model(&self) -> String {
self.model.clone()
}
#[wasm_bindgen(setter)]
pub fn set_model(&mut self, value: String) {
self.model = value;
}
#[wasm_bindgen(getter, js_name = "apiKey")]
pub fn api_key(&self) -> Option<String> {
self.api_key.clone()
}
#[wasm_bindgen(setter, js_name = "apiKey")]
pub fn set_api_key(&mut self, value: Option<String>) {
self.api_key = value;
}
#[wasm_bindgen(getter, js_name = "baseUrl")]
pub fn base_url(&self) -> Option<String> {
self.base_url.clone()
}
#[wasm_bindgen(setter, js_name = "baseUrl")]
pub fn set_base_url(&mut self, value: Option<String>) {
self.base_url = value;
}
#[wasm_bindgen(getter, js_name = "timeoutSecs")]
pub fn timeout_secs(&self) -> Option<u64> {
self.timeout_secs
}
#[wasm_bindgen(setter, js_name = "timeoutSecs")]
pub fn set_timeout_secs(&mut self, value: Option<u64>) {
self.timeout_secs = value;
}
#[wasm_bindgen(getter, js_name = "maxRetries")]
pub fn max_retries(&self) -> Option<u32> {
self.max_retries
}
#[wasm_bindgen(setter, js_name = "maxRetries")]
pub fn set_max_retries(&mut self, value: Option<u32>) {
self.max_retries = value;
}
#[wasm_bindgen(getter)]
pub fn temperature(&self) -> Option<f64> {
self.temperature
}
#[wasm_bindgen(setter)]
pub fn set_temperature(&mut self, value: Option<f64>) {
self.temperature = value;
}
#[wasm_bindgen(getter, js_name = "maxTokens")]
pub fn max_tokens(&self) -> Option<u64> {
self.max_tokens
}
#[wasm_bindgen(setter, js_name = "maxTokens")]
pub fn set_max_tokens(&mut self, value: Option<u64>) {
self.max_tokens = value;
}
}
/// Configuration for LLM-based structured data extraction.
///
/// Sends extracted document content to a VLM with a JSON schema,
/// returning structured data that conforms to the schema.
///
/// # Example
///
/// ```toml
/// [structured_extraction]
/// schema_name = "invoice_data"
/// strict = true
///
/// [structured_extraction.schema]
/// type = "object"
/// properties.vendor = { type = "string" }
/// properties.total = { type = "number" }
/// required = ["vendor", "total"]
///
/// [structured_extraction.llm]
/// model = "openai/gpt-4o"
/// ```
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmStructuredExtractionConfig {
schema: JsValue,
schema_name: String,
schema_description: Option<String>,
strict: bool,
prompt: Option<String>,
llm: WasmLlmConfig,
}
#[wasm_bindgen]
impl WasmStructuredExtractionConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
schema: JsValue,
schemaName: String,
strict: bool,
llm: WasmLlmConfig,
schemaDescription: Option<String>,
prompt: Option<String>,
) -> WasmStructuredExtractionConfig {
WasmStructuredExtractionConfig {
schema,
schema_name: schemaName,
schema_description: schemaDescription,
strict,
prompt,
llm,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmStructuredExtractionConfig {
<WasmStructuredExtractionConfig as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn schema(&self) -> JsValue {
self.schema.clone()
}
#[wasm_bindgen(setter)]
pub fn set_schema(&mut self, value: JsValue) {
self.schema = value;
}
#[wasm_bindgen(getter, js_name = "schemaName")]
pub fn schema_name(&self) -> String {
self.schema_name.clone()
}
#[wasm_bindgen(setter, js_name = "schemaName")]
pub fn set_schema_name(&mut self, value: String) {
self.schema_name = value;
}
#[wasm_bindgen(getter, js_name = "schemaDescription")]
pub fn schema_description(&self) -> Option<String> {
self.schema_description.clone()
}
#[wasm_bindgen(setter, js_name = "schemaDescription")]
pub fn set_schema_description(&mut self, value: Option<String>) {
self.schema_description = value;
}
#[wasm_bindgen(getter)]
pub fn strict(&self) -> bool {
self.strict
}
#[wasm_bindgen(setter)]
pub fn set_strict(&mut self, value: bool) {
self.strict = value;
}
#[wasm_bindgen(getter)]
pub fn prompt(&self) -> Option<String> {
self.prompt.clone()
}
#[wasm_bindgen(setter)]
pub fn set_prompt(&mut self, value: Option<String>) {
self.prompt = value;
}
#[wasm_bindgen(getter)]
pub fn llm(&self) -> WasmLlmConfig {
self.llm.clone()
}
#[wasm_bindgen(setter)]
pub fn set_llm(&mut self, value: WasmLlmConfig) {
self.llm = value;
}
}
/// Quality thresholds for OCR fallback decisions and pipeline quality gating.
///
/// All fields default to the values that match the previous hardcoded behavior,
/// so `OcrQualityThresholds.default()` preserves existing semantics exactly.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrQualityThresholds {
min_total_non_whitespace: usize,
min_non_whitespace_per_page: f64,
min_meaningful_word_len: usize,
min_meaningful_words: usize,
min_alnum_ratio: f64,
min_garbage_chars: usize,
max_fragmented_word_ratio: f64,
critical_fragmented_word_ratio: f64,
min_avg_word_length: f64,
min_words_for_avg_length_check: usize,
min_consecutive_repeat_ratio: f64,
min_words_for_repeat_check: usize,
substantive_min_chars: usize,
non_text_min_chars: usize,
alnum_ws_ratio_threshold: f64,
pipeline_min_quality: f64,
}
#[wasm_bindgen]
impl WasmOcrQualityThresholds {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
minTotalNonWhitespace: Option<usize>,
minNonWhitespacePerPage: Option<f64>,
minMeaningfulWordLen: Option<usize>,
minMeaningfulWords: Option<usize>,
minAlnumRatio: Option<f64>,
minGarbageChars: Option<usize>,
maxFragmentedWordRatio: Option<f64>,
criticalFragmentedWordRatio: Option<f64>,
minAvgWordLength: Option<f64>,
minWordsForAvgLengthCheck: Option<usize>,
minConsecutiveRepeatRatio: Option<f64>,
minWordsForRepeatCheck: Option<usize>,
substantiveMinChars: Option<usize>,
nonTextMinChars: Option<usize>,
alnumWsRatioThreshold: Option<f64>,
pipelineMinQuality: Option<f64>,
) -> WasmOcrQualityThresholds {
WasmOcrQualityThresholds {
min_total_non_whitespace: minTotalNonWhitespace.unwrap_or(64),
min_non_whitespace_per_page: minNonWhitespacePerPage.unwrap_or(32.0),
min_meaningful_word_len: minMeaningfulWordLen.unwrap_or(4),
min_meaningful_words: minMeaningfulWords.unwrap_or(3),
min_alnum_ratio: minAlnumRatio.unwrap_or(0.3),
min_garbage_chars: minGarbageChars.unwrap_or(5),
max_fragmented_word_ratio: maxFragmentedWordRatio.unwrap_or(0.6),
critical_fragmented_word_ratio: criticalFragmentedWordRatio.unwrap_or(0.8),
min_avg_word_length: minAvgWordLength.unwrap_or(2.0),
min_words_for_avg_length_check: minWordsForAvgLengthCheck.unwrap_or(50),
min_consecutive_repeat_ratio: minConsecutiveRepeatRatio.unwrap_or(0.08),
min_words_for_repeat_check: minWordsForRepeatCheck.unwrap_or(50),
substantive_min_chars: substantiveMinChars.unwrap_or(100),
non_text_min_chars: nonTextMinChars.unwrap_or(20),
alnum_ws_ratio_threshold: alnumWsRatioThreshold.unwrap_or(0.4),
pipeline_min_quality: pipelineMinQuality.unwrap_or(0.5),
}
}
#[wasm_bindgen(getter, js_name = "minTotalNonWhitespace")]
pub fn min_total_non_whitespace(&self) -> usize {
self.min_total_non_whitespace
}
#[wasm_bindgen(setter, js_name = "minTotalNonWhitespace")]
pub fn set_min_total_non_whitespace(&mut self, value: usize) {
self.min_total_non_whitespace = value;
}
#[wasm_bindgen(getter, js_name = "minNonWhitespacePerPage")]
pub fn min_non_whitespace_per_page(&self) -> f64 {
self.min_non_whitespace_per_page
}
#[wasm_bindgen(setter, js_name = "minNonWhitespacePerPage")]
pub fn set_min_non_whitespace_per_page(&mut self, value: f64) {
self.min_non_whitespace_per_page = value;
}
#[wasm_bindgen(getter, js_name = "minMeaningfulWordLen")]
pub fn min_meaningful_word_len(&self) -> usize {
self.min_meaningful_word_len
}
#[wasm_bindgen(setter, js_name = "minMeaningfulWordLen")]
pub fn set_min_meaningful_word_len(&mut self, value: usize) {
self.min_meaningful_word_len = value;
}
#[wasm_bindgen(getter, js_name = "minMeaningfulWords")]
pub fn min_meaningful_words(&self) -> usize {
self.min_meaningful_words
}
#[wasm_bindgen(setter, js_name = "minMeaningfulWords")]
pub fn set_min_meaningful_words(&mut self, value: usize) {
self.min_meaningful_words = value;
}
#[wasm_bindgen(getter, js_name = "minAlnumRatio")]
pub fn min_alnum_ratio(&self) -> f64 {
self.min_alnum_ratio
}
#[wasm_bindgen(setter, js_name = "minAlnumRatio")]
pub fn set_min_alnum_ratio(&mut self, value: f64) {
self.min_alnum_ratio = value;
}
#[wasm_bindgen(getter, js_name = "minGarbageChars")]
pub fn min_garbage_chars(&self) -> usize {
self.min_garbage_chars
}
#[wasm_bindgen(setter, js_name = "minGarbageChars")]
pub fn set_min_garbage_chars(&mut self, value: usize) {
self.min_garbage_chars = value;
}
#[wasm_bindgen(getter, js_name = "maxFragmentedWordRatio")]
pub fn max_fragmented_word_ratio(&self) -> f64 {
self.max_fragmented_word_ratio
}
#[wasm_bindgen(setter, js_name = "maxFragmentedWordRatio")]
pub fn set_max_fragmented_word_ratio(&mut self, value: f64) {
self.max_fragmented_word_ratio = value;
}
#[wasm_bindgen(getter, js_name = "criticalFragmentedWordRatio")]
pub fn critical_fragmented_word_ratio(&self) -> f64 {
self.critical_fragmented_word_ratio
}
#[wasm_bindgen(setter, js_name = "criticalFragmentedWordRatio")]
pub fn set_critical_fragmented_word_ratio(&mut self, value: f64) {
self.critical_fragmented_word_ratio = value;
}
#[wasm_bindgen(getter, js_name = "minAvgWordLength")]
pub fn min_avg_word_length(&self) -> f64 {
self.min_avg_word_length
}
#[wasm_bindgen(setter, js_name = "minAvgWordLength")]
pub fn set_min_avg_word_length(&mut self, value: f64) {
self.min_avg_word_length = value;
}
#[wasm_bindgen(getter, js_name = "minWordsForAvgLengthCheck")]
pub fn min_words_for_avg_length_check(&self) -> usize {
self.min_words_for_avg_length_check
}
#[wasm_bindgen(setter, js_name = "minWordsForAvgLengthCheck")]
pub fn set_min_words_for_avg_length_check(&mut self, value: usize) {
self.min_words_for_avg_length_check = value;
}
#[wasm_bindgen(getter, js_name = "minConsecutiveRepeatRatio")]
pub fn min_consecutive_repeat_ratio(&self) -> f64 {
self.min_consecutive_repeat_ratio
}
#[wasm_bindgen(setter, js_name = "minConsecutiveRepeatRatio")]
pub fn set_min_consecutive_repeat_ratio(&mut self, value: f64) {
self.min_consecutive_repeat_ratio = value;
}
#[wasm_bindgen(getter, js_name = "minWordsForRepeatCheck")]
pub fn min_words_for_repeat_check(&self) -> usize {
self.min_words_for_repeat_check
}
#[wasm_bindgen(setter, js_name = "minWordsForRepeatCheck")]
pub fn set_min_words_for_repeat_check(&mut self, value: usize) {
self.min_words_for_repeat_check = value;
}
#[wasm_bindgen(getter, js_name = "substantiveMinChars")]
pub fn substantive_min_chars(&self) -> usize {
self.substantive_min_chars
}
#[wasm_bindgen(setter, js_name = "substantiveMinChars")]
pub fn set_substantive_min_chars(&mut self, value: usize) {
self.substantive_min_chars = value;
}
#[wasm_bindgen(getter, js_name = "nonTextMinChars")]
pub fn non_text_min_chars(&self) -> usize {
self.non_text_min_chars
}
#[wasm_bindgen(setter, js_name = "nonTextMinChars")]
pub fn set_non_text_min_chars(&mut self, value: usize) {
self.non_text_min_chars = value;
}
#[wasm_bindgen(getter, js_name = "alnumWsRatioThreshold")]
pub fn alnum_ws_ratio_threshold(&self) -> f64 {
self.alnum_ws_ratio_threshold
}
#[wasm_bindgen(setter, js_name = "alnumWsRatioThreshold")]
pub fn set_alnum_ws_ratio_threshold(&mut self, value: f64) {
self.alnum_ws_ratio_threshold = value;
}
#[wasm_bindgen(getter, js_name = "pipelineMinQuality")]
pub fn pipeline_min_quality(&self) -> f64 {
self.pipeline_min_quality
}
#[wasm_bindgen(setter, js_name = "pipelineMinQuality")]
pub fn set_pipeline_min_quality(&mut self, value: f64) {
self.pipeline_min_quality = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmOcrQualityThresholds {
kreuzberg::OcrQualityThresholds::default().into()
}
}
/// A single backend stage in the OCR pipeline.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrPipelineStage {
backend: String,
priority: u32,
language: Option<String>,
tesseract_config: Option<WasmTesseractConfig>,
paddle_ocr_config: Option<JsValue>,
vlm_config: Option<WasmLlmConfig>,
backend_options: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmOcrPipelineStage {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
backend: String,
priority: u32,
language: Option<String>,
tesseractConfig: Option<WasmTesseractConfig>,
paddleOcrConfig: Option<JsValue>,
vlmConfig: Option<WasmLlmConfig>,
backendOptions: Option<JsValue>,
) -> WasmOcrPipelineStage {
WasmOcrPipelineStage {
backend,
priority,
language,
tesseract_config: tesseractConfig,
paddle_ocr_config: paddleOcrConfig,
vlm_config: vlmConfig,
backend_options: backendOptions,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrPipelineStage {
<WasmOcrPipelineStage as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn backend(&self) -> String {
self.backend.clone()
}
#[wasm_bindgen(setter)]
pub fn set_backend(&mut self, value: String) {
self.backend = value;
}
#[wasm_bindgen(getter)]
pub fn priority(&self) -> u32 {
self.priority
}
#[wasm_bindgen(setter)]
pub fn set_priority(&mut self, value: u32) {
self.priority = value;
}
#[wasm_bindgen(getter)]
pub fn language(&self) -> Option<String> {
self.language.clone()
}
#[wasm_bindgen(setter)]
pub fn set_language(&mut self, value: Option<String>) {
self.language = value;
}
#[wasm_bindgen(getter, js_name = "tesseractConfig")]
pub fn tesseract_config(&self) -> Option<WasmTesseractConfig> {
self.tesseract_config.clone()
}
#[wasm_bindgen(setter, js_name = "tesseractConfig")]
pub fn set_tesseract_config(&mut self, value: Option<WasmTesseractConfig>) {
self.tesseract_config = value;
}
#[wasm_bindgen(getter, js_name = "paddleOcrConfig")]
pub fn paddle_ocr_config(&self) -> Option<JsValue> {
self.paddle_ocr_config.clone()
}
#[wasm_bindgen(setter, js_name = "paddleOcrConfig")]
pub fn set_paddle_ocr_config(&mut self, value: Option<JsValue>) {
self.paddle_ocr_config = value;
}
#[wasm_bindgen(getter, js_name = "vlmConfig")]
pub fn vlm_config(&self) -> Option<WasmLlmConfig> {
self.vlm_config.clone()
}
#[wasm_bindgen(setter, js_name = "vlmConfig")]
pub fn set_vlm_config(&mut self, value: Option<WasmLlmConfig>) {
self.vlm_config = value;
}
#[wasm_bindgen(getter, js_name = "backendOptions")]
pub fn backend_options(&self) -> Option<JsValue> {
self.backend_options.clone()
}
#[wasm_bindgen(setter, js_name = "backendOptions")]
pub fn set_backend_options(&mut self, value: Option<JsValue>) {
self.backend_options = value;
}
}
/// Multi-backend OCR pipeline with quality-based fallback.
///
/// Backends are tried in priority order (highest first). After each backend
/// produces output, quality is evaluated. If it meets `quality_thresholds.pipeline_min_quality`,
/// the result is accepted. Otherwise the next backend is tried.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrPipelineConfig {
stages: Vec<WasmOcrPipelineStage>,
quality_thresholds: WasmOcrQualityThresholds,
}
#[wasm_bindgen]
impl WasmOcrPipelineConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
stages: Vec<WasmOcrPipelineStage>,
qualityThresholds: WasmOcrQualityThresholds,
) -> WasmOcrPipelineConfig {
WasmOcrPipelineConfig {
stages,
quality_thresholds: qualityThresholds,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrPipelineConfig {
<WasmOcrPipelineConfig as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn stages(&self) -> Vec<WasmOcrPipelineStage> {
self.stages.clone()
}
#[wasm_bindgen(setter)]
pub fn set_stages(&mut self, value: Vec<WasmOcrPipelineStage>) {
self.stages = value;
}
#[wasm_bindgen(getter, js_name = "qualityThresholds")]
pub fn quality_thresholds(&self) -> WasmOcrQualityThresholds {
self.quality_thresholds.clone()
}
#[wasm_bindgen(setter, js_name = "qualityThresholds")]
pub fn set_quality_thresholds(&mut self, value: WasmOcrQualityThresholds) {
self.quality_thresholds = value;
}
}
/// OCR configuration.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrConfig {
enabled: bool,
backend: String,
language: String,
tesseract_config: Option<WasmTesseractConfig>,
output_format: Option<WasmOutputFormat>,
paddle_ocr_config: Option<JsValue>,
backend_options: Option<JsValue>,
element_config: Option<WasmOcrElementConfig>,
quality_thresholds: Option<WasmOcrQualityThresholds>,
pipeline: Option<WasmOcrPipelineConfig>,
auto_rotate: bool,
vlm_config: Option<WasmLlmConfig>,
vlm_prompt: Option<String>,
acceleration: Option<WasmAccelerationConfig>,
tessdata_bytes: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmOcrConfig {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
enabled: Option<bool>,
backend: Option<String>,
language: Option<String>,
autoRotate: Option<bool>,
tesseractConfig: Option<WasmTesseractConfig>,
outputFormat: Option<WasmOutputFormat>,
paddleOcrConfig: Option<JsValue>,
backendOptions: Option<JsValue>,
elementConfig: Option<WasmOcrElementConfig>,
qualityThresholds: Option<WasmOcrQualityThresholds>,
pipeline: Option<WasmOcrPipelineConfig>,
vlmConfig: Option<WasmLlmConfig>,
vlmPrompt: Option<String>,
acceleration: Option<WasmAccelerationConfig>,
tessdataBytes: Option<JsValue>,
) -> WasmOcrConfig {
WasmOcrConfig {
enabled: enabled.unwrap_or(true),
backend: backend.unwrap_or_default(),
language: language.unwrap_or_default(),
tesseract_config: tesseractConfig,
output_format: outputFormat,
paddle_ocr_config: paddleOcrConfig,
backend_options: backendOptions,
element_config: elementConfig,
quality_thresholds: qualityThresholds,
pipeline,
auto_rotate: autoRotate.unwrap_or(false),
vlm_config: vlmConfig,
vlm_prompt: vlmPrompt,
acceleration,
tessdata_bytes: tessdataBytes,
}
}
#[wasm_bindgen(getter)]
pub fn enabled(&self) -> bool {
self.enabled
}
#[wasm_bindgen(setter)]
pub fn set_enabled(&mut self, value: bool) {
self.enabled = value;
}
#[wasm_bindgen(getter)]
pub fn backend(&self) -> String {
self.backend.clone()
}
#[wasm_bindgen(setter)]
pub fn set_backend(&mut self, value: String) {
self.backend = value;
}
#[wasm_bindgen(getter)]
pub fn language(&self) -> String {
self.language.clone()
}
#[wasm_bindgen(setter)]
pub fn set_language(&mut self, value: String) {
self.language = value;
}
#[wasm_bindgen(getter, js_name = "tesseractConfig")]
pub fn tesseract_config(&self) -> Option<WasmTesseractConfig> {
self.tesseract_config.clone()
}
#[wasm_bindgen(setter, js_name = "tesseractConfig")]
pub fn set_tesseract_config(&mut self, value: Option<WasmTesseractConfig>) {
self.tesseract_config = value;
}
#[wasm_bindgen(getter, js_name = "outputFormat")]
pub fn output_format(&self) -> Option<String> {
self.output_format.map(|v| v.to_api_str().to_owned())
}
#[wasm_bindgen(setter, js_name = "outputFormat")]
pub fn set_output_format(&mut self, value: Option<WasmOutputFormat>) {
self.output_format = value;
}
#[wasm_bindgen(getter, js_name = "paddleOcrConfig")]
pub fn paddle_ocr_config(&self) -> Option<JsValue> {
self.paddle_ocr_config.clone()
}
#[wasm_bindgen(setter, js_name = "paddleOcrConfig")]
pub fn set_paddle_ocr_config(&mut self, value: Option<JsValue>) {
self.paddle_ocr_config = value;
}
#[wasm_bindgen(getter, js_name = "backendOptions")]
pub fn backend_options(&self) -> Option<JsValue> {
self.backend_options.clone()
}
#[wasm_bindgen(setter, js_name = "backendOptions")]
pub fn set_backend_options(&mut self, value: Option<JsValue>) {
self.backend_options = value;
}
#[wasm_bindgen(getter, js_name = "elementConfig")]
pub fn element_config(&self) -> Option<WasmOcrElementConfig> {
self.element_config.clone()
}
#[wasm_bindgen(setter, js_name = "elementConfig")]
pub fn set_element_config(&mut self, value: Option<WasmOcrElementConfig>) {
self.element_config = value;
}
#[wasm_bindgen(getter, js_name = "qualityThresholds")]
pub fn quality_thresholds(&self) -> Option<WasmOcrQualityThresholds> {
self.quality_thresholds.clone()
}
#[wasm_bindgen(setter, js_name = "qualityThresholds")]
pub fn set_quality_thresholds(&mut self, value: Option<WasmOcrQualityThresholds>) {
self.quality_thresholds = value;
}
#[wasm_bindgen(getter)]
pub fn pipeline(&self) -> Option<WasmOcrPipelineConfig> {
self.pipeline.clone()
}
#[wasm_bindgen(setter)]
pub fn set_pipeline(&mut self, value: Option<WasmOcrPipelineConfig>) {
self.pipeline = value;
}
#[wasm_bindgen(getter, js_name = "autoRotate")]
pub fn auto_rotate(&self) -> bool {
self.auto_rotate
}
#[wasm_bindgen(setter, js_name = "autoRotate")]
pub fn set_auto_rotate(&mut self, value: bool) {
self.auto_rotate = value;
}
#[wasm_bindgen(getter, js_name = "vlmConfig")]
pub fn vlm_config(&self) -> Option<WasmLlmConfig> {
self.vlm_config.clone()
}
#[wasm_bindgen(setter, js_name = "vlmConfig")]
pub fn set_vlm_config(&mut self, value: Option<WasmLlmConfig>) {
self.vlm_config = value;
}
#[wasm_bindgen(getter, js_name = "vlmPrompt")]
pub fn vlm_prompt(&self) -> Option<String> {
self.vlm_prompt.clone()
}
#[wasm_bindgen(setter, js_name = "vlmPrompt")]
pub fn set_vlm_prompt(&mut self, value: Option<String>) {
self.vlm_prompt = value;
}
#[wasm_bindgen(getter)]
pub fn acceleration(&self) -> Option<WasmAccelerationConfig> {
self.acceleration.clone()
}
#[wasm_bindgen(setter)]
pub fn set_acceleration(&mut self, value: Option<WasmAccelerationConfig>) {
self.acceleration = value;
}
#[wasm_bindgen(getter, js_name = "tessdataBytes")]
pub fn tessdata_bytes(&self) -> Option<JsValue> {
self.tessdata_bytes.clone()
}
#[wasm_bindgen(setter, js_name = "tessdataBytes")]
pub fn set_tessdata_bytes(&mut self, value: Option<JsValue>) {
self.tessdata_bytes = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmOcrConfig {
kreuzberg::OcrConfig::default().into()
}
}
/// Page extraction and tracking configuration.
///
/// Controls how pages are extracted, tracked, and represented in the extraction results.
/// When `None`, page tracking is disabled.
///
/// Page range tracking in chunk metadata (first_page/last_page) is automatically enabled
/// when page boundaries are available and chunking is configured.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPageConfig {
extract_pages: bool,
insert_page_markers: bool,
marker_format: String,
}
#[wasm_bindgen]
impl WasmPageConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
extractPages: Option<bool>,
insertPageMarkers: Option<bool>,
markerFormat: Option<String>,
) -> WasmPageConfig {
WasmPageConfig {
extract_pages: extractPages.unwrap_or(false),
insert_page_markers: insertPageMarkers.unwrap_or(false),
marker_format: markerFormat.unwrap_or_else(|| "\n\n<!-- PAGE {page_num} -->\n\n".to_string()),
}
}
#[wasm_bindgen(getter, js_name = "extractPages")]
pub fn extract_pages(&self) -> bool {
self.extract_pages
}
#[wasm_bindgen(setter, js_name = "extractPages")]
pub fn set_extract_pages(&mut self, value: bool) {
self.extract_pages = value;
}
#[wasm_bindgen(getter, js_name = "insertPageMarkers")]
pub fn insert_page_markers(&self) -> bool {
self.insert_page_markers
}
#[wasm_bindgen(setter, js_name = "insertPageMarkers")]
pub fn set_insert_page_markers(&mut self, value: bool) {
self.insert_page_markers = value;
}
#[wasm_bindgen(getter, js_name = "markerFormat")]
pub fn marker_format(&self) -> String {
self.marker_format.clone()
}
#[wasm_bindgen(setter, js_name = "markerFormat")]
pub fn set_marker_format(&mut self, value: String) {
self.marker_format = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmPageConfig {
kreuzberg::PageConfig::default().into()
}
}
/// Post-processor configuration.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPostProcessorConfig {
enabled: bool,
enabled_processors: Option<Vec<String>>,
disabled_processors: Option<Vec<String>>,
enabled_set: Option<Vec<String>>,
disabled_set: Option<Vec<String>>,
}
#[wasm_bindgen]
impl WasmPostProcessorConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
enabled: Option<bool>,
enabledProcessors: Option<Vec<String>>,
disabledProcessors: Option<Vec<String>>,
enabledSet: Option<Vec<String>>,
disabledSet: Option<Vec<String>>,
) -> WasmPostProcessorConfig {
WasmPostProcessorConfig {
enabled: enabled.unwrap_or(true),
enabled_processors: enabledProcessors,
disabled_processors: disabledProcessors,
enabled_set: enabledSet,
disabled_set: disabledSet,
}
}
#[wasm_bindgen(getter)]
pub fn enabled(&self) -> bool {
self.enabled
}
#[wasm_bindgen(setter)]
pub fn set_enabled(&mut self, value: bool) {
self.enabled = value;
}
#[wasm_bindgen(getter, js_name = "enabledProcessors")]
pub fn enabled_processors(&self) -> Option<Vec<String>> {
self.enabled_processors.clone()
}
#[wasm_bindgen(setter, js_name = "enabledProcessors")]
pub fn set_enabled_processors(&mut self, value: Option<Vec<String>>) {
self.enabled_processors = value;
}
#[wasm_bindgen(getter, js_name = "disabledProcessors")]
pub fn disabled_processors(&self) -> Option<Vec<String>> {
self.disabled_processors.clone()
}
#[wasm_bindgen(setter, js_name = "disabledProcessors")]
pub fn set_disabled_processors(&mut self, value: Option<Vec<String>>) {
self.disabled_processors = value;
}
#[wasm_bindgen(getter, js_name = "enabledSet")]
pub fn enabled_set(&self) -> Option<Vec<String>> {
self.enabled_set.clone()
}
#[wasm_bindgen(setter, js_name = "enabledSet")]
pub fn set_enabled_set(&mut self, value: Option<Vec<String>>) {
self.enabled_set = value;
}
#[wasm_bindgen(getter, js_name = "disabledSet")]
pub fn disabled_set(&self) -> Option<Vec<String>> {
self.disabled_set.clone()
}
#[wasm_bindgen(setter, js_name = "disabledSet")]
pub fn set_disabled_set(&mut self, value: Option<Vec<String>>) {
self.disabled_set = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmPostProcessorConfig {
kreuzberg::PostProcessorConfig::default().into()
}
}
/// Chunking configuration.
///
/// Configures text chunking for document content, including chunk size,
/// overlap, trimming behavior, and optional embeddings.
///
/// Use `..Default.default()` when constructing to allow for future field additions:
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmChunkingConfig {
max_characters: usize,
overlap: usize,
trim: bool,
chunker_type: WasmChunkerType,
embedding: Option<WasmEmbeddingConfig>,
preset: Option<String>,
sizing: JsValue,
prepend_heading_context: bool,
topic_threshold: Option<f32>,
}
#[wasm_bindgen]
impl WasmChunkingConfig {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
maxCharacters: Option<usize>,
overlap: Option<usize>,
trim: Option<bool>,
chunkerType: Option<WasmChunkerType>,
sizing: Option<JsValue>,
prependHeadingContext: Option<bool>,
embedding: Option<WasmEmbeddingConfig>,
preset: Option<String>,
topicThreshold: Option<f32>,
) -> WasmChunkingConfig {
WasmChunkingConfig {
max_characters: maxCharacters.unwrap_or(1000),
overlap: overlap.unwrap_or(200),
trim: trim.unwrap_or(true),
chunker_type: chunkerType.unwrap_or_default(),
embedding,
preset,
sizing: sizing.unwrap_or_default(),
prepend_heading_context: prependHeadingContext.unwrap_or(false),
topic_threshold: topicThreshold,
}
}
#[wasm_bindgen(getter, js_name = "maxCharacters")]
pub fn max_characters(&self) -> usize {
self.max_characters
}
#[wasm_bindgen(setter, js_name = "maxCharacters")]
pub fn set_max_characters(&mut self, value: usize) {
self.max_characters = value;
}
#[wasm_bindgen(getter)]
pub fn overlap(&self) -> usize {
self.overlap
}
#[wasm_bindgen(setter)]
pub fn set_overlap(&mut self, value: usize) {
self.overlap = value;
}
#[wasm_bindgen(getter)]
pub fn trim(&self) -> bool {
self.trim
}
#[wasm_bindgen(setter)]
pub fn set_trim(&mut self, value: bool) {
self.trim = value;
}
#[wasm_bindgen(getter, js_name = "chunkerType")]
pub fn chunker_type(&self) -> String {
self.chunker_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "chunkerType")]
pub fn set_chunker_type(&mut self, value: WasmChunkerType) {
self.chunker_type = value;
}
#[wasm_bindgen(getter)]
pub fn embedding(&self) -> Option<WasmEmbeddingConfig> {
self.embedding.clone()
}
#[wasm_bindgen(setter)]
pub fn set_embedding(&mut self, value: Option<WasmEmbeddingConfig>) {
self.embedding = value;
}
#[wasm_bindgen(getter)]
pub fn preset(&self) -> Option<String> {
self.preset.clone()
}
#[wasm_bindgen(setter)]
pub fn set_preset(&mut self, value: Option<String>) {
self.preset = value;
}
#[wasm_bindgen(getter)]
pub fn sizing(&self) -> JsValue {
self.sizing.clone()
}
#[wasm_bindgen(setter)]
pub fn set_sizing(&mut self, value: JsValue) {
self.sizing = value;
}
#[wasm_bindgen(getter, js_name = "prependHeadingContext")]
pub fn prepend_heading_context(&self) -> bool {
self.prepend_heading_context
}
#[wasm_bindgen(setter, js_name = "prependHeadingContext")]
pub fn set_prepend_heading_context(&mut self, value: bool) {
self.prepend_heading_context = value;
}
#[wasm_bindgen(getter, js_name = "topicThreshold")]
pub fn topic_threshold(&self) -> Option<f32> {
self.topic_threshold
}
#[wasm_bindgen(setter, js_name = "topicThreshold")]
pub fn set_topic_threshold(&mut self, value: Option<f32>) {
self.topic_threshold = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmChunkingConfig {
kreuzberg::ChunkingConfig::default().into()
}
}
/// Embedding configuration for text chunks.
///
/// Configures embedding generation using ONNX models via the vendored embedding engine.
/// Requires the `embeddings` feature to be enabled.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmEmbeddingConfig {
model: JsValue,
normalize: bool,
batch_size: usize,
show_download_progress: bool,
cache_dir: Option<String>,
acceleration: Option<WasmAccelerationConfig>,
max_embed_duration_secs: Option<u64>,
}
#[wasm_bindgen]
impl WasmEmbeddingConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
model: Option<JsValue>,
normalize: Option<bool>,
batchSize: Option<usize>,
showDownloadProgress: Option<bool>,
cacheDir: Option<String>,
acceleration: Option<WasmAccelerationConfig>,
maxEmbedDurationSecs: Option<u64>,
) -> WasmEmbeddingConfig {
WasmEmbeddingConfig {
model: model.unwrap_or_default(),
normalize: normalize.unwrap_or(true),
batch_size: batchSize.unwrap_or(32),
show_download_progress: showDownloadProgress.unwrap_or(false),
cache_dir: cacheDir,
acceleration,
max_embed_duration_secs: maxEmbedDurationSecs,
}
}
#[wasm_bindgen(getter)]
pub fn model(&self) -> JsValue {
self.model.clone()
}
#[wasm_bindgen(setter)]
pub fn set_model(&mut self, value: JsValue) {
self.model = value;
}
#[wasm_bindgen(getter)]
pub fn normalize(&self) -> bool {
self.normalize
}
#[wasm_bindgen(setter)]
pub fn set_normalize(&mut self, value: bool) {
self.normalize = value;
}
#[wasm_bindgen(getter, js_name = "batchSize")]
pub fn batch_size(&self) -> usize {
self.batch_size
}
#[wasm_bindgen(setter, js_name = "batchSize")]
pub fn set_batch_size(&mut self, value: usize) {
self.batch_size = value;
}
#[wasm_bindgen(getter, js_name = "showDownloadProgress")]
pub fn show_download_progress(&self) -> bool {
self.show_download_progress
}
#[wasm_bindgen(setter, js_name = "showDownloadProgress")]
pub fn set_show_download_progress(&mut self, value: bool) {
self.show_download_progress = value;
}
#[wasm_bindgen(getter, js_name = "cacheDir")]
pub fn cache_dir(&self) -> Option<String> {
self.cache_dir.clone()
}
#[wasm_bindgen(setter, js_name = "cacheDir")]
pub fn set_cache_dir(&mut self, value: Option<String>) {
self.cache_dir = value;
}
#[wasm_bindgen(getter)]
pub fn acceleration(&self) -> Option<WasmAccelerationConfig> {
self.acceleration.clone()
}
#[wasm_bindgen(setter)]
pub fn set_acceleration(&mut self, value: Option<WasmAccelerationConfig>) {
self.acceleration = value;
}
#[wasm_bindgen(getter, js_name = "maxEmbedDurationSecs")]
pub fn max_embed_duration_secs(&self) -> Option<u64> {
self.max_embed_duration_secs
}
#[wasm_bindgen(setter, js_name = "maxEmbedDurationSecs")]
pub fn set_max_embed_duration_secs(&mut self, value: Option<u64>) {
self.max_embed_duration_secs = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmEmbeddingConfig {
kreuzberg::EmbeddingConfig::default().into()
}
}
/// A supported document format entry.
///
/// Represents a file extension and its corresponding MIME type that Kreuzberg can process.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmSupportedFormat {
extension: String,
mime_type: String,
}
#[wasm_bindgen]
impl WasmSupportedFormat {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(extension: String, mimeType: String) -> WasmSupportedFormat {
WasmSupportedFormat {
extension,
mime_type: mimeType,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmSupportedFormat {
<WasmSupportedFormat as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn extension(&self) -> String {
self.extension.clone()
}
#[wasm_bindgen(setter)]
pub fn set_extension(&mut self, value: String) {
self.extension = value;
}
#[wasm_bindgen(getter, js_name = "mimeType")]
pub fn mime_type(&self) -> String {
self.mime_type.clone()
}
#[wasm_bindgen(setter, js_name = "mimeType")]
pub fn set_mime_type(&mut self, value: String) {
self.mime_type = value;
}
}
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmStructuredDataResult {
content: String,
format: String,
metadata: JsValue,
text_fields: Vec<String>,
}
#[wasm_bindgen]
impl WasmStructuredDataResult {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
content: String,
format: String,
metadata: JsValue,
textFields: Vec<String>,
) -> WasmStructuredDataResult {
WasmStructuredDataResult {
content,
format,
metadata,
text_fields: textFields,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmStructuredDataResult {
<WasmStructuredDataResult as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter)]
pub fn format(&self) -> String {
self.format.clone()
}
#[wasm_bindgen(setter)]
pub fn set_format(&mut self, value: String) {
self.format = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> JsValue {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: JsValue) {
self.metadata = value;
}
#[wasm_bindgen(getter, js_name = "textFields")]
pub fn text_fields(&self) -> Vec<String> {
self.text_fields.clone()
}
#[wasm_bindgen(setter, js_name = "textFields")]
pub fn set_text_fields(&mut self, value: Vec<String>) {
self.text_fields = value;
}
}
/// Application properties from docProps/app.xml for XLSX
///
/// Contains Excel-specific document metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmXlsxAppProperties {
application: Option<String>,
app_version: Option<String>,
doc_security: Option<i32>,
scale_crop: Option<bool>,
links_up_to_date: Option<bool>,
shared_doc: Option<bool>,
hyperlinks_changed: Option<bool>,
company: Option<String>,
worksheet_names: Vec<String>,
}
#[wasm_bindgen]
impl WasmXlsxAppProperties {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
worksheetNames: Option<Vec<String>>,
application: Option<String>,
appVersion: Option<String>,
docSecurity: Option<i32>,
scaleCrop: Option<bool>,
linksUpToDate: Option<bool>,
sharedDoc: Option<bool>,
hyperlinksChanged: Option<bool>,
company: Option<String>,
) -> WasmXlsxAppProperties {
WasmXlsxAppProperties {
application,
app_version: appVersion,
doc_security: docSecurity,
scale_crop: scaleCrop,
links_up_to_date: linksUpToDate,
shared_doc: sharedDoc,
hyperlinks_changed: hyperlinksChanged,
company,
worksheet_names: worksheetNames.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmXlsxAppProperties {
<WasmXlsxAppProperties as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn application(&self) -> Option<String> {
self.application.clone()
}
#[wasm_bindgen(setter)]
pub fn set_application(&mut self, value: Option<String>) {
self.application = value;
}
#[wasm_bindgen(getter, js_name = "appVersion")]
pub fn app_version(&self) -> Option<String> {
self.app_version.clone()
}
#[wasm_bindgen(setter, js_name = "appVersion")]
pub fn set_app_version(&mut self, value: Option<String>) {
self.app_version = value;
}
#[wasm_bindgen(getter, js_name = "docSecurity")]
pub fn doc_security(&self) -> Option<i32> {
self.doc_security
}
#[wasm_bindgen(setter, js_name = "docSecurity")]
pub fn set_doc_security(&mut self, value: Option<i32>) {
self.doc_security = value;
}
#[wasm_bindgen(getter, js_name = "scaleCrop")]
pub fn scale_crop(&self) -> Option<bool> {
self.scale_crop
}
#[wasm_bindgen(setter, js_name = "scaleCrop")]
pub fn set_scale_crop(&mut self, value: Option<bool>) {
self.scale_crop = value;
}
#[wasm_bindgen(getter, js_name = "linksUpToDate")]
pub fn links_up_to_date(&self) -> Option<bool> {
self.links_up_to_date
}
#[wasm_bindgen(setter, js_name = "linksUpToDate")]
pub fn set_links_up_to_date(&mut self, value: Option<bool>) {
self.links_up_to_date = value;
}
#[wasm_bindgen(getter, js_name = "sharedDoc")]
pub fn shared_doc(&self) -> Option<bool> {
self.shared_doc
}
#[wasm_bindgen(setter, js_name = "sharedDoc")]
pub fn set_shared_doc(&mut self, value: Option<bool>) {
self.shared_doc = value;
}
#[wasm_bindgen(getter, js_name = "hyperlinksChanged")]
pub fn hyperlinks_changed(&self) -> Option<bool> {
self.hyperlinks_changed
}
#[wasm_bindgen(setter, js_name = "hyperlinksChanged")]
pub fn set_hyperlinks_changed(&mut self, value: Option<bool>) {
self.hyperlinks_changed = value;
}
#[wasm_bindgen(getter)]
pub fn company(&self) -> Option<String> {
self.company.clone()
}
#[wasm_bindgen(setter)]
pub fn set_company(&mut self, value: Option<String>) {
self.company = value;
}
#[wasm_bindgen(getter, js_name = "worksheetNames")]
pub fn worksheet_names(&self) -> Vec<String> {
self.worksheet_names.clone()
}
#[wasm_bindgen(setter, js_name = "worksheetNames")]
pub fn set_worksheet_names(&mut self, value: Vec<String>) {
self.worksheet_names = value;
}
}
/// Application properties from docProps/app.xml for PPTX
///
/// Contains PowerPoint-specific document metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPptxAppProperties {
application: Option<String>,
app_version: Option<String>,
total_time: Option<i32>,
company: Option<String>,
doc_security: Option<i32>,
scale_crop: Option<bool>,
links_up_to_date: Option<bool>,
shared_doc: Option<bool>,
hyperlinks_changed: Option<bool>,
slides: Option<i32>,
notes: Option<i32>,
hidden_slides: Option<i32>,
multimedia_clips: Option<i32>,
presentation_format: Option<String>,
slide_titles: Vec<String>,
}
#[wasm_bindgen]
impl WasmPptxAppProperties {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
slideTitles: Option<Vec<String>>,
application: Option<String>,
appVersion: Option<String>,
totalTime: Option<i32>,
company: Option<String>,
docSecurity: Option<i32>,
scaleCrop: Option<bool>,
linksUpToDate: Option<bool>,
sharedDoc: Option<bool>,
hyperlinksChanged: Option<bool>,
slides: Option<i32>,
notes: Option<i32>,
hiddenSlides: Option<i32>,
multimediaClips: Option<i32>,
presentationFormat: Option<String>,
) -> WasmPptxAppProperties {
WasmPptxAppProperties {
application,
app_version: appVersion,
total_time: totalTime,
company,
doc_security: docSecurity,
scale_crop: scaleCrop,
links_up_to_date: linksUpToDate,
shared_doc: sharedDoc,
hyperlinks_changed: hyperlinksChanged,
slides,
notes,
hidden_slides: hiddenSlides,
multimedia_clips: multimediaClips,
presentation_format: presentationFormat,
slide_titles: slideTitles.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPptxAppProperties {
<WasmPptxAppProperties as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn application(&self) -> Option<String> {
self.application.clone()
}
#[wasm_bindgen(setter)]
pub fn set_application(&mut self, value: Option<String>) {
self.application = value;
}
#[wasm_bindgen(getter, js_name = "appVersion")]
pub fn app_version(&self) -> Option<String> {
self.app_version.clone()
}
#[wasm_bindgen(setter, js_name = "appVersion")]
pub fn set_app_version(&mut self, value: Option<String>) {
self.app_version = value;
}
#[wasm_bindgen(getter, js_name = "totalTime")]
pub fn total_time(&self) -> Option<i32> {
self.total_time
}
#[wasm_bindgen(setter, js_name = "totalTime")]
pub fn set_total_time(&mut self, value: Option<i32>) {
self.total_time = value;
}
#[wasm_bindgen(getter)]
pub fn company(&self) -> Option<String> {
self.company.clone()
}
#[wasm_bindgen(setter)]
pub fn set_company(&mut self, value: Option<String>) {
self.company = value;
}
#[wasm_bindgen(getter, js_name = "docSecurity")]
pub fn doc_security(&self) -> Option<i32> {
self.doc_security
}
#[wasm_bindgen(setter, js_name = "docSecurity")]
pub fn set_doc_security(&mut self, value: Option<i32>) {
self.doc_security = value;
}
#[wasm_bindgen(getter, js_name = "scaleCrop")]
pub fn scale_crop(&self) -> Option<bool> {
self.scale_crop
}
#[wasm_bindgen(setter, js_name = "scaleCrop")]
pub fn set_scale_crop(&mut self, value: Option<bool>) {
self.scale_crop = value;
}
#[wasm_bindgen(getter, js_name = "linksUpToDate")]
pub fn links_up_to_date(&self) -> Option<bool> {
self.links_up_to_date
}
#[wasm_bindgen(setter, js_name = "linksUpToDate")]
pub fn set_links_up_to_date(&mut self, value: Option<bool>) {
self.links_up_to_date = value;
}
#[wasm_bindgen(getter, js_name = "sharedDoc")]
pub fn shared_doc(&self) -> Option<bool> {
self.shared_doc
}
#[wasm_bindgen(setter, js_name = "sharedDoc")]
pub fn set_shared_doc(&mut self, value: Option<bool>) {
self.shared_doc = value;
}
#[wasm_bindgen(getter, js_name = "hyperlinksChanged")]
pub fn hyperlinks_changed(&self) -> Option<bool> {
self.hyperlinks_changed
}
#[wasm_bindgen(setter, js_name = "hyperlinksChanged")]
pub fn set_hyperlinks_changed(&mut self, value: Option<bool>) {
self.hyperlinks_changed = value;
}
#[wasm_bindgen(getter)]
pub fn slides(&self) -> Option<i32> {
self.slides
}
#[wasm_bindgen(setter)]
pub fn set_slides(&mut self, value: Option<i32>) {
self.slides = value;
}
#[wasm_bindgen(getter)]
pub fn notes(&self) -> Option<i32> {
self.notes
}
#[wasm_bindgen(setter)]
pub fn set_notes(&mut self, value: Option<i32>) {
self.notes = value;
}
#[wasm_bindgen(getter, js_name = "hiddenSlides")]
pub fn hidden_slides(&self) -> Option<i32> {
self.hidden_slides
}
#[wasm_bindgen(setter, js_name = "hiddenSlides")]
pub fn set_hidden_slides(&mut self, value: Option<i32>) {
self.hidden_slides = value;
}
#[wasm_bindgen(getter, js_name = "multimediaClips")]
pub fn multimedia_clips(&self) -> Option<i32> {
self.multimedia_clips
}
#[wasm_bindgen(setter, js_name = "multimediaClips")]
pub fn set_multimedia_clips(&mut self, value: Option<i32>) {
self.multimedia_clips = value;
}
#[wasm_bindgen(getter, js_name = "presentationFormat")]
pub fn presentation_format(&self) -> Option<String> {
self.presentation_format.clone()
}
#[wasm_bindgen(setter, js_name = "presentationFormat")]
pub fn set_presentation_format(&mut self, value: Option<String>) {
self.presentation_format = value;
}
#[wasm_bindgen(getter, js_name = "slideTitles")]
pub fn slide_titles(&self) -> Vec<String> {
self.slide_titles.clone()
}
#[wasm_bindgen(setter, js_name = "slideTitles")]
pub fn set_slide_titles(&mut self, value: Vec<String>) {
self.slide_titles = value;
}
}
/// Configuration for security limits across extractors.
///
/// All limits are intentionally conservative to prevent DoS attacks
/// while still supporting legitimate documents.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmSecurityLimits {
max_archive_size: usize,
max_compression_ratio: usize,
max_files_in_archive: usize,
max_nesting_depth: usize,
max_entity_length: usize,
max_content_size: usize,
max_iterations: usize,
max_xml_depth: usize,
max_table_cells: usize,
}
#[wasm_bindgen]
impl WasmSecurityLimits {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
maxArchiveSize: Option<usize>,
maxCompressionRatio: Option<usize>,
maxFilesInArchive: Option<usize>,
maxNestingDepth: Option<usize>,
maxEntityLength: Option<usize>,
maxContentSize: Option<usize>,
maxIterations: Option<usize>,
maxXmlDepth: Option<usize>,
maxTableCells: Option<usize>,
) -> WasmSecurityLimits {
WasmSecurityLimits {
max_archive_size: maxArchiveSize.unwrap_or(524288000),
max_compression_ratio: maxCompressionRatio.unwrap_or(100),
max_files_in_archive: maxFilesInArchive.unwrap_or(10000),
max_nesting_depth: maxNestingDepth.unwrap_or(1024),
max_entity_length: maxEntityLength.unwrap_or(1048576),
max_content_size: maxContentSize.unwrap_or(104857600),
max_iterations: maxIterations.unwrap_or(10000000),
max_xml_depth: maxXmlDepth.unwrap_or(1024),
max_table_cells: maxTableCells.unwrap_or(100000),
}
}
#[wasm_bindgen(getter, js_name = "maxArchiveSize")]
pub fn max_archive_size(&self) -> usize {
self.max_archive_size
}
#[wasm_bindgen(setter, js_name = "maxArchiveSize")]
pub fn set_max_archive_size(&mut self, value: usize) {
self.max_archive_size = value;
}
#[wasm_bindgen(getter, js_name = "maxCompressionRatio")]
pub fn max_compression_ratio(&self) -> usize {
self.max_compression_ratio
}
#[wasm_bindgen(setter, js_name = "maxCompressionRatio")]
pub fn set_max_compression_ratio(&mut self, value: usize) {
self.max_compression_ratio = value;
}
#[wasm_bindgen(getter, js_name = "maxFilesInArchive")]
pub fn max_files_in_archive(&self) -> usize {
self.max_files_in_archive
}
#[wasm_bindgen(setter, js_name = "maxFilesInArchive")]
pub fn set_max_files_in_archive(&mut self, value: usize) {
self.max_files_in_archive = value;
}
#[wasm_bindgen(getter, js_name = "maxNestingDepth")]
pub fn max_nesting_depth(&self) -> usize {
self.max_nesting_depth
}
#[wasm_bindgen(setter, js_name = "maxNestingDepth")]
pub fn set_max_nesting_depth(&mut self, value: usize) {
self.max_nesting_depth = value;
}
#[wasm_bindgen(getter, js_name = "maxEntityLength")]
pub fn max_entity_length(&self) -> usize {
self.max_entity_length
}
#[wasm_bindgen(setter, js_name = "maxEntityLength")]
pub fn set_max_entity_length(&mut self, value: usize) {
self.max_entity_length = value;
}
#[wasm_bindgen(getter, js_name = "maxContentSize")]
pub fn max_content_size(&self) -> usize {
self.max_content_size
}
#[wasm_bindgen(setter, js_name = "maxContentSize")]
pub fn set_max_content_size(&mut self, value: usize) {
self.max_content_size = value;
}
#[wasm_bindgen(getter, js_name = "maxIterations")]
pub fn max_iterations(&self) -> usize {
self.max_iterations
}
#[wasm_bindgen(setter, js_name = "maxIterations")]
pub fn set_max_iterations(&mut self, value: usize) {
self.max_iterations = value;
}
#[wasm_bindgen(getter, js_name = "maxXmlDepth")]
pub fn max_xml_depth(&self) -> usize {
self.max_xml_depth
}
#[wasm_bindgen(setter, js_name = "maxXmlDepth")]
pub fn set_max_xml_depth(&mut self, value: usize) {
self.max_xml_depth = value;
}
#[wasm_bindgen(getter, js_name = "maxTableCells")]
pub fn max_table_cells(&self) -> usize {
self.max_table_cells
}
#[wasm_bindgen(setter, js_name = "maxTableCells")]
pub fn set_max_table_cells(&mut self, value: usize) {
self.max_table_cells = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmSecurityLimits {
kreuzberg::SecurityLimits::default().into()
}
}
/// A PDF annotation extracted from a document page.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPdfAnnotation {
annotation_type: WasmPdfAnnotationType,
content: Option<String>,
page_number: u32,
bounding_box: Option<WasmBoundingBox>,
}
#[wasm_bindgen]
impl WasmPdfAnnotation {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
annotationType: WasmPdfAnnotationType,
pageNumber: u32,
content: Option<String>,
boundingBox: Option<WasmBoundingBox>,
) -> WasmPdfAnnotation {
WasmPdfAnnotation {
annotation_type: annotationType,
content,
page_number: pageNumber,
bounding_box: boundingBox,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPdfAnnotation {
<WasmPdfAnnotation as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "annotationType")]
pub fn annotation_type(&self) -> String {
self.annotation_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "annotationType")]
pub fn set_annotation_type(&mut self, value: WasmPdfAnnotationType) {
self.annotation_type = value;
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> Option<String> {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: Option<String>) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "pageNumber")]
pub fn page_number(&self) -> u32 {
self.page_number
}
#[wasm_bindgen(setter, js_name = "pageNumber")]
pub fn set_page_number(&mut self, value: u32) {
self.page_number = value;
}
#[wasm_bindgen(getter, js_name = "boundingBox")]
pub fn bounding_box(&self) -> Option<WasmBoundingBox> {
self.bounding_box.clone()
}
#[wasm_bindgen(setter, js_name = "boundingBox")]
pub fn set_bounding_box(&mut self, value: Option<WasmBoundingBox>) {
self.bounding_box = value;
}
}
/// Comprehensive Djot document structure with semantic preservation.
///
/// This type captures the full richness of Djot markup, including:
/// - Block-level structures (headings, lists, blockquotes, code blocks, etc.)
/// - Inline formatting (emphasis, strong, highlight, subscript, superscript, etc.)
/// - Attributes (classes, IDs, key-value pairs)
/// - Links, images, footnotes
/// - Math expressions (inline and display)
/// - Tables with full structure
///
/// Available when the `djot` feature is enabled.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDjotContent {
plain_text: String,
blocks: Vec<WasmFormattedBlock>,
metadata: WasmMetadata,
tables: Vec<WasmTable>,
images: Vec<WasmDjotImage>,
links: Vec<WasmDjotLink>,
footnotes: Vec<WasmFootnote>,
attributes: Vec<String>,
}
#[wasm_bindgen]
impl WasmDjotContent {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
plainText: String,
blocks: Vec<WasmFormattedBlock>,
metadata: WasmMetadata,
tables: Vec<WasmTable>,
images: Vec<WasmDjotImage>,
links: Vec<WasmDjotLink>,
footnotes: Vec<WasmFootnote>,
attributes: Vec<String>,
) -> WasmDjotContent {
WasmDjotContent {
plain_text: plainText,
blocks,
metadata,
tables,
images,
links,
footnotes,
attributes,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDjotContent {
<WasmDjotContent as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "plainText")]
pub fn plain_text(&self) -> String {
self.plain_text.clone()
}
#[wasm_bindgen(setter, js_name = "plainText")]
pub fn set_plain_text(&mut self, value: String) {
self.plain_text = value;
}
#[wasm_bindgen(getter)]
pub fn blocks(&self) -> Vec<WasmFormattedBlock> {
self.blocks.clone()
}
#[wasm_bindgen(setter)]
pub fn set_blocks(&mut self, value: Vec<WasmFormattedBlock>) {
self.blocks = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> WasmMetadata {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: WasmMetadata) {
self.metadata = value;
}
#[wasm_bindgen(getter)]
pub fn tables(&self) -> Vec<WasmTable> {
self.tables.clone()
}
#[wasm_bindgen(setter)]
pub fn set_tables(&mut self, value: Vec<WasmTable>) {
self.tables = value;
}
#[wasm_bindgen(getter)]
pub fn images(&self) -> Vec<WasmDjotImage> {
self.images.clone()
}
#[wasm_bindgen(setter)]
pub fn set_images(&mut self, value: Vec<WasmDjotImage>) {
self.images = value;
}
#[wasm_bindgen(getter)]
pub fn links(&self) -> Vec<WasmDjotLink> {
self.links.clone()
}
#[wasm_bindgen(setter)]
pub fn set_links(&mut self, value: Vec<WasmDjotLink>) {
self.links = value;
}
#[wasm_bindgen(getter)]
pub fn footnotes(&self) -> Vec<WasmFootnote> {
self.footnotes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_footnotes(&mut self, value: Vec<WasmFootnote>) {
self.footnotes = value;
}
#[wasm_bindgen(getter)]
pub fn attributes(&self) -> Vec<String> {
self.attributes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attributes(&mut self, value: Vec<String>) {
self.attributes = value;
}
}
/// Block-level element in a Djot document.
///
/// Represents structural elements like headings, paragraphs, lists, code blocks, etc.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmFormattedBlock {
block_type: WasmBlockType,
level: Option<usize>,
inline_content: Vec<WasmInlineElement>,
attributes: Option<String>,
language: Option<String>,
code: Option<String>,
children: Vec<WasmFormattedBlock>,
}
#[wasm_bindgen]
impl WasmFormattedBlock {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
blockType: WasmBlockType,
inlineContent: Vec<WasmInlineElement>,
children: Vec<WasmFormattedBlock>,
level: Option<usize>,
attributes: Option<String>,
language: Option<String>,
code: Option<String>,
) -> WasmFormattedBlock {
WasmFormattedBlock {
block_type: blockType,
level,
inline_content: inlineContent,
attributes,
language,
code,
children,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmFormattedBlock {
<WasmFormattedBlock as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "blockType")]
pub fn block_type(&self) -> String {
self.block_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "blockType")]
pub fn set_block_type(&mut self, value: WasmBlockType) {
self.block_type = value;
}
#[wasm_bindgen(getter)]
pub fn level(&self) -> Option<usize> {
self.level
}
#[wasm_bindgen(setter)]
pub fn set_level(&mut self, value: Option<usize>) {
self.level = value;
}
#[wasm_bindgen(getter, js_name = "inlineContent")]
pub fn inline_content(&self) -> Vec<WasmInlineElement> {
self.inline_content.clone()
}
#[wasm_bindgen(setter, js_name = "inlineContent")]
pub fn set_inline_content(&mut self, value: Vec<WasmInlineElement>) {
self.inline_content = value;
}
#[wasm_bindgen(getter)]
pub fn attributes(&self) -> Option<String> {
self.attributes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attributes(&mut self, value: Option<String>) {
self.attributes = value;
}
#[wasm_bindgen(getter)]
pub fn language(&self) -> Option<String> {
self.language.clone()
}
#[wasm_bindgen(setter)]
pub fn set_language(&mut self, value: Option<String>) {
self.language = value;
}
#[wasm_bindgen(getter)]
pub fn code(&self) -> Option<String> {
self.code.clone()
}
#[wasm_bindgen(setter)]
pub fn set_code(&mut self, value: Option<String>) {
self.code = value;
}
#[wasm_bindgen(getter)]
pub fn children(&self) -> Vec<WasmFormattedBlock> {
self.children.clone()
}
#[wasm_bindgen(setter)]
pub fn set_children(&mut self, value: Vec<WasmFormattedBlock>) {
self.children = value;
}
}
/// Inline element within a block.
///
/// Represents text with formatting, links, images, etc.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmInlineElement {
element_type: WasmInlineType,
content: String,
attributes: Option<String>,
metadata: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmInlineElement {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
elementType: WasmInlineType,
content: String,
attributes: Option<String>,
metadata: Option<JsValue>,
) -> WasmInlineElement {
WasmInlineElement {
element_type: elementType,
content,
attributes,
metadata,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmInlineElement {
<WasmInlineElement as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "elementType")]
pub fn element_type(&self) -> String {
self.element_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "elementType")]
pub fn set_element_type(&mut self, value: WasmInlineType) {
self.element_type = value;
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter)]
pub fn attributes(&self) -> Option<String> {
self.attributes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attributes(&mut self, value: Option<String>) {
self.attributes = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> Option<JsValue> {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: Option<JsValue>) {
self.metadata = value;
}
}
/// Image element in Djot.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDjotImage {
src: String,
alt: String,
title: Option<String>,
attributes: Option<String>,
}
#[wasm_bindgen]
impl WasmDjotImage {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(src: String, alt: String, title: Option<String>, attributes: Option<String>) -> WasmDjotImage {
WasmDjotImage {
src,
alt,
title,
attributes,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDjotImage {
<WasmDjotImage as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn src(&self) -> String {
self.src.clone()
}
#[wasm_bindgen(setter)]
pub fn set_src(&mut self, value: String) {
self.src = value;
}
#[wasm_bindgen(getter)]
pub fn alt(&self) -> String {
self.alt.clone()
}
#[wasm_bindgen(setter)]
pub fn set_alt(&mut self, value: String) {
self.alt = value;
}
#[wasm_bindgen(getter)]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter)]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter)]
pub fn attributes(&self) -> Option<String> {
self.attributes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attributes(&mut self, value: Option<String>) {
self.attributes = value;
}
}
/// Link element in Djot.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDjotLink {
url: String,
text: String,
title: Option<String>,
attributes: Option<String>,
}
#[wasm_bindgen]
impl WasmDjotLink {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(url: String, text: String, title: Option<String>, attributes: Option<String>) -> WasmDjotLink {
WasmDjotLink {
url,
text,
title,
attributes,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDjotLink {
<WasmDjotLink as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn url(&self) -> String {
self.url.clone()
}
#[wasm_bindgen(setter)]
pub fn set_url(&mut self, value: String) {
self.url = value;
}
#[wasm_bindgen(getter)]
pub fn text(&self) -> String {
self.text.clone()
}
#[wasm_bindgen(setter)]
pub fn set_text(&mut self, value: String) {
self.text = value;
}
#[wasm_bindgen(getter)]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter)]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter)]
pub fn attributes(&self) -> Option<String> {
self.attributes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attributes(&mut self, value: Option<String>) {
self.attributes = value;
}
}
/// Footnote in Djot.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmFootnote {
label: String,
content: Vec<WasmFormattedBlock>,
}
#[wasm_bindgen]
impl WasmFootnote {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(label: String, content: Vec<WasmFormattedBlock>) -> WasmFootnote {
WasmFootnote { label, content }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmFootnote {
<WasmFootnote as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn label(&self) -> String {
self.label.clone()
}
#[wasm_bindgen(setter)]
pub fn set_label(&mut self, value: String) {
self.label = value;
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> Vec<WasmFormattedBlock> {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: Vec<WasmFormattedBlock>) {
self.content = value;
}
}
/// Top-level structured document representation.
///
/// A flat array of nodes with index-based parent/child references forming a tree.
/// Root-level nodes have `parent: None`. Use `body_roots()` and `furniture_roots()`
/// to iterate over top-level content by layer.
///
/// # Validation
///
/// Call `validate()` after construction to verify all node indices are in bounds
/// and parent-child relationships are bidirectionally consistent.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDocumentStructure {
nodes: Vec<WasmDocumentNode>,
source_format: Option<String>,
relationships: Vec<WasmDocumentRelationship>,
node_types: Vec<String>,
}
#[wasm_bindgen]
impl WasmDocumentStructure {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
nodes: Option<Vec<WasmDocumentNode>>,
relationships: Option<Vec<WasmDocumentRelationship>>,
nodeTypes: Option<Vec<String>>,
sourceFormat: Option<String>,
) -> WasmDocumentStructure {
WasmDocumentStructure {
nodes: nodes.unwrap_or_default(),
source_format: sourceFormat,
relationships: relationships.unwrap_or_default(),
node_types: nodeTypes.unwrap_or_default(),
}
}
#[wasm_bindgen(getter)]
pub fn nodes(&self) -> Vec<WasmDocumentNode> {
self.nodes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_nodes(&mut self, value: Vec<WasmDocumentNode>) {
self.nodes = value;
}
#[wasm_bindgen(getter, js_name = "sourceFormat")]
pub fn source_format(&self) -> Option<String> {
self.source_format.clone()
}
#[wasm_bindgen(setter, js_name = "sourceFormat")]
pub fn set_source_format(&mut self, value: Option<String>) {
self.source_format = value;
}
#[wasm_bindgen(getter)]
pub fn relationships(&self) -> Vec<WasmDocumentRelationship> {
self.relationships.clone()
}
#[wasm_bindgen(setter)]
pub fn set_relationships(&mut self, value: Vec<WasmDocumentRelationship>) {
self.relationships = value;
}
#[wasm_bindgen(getter, js_name = "nodeTypes")]
pub fn node_types(&self) -> Vec<String> {
self.node_types.clone()
}
#[wasm_bindgen(setter, js_name = "nodeTypes")]
pub fn set_node_types(&mut self, value: Vec<String>) {
self.node_types = value;
}
/// Compute and populate the `node_types` field from the current `nodes`.
///
/// Call this after all nodes have been added to the structure. Internal
/// construction paths (builder, derivation) call this automatically.
///
/// # Examples
#[wasm_bindgen(js_name = "finalizeNodeTypes")]
pub fn finalize_node_types(&self) -> () {
kreuzberg::DocumentStructure::from(self.clone()).finalize_node_types()
}
/// Check if the document structure is empty.
#[wasm_bindgen(js_name = "isEmpty")]
pub fn is_empty(&self) -> bool {
kreuzberg::DocumentStructure::from(self.clone()).is_empty()
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmDocumentStructure {
kreuzberg::DocumentStructure::default().into()
}
}
/// A resolved relationship between two nodes in the document tree.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDocumentRelationship {
source: u32,
target: u32,
kind: WasmRelationshipKind,
}
#[wasm_bindgen]
impl WasmDocumentRelationship {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(source: u32, target: u32, kind: WasmRelationshipKind) -> WasmDocumentRelationship {
WasmDocumentRelationship { source, target, kind }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDocumentRelationship {
<WasmDocumentRelationship as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn source(&self) -> u32 {
self.source
}
#[wasm_bindgen(setter)]
pub fn set_source(&mut self, value: u32) {
self.source = value;
}
#[wasm_bindgen(getter)]
pub fn target(&self) -> u32 {
self.target
}
#[wasm_bindgen(setter)]
pub fn set_target(&mut self, value: u32) {
self.target = value;
}
#[wasm_bindgen(getter)]
pub fn kind(&self) -> String {
self.kind.to_api_str().to_owned()
}
#[wasm_bindgen(setter)]
pub fn set_kind(&mut self, value: WasmRelationshipKind) {
self.kind = value;
}
}
/// A single node in the document tree.
///
/// Each node has deterministic `id`, typed `content`, optional `parent`/`children`
/// for tree structure, and metadata like page number, bounding box, and content layer.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDocumentNode {
id: String,
content: JsValue,
parent: Option<u32>,
children: Vec<u32>,
content_layer: WasmContentLayer,
page: Option<u32>,
page_end: Option<u32>,
bbox: Option<WasmBoundingBox>,
annotations: Vec<WasmTextAnnotation>,
attributes: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmDocumentNode {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
id: String,
content: JsValue,
children: Vec<u32>,
contentLayer: WasmContentLayer,
annotations: Vec<WasmTextAnnotation>,
parent: Option<u32>,
page: Option<u32>,
pageEnd: Option<u32>,
bbox: Option<WasmBoundingBox>,
attributes: Option<JsValue>,
) -> WasmDocumentNode {
WasmDocumentNode {
id,
content,
parent,
children,
content_layer: contentLayer,
page,
page_end: pageEnd,
bbox,
annotations,
attributes,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDocumentNode {
<WasmDocumentNode as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn id(&self) -> String {
self.id.clone()
}
#[wasm_bindgen(setter)]
pub fn set_id(&mut self, value: String) {
self.id = value;
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> JsValue {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: JsValue) {
self.content = value;
}
#[wasm_bindgen(getter)]
pub fn parent(&self) -> Option<u32> {
self.parent
}
#[wasm_bindgen(setter)]
pub fn set_parent(&mut self, value: Option<u32>) {
self.parent = value;
}
#[wasm_bindgen(getter)]
pub fn children(&self) -> Vec<u32> {
self.children.clone()
}
#[wasm_bindgen(setter)]
pub fn set_children(&mut self, value: Vec<u32>) {
self.children = value;
}
#[wasm_bindgen(getter, js_name = "contentLayer")]
pub fn content_layer(&self) -> String {
self.content_layer.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "contentLayer")]
pub fn set_content_layer(&mut self, value: WasmContentLayer) {
self.content_layer = value;
}
#[wasm_bindgen(getter)]
pub fn page(&self) -> Option<u32> {
self.page
}
#[wasm_bindgen(setter)]
pub fn set_page(&mut self, value: Option<u32>) {
self.page = value;
}
#[wasm_bindgen(getter, js_name = "pageEnd")]
pub fn page_end(&self) -> Option<u32> {
self.page_end
}
#[wasm_bindgen(setter, js_name = "pageEnd")]
pub fn set_page_end(&mut self, value: Option<u32>) {
self.page_end = value;
}
#[wasm_bindgen(getter)]
pub fn bbox(&self) -> Option<WasmBoundingBox> {
self.bbox.clone()
}
#[wasm_bindgen(setter)]
pub fn set_bbox(&mut self, value: Option<WasmBoundingBox>) {
self.bbox = value;
}
#[wasm_bindgen(getter)]
pub fn annotations(&self) -> Vec<WasmTextAnnotation> {
self.annotations.clone()
}
#[wasm_bindgen(setter)]
pub fn set_annotations(&mut self, value: Vec<WasmTextAnnotation>) {
self.annotations = value;
}
#[wasm_bindgen(getter)]
pub fn attributes(&self) -> Option<JsValue> {
self.attributes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attributes(&mut self, value: Option<JsValue>) {
self.attributes = value;
}
}
/// Structured table grid with cell-level metadata.
///
/// Stores row/column dimensions and a flat list of cells with position info.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmTableGrid {
rows: u32,
cols: u32,
cells: Vec<WasmGridCell>,
}
#[wasm_bindgen]
impl WasmTableGrid {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(rows: Option<u32>, cols: Option<u32>, cells: Option<Vec<WasmGridCell>>) -> WasmTableGrid {
WasmTableGrid {
rows: rows.unwrap_or_default(),
cols: cols.unwrap_or_default(),
cells: cells.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmTableGrid {
<WasmTableGrid as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn rows(&self) -> u32 {
self.rows
}
#[wasm_bindgen(setter)]
pub fn set_rows(&mut self, value: u32) {
self.rows = value;
}
#[wasm_bindgen(getter)]
pub fn cols(&self) -> u32 {
self.cols
}
#[wasm_bindgen(setter)]
pub fn set_cols(&mut self, value: u32) {
self.cols = value;
}
#[wasm_bindgen(getter)]
pub fn cells(&self) -> Vec<WasmGridCell> {
self.cells.clone()
}
#[wasm_bindgen(setter)]
pub fn set_cells(&mut self, value: Vec<WasmGridCell>) {
self.cells = value;
}
}
/// Individual grid cell with position and span metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmGridCell {
content: String,
row: u32,
col: u32,
row_span: u32,
col_span: u32,
is_header: bool,
bbox: Option<WasmBoundingBox>,
}
#[wasm_bindgen]
impl WasmGridCell {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
content: String,
row: u32,
col: u32,
rowSpan: u32,
colSpan: u32,
isHeader: bool,
bbox: Option<WasmBoundingBox>,
) -> WasmGridCell {
WasmGridCell {
content,
row,
col,
row_span: rowSpan,
col_span: colSpan,
is_header: isHeader,
bbox,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmGridCell {
<WasmGridCell as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter)]
pub fn row(&self) -> u32 {
self.row
}
#[wasm_bindgen(setter)]
pub fn set_row(&mut self, value: u32) {
self.row = value;
}
#[wasm_bindgen(getter)]
pub fn col(&self) -> u32 {
self.col
}
#[wasm_bindgen(setter)]
pub fn set_col(&mut self, value: u32) {
self.col = value;
}
#[wasm_bindgen(getter, js_name = "rowSpan")]
pub fn row_span(&self) -> u32 {
self.row_span
}
#[wasm_bindgen(setter, js_name = "rowSpan")]
pub fn set_row_span(&mut self, value: u32) {
self.row_span = value;
}
#[wasm_bindgen(getter, js_name = "colSpan")]
pub fn col_span(&self) -> u32 {
self.col_span
}
#[wasm_bindgen(setter, js_name = "colSpan")]
pub fn set_col_span(&mut self, value: u32) {
self.col_span = value;
}
#[wasm_bindgen(getter, js_name = "isHeader")]
pub fn is_header(&self) -> bool {
self.is_header
}
#[wasm_bindgen(setter, js_name = "isHeader")]
pub fn set_is_header(&mut self, value: bool) {
self.is_header = value;
}
#[wasm_bindgen(getter)]
pub fn bbox(&self) -> Option<WasmBoundingBox> {
self.bbox.clone()
}
#[wasm_bindgen(setter)]
pub fn set_bbox(&mut self, value: Option<WasmBoundingBox>) {
self.bbox = value;
}
}
/// Inline text annotation — byte-range based formatting and links.
///
/// Annotations reference byte offsets into the node's text content,
/// enabling precise identification of formatted regions.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmTextAnnotation {
start: u32,
end: u32,
kind: JsValue,
}
#[wasm_bindgen]
impl WasmTextAnnotation {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(start: u32, end: u32, kind: JsValue) -> WasmTextAnnotation {
WasmTextAnnotation { start, end, kind }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmTextAnnotation {
<WasmTextAnnotation as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn start(&self) -> u32 {
self.start
}
#[wasm_bindgen(setter)]
pub fn set_start(&mut self, value: u32) {
self.start = value;
}
#[wasm_bindgen(getter)]
pub fn end(&self) -> u32 {
self.end
}
#[wasm_bindgen(setter)]
pub fn set_end(&mut self, value: u32) {
self.end = value;
}
#[wasm_bindgen(getter)]
pub fn kind(&self) -> JsValue {
self.kind.clone()
}
#[wasm_bindgen(setter)]
pub fn set_kind(&mut self, value: JsValue) {
self.kind = value;
}
}
/// General extraction result used by the core extraction API.
///
/// This is the main result type returned by all extraction functions.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmExtractionResult {
content: String,
mime_type: String,
metadata: WasmMetadata,
extraction_method: Option<WasmExtractionMethod>,
tables: Vec<WasmTable>,
detected_languages: Option<Vec<String>>,
chunks: Option<Vec<WasmChunk>>,
images: Option<Vec<WasmExtractedImage>>,
pages: Option<Vec<WasmPageContent>>,
elements: Option<Vec<WasmElement>>,
djot_content: Option<WasmDjotContent>,
ocr_elements: Option<Vec<WasmOcrElement>>,
document: Option<WasmDocumentStructure>,
quality_score: Option<f64>,
processing_warnings: Vec<WasmProcessingWarning>,
annotations: Option<Vec<WasmPdfAnnotation>>,
children: Option<Vec<WasmArchiveEntry>>,
uris: Option<Vec<WasmExtractedUri>>,
revisions: Option<Vec<WasmDocumentRevision>>,
structured_output: Option<JsValue>,
code_intelligence: Option<JsValue>,
llm_usage: Option<Vec<WasmLlmUsage>>,
formatted_content: Option<String>,
ocr_internal_document: Option<String>,
}
#[wasm_bindgen]
impl WasmExtractionResult {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
content: Option<String>,
mimeType: Option<String>,
metadata: Option<WasmMetadata>,
tables: Option<Vec<WasmTable>>,
processingWarnings: Option<Vec<WasmProcessingWarning>>,
extractionMethod: Option<WasmExtractionMethod>,
detectedLanguages: Option<Vec<String>>,
chunks: Option<Vec<WasmChunk>>,
images: Option<Vec<WasmExtractedImage>>,
pages: Option<Vec<WasmPageContent>>,
elements: Option<Vec<WasmElement>>,
djotContent: Option<WasmDjotContent>,
ocrElements: Option<Vec<WasmOcrElement>>,
document: Option<WasmDocumentStructure>,
qualityScore: Option<f64>,
annotations: Option<Vec<WasmPdfAnnotation>>,
children: Option<Vec<WasmArchiveEntry>>,
uris: Option<Vec<WasmExtractedUri>>,
revisions: Option<Vec<WasmDocumentRevision>>,
structuredOutput: Option<JsValue>,
llmUsage: Option<Vec<WasmLlmUsage>>,
formattedContent: Option<String>,
ocrInternalDocument: Option<String>,
) -> WasmExtractionResult {
WasmExtractionResult {
content: content.unwrap_or_default(),
mime_type: mimeType.unwrap_or_default(),
metadata: metadata.unwrap_or_default(),
extraction_method: extractionMethod,
tables: tables.unwrap_or_default(),
detected_languages: detectedLanguages,
chunks,
images,
pages,
elements,
djot_content: djotContent,
ocr_elements: ocrElements,
document,
quality_score: qualityScore,
processing_warnings: processingWarnings.unwrap_or_default(),
annotations,
children,
uris,
revisions,
structured_output: structuredOutput,
code_intelligence: Default::default(),
llm_usage: llmUsage,
formatted_content: formattedContent,
ocr_internal_document: ocrInternalDocument,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmExtractionResult {
<WasmExtractionResult as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "mimeType")]
pub fn mime_type(&self) -> String {
self.mime_type.clone()
}
#[wasm_bindgen(setter, js_name = "mimeType")]
pub fn set_mime_type(&mut self, value: String) {
self.mime_type = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> WasmMetadata {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: WasmMetadata) {
self.metadata = value;
}
#[wasm_bindgen(getter, js_name = "extractionMethod")]
pub fn extraction_method(&self) -> Option<String> {
self.extraction_method.map(|v| v.to_api_str().to_owned())
}
#[wasm_bindgen(setter, js_name = "extractionMethod")]
pub fn set_extraction_method(&mut self, value: Option<WasmExtractionMethod>) {
self.extraction_method = value;
}
#[wasm_bindgen(getter)]
pub fn tables(&self) -> Vec<WasmTable> {
self.tables.clone()
}
#[wasm_bindgen(setter)]
pub fn set_tables(&mut self, value: Vec<WasmTable>) {
self.tables = value;
}
#[wasm_bindgen(getter, js_name = "detectedLanguages")]
pub fn detected_languages(&self) -> Option<Vec<String>> {
self.detected_languages.clone()
}
#[wasm_bindgen(setter, js_name = "detectedLanguages")]
pub fn set_detected_languages(&mut self, value: Option<Vec<String>>) {
self.detected_languages = value;
}
#[wasm_bindgen(getter)]
pub fn chunks(&self) -> Option<js_sys::Array> {
self.chunks.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_chunks(&mut self, value: Option<Vec<WasmChunk>>) {
self.chunks = value;
}
#[wasm_bindgen(getter)]
pub fn images(&self) -> Option<js_sys::Array> {
self.images.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_images(&mut self, value: Option<Vec<WasmExtractedImage>>) {
self.images = value;
}
#[wasm_bindgen(getter)]
pub fn pages(&self) -> Option<js_sys::Array> {
self.pages.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_pages(&mut self, value: Option<Vec<WasmPageContent>>) {
self.pages = value;
}
#[wasm_bindgen(getter)]
pub fn elements(&self) -> Option<js_sys::Array> {
self.elements.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_elements(&mut self, value: Option<Vec<WasmElement>>) {
self.elements = value;
}
#[wasm_bindgen(getter, js_name = "djotContent")]
pub fn djot_content(&self) -> Option<WasmDjotContent> {
self.djot_content.clone()
}
#[wasm_bindgen(setter, js_name = "djotContent")]
pub fn set_djot_content(&mut self, value: Option<WasmDjotContent>) {
self.djot_content = value;
}
#[wasm_bindgen(getter, js_name = "ocrElements")]
pub fn ocr_elements(&self) -> Option<js_sys::Array> {
self.ocr_elements.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter, js_name = "ocrElements")]
pub fn set_ocr_elements(&mut self, value: Option<Vec<WasmOcrElement>>) {
self.ocr_elements = value;
}
#[wasm_bindgen(getter)]
pub fn document(&self) -> Option<WasmDocumentStructure> {
self.document.clone()
}
#[wasm_bindgen(setter)]
pub fn set_document(&mut self, value: Option<WasmDocumentStructure>) {
self.document = value;
}
#[wasm_bindgen(getter, js_name = "qualityScore")]
pub fn quality_score(&self) -> Option<f64> {
self.quality_score
}
#[wasm_bindgen(setter, js_name = "qualityScore")]
pub fn set_quality_score(&mut self, value: Option<f64>) {
self.quality_score = value;
}
#[wasm_bindgen(getter, js_name = "processingWarnings")]
pub fn processing_warnings(&self) -> Vec<WasmProcessingWarning> {
self.processing_warnings.clone()
}
#[wasm_bindgen(setter, js_name = "processingWarnings")]
pub fn set_processing_warnings(&mut self, value: Vec<WasmProcessingWarning>) {
self.processing_warnings = value;
}
#[wasm_bindgen(getter)]
pub fn annotations(&self) -> Option<js_sys::Array> {
self.annotations.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_annotations(&mut self, value: Option<Vec<WasmPdfAnnotation>>) {
self.annotations = value;
}
#[wasm_bindgen(getter)]
pub fn children(&self) -> Option<js_sys::Array> {
self.children.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_children(&mut self, value: Option<Vec<WasmArchiveEntry>>) {
self.children = value;
}
#[wasm_bindgen(getter)]
pub fn uris(&self) -> Option<js_sys::Array> {
self.uris.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_uris(&mut self, value: Option<Vec<WasmExtractedUri>>) {
self.uris = value;
}
#[wasm_bindgen(getter)]
pub fn revisions(&self) -> Option<js_sys::Array> {
self.revisions.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_revisions(&mut self, value: Option<Vec<WasmDocumentRevision>>) {
self.revisions = value;
}
#[wasm_bindgen(getter, js_name = "structuredOutput")]
pub fn structured_output(&self) -> Option<JsValue> {
self.structured_output.clone()
}
#[wasm_bindgen(setter, js_name = "structuredOutput")]
pub fn set_structured_output(&mut self, value: Option<JsValue>) {
self.structured_output = value;
}
#[wasm_bindgen(getter, js_name = "codeIntelligence")]
pub fn code_intelligence(&self) -> Option<JsValue> {
self.code_intelligence.clone()
}
#[wasm_bindgen(setter, js_name = "codeIntelligence")]
pub fn set_code_intelligence(&mut self, value: Option<JsValue>) {
self.code_intelligence = value;
}
#[wasm_bindgen(getter, js_name = "llmUsage")]
pub fn llm_usage(&self) -> Option<js_sys::Array> {
self.llm_usage.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter, js_name = "llmUsage")]
pub fn set_llm_usage(&mut self, value: Option<Vec<WasmLlmUsage>>) {
self.llm_usage = value;
}
#[wasm_bindgen(getter, js_name = "formattedContent")]
pub fn formatted_content(&self) -> Option<String> {
self.formatted_content.clone()
}
#[wasm_bindgen(setter, js_name = "formattedContent")]
pub fn set_formatted_content(&mut self, value: Option<String>) {
self.formatted_content = value;
}
#[wasm_bindgen(getter, js_name = "ocrInternalDocument")]
pub fn ocr_internal_document(&self) -> Option<String> {
self.ocr_internal_document.clone()
}
#[wasm_bindgen(setter, js_name = "ocrInternalDocument")]
pub fn set_ocr_internal_document(&mut self, value: Option<String>) {
self.ocr_internal_document = value;
}
/// Convert from an OCR result.
#[wasm_bindgen(js_name = "fromOcr")]
pub fn from_ocr(ocr: WasmOcrExtractionResult) -> WasmExtractionResult {
let ocr_core: kreuzberg::OcrExtractionResult = ocr.into();
kreuzberg::ExtractionResult::from_ocr(ocr_core).into()
}
}
/// A single file extracted from an archive.
///
/// When archives (ZIP, TAR, 7Z, GZIP) are extracted with recursive extraction
/// enabled, each processable file produces its own full `ExtractionResult`.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmArchiveEntry {
path: String,
mime_type: String,
result: WasmExtractionResult,
}
#[wasm_bindgen]
impl WasmArchiveEntry {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(path: String, mimeType: String, result: WasmExtractionResult) -> WasmArchiveEntry {
WasmArchiveEntry {
path,
mime_type: mimeType,
result,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmArchiveEntry {
<WasmArchiveEntry as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn path(&self) -> String {
self.path.clone()
}
#[wasm_bindgen(setter)]
pub fn set_path(&mut self, value: String) {
self.path = value;
}
#[wasm_bindgen(getter, js_name = "mimeType")]
pub fn mime_type(&self) -> String {
self.mime_type.clone()
}
#[wasm_bindgen(setter, js_name = "mimeType")]
pub fn set_mime_type(&mut self, value: String) {
self.mime_type = value;
}
#[wasm_bindgen(getter)]
pub fn result(&self) -> WasmExtractionResult {
self.result.clone()
}
#[wasm_bindgen(setter)]
pub fn set_result(&mut self, value: WasmExtractionResult) {
self.result = value;
}
}
/// A non-fatal warning from a processing pipeline stage.
///
/// Captures errors from optional features that don't prevent extraction
/// but may indicate degraded results.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmProcessingWarning {
source: String,
message: String,
}
#[wasm_bindgen]
impl WasmProcessingWarning {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(source: String, message: String) -> WasmProcessingWarning {
WasmProcessingWarning { source, message }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmProcessingWarning {
<WasmProcessingWarning as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn source(&self) -> String {
self.source.clone()
}
#[wasm_bindgen(setter)]
pub fn set_source(&mut self, value: String) {
self.source = value;
}
#[wasm_bindgen(getter)]
pub fn message(&self) -> String {
self.message.clone()
}
#[wasm_bindgen(setter)]
pub fn set_message(&mut self, value: String) {
self.message = value;
}
}
/// Token usage and cost data for a single LLM call made during extraction.
///
/// Populated when VLM OCR, structured extraction, or LLM-based embeddings
/// are used. Multiple entries may be present when multiple LLM calls occur
/// within one extraction (e.g. VLM OCR + structured extraction).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmLlmUsage {
model: String,
source: String,
input_tokens: Option<u64>,
output_tokens: Option<u64>,
total_tokens: Option<u64>,
estimated_cost: Option<f64>,
finish_reason: Option<String>,
}
#[wasm_bindgen]
impl WasmLlmUsage {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
model: Option<String>,
source: Option<String>,
inputTokens: Option<u64>,
outputTokens: Option<u64>,
totalTokens: Option<u64>,
estimatedCost: Option<f64>,
finishReason: Option<String>,
) -> WasmLlmUsage {
WasmLlmUsage {
model: model.unwrap_or_default(),
source: source.unwrap_or_default(),
input_tokens: inputTokens,
output_tokens: outputTokens,
total_tokens: totalTokens,
estimated_cost: estimatedCost,
finish_reason: finishReason,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmLlmUsage {
<WasmLlmUsage as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn model(&self) -> String {
self.model.clone()
}
#[wasm_bindgen(setter)]
pub fn set_model(&mut self, value: String) {
self.model = value;
}
#[wasm_bindgen(getter)]
pub fn source(&self) -> String {
self.source.clone()
}
#[wasm_bindgen(setter)]
pub fn set_source(&mut self, value: String) {
self.source = value;
}
#[wasm_bindgen(getter, js_name = "inputTokens")]
pub fn input_tokens(&self) -> Option<u64> {
self.input_tokens
}
#[wasm_bindgen(setter, js_name = "inputTokens")]
pub fn set_input_tokens(&mut self, value: Option<u64>) {
self.input_tokens = value;
}
#[wasm_bindgen(getter, js_name = "outputTokens")]
pub fn output_tokens(&self) -> Option<u64> {
self.output_tokens
}
#[wasm_bindgen(setter, js_name = "outputTokens")]
pub fn set_output_tokens(&mut self, value: Option<u64>) {
self.output_tokens = value;
}
#[wasm_bindgen(getter, js_name = "totalTokens")]
pub fn total_tokens(&self) -> Option<u64> {
self.total_tokens
}
#[wasm_bindgen(setter, js_name = "totalTokens")]
pub fn set_total_tokens(&mut self, value: Option<u64>) {
self.total_tokens = value;
}
#[wasm_bindgen(getter, js_name = "estimatedCost")]
pub fn estimated_cost(&self) -> Option<f64> {
self.estimated_cost
}
#[wasm_bindgen(setter, js_name = "estimatedCost")]
pub fn set_estimated_cost(&mut self, value: Option<f64>) {
self.estimated_cost = value;
}
#[wasm_bindgen(getter, js_name = "finishReason")]
pub fn finish_reason(&self) -> Option<String> {
self.finish_reason.clone()
}
#[wasm_bindgen(setter, js_name = "finishReason")]
pub fn set_finish_reason(&mut self, value: Option<String>) {
self.finish_reason = value;
}
}
/// A text chunk with optional embedding and metadata.
///
/// Chunks are created when chunking is enabled in `ExtractionConfig`. Each chunk
/// contains the text content, optional embedding vector (if embedding generation
/// is configured), and metadata about its position in the document.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmChunk {
content: String,
chunk_type: WasmChunkType,
embedding: Option<Vec<f32>>,
metadata: WasmChunkMetadata,
}
#[wasm_bindgen]
impl WasmChunk {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
content: String,
chunkType: WasmChunkType,
metadata: WasmChunkMetadata,
embedding: Option<Vec<f32>>,
) -> WasmChunk {
WasmChunk {
content,
chunk_type: chunkType,
embedding,
metadata,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmChunk {
<WasmChunk as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "chunkType")]
pub fn chunk_type(&self) -> String {
self.chunk_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "chunkType")]
pub fn set_chunk_type(&mut self, value: WasmChunkType) {
self.chunk_type = value;
}
#[wasm_bindgen(getter)]
pub fn embedding(&self) -> Option<Vec<f32>> {
self.embedding.clone()
}
#[wasm_bindgen(setter)]
pub fn set_embedding(&mut self, value: Option<Vec<f32>>) {
self.embedding = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> WasmChunkMetadata {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: WasmChunkMetadata) {
self.metadata = value;
}
}
/// Heading context for a chunk within a Markdown document.
///
/// Contains the heading hierarchy from document root to this chunk's section.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmHeadingContext {
headings: Vec<WasmHeadingLevel>,
}
#[wasm_bindgen]
impl WasmHeadingContext {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(headings: Vec<WasmHeadingLevel>) -> WasmHeadingContext {
WasmHeadingContext { headings }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmHeadingContext {
<WasmHeadingContext as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn headings(&self) -> Vec<WasmHeadingLevel> {
self.headings.clone()
}
#[wasm_bindgen(setter)]
pub fn set_headings(&mut self, value: Vec<WasmHeadingLevel>) {
self.headings = value;
}
}
/// A single heading in the hierarchy.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmHeadingLevel {
level: u8,
text: String,
}
#[wasm_bindgen]
impl WasmHeadingLevel {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(level: u8, text: String) -> WasmHeadingLevel {
WasmHeadingLevel { level, text }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmHeadingLevel {
<WasmHeadingLevel as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn level(&self) -> u8 {
self.level
}
#[wasm_bindgen(setter)]
pub fn set_level(&mut self, value: u8) {
self.level = value;
}
#[wasm_bindgen(getter)]
pub fn text(&self) -> String {
self.text.clone()
}
#[wasm_bindgen(setter)]
pub fn set_text(&mut self, value: String) {
self.text = value;
}
}
/// Metadata about a chunk's position in the original document.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmChunkMetadata {
byte_start: usize,
byte_end: usize,
token_count: Option<usize>,
chunk_index: usize,
total_chunks: usize,
first_page: Option<u32>,
last_page: Option<u32>,
heading_context: Option<WasmHeadingContext>,
image_indices: Vec<u32>,
}
#[wasm_bindgen]
impl WasmChunkMetadata {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
byteStart: usize,
byteEnd: usize,
chunkIndex: usize,
totalChunks: usize,
imageIndices: Vec<u32>,
tokenCount: Option<usize>,
firstPage: Option<u32>,
lastPage: Option<u32>,
headingContext: Option<WasmHeadingContext>,
) -> WasmChunkMetadata {
WasmChunkMetadata {
byte_start: byteStart,
byte_end: byteEnd,
token_count: tokenCount,
chunk_index: chunkIndex,
total_chunks: totalChunks,
first_page: firstPage,
last_page: lastPage,
heading_context: headingContext,
image_indices: imageIndices,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmChunkMetadata {
<WasmChunkMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "byteStart")]
pub fn byte_start(&self) -> usize {
self.byte_start
}
#[wasm_bindgen(setter, js_name = "byteStart")]
pub fn set_byte_start(&mut self, value: usize) {
self.byte_start = value;
}
#[wasm_bindgen(getter, js_name = "byteEnd")]
pub fn byte_end(&self) -> usize {
self.byte_end
}
#[wasm_bindgen(setter, js_name = "byteEnd")]
pub fn set_byte_end(&mut self, value: usize) {
self.byte_end = value;
}
#[wasm_bindgen(getter, js_name = "tokenCount")]
pub fn token_count(&self) -> Option<usize> {
self.token_count
}
#[wasm_bindgen(setter, js_name = "tokenCount")]
pub fn set_token_count(&mut self, value: Option<usize>) {
self.token_count = value;
}
#[wasm_bindgen(getter, js_name = "chunkIndex")]
pub fn chunk_index(&self) -> usize {
self.chunk_index
}
#[wasm_bindgen(setter, js_name = "chunkIndex")]
pub fn set_chunk_index(&mut self, value: usize) {
self.chunk_index = value;
}
#[wasm_bindgen(getter, js_name = "totalChunks")]
pub fn total_chunks(&self) -> usize {
self.total_chunks
}
#[wasm_bindgen(setter, js_name = "totalChunks")]
pub fn set_total_chunks(&mut self, value: usize) {
self.total_chunks = value;
}
#[wasm_bindgen(getter, js_name = "firstPage")]
pub fn first_page(&self) -> Option<u32> {
self.first_page
}
#[wasm_bindgen(setter, js_name = "firstPage")]
pub fn set_first_page(&mut self, value: Option<u32>) {
self.first_page = value;
}
#[wasm_bindgen(getter, js_name = "lastPage")]
pub fn last_page(&self) -> Option<u32> {
self.last_page
}
#[wasm_bindgen(setter, js_name = "lastPage")]
pub fn set_last_page(&mut self, value: Option<u32>) {
self.last_page = value;
}
#[wasm_bindgen(getter, js_name = "headingContext")]
pub fn heading_context(&self) -> Option<WasmHeadingContext> {
self.heading_context.clone()
}
#[wasm_bindgen(setter, js_name = "headingContext")]
pub fn set_heading_context(&mut self, value: Option<WasmHeadingContext>) {
self.heading_context = value;
}
#[wasm_bindgen(getter, js_name = "imageIndices")]
pub fn image_indices(&self) -> Vec<u32> {
self.image_indices.clone()
}
#[wasm_bindgen(setter, js_name = "imageIndices")]
pub fn set_image_indices(&mut self, value: Vec<u32>) {
self.image_indices = value;
}
}
/// Extracted image from a document.
///
/// Contains raw image data, metadata, and optional nested OCR results.
/// Raw bytes allow cross-language compatibility - users can convert to
/// PIL.Image (Python), Sharp (Node.js), or other formats as needed.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmExtractedImage {
data: Vec<u8>,
format: String,
image_index: u32,
page_number: Option<u32>,
width: Option<u32>,
height: Option<u32>,
colorspace: Option<String>,
bits_per_component: Option<u32>,
is_mask: bool,
description: Option<String>,
ocr_result: Option<WasmExtractionResult>,
bounding_box: Option<WasmBoundingBox>,
source_path: Option<String>,
image_kind: Option<WasmImageKind>,
kind_confidence: Option<f32>,
cluster_id: Option<u32>,
}
#[wasm_bindgen]
impl WasmExtractedImage {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
data: Vec<u8>,
format: String,
imageIndex: u32,
isMask: bool,
pageNumber: Option<u32>,
width: Option<u32>,
height: Option<u32>,
colorspace: Option<String>,
bitsPerComponent: Option<u32>,
description: Option<String>,
ocrResult: Option<WasmExtractionResult>,
boundingBox: Option<WasmBoundingBox>,
sourcePath: Option<String>,
imageKind: Option<WasmImageKind>,
kindConfidence: Option<f32>,
clusterId: Option<u32>,
) -> WasmExtractedImage {
WasmExtractedImage {
data,
format,
image_index: imageIndex,
page_number: pageNumber,
width,
height,
colorspace,
bits_per_component: bitsPerComponent,
is_mask: isMask,
description,
ocr_result: ocrResult,
bounding_box: boundingBox,
source_path: sourcePath,
image_kind: imageKind,
kind_confidence: kindConfidence,
cluster_id: clusterId,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmExtractedImage {
<WasmExtractedImage as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn data(&self) -> Vec<u8> {
self.data.clone()
}
#[wasm_bindgen(setter)]
pub fn set_data(&mut self, value: Vec<u8>) {
self.data = value;
}
#[wasm_bindgen(getter)]
pub fn format(&self) -> String {
self.format.clone()
}
#[wasm_bindgen(setter)]
pub fn set_format(&mut self, value: String) {
self.format = value;
}
#[wasm_bindgen(getter, js_name = "imageIndex")]
pub fn image_index(&self) -> u32 {
self.image_index
}
#[wasm_bindgen(setter, js_name = "imageIndex")]
pub fn set_image_index(&mut self, value: u32) {
self.image_index = value;
}
#[wasm_bindgen(getter, js_name = "pageNumber")]
pub fn page_number(&self) -> Option<u32> {
self.page_number
}
#[wasm_bindgen(setter, js_name = "pageNumber")]
pub fn set_page_number(&mut self, value: Option<u32>) {
self.page_number = value;
}
#[wasm_bindgen(getter)]
pub fn width(&self) -> Option<u32> {
self.width
}
#[wasm_bindgen(setter)]
pub fn set_width(&mut self, value: Option<u32>) {
self.width = value;
}
#[wasm_bindgen(getter)]
pub fn height(&self) -> Option<u32> {
self.height
}
#[wasm_bindgen(setter)]
pub fn set_height(&mut self, value: Option<u32>) {
self.height = value;
}
#[wasm_bindgen(getter)]
pub fn colorspace(&self) -> Option<String> {
self.colorspace.clone()
}
#[wasm_bindgen(setter)]
pub fn set_colorspace(&mut self, value: Option<String>) {
self.colorspace = value;
}
#[wasm_bindgen(getter, js_name = "bitsPerComponent")]
pub fn bits_per_component(&self) -> Option<u32> {
self.bits_per_component
}
#[wasm_bindgen(setter, js_name = "bitsPerComponent")]
pub fn set_bits_per_component(&mut self, value: Option<u32>) {
self.bits_per_component = value;
}
#[wasm_bindgen(getter, js_name = "isMask")]
pub fn is_mask(&self) -> bool {
self.is_mask
}
#[wasm_bindgen(setter, js_name = "isMask")]
pub fn set_is_mask(&mut self, value: bool) {
self.is_mask = value;
}
#[wasm_bindgen(getter)]
pub fn description(&self) -> Option<String> {
self.description.clone()
}
#[wasm_bindgen(setter)]
pub fn set_description(&mut self, value: Option<String>) {
self.description = value;
}
#[wasm_bindgen(getter, js_name = "ocrResult")]
pub fn ocr_result(&self) -> Option<WasmExtractionResult> {
self.ocr_result.clone()
}
#[wasm_bindgen(setter, js_name = "ocrResult")]
pub fn set_ocr_result(&mut self, value: Option<WasmExtractionResult>) {
self.ocr_result = value;
}
#[wasm_bindgen(getter, js_name = "boundingBox")]
pub fn bounding_box(&self) -> Option<WasmBoundingBox> {
self.bounding_box.clone()
}
#[wasm_bindgen(setter, js_name = "boundingBox")]
pub fn set_bounding_box(&mut self, value: Option<WasmBoundingBox>) {
self.bounding_box = value;
}
#[wasm_bindgen(getter, js_name = "sourcePath")]
pub fn source_path(&self) -> Option<String> {
self.source_path.clone()
}
#[wasm_bindgen(setter, js_name = "sourcePath")]
pub fn set_source_path(&mut self, value: Option<String>) {
self.source_path = value;
}
#[wasm_bindgen(getter, js_name = "imageKind")]
pub fn image_kind(&self) -> Option<String> {
self.image_kind.map(|v| v.to_api_str().to_owned())
}
#[wasm_bindgen(setter, js_name = "imageKind")]
pub fn set_image_kind(&mut self, value: Option<WasmImageKind>) {
self.image_kind = value;
}
#[wasm_bindgen(getter, js_name = "kindConfidence")]
pub fn kind_confidence(&self) -> Option<f32> {
self.kind_confidence
}
#[wasm_bindgen(setter, js_name = "kindConfidence")]
pub fn set_kind_confidence(&mut self, value: Option<f32>) {
self.kind_confidence = value;
}
#[wasm_bindgen(getter, js_name = "clusterId")]
pub fn cluster_id(&self) -> Option<u32> {
self.cluster_id
}
#[wasm_bindgen(setter, js_name = "clusterId")]
pub fn set_cluster_id(&mut self, value: Option<u32>) {
self.cluster_id = value;
}
}
/// Bounding box coordinates for element positioning.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmBoundingBox {
x0: f64,
y0: f64,
x1: f64,
y1: f64,
}
#[wasm_bindgen]
impl WasmBoundingBox {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(x0: Option<f64>, y0: Option<f64>, x1: Option<f64>, y1: Option<f64>) -> WasmBoundingBox {
WasmBoundingBox {
x0: x0.unwrap_or_default(),
y0: y0.unwrap_or_default(),
x1: x1.unwrap_or_default(),
y1: y1.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmBoundingBox {
<WasmBoundingBox as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn x0(&self) -> f64 {
self.x0
}
#[wasm_bindgen(setter)]
pub fn set_x0(&mut self, value: f64) {
self.x0 = value;
}
#[wasm_bindgen(getter)]
pub fn y0(&self) -> f64 {
self.y0
}
#[wasm_bindgen(setter)]
pub fn set_y0(&mut self, value: f64) {
self.y0 = value;
}
#[wasm_bindgen(getter)]
pub fn x1(&self) -> f64 {
self.x1
}
#[wasm_bindgen(setter)]
pub fn set_x1(&mut self, value: f64) {
self.x1 = value;
}
#[wasm_bindgen(getter)]
pub fn y1(&self) -> f64 {
self.y1
}
#[wasm_bindgen(setter)]
pub fn set_y1(&mut self, value: f64) {
self.y1 = value;
}
}
/// Metadata for a semantic element.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmElementMetadata {
page_number: Option<u32>,
filename: Option<String>,
coordinates: Option<WasmBoundingBox>,
element_index: Option<usize>,
additional: JsValue,
}
#[wasm_bindgen]
impl WasmElementMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
additional: JsValue,
pageNumber: Option<u32>,
filename: Option<String>,
coordinates: Option<WasmBoundingBox>,
elementIndex: Option<usize>,
) -> WasmElementMetadata {
WasmElementMetadata {
page_number: pageNumber,
filename,
coordinates,
element_index: elementIndex,
additional,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmElementMetadata {
<WasmElementMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "pageNumber")]
pub fn page_number(&self) -> Option<u32> {
self.page_number
}
#[wasm_bindgen(setter, js_name = "pageNumber")]
pub fn set_page_number(&mut self, value: Option<u32>) {
self.page_number = value;
}
#[wasm_bindgen(getter)]
pub fn filename(&self) -> Option<String> {
self.filename.clone()
}
#[wasm_bindgen(setter)]
pub fn set_filename(&mut self, value: Option<String>) {
self.filename = value;
}
#[wasm_bindgen(getter)]
pub fn coordinates(&self) -> Option<WasmBoundingBox> {
self.coordinates.clone()
}
#[wasm_bindgen(setter)]
pub fn set_coordinates(&mut self, value: Option<WasmBoundingBox>) {
self.coordinates = value;
}
#[wasm_bindgen(getter, js_name = "elementIndex")]
pub fn element_index(&self) -> Option<usize> {
self.element_index
}
#[wasm_bindgen(setter, js_name = "elementIndex")]
pub fn set_element_index(&mut self, value: Option<usize>) {
self.element_index = value;
}
#[wasm_bindgen(getter)]
pub fn additional(&self) -> JsValue {
self.additional.clone()
}
#[wasm_bindgen(setter)]
pub fn set_additional(&mut self, value: JsValue) {
self.additional = value;
}
}
/// Semantic element extracted from document.
///
/// Represents a logical unit of content with semantic classification,
/// unique identifier, and metadata for tracking origin and position.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmElement {
element_id: String,
element_type: WasmElementType,
text: String,
metadata: WasmElementMetadata,
}
#[wasm_bindgen]
impl WasmElement {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
elementId: String,
elementType: WasmElementType,
text: String,
metadata: WasmElementMetadata,
) -> WasmElement {
WasmElement {
element_id: elementId,
element_type: elementType,
text,
metadata,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmElement {
<WasmElement as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "elementId")]
pub fn element_id(&self) -> String {
self.element_id.clone()
}
#[wasm_bindgen(setter, js_name = "elementId")]
pub fn set_element_id(&mut self, value: String) {
self.element_id = value;
}
#[wasm_bindgen(getter, js_name = "elementType")]
pub fn element_type(&self) -> String {
self.element_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "elementType")]
pub fn set_element_type(&mut self, value: WasmElementType) {
self.element_type = value;
}
#[wasm_bindgen(getter)]
pub fn text(&self) -> String {
self.text.clone()
}
#[wasm_bindgen(setter)]
pub fn set_text(&mut self, value: String) {
self.text = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> WasmElementMetadata {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: WasmElementMetadata) {
self.metadata = value;
}
}
/// Excel workbook representation.
///
/// Contains all sheets from an Excel file (.xlsx, .xls, etc.) with
/// extracted content and metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmExcelWorkbook {
sheets: Vec<WasmExcelSheet>,
metadata: JsValue,
revisions: Option<Vec<WasmDocumentRevision>>,
}
#[wasm_bindgen]
impl WasmExcelWorkbook {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
sheets: Vec<WasmExcelSheet>,
metadata: JsValue,
revisions: Option<Vec<WasmDocumentRevision>>,
) -> WasmExcelWorkbook {
WasmExcelWorkbook {
sheets,
metadata,
revisions,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmExcelWorkbook {
<WasmExcelWorkbook as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn sheets(&self) -> Vec<WasmExcelSheet> {
self.sheets.clone()
}
#[wasm_bindgen(setter)]
pub fn set_sheets(&mut self, value: Vec<WasmExcelSheet>) {
self.sheets = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> JsValue {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: JsValue) {
self.metadata = value;
}
#[wasm_bindgen(getter)]
pub fn revisions(&self) -> Option<js_sys::Array> {
self.revisions.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_revisions(&mut self, value: Option<Vec<WasmDocumentRevision>>) {
self.revisions = value;
}
}
/// Single Excel worksheet.
///
/// Represents one sheet from an Excel workbook with its content
/// converted to Markdown format and dimensional statistics.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmExcelSheet {
name: String,
markdown: String,
row_count: usize,
col_count: usize,
cell_count: usize,
table_cells: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmExcelSheet {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
name: String,
markdown: String,
rowCount: usize,
colCount: usize,
cellCount: usize,
tableCells: Option<JsValue>,
) -> WasmExcelSheet {
WasmExcelSheet {
name,
markdown,
row_count: rowCount,
col_count: colCount,
cell_count: cellCount,
table_cells: tableCells,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmExcelSheet {
<WasmExcelSheet as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn name(&self) -> String {
self.name.clone()
}
#[wasm_bindgen(setter)]
pub fn set_name(&mut self, value: String) {
self.name = value;
}
#[wasm_bindgen(getter)]
pub fn markdown(&self) -> String {
self.markdown.clone()
}
#[wasm_bindgen(setter)]
pub fn set_markdown(&mut self, value: String) {
self.markdown = value;
}
#[wasm_bindgen(getter, js_name = "rowCount")]
pub fn row_count(&self) -> usize {
self.row_count
}
#[wasm_bindgen(setter, js_name = "rowCount")]
pub fn set_row_count(&mut self, value: usize) {
self.row_count = value;
}
#[wasm_bindgen(getter, js_name = "colCount")]
pub fn col_count(&self) -> usize {
self.col_count
}
#[wasm_bindgen(setter, js_name = "colCount")]
pub fn set_col_count(&mut self, value: usize) {
self.col_count = value;
}
#[wasm_bindgen(getter, js_name = "cellCount")]
pub fn cell_count(&self) -> usize {
self.cell_count
}
#[wasm_bindgen(setter, js_name = "cellCount")]
pub fn set_cell_count(&mut self, value: usize) {
self.cell_count = value;
}
#[wasm_bindgen(getter, js_name = "tableCells")]
pub fn table_cells(&self) -> Option<JsValue> {
self.table_cells.clone()
}
#[wasm_bindgen(setter, js_name = "tableCells")]
pub fn set_table_cells(&mut self, value: Option<JsValue>) {
self.table_cells = value;
}
}
/// XML extraction result.
///
/// Contains extracted text content from XML files along with
/// structural statistics about the XML document.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmXmlExtractionResult {
content: String,
element_count: usize,
unique_elements: Vec<String>,
}
#[wasm_bindgen]
impl WasmXmlExtractionResult {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(content: String, elementCount: usize, uniqueElements: Vec<String>) -> WasmXmlExtractionResult {
WasmXmlExtractionResult {
content,
element_count: elementCount,
unique_elements: uniqueElements,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmXmlExtractionResult {
<WasmXmlExtractionResult as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "elementCount")]
pub fn element_count(&self) -> usize {
self.element_count
}
#[wasm_bindgen(setter, js_name = "elementCount")]
pub fn set_element_count(&mut self, value: usize) {
self.element_count = value;
}
#[wasm_bindgen(getter, js_name = "uniqueElements")]
pub fn unique_elements(&self) -> Vec<String> {
self.unique_elements.clone()
}
#[wasm_bindgen(setter, js_name = "uniqueElements")]
pub fn set_unique_elements(&mut self, value: Vec<String>) {
self.unique_elements = value;
}
}
/// Plain text and Markdown extraction result.
///
/// Contains the extracted text along with statistics and,
/// for Markdown files, structural elements like headers and links.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmTextExtractionResult {
content: String,
line_count: usize,
word_count: usize,
character_count: usize,
headers: Option<Vec<String>>,
links: Option<JsValue>,
code_blocks: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmTextExtractionResult {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
content: String,
lineCount: usize,
wordCount: usize,
characterCount: usize,
headers: Option<Vec<String>>,
links: Option<JsValue>,
codeBlocks: Option<JsValue>,
) -> WasmTextExtractionResult {
WasmTextExtractionResult {
content,
line_count: lineCount,
word_count: wordCount,
character_count: characterCount,
headers,
links,
code_blocks: codeBlocks,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmTextExtractionResult {
<WasmTextExtractionResult as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "lineCount")]
pub fn line_count(&self) -> usize {
self.line_count
}
#[wasm_bindgen(setter, js_name = "lineCount")]
pub fn set_line_count(&mut self, value: usize) {
self.line_count = value;
}
#[wasm_bindgen(getter, js_name = "wordCount")]
pub fn word_count(&self) -> usize {
self.word_count
}
#[wasm_bindgen(setter, js_name = "wordCount")]
pub fn set_word_count(&mut self, value: usize) {
self.word_count = value;
}
#[wasm_bindgen(getter, js_name = "characterCount")]
pub fn character_count(&self) -> usize {
self.character_count
}
#[wasm_bindgen(setter, js_name = "characterCount")]
pub fn set_character_count(&mut self, value: usize) {
self.character_count = value;
}
#[wasm_bindgen(getter)]
pub fn headers(&self) -> Option<Vec<String>> {
self.headers.clone()
}
#[wasm_bindgen(setter)]
pub fn set_headers(&mut self, value: Option<Vec<String>>) {
self.headers = value;
}
#[wasm_bindgen(getter)]
pub fn links(&self) -> Option<JsValue> {
self.links.clone()
}
#[wasm_bindgen(setter)]
pub fn set_links(&mut self, value: Option<JsValue>) {
self.links = value;
}
#[wasm_bindgen(getter, js_name = "codeBlocks")]
pub fn code_blocks(&self) -> Option<JsValue> {
self.code_blocks.clone()
}
#[wasm_bindgen(setter, js_name = "codeBlocks")]
pub fn set_code_blocks(&mut self, value: Option<JsValue>) {
self.code_blocks = value;
}
}
/// PowerPoint (PPTX) extraction result.
///
/// Contains extracted slide content, metadata, and embedded images/tables.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPptxExtractionResult {
content: String,
metadata: WasmPptxMetadata,
slide_count: usize,
image_count: usize,
table_count: usize,
images: Vec<WasmExtractedImage>,
page_structure: Option<WasmPageStructure>,
page_contents: Option<Vec<WasmPageContent>>,
document: Option<WasmDocumentStructure>,
hyperlinks: Vec<String>,
office_metadata: JsValue,
revisions: Option<Vec<WasmDocumentRevision>>,
}
#[wasm_bindgen]
impl WasmPptxExtractionResult {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
content: String,
metadata: WasmPptxMetadata,
slideCount: usize,
imageCount: usize,
tableCount: usize,
images: Vec<WasmExtractedImage>,
hyperlinks: Vec<String>,
officeMetadata: JsValue,
pageStructure: Option<WasmPageStructure>,
pageContents: Option<Vec<WasmPageContent>>,
document: Option<WasmDocumentStructure>,
revisions: Option<Vec<WasmDocumentRevision>>,
) -> WasmPptxExtractionResult {
WasmPptxExtractionResult {
content,
metadata,
slide_count: slideCount,
image_count: imageCount,
table_count: tableCount,
images,
page_structure: pageStructure,
page_contents: pageContents,
document,
hyperlinks,
office_metadata: officeMetadata,
revisions,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPptxExtractionResult {
<WasmPptxExtractionResult as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> WasmPptxMetadata {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: WasmPptxMetadata) {
self.metadata = value;
}
#[wasm_bindgen(getter, js_name = "slideCount")]
pub fn slide_count(&self) -> usize {
self.slide_count
}
#[wasm_bindgen(setter, js_name = "slideCount")]
pub fn set_slide_count(&mut self, value: usize) {
self.slide_count = value;
}
#[wasm_bindgen(getter, js_name = "imageCount")]
pub fn image_count(&self) -> usize {
self.image_count
}
#[wasm_bindgen(setter, js_name = "imageCount")]
pub fn set_image_count(&mut self, value: usize) {
self.image_count = value;
}
#[wasm_bindgen(getter, js_name = "tableCount")]
pub fn table_count(&self) -> usize {
self.table_count
}
#[wasm_bindgen(setter, js_name = "tableCount")]
pub fn set_table_count(&mut self, value: usize) {
self.table_count = value;
}
#[wasm_bindgen(getter)]
pub fn images(&self) -> Vec<WasmExtractedImage> {
self.images.clone()
}
#[wasm_bindgen(setter)]
pub fn set_images(&mut self, value: Vec<WasmExtractedImage>) {
self.images = value;
}
#[wasm_bindgen(getter, js_name = "pageStructure")]
pub fn page_structure(&self) -> Option<WasmPageStructure> {
self.page_structure.clone()
}
#[wasm_bindgen(setter, js_name = "pageStructure")]
pub fn set_page_structure(&mut self, value: Option<WasmPageStructure>) {
self.page_structure = value;
}
#[wasm_bindgen(getter, js_name = "pageContents")]
pub fn page_contents(&self) -> Option<js_sys::Array> {
self.page_contents.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter, js_name = "pageContents")]
pub fn set_page_contents(&mut self, value: Option<Vec<WasmPageContent>>) {
self.page_contents = value;
}
#[wasm_bindgen(getter)]
pub fn document(&self) -> Option<WasmDocumentStructure> {
self.document.clone()
}
#[wasm_bindgen(setter)]
pub fn set_document(&mut self, value: Option<WasmDocumentStructure>) {
self.document = value;
}
#[wasm_bindgen(getter)]
pub fn hyperlinks(&self) -> Vec<String> {
self.hyperlinks.clone()
}
#[wasm_bindgen(setter)]
pub fn set_hyperlinks(&mut self, value: Vec<String>) {
self.hyperlinks = value;
}
#[wasm_bindgen(getter, js_name = "officeMetadata")]
pub fn office_metadata(&self) -> JsValue {
self.office_metadata.clone()
}
#[wasm_bindgen(setter, js_name = "officeMetadata")]
pub fn set_office_metadata(&mut self, value: JsValue) {
self.office_metadata = value;
}
#[wasm_bindgen(getter)]
pub fn revisions(&self) -> Option<js_sys::Array> {
self.revisions.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_revisions(&mut self, value: Option<Vec<WasmDocumentRevision>>) {
self.revisions = value;
}
}
/// Email extraction result.
///
/// Complete representation of an extracted email message (.eml or .msg)
/// including headers, body content, and attachments.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmEmailExtractionResult {
subject: Option<String>,
from_email: Option<String>,
to_emails: Vec<String>,
cc_emails: Vec<String>,
bcc_emails: Vec<String>,
date: Option<String>,
message_id: Option<String>,
plain_text: Option<String>,
html_content: Option<String>,
content: String,
attachments: Vec<WasmEmailAttachment>,
metadata: JsValue,
}
#[wasm_bindgen]
impl WasmEmailExtractionResult {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
toEmails: Vec<String>,
ccEmails: Vec<String>,
bccEmails: Vec<String>,
content: String,
attachments: Vec<WasmEmailAttachment>,
metadata: JsValue,
subject: Option<String>,
fromEmail: Option<String>,
date: Option<String>,
messageId: Option<String>,
plainText: Option<String>,
htmlContent: Option<String>,
) -> WasmEmailExtractionResult {
WasmEmailExtractionResult {
subject,
from_email: fromEmail,
to_emails: toEmails,
cc_emails: ccEmails,
bcc_emails: bccEmails,
date,
message_id: messageId,
plain_text: plainText,
html_content: htmlContent,
content,
attachments,
metadata,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmEmailExtractionResult {
<WasmEmailExtractionResult as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn subject(&self) -> Option<String> {
self.subject.clone()
}
#[wasm_bindgen(setter)]
pub fn set_subject(&mut self, value: Option<String>) {
self.subject = value;
}
#[wasm_bindgen(getter, js_name = "fromEmail")]
pub fn from_email(&self) -> Option<String> {
self.from_email.clone()
}
#[wasm_bindgen(setter, js_name = "fromEmail")]
pub fn set_from_email(&mut self, value: Option<String>) {
self.from_email = value;
}
#[wasm_bindgen(getter, js_name = "toEmails")]
pub fn to_emails(&self) -> Vec<String> {
self.to_emails.clone()
}
#[wasm_bindgen(setter, js_name = "toEmails")]
pub fn set_to_emails(&mut self, value: Vec<String>) {
self.to_emails = value;
}
#[wasm_bindgen(getter, js_name = "ccEmails")]
pub fn cc_emails(&self) -> Vec<String> {
self.cc_emails.clone()
}
#[wasm_bindgen(setter, js_name = "ccEmails")]
pub fn set_cc_emails(&mut self, value: Vec<String>) {
self.cc_emails = value;
}
#[wasm_bindgen(getter, js_name = "bccEmails")]
pub fn bcc_emails(&self) -> Vec<String> {
self.bcc_emails.clone()
}
#[wasm_bindgen(setter, js_name = "bccEmails")]
pub fn set_bcc_emails(&mut self, value: Vec<String>) {
self.bcc_emails = value;
}
#[wasm_bindgen(getter)]
pub fn date(&self) -> Option<String> {
self.date.clone()
}
#[wasm_bindgen(setter)]
pub fn set_date(&mut self, value: Option<String>) {
self.date = value;
}
#[wasm_bindgen(getter, js_name = "messageId")]
pub fn message_id(&self) -> Option<String> {
self.message_id.clone()
}
#[wasm_bindgen(setter, js_name = "messageId")]
pub fn set_message_id(&mut self, value: Option<String>) {
self.message_id = value;
}
#[wasm_bindgen(getter, js_name = "plainText")]
pub fn plain_text(&self) -> Option<String> {
self.plain_text.clone()
}
#[wasm_bindgen(setter, js_name = "plainText")]
pub fn set_plain_text(&mut self, value: Option<String>) {
self.plain_text = value;
}
#[wasm_bindgen(getter, js_name = "htmlContent")]
pub fn html_content(&self) -> Option<String> {
self.html_content.clone()
}
#[wasm_bindgen(setter, js_name = "htmlContent")]
pub fn set_html_content(&mut self, value: Option<String>) {
self.html_content = value;
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter)]
pub fn attachments(&self) -> Vec<WasmEmailAttachment> {
self.attachments.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attachments(&mut self, value: Vec<WasmEmailAttachment>) {
self.attachments = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> JsValue {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: JsValue) {
self.metadata = value;
}
}
/// Email attachment representation.
///
/// Contains metadata and optionally the content of an email attachment.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmEmailAttachment {
name: Option<String>,
filename: Option<String>,
mime_type: Option<String>,
size: Option<usize>,
is_image: bool,
data: Option<Vec<u8>>,
}
#[wasm_bindgen]
impl WasmEmailAttachment {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
isImage: bool,
name: Option<String>,
filename: Option<String>,
mimeType: Option<String>,
size: Option<usize>,
data: Option<Vec<u8>>,
) -> WasmEmailAttachment {
WasmEmailAttachment {
name,
filename,
mime_type: mimeType,
size,
is_image: isImage,
data,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmEmailAttachment {
<WasmEmailAttachment as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn name(&self) -> Option<String> {
self.name.clone()
}
#[wasm_bindgen(setter)]
pub fn set_name(&mut self, value: Option<String>) {
self.name = value;
}
#[wasm_bindgen(getter)]
pub fn filename(&self) -> Option<String> {
self.filename.clone()
}
#[wasm_bindgen(setter)]
pub fn set_filename(&mut self, value: Option<String>) {
self.filename = value;
}
#[wasm_bindgen(getter, js_name = "mimeType")]
pub fn mime_type(&self) -> Option<String> {
self.mime_type.clone()
}
#[wasm_bindgen(setter, js_name = "mimeType")]
pub fn set_mime_type(&mut self, value: Option<String>) {
self.mime_type = value;
}
#[wasm_bindgen(getter)]
pub fn size(&self) -> Option<usize> {
self.size
}
#[wasm_bindgen(setter)]
pub fn set_size(&mut self, value: Option<usize>) {
self.size = value;
}
#[wasm_bindgen(getter, js_name = "isImage")]
pub fn is_image(&self) -> bool {
self.is_image
}
#[wasm_bindgen(setter, js_name = "isImage")]
pub fn set_is_image(&mut self, value: bool) {
self.is_image = value;
}
#[wasm_bindgen(getter)]
pub fn data(&self) -> Option<Vec<u8>> {
self.data.clone()
}
#[wasm_bindgen(setter)]
pub fn set_data(&mut self, value: Option<Vec<u8>>) {
self.data = value;
}
}
/// OCR extraction result.
///
/// Result of performing OCR on an image or scanned document,
/// including recognized text and detected tables.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrExtractionResult {
content: String,
mime_type: String,
metadata: JsValue,
tables: Vec<WasmOcrTable>,
ocr_elements: Option<Vec<WasmOcrElement>>,
internal_document: Option<String>,
}
#[wasm_bindgen]
impl WasmOcrExtractionResult {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
content: String,
mimeType: String,
metadata: JsValue,
tables: Vec<WasmOcrTable>,
ocrElements: Option<Vec<WasmOcrElement>>,
internalDocument: Option<String>,
) -> WasmOcrExtractionResult {
WasmOcrExtractionResult {
content,
mime_type: mimeType,
metadata,
tables,
ocr_elements: ocrElements,
internal_document: internalDocument,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrExtractionResult {
<WasmOcrExtractionResult as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "mimeType")]
pub fn mime_type(&self) -> String {
self.mime_type.clone()
}
#[wasm_bindgen(setter, js_name = "mimeType")]
pub fn set_mime_type(&mut self, value: String) {
self.mime_type = value;
}
#[wasm_bindgen(getter)]
pub fn metadata(&self) -> JsValue {
self.metadata.clone()
}
#[wasm_bindgen(setter)]
pub fn set_metadata(&mut self, value: JsValue) {
self.metadata = value;
}
#[wasm_bindgen(getter)]
pub fn tables(&self) -> Vec<WasmOcrTable> {
self.tables.clone()
}
#[wasm_bindgen(setter)]
pub fn set_tables(&mut self, value: Vec<WasmOcrTable>) {
self.tables = value;
}
#[wasm_bindgen(getter, js_name = "ocrElements")]
pub fn ocr_elements(&self) -> Option<js_sys::Array> {
self.ocr_elements.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter, js_name = "ocrElements")]
pub fn set_ocr_elements(&mut self, value: Option<Vec<WasmOcrElement>>) {
self.ocr_elements = value;
}
#[wasm_bindgen(getter, js_name = "internalDocument")]
pub fn internal_document(&self) -> Option<String> {
self.internal_document.clone()
}
#[wasm_bindgen(setter, js_name = "internalDocument")]
pub fn set_internal_document(&mut self, value: Option<String>) {
self.internal_document = value;
}
}
/// Table detected via OCR.
///
/// Represents a table structure recognized during OCR processing.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrTable {
cells: JsValue,
markdown: String,
page_number: u32,
bounding_box: Option<WasmOcrTableBoundingBox>,
}
#[wasm_bindgen]
impl WasmOcrTable {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
cells: JsValue,
markdown: String,
pageNumber: u32,
boundingBox: Option<WasmOcrTableBoundingBox>,
) -> WasmOcrTable {
WasmOcrTable {
cells,
markdown,
page_number: pageNumber,
bounding_box: boundingBox,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrTable {
<WasmOcrTable as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn cells(&self) -> JsValue {
self.cells.clone()
}
#[wasm_bindgen(setter)]
pub fn set_cells(&mut self, value: JsValue) {
self.cells = value;
}
#[wasm_bindgen(getter)]
pub fn markdown(&self) -> String {
self.markdown.clone()
}
#[wasm_bindgen(setter)]
pub fn set_markdown(&mut self, value: String) {
self.markdown = value;
}
#[wasm_bindgen(getter, js_name = "pageNumber")]
pub fn page_number(&self) -> u32 {
self.page_number
}
#[wasm_bindgen(setter, js_name = "pageNumber")]
pub fn set_page_number(&mut self, value: u32) {
self.page_number = value;
}
#[wasm_bindgen(getter, js_name = "boundingBox")]
pub fn bounding_box(&self) -> Option<WasmOcrTableBoundingBox> {
self.bounding_box.clone()
}
#[wasm_bindgen(setter, js_name = "boundingBox")]
pub fn set_bounding_box(&mut self, value: Option<WasmOcrTableBoundingBox>) {
self.bounding_box = value;
}
}
/// Bounding box for an OCR-detected table in pixel coordinates.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrTableBoundingBox {
left: u32,
top: u32,
right: u32,
bottom: u32,
}
#[wasm_bindgen]
impl WasmOcrTableBoundingBox {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(left: u32, top: u32, right: u32, bottom: u32) -> WasmOcrTableBoundingBox {
WasmOcrTableBoundingBox {
left,
top,
right,
bottom,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrTableBoundingBox {
<WasmOcrTableBoundingBox as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn left(&self) -> u32 {
self.left
}
#[wasm_bindgen(setter)]
pub fn set_left(&mut self, value: u32) {
self.left = value;
}
#[wasm_bindgen(getter)]
pub fn top(&self) -> u32 {
self.top
}
#[wasm_bindgen(setter)]
pub fn set_top(&mut self, value: u32) {
self.top = value;
}
#[wasm_bindgen(getter)]
pub fn right(&self) -> u32 {
self.right
}
#[wasm_bindgen(setter)]
pub fn set_right(&mut self, value: u32) {
self.right = value;
}
#[wasm_bindgen(getter)]
pub fn bottom(&self) -> u32 {
self.bottom
}
#[wasm_bindgen(setter)]
pub fn set_bottom(&mut self, value: u32) {
self.bottom = value;
}
}
/// Image preprocessing configuration for OCR.
///
/// These settings control how images are preprocessed before OCR to improve
/// text recognition quality. Different preprocessing strategies work better
/// for different document types.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmImagePreprocessingConfig {
target_dpi: i32,
auto_rotate: bool,
deskew: bool,
denoise: bool,
contrast_enhance: bool,
binarization_method: String,
invert_colors: bool,
}
#[wasm_bindgen]
impl WasmImagePreprocessingConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
targetDpi: Option<i32>,
autoRotate: Option<bool>,
deskew: Option<bool>,
denoise: Option<bool>,
contrastEnhance: Option<bool>,
binarizationMethod: Option<String>,
invertColors: Option<bool>,
) -> WasmImagePreprocessingConfig {
WasmImagePreprocessingConfig {
target_dpi: targetDpi.unwrap_or(300),
auto_rotate: autoRotate.unwrap_or(true),
deskew: deskew.unwrap_or(true),
denoise: denoise.unwrap_or(false),
contrast_enhance: contrastEnhance.unwrap_or(false),
binarization_method: binarizationMethod.unwrap_or_else(|| "otsu".to_string()),
invert_colors: invertColors.unwrap_or(false),
}
}
#[wasm_bindgen(getter, js_name = "targetDpi")]
pub fn target_dpi(&self) -> i32 {
self.target_dpi
}
#[wasm_bindgen(setter, js_name = "targetDpi")]
pub fn set_target_dpi(&mut self, value: i32) {
self.target_dpi = value;
}
#[wasm_bindgen(getter, js_name = "autoRotate")]
pub fn auto_rotate(&self) -> bool {
self.auto_rotate
}
#[wasm_bindgen(setter, js_name = "autoRotate")]
pub fn set_auto_rotate(&mut self, value: bool) {
self.auto_rotate = value;
}
#[wasm_bindgen(getter)]
pub fn deskew(&self) -> bool {
self.deskew
}
#[wasm_bindgen(setter)]
pub fn set_deskew(&mut self, value: bool) {
self.deskew = value;
}
#[wasm_bindgen(getter)]
pub fn denoise(&self) -> bool {
self.denoise
}
#[wasm_bindgen(setter)]
pub fn set_denoise(&mut self, value: bool) {
self.denoise = value;
}
#[wasm_bindgen(getter, js_name = "contrastEnhance")]
pub fn contrast_enhance(&self) -> bool {
self.contrast_enhance
}
#[wasm_bindgen(setter, js_name = "contrastEnhance")]
pub fn set_contrast_enhance(&mut self, value: bool) {
self.contrast_enhance = value;
}
#[wasm_bindgen(getter, js_name = "binarizationMethod")]
pub fn binarization_method(&self) -> String {
self.binarization_method.clone()
}
#[wasm_bindgen(setter, js_name = "binarizationMethod")]
pub fn set_binarization_method(&mut self, value: String) {
self.binarization_method = value;
}
#[wasm_bindgen(getter, js_name = "invertColors")]
pub fn invert_colors(&self) -> bool {
self.invert_colors
}
#[wasm_bindgen(setter, js_name = "invertColors")]
pub fn set_invert_colors(&mut self, value: bool) {
self.invert_colors = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmImagePreprocessingConfig {
kreuzberg::ImagePreprocessingConfig::default().into()
}
}
/// Tesseract OCR configuration.
///
/// Provides fine-grained control over Tesseract OCR engine parameters.
/// Most users can use the defaults, but these settings allow optimization
/// for specific document types (invoices, handwriting, etc.).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmTesseractConfig {
language: String,
psm: i32,
output_format: String,
oem: i32,
min_confidence: f64,
preprocessing: Option<WasmImagePreprocessingConfig>,
enable_table_detection: bool,
table_min_confidence: f64,
table_column_threshold: i32,
table_row_threshold_ratio: f64,
use_cache: bool,
classify_use_pre_adapted_templates: bool,
language_model_ngram_on: bool,
tessedit_dont_blkrej_good_wds: bool,
tessedit_dont_rowrej_good_wds: bool,
tessedit_enable_dict_correction: bool,
tessedit_char_whitelist: String,
tessedit_char_blacklist: String,
tessedit_use_primary_params_model: bool,
textord_space_size_is_variable: bool,
thresholding_method: bool,
}
#[wasm_bindgen]
impl WasmTesseractConfig {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
language: Option<String>,
psm: Option<i32>,
outputFormat: Option<String>,
oem: Option<i32>,
minConfidence: Option<f64>,
enableTableDetection: Option<bool>,
tableMinConfidence: Option<f64>,
tableColumnThreshold: Option<i32>,
tableRowThresholdRatio: Option<f64>,
useCache: Option<bool>,
classifyUsePreAdaptedTemplates: Option<bool>,
languageModelNgramOn: Option<bool>,
tesseditDontBlkrejGoodWds: Option<bool>,
tesseditDontRowrejGoodWds: Option<bool>,
tesseditEnableDictCorrection: Option<bool>,
tesseditCharWhitelist: Option<String>,
tesseditCharBlacklist: Option<String>,
tesseditUsePrimaryParamsModel: Option<bool>,
textordSpaceSizeIsVariable: Option<bool>,
thresholdingMethod: Option<bool>,
preprocessing: Option<WasmImagePreprocessingConfig>,
) -> WasmTesseractConfig {
WasmTesseractConfig {
language: language.unwrap_or_else(|| "eng".to_string()),
psm: psm.unwrap_or(3),
output_format: outputFormat.unwrap_or_else(|| "markdown".to_string()),
oem: oem.unwrap_or(3),
min_confidence: minConfidence.unwrap_or(0.0),
preprocessing,
enable_table_detection: enableTableDetection.unwrap_or(true),
table_min_confidence: tableMinConfidence.unwrap_or(0.0),
table_column_threshold: tableColumnThreshold.unwrap_or(50),
table_row_threshold_ratio: tableRowThresholdRatio.unwrap_or(0.5),
use_cache: useCache.unwrap_or(true),
classify_use_pre_adapted_templates: classifyUsePreAdaptedTemplates.unwrap_or(true),
language_model_ngram_on: languageModelNgramOn.unwrap_or(false),
tessedit_dont_blkrej_good_wds: tesseditDontBlkrejGoodWds.unwrap_or(true),
tessedit_dont_rowrej_good_wds: tesseditDontRowrejGoodWds.unwrap_or(true),
tessedit_enable_dict_correction: tesseditEnableDictCorrection.unwrap_or(true),
tessedit_char_whitelist: tesseditCharWhitelist.unwrap_or_else(|| "".to_string()),
tessedit_char_blacklist: tesseditCharBlacklist.unwrap_or_else(|| "".to_string()),
tessedit_use_primary_params_model: tesseditUsePrimaryParamsModel.unwrap_or(true),
textord_space_size_is_variable: textordSpaceSizeIsVariable.unwrap_or(true),
thresholding_method: thresholdingMethod.unwrap_or(false),
}
}
#[wasm_bindgen(getter)]
pub fn language(&self) -> String {
self.language.clone()
}
#[wasm_bindgen(setter)]
pub fn set_language(&mut self, value: String) {
self.language = value;
}
#[wasm_bindgen(getter)]
pub fn psm(&self) -> i32 {
self.psm
}
#[wasm_bindgen(setter)]
pub fn set_psm(&mut self, value: i32) {
self.psm = value;
}
#[wasm_bindgen(getter, js_name = "outputFormat")]
pub fn output_format(&self) -> String {
self.output_format.clone()
}
#[wasm_bindgen(setter, js_name = "outputFormat")]
pub fn set_output_format(&mut self, value: String) {
self.output_format = value;
}
#[wasm_bindgen(getter)]
pub fn oem(&self) -> i32 {
self.oem
}
#[wasm_bindgen(setter)]
pub fn set_oem(&mut self, value: i32) {
self.oem = value;
}
#[wasm_bindgen(getter, js_name = "minConfidence")]
pub fn min_confidence(&self) -> f64 {
self.min_confidence
}
#[wasm_bindgen(setter, js_name = "minConfidence")]
pub fn set_min_confidence(&mut self, value: f64) {
self.min_confidence = value;
}
#[wasm_bindgen(getter)]
pub fn preprocessing(&self) -> Option<WasmImagePreprocessingConfig> {
self.preprocessing.clone()
}
#[wasm_bindgen(setter)]
pub fn set_preprocessing(&mut self, value: Option<WasmImagePreprocessingConfig>) {
self.preprocessing = value;
}
#[wasm_bindgen(getter, js_name = "enableTableDetection")]
pub fn enable_table_detection(&self) -> bool {
self.enable_table_detection
}
#[wasm_bindgen(setter, js_name = "enableTableDetection")]
pub fn set_enable_table_detection(&mut self, value: bool) {
self.enable_table_detection = value;
}
#[wasm_bindgen(getter, js_name = "tableMinConfidence")]
pub fn table_min_confidence(&self) -> f64 {
self.table_min_confidence
}
#[wasm_bindgen(setter, js_name = "tableMinConfidence")]
pub fn set_table_min_confidence(&mut self, value: f64) {
self.table_min_confidence = value;
}
#[wasm_bindgen(getter, js_name = "tableColumnThreshold")]
pub fn table_column_threshold(&self) -> i32 {
self.table_column_threshold
}
#[wasm_bindgen(setter, js_name = "tableColumnThreshold")]
pub fn set_table_column_threshold(&mut self, value: i32) {
self.table_column_threshold = value;
}
#[wasm_bindgen(getter, js_name = "tableRowThresholdRatio")]
pub fn table_row_threshold_ratio(&self) -> f64 {
self.table_row_threshold_ratio
}
#[wasm_bindgen(setter, js_name = "tableRowThresholdRatio")]
pub fn set_table_row_threshold_ratio(&mut self, value: f64) {
self.table_row_threshold_ratio = value;
}
#[wasm_bindgen(getter, js_name = "useCache")]
pub fn use_cache(&self) -> bool {
self.use_cache
}
#[wasm_bindgen(setter, js_name = "useCache")]
pub fn set_use_cache(&mut self, value: bool) {
self.use_cache = value;
}
#[wasm_bindgen(getter, js_name = "classifyUsePreAdaptedTemplates")]
pub fn classify_use_pre_adapted_templates(&self) -> bool {
self.classify_use_pre_adapted_templates
}
#[wasm_bindgen(setter, js_name = "classifyUsePreAdaptedTemplates")]
pub fn set_classify_use_pre_adapted_templates(&mut self, value: bool) {
self.classify_use_pre_adapted_templates = value;
}
#[wasm_bindgen(getter, js_name = "languageModelNgramOn")]
pub fn language_model_ngram_on(&self) -> bool {
self.language_model_ngram_on
}
#[wasm_bindgen(setter, js_name = "languageModelNgramOn")]
pub fn set_language_model_ngram_on(&mut self, value: bool) {
self.language_model_ngram_on = value;
}
#[wasm_bindgen(getter, js_name = "tesseditDontBlkrejGoodWds")]
pub fn tessedit_dont_blkrej_good_wds(&self) -> bool {
self.tessedit_dont_blkrej_good_wds
}
#[wasm_bindgen(setter, js_name = "tesseditDontBlkrejGoodWds")]
pub fn set_tessedit_dont_blkrej_good_wds(&mut self, value: bool) {
self.tessedit_dont_blkrej_good_wds = value;
}
#[wasm_bindgen(getter, js_name = "tesseditDontRowrejGoodWds")]
pub fn tessedit_dont_rowrej_good_wds(&self) -> bool {
self.tessedit_dont_rowrej_good_wds
}
#[wasm_bindgen(setter, js_name = "tesseditDontRowrejGoodWds")]
pub fn set_tessedit_dont_rowrej_good_wds(&mut self, value: bool) {
self.tessedit_dont_rowrej_good_wds = value;
}
#[wasm_bindgen(getter, js_name = "tesseditEnableDictCorrection")]
pub fn tessedit_enable_dict_correction(&self) -> bool {
self.tessedit_enable_dict_correction
}
#[wasm_bindgen(setter, js_name = "tesseditEnableDictCorrection")]
pub fn set_tessedit_enable_dict_correction(&mut self, value: bool) {
self.tessedit_enable_dict_correction = value;
}
#[wasm_bindgen(getter, js_name = "tesseditCharWhitelist")]
pub fn tessedit_char_whitelist(&self) -> String {
self.tessedit_char_whitelist.clone()
}
#[wasm_bindgen(setter, js_name = "tesseditCharWhitelist")]
pub fn set_tessedit_char_whitelist(&mut self, value: String) {
self.tessedit_char_whitelist = value;
}
#[wasm_bindgen(getter, js_name = "tesseditCharBlacklist")]
pub fn tessedit_char_blacklist(&self) -> String {
self.tessedit_char_blacklist.clone()
}
#[wasm_bindgen(setter, js_name = "tesseditCharBlacklist")]
pub fn set_tessedit_char_blacklist(&mut self, value: String) {
self.tessedit_char_blacklist = value;
}
#[wasm_bindgen(getter, js_name = "tesseditUsePrimaryParamsModel")]
pub fn tessedit_use_primary_params_model(&self) -> bool {
self.tessedit_use_primary_params_model
}
#[wasm_bindgen(setter, js_name = "tesseditUsePrimaryParamsModel")]
pub fn set_tessedit_use_primary_params_model(&mut self, value: bool) {
self.tessedit_use_primary_params_model = value;
}
#[wasm_bindgen(getter, js_name = "textordSpaceSizeIsVariable")]
pub fn textord_space_size_is_variable(&self) -> bool {
self.textord_space_size_is_variable
}
#[wasm_bindgen(setter, js_name = "textordSpaceSizeIsVariable")]
pub fn set_textord_space_size_is_variable(&mut self, value: bool) {
self.textord_space_size_is_variable = value;
}
#[wasm_bindgen(getter, js_name = "thresholdingMethod")]
pub fn thresholding_method(&self) -> bool {
self.thresholding_method
}
#[wasm_bindgen(setter, js_name = "thresholdingMethod")]
pub fn set_thresholding_method(&mut self, value: bool) {
self.thresholding_method = value;
}
#[allow(clippy::should_implement_trait)]
#[wasm_bindgen]
pub fn default() -> WasmTesseractConfig {
kreuzberg::TesseractConfig::default().into()
}
}
/// Image preprocessing metadata.
///
/// Tracks the transformations applied to an image during OCR preprocessing,
/// including DPI normalization, resizing, and resampling.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmImagePreprocessingMetadata {
original_dimensions: Vec<usize>,
original_dpi: Vec<f64>,
target_dpi: i32,
scale_factor: f64,
auto_adjusted: bool,
final_dpi: i32,
new_dimensions: Option<Vec<usize>>,
resample_method: String,
dimension_clamped: bool,
calculated_dpi: Option<i32>,
skipped_resize: bool,
resize_error: Option<String>,
}
#[wasm_bindgen]
impl WasmImagePreprocessingMetadata {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
originalDimensions: Vec<usize>,
originalDpi: Vec<f64>,
targetDpi: i32,
scaleFactor: f64,
autoAdjusted: bool,
finalDpi: i32,
resampleMethod: String,
dimensionClamped: bool,
skippedResize: bool,
newDimensions: Option<Vec<usize>>,
calculatedDpi: Option<i32>,
resizeError: Option<String>,
) -> WasmImagePreprocessingMetadata {
WasmImagePreprocessingMetadata {
original_dimensions: originalDimensions,
original_dpi: originalDpi,
target_dpi: targetDpi,
scale_factor: scaleFactor,
auto_adjusted: autoAdjusted,
final_dpi: finalDpi,
new_dimensions: newDimensions,
resample_method: resampleMethod,
dimension_clamped: dimensionClamped,
calculated_dpi: calculatedDpi,
skipped_resize: skippedResize,
resize_error: resizeError,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmImagePreprocessingMetadata {
<WasmImagePreprocessingMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "originalDimensions")]
pub fn original_dimensions(&self) -> Vec<usize> {
self.original_dimensions.clone()
}
#[wasm_bindgen(setter, js_name = "originalDimensions")]
pub fn set_original_dimensions(&mut self, value: Vec<usize>) {
self.original_dimensions = value;
}
#[wasm_bindgen(getter, js_name = "originalDpi")]
pub fn original_dpi(&self) -> Vec<f64> {
self.original_dpi.clone()
}
#[wasm_bindgen(setter, js_name = "originalDpi")]
pub fn set_original_dpi(&mut self, value: Vec<f64>) {
self.original_dpi = value;
}
#[wasm_bindgen(getter, js_name = "targetDpi")]
pub fn target_dpi(&self) -> i32 {
self.target_dpi
}
#[wasm_bindgen(setter, js_name = "targetDpi")]
pub fn set_target_dpi(&mut self, value: i32) {
self.target_dpi = value;
}
#[wasm_bindgen(getter, js_name = "scaleFactor")]
pub fn scale_factor(&self) -> f64 {
self.scale_factor
}
#[wasm_bindgen(setter, js_name = "scaleFactor")]
pub fn set_scale_factor(&mut self, value: f64) {
self.scale_factor = value;
}
#[wasm_bindgen(getter, js_name = "autoAdjusted")]
pub fn auto_adjusted(&self) -> bool {
self.auto_adjusted
}
#[wasm_bindgen(setter, js_name = "autoAdjusted")]
pub fn set_auto_adjusted(&mut self, value: bool) {
self.auto_adjusted = value;
}
#[wasm_bindgen(getter, js_name = "finalDpi")]
pub fn final_dpi(&self) -> i32 {
self.final_dpi
}
#[wasm_bindgen(setter, js_name = "finalDpi")]
pub fn set_final_dpi(&mut self, value: i32) {
self.final_dpi = value;
}
#[wasm_bindgen(getter, js_name = "newDimensions")]
pub fn new_dimensions(&self) -> Option<Vec<usize>> {
self.new_dimensions.clone()
}
#[wasm_bindgen(setter, js_name = "newDimensions")]
pub fn set_new_dimensions(&mut self, value: Option<Vec<usize>>) {
self.new_dimensions = value;
}
#[wasm_bindgen(getter, js_name = "resampleMethod")]
pub fn resample_method(&self) -> String {
self.resample_method.clone()
}
#[wasm_bindgen(setter, js_name = "resampleMethod")]
pub fn set_resample_method(&mut self, value: String) {
self.resample_method = value;
}
#[wasm_bindgen(getter, js_name = "dimensionClamped")]
pub fn dimension_clamped(&self) -> bool {
self.dimension_clamped
}
#[wasm_bindgen(setter, js_name = "dimensionClamped")]
pub fn set_dimension_clamped(&mut self, value: bool) {
self.dimension_clamped = value;
}
#[wasm_bindgen(getter, js_name = "calculatedDpi")]
pub fn calculated_dpi(&self) -> Option<i32> {
self.calculated_dpi
}
#[wasm_bindgen(setter, js_name = "calculatedDpi")]
pub fn set_calculated_dpi(&mut self, value: Option<i32>) {
self.calculated_dpi = value;
}
#[wasm_bindgen(getter, js_name = "skippedResize")]
pub fn skipped_resize(&self) -> bool {
self.skipped_resize
}
#[wasm_bindgen(setter, js_name = "skippedResize")]
pub fn set_skipped_resize(&mut self, value: bool) {
self.skipped_resize = value;
}
#[wasm_bindgen(getter, js_name = "resizeError")]
pub fn resize_error(&self) -> Option<String> {
self.resize_error.clone()
}
#[wasm_bindgen(setter, js_name = "resizeError")]
pub fn set_resize_error(&mut self, value: Option<String>) {
self.resize_error = value;
}
}
/// Extraction result metadata.
///
/// Contains common fields applicable to all formats, format-specific metadata
/// via a discriminated union, and additional custom fields from postprocessors.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmMetadata {
title: Option<String>,
subject: Option<String>,
authors: Option<Vec<String>>,
keywords: Option<Vec<String>>,
language: Option<String>,
created_at: Option<String>,
modified_at: Option<String>,
created_by: Option<String>,
modified_by: Option<String>,
pages: Option<WasmPageStructure>,
format: Option<JsValue>,
image_preprocessing: Option<WasmImagePreprocessingMetadata>,
json_schema: Option<JsValue>,
error: Option<WasmErrorMetadata>,
extraction_duration_ms: Option<u64>,
category: Option<String>,
tags: Option<Vec<String>>,
document_version: Option<String>,
abstract_text: Option<String>,
output_format: Option<String>,
ocr_used: bool,
additional: JsValue,
}
#[wasm_bindgen]
impl WasmMetadata {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
ocrUsed: Option<bool>,
additional: Option<JsValue>,
title: Option<String>,
subject: Option<String>,
authors: Option<Vec<String>>,
keywords: Option<Vec<String>>,
language: Option<String>,
createdAt: Option<String>,
modifiedAt: Option<String>,
createdBy: Option<String>,
modifiedBy: Option<String>,
pages: Option<WasmPageStructure>,
format: Option<JsValue>,
imagePreprocessing: Option<WasmImagePreprocessingMetadata>,
jsonSchema: Option<JsValue>,
error: Option<WasmErrorMetadata>,
extractionDurationMs: Option<u64>,
category: Option<String>,
tags: Option<Vec<String>>,
documentVersion: Option<String>,
abstractText: Option<String>,
outputFormat: Option<String>,
) -> WasmMetadata {
WasmMetadata {
title,
subject,
authors,
keywords,
language,
created_at: createdAt,
modified_at: modifiedAt,
created_by: createdBy,
modified_by: modifiedBy,
pages,
format,
image_preprocessing: imagePreprocessing,
json_schema: jsonSchema,
error,
extraction_duration_ms: extractionDurationMs,
category,
tags,
document_version: documentVersion,
abstract_text: abstractText,
output_format: outputFormat,
ocr_used: ocrUsed.unwrap_or_default(),
additional: additional.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmMetadata {
<WasmMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter)]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter)]
pub fn subject(&self) -> Option<String> {
self.subject.clone()
}
#[wasm_bindgen(setter)]
pub fn set_subject(&mut self, value: Option<String>) {
self.subject = value;
}
#[wasm_bindgen(getter)]
pub fn authors(&self) -> Option<Vec<String>> {
self.authors.clone()
}
#[wasm_bindgen(setter)]
pub fn set_authors(&mut self, value: Option<Vec<String>>) {
self.authors = value;
}
#[wasm_bindgen(getter)]
pub fn keywords(&self) -> Option<Vec<String>> {
self.keywords.clone()
}
#[wasm_bindgen(setter)]
pub fn set_keywords(&mut self, value: Option<Vec<String>>) {
self.keywords = value;
}
#[wasm_bindgen(getter)]
pub fn language(&self) -> Option<String> {
self.language.clone()
}
#[wasm_bindgen(setter)]
pub fn set_language(&mut self, value: Option<String>) {
self.language = value;
}
#[wasm_bindgen(getter, js_name = "createdAt")]
pub fn created_at(&self) -> Option<String> {
self.created_at.clone()
}
#[wasm_bindgen(setter, js_name = "createdAt")]
pub fn set_created_at(&mut self, value: Option<String>) {
self.created_at = value;
}
#[wasm_bindgen(getter, js_name = "modifiedAt")]
pub fn modified_at(&self) -> Option<String> {
self.modified_at.clone()
}
#[wasm_bindgen(setter, js_name = "modifiedAt")]
pub fn set_modified_at(&mut self, value: Option<String>) {
self.modified_at = value;
}
#[wasm_bindgen(getter, js_name = "createdBy")]
pub fn created_by(&self) -> Option<String> {
self.created_by.clone()
}
#[wasm_bindgen(setter, js_name = "createdBy")]
pub fn set_created_by(&mut self, value: Option<String>) {
self.created_by = value;
}
#[wasm_bindgen(getter, js_name = "modifiedBy")]
pub fn modified_by(&self) -> Option<String> {
self.modified_by.clone()
}
#[wasm_bindgen(setter, js_name = "modifiedBy")]
pub fn set_modified_by(&mut self, value: Option<String>) {
self.modified_by = value;
}
#[wasm_bindgen(getter)]
pub fn pages(&self) -> Option<WasmPageStructure> {
self.pages.clone()
}
#[wasm_bindgen(setter)]
pub fn set_pages(&mut self, value: Option<WasmPageStructure>) {
self.pages = value;
}
#[wasm_bindgen(getter)]
pub fn format(&self) -> Option<JsValue> {
self.format.clone()
}
#[wasm_bindgen(setter)]
pub fn set_format(&mut self, value: Option<JsValue>) {
self.format = value;
}
#[wasm_bindgen(getter, js_name = "imagePreprocessing")]
pub fn image_preprocessing(&self) -> Option<WasmImagePreprocessingMetadata> {
self.image_preprocessing.clone()
}
#[wasm_bindgen(setter, js_name = "imagePreprocessing")]
pub fn set_image_preprocessing(&mut self, value: Option<WasmImagePreprocessingMetadata>) {
self.image_preprocessing = value;
}
#[wasm_bindgen(getter, js_name = "jsonSchema")]
pub fn json_schema(&self) -> Option<JsValue> {
self.json_schema.clone()
}
#[wasm_bindgen(setter, js_name = "jsonSchema")]
pub fn set_json_schema(&mut self, value: Option<JsValue>) {
self.json_schema = value;
}
#[wasm_bindgen(getter)]
pub fn error(&self) -> Option<WasmErrorMetadata> {
self.error.clone()
}
#[wasm_bindgen(setter)]
pub fn set_error(&mut self, value: Option<WasmErrorMetadata>) {
self.error = value;
}
#[wasm_bindgen(getter, js_name = "extractionDurationMs")]
pub fn extraction_duration_ms(&self) -> Option<u64> {
self.extraction_duration_ms
}
#[wasm_bindgen(setter, js_name = "extractionDurationMs")]
pub fn set_extraction_duration_ms(&mut self, value: Option<u64>) {
self.extraction_duration_ms = value;
}
#[wasm_bindgen(getter)]
pub fn category(&self) -> Option<String> {
self.category.clone()
}
#[wasm_bindgen(setter)]
pub fn set_category(&mut self, value: Option<String>) {
self.category = value;
}
#[wasm_bindgen(getter)]
pub fn tags(&self) -> Option<Vec<String>> {
self.tags.clone()
}
#[wasm_bindgen(setter)]
pub fn set_tags(&mut self, value: Option<Vec<String>>) {
self.tags = value;
}
#[wasm_bindgen(getter, js_name = "documentVersion")]
pub fn document_version(&self) -> Option<String> {
self.document_version.clone()
}
#[wasm_bindgen(setter, js_name = "documentVersion")]
pub fn set_document_version(&mut self, value: Option<String>) {
self.document_version = value;
}
#[wasm_bindgen(getter, js_name = "abstractText")]
pub fn abstract_text(&self) -> Option<String> {
self.abstract_text.clone()
}
#[wasm_bindgen(setter, js_name = "abstractText")]
pub fn set_abstract_text(&mut self, value: Option<String>) {
self.abstract_text = value;
}
#[wasm_bindgen(getter, js_name = "outputFormat")]
pub fn output_format(&self) -> Option<String> {
self.output_format.clone()
}
#[wasm_bindgen(setter, js_name = "outputFormat")]
pub fn set_output_format(&mut self, value: Option<String>) {
self.output_format = value;
}
#[wasm_bindgen(getter, js_name = "ocrUsed")]
pub fn ocr_used(&self) -> bool {
self.ocr_used
}
#[wasm_bindgen(setter, js_name = "ocrUsed")]
pub fn set_ocr_used(&mut self, value: bool) {
self.ocr_used = value;
}
#[wasm_bindgen(getter)]
pub fn additional(&self) -> JsValue {
self.additional.clone()
}
#[wasm_bindgen(setter)]
pub fn set_additional(&mut self, value: JsValue) {
self.additional = value;
}
/// Returns `true` when no metadata fields, format-specific metadata, or
/// additional postprocessor fields are populated.
#[wasm_bindgen(js_name = "isEmpty")]
pub fn is_empty(&self) -> bool {
kreuzberg::Metadata::from(self.clone()).is_empty()
}
}
/// Excel/spreadsheet format metadata.
///
/// Identifies the document as a spreadsheet source via the `FormatMetadata.Excel`
/// discriminant. Sheet count and sheet names are stored inside this struct.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmExcelMetadata {
sheet_count: Option<u32>,
sheet_names: Option<Vec<String>>,
}
#[wasm_bindgen]
impl WasmExcelMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(sheetCount: Option<u32>, sheetNames: Option<Vec<String>>) -> WasmExcelMetadata {
WasmExcelMetadata {
sheet_count: sheetCount,
sheet_names: sheetNames,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmExcelMetadata {
<WasmExcelMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "sheetCount")]
pub fn sheet_count(&self) -> Option<u32> {
self.sheet_count
}
#[wasm_bindgen(setter, js_name = "sheetCount")]
pub fn set_sheet_count(&mut self, value: Option<u32>) {
self.sheet_count = value;
}
#[wasm_bindgen(getter, js_name = "sheetNames")]
pub fn sheet_names(&self) -> Option<Vec<String>> {
self.sheet_names.clone()
}
#[wasm_bindgen(setter, js_name = "sheetNames")]
pub fn set_sheet_names(&mut self, value: Option<Vec<String>>) {
self.sheet_names = value;
}
}
/// Email metadata extracted from .eml and .msg files.
///
/// Includes sender/recipient information, message ID, and attachment list.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmEmailMetadata {
from_email: Option<String>,
from_name: Option<String>,
to_emails: Vec<String>,
cc_emails: Vec<String>,
bcc_emails: Vec<String>,
message_id: Option<String>,
attachments: Vec<String>,
}
#[wasm_bindgen]
impl WasmEmailMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
toEmails: Option<Vec<String>>,
ccEmails: Option<Vec<String>>,
bccEmails: Option<Vec<String>>,
attachments: Option<Vec<String>>,
fromEmail: Option<String>,
fromName: Option<String>,
messageId: Option<String>,
) -> WasmEmailMetadata {
WasmEmailMetadata {
from_email: fromEmail,
from_name: fromName,
to_emails: toEmails.unwrap_or_default(),
cc_emails: ccEmails.unwrap_or_default(),
bcc_emails: bccEmails.unwrap_or_default(),
message_id: messageId,
attachments: attachments.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmEmailMetadata {
<WasmEmailMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "fromEmail")]
pub fn from_email(&self) -> Option<String> {
self.from_email.clone()
}
#[wasm_bindgen(setter, js_name = "fromEmail")]
pub fn set_from_email(&mut self, value: Option<String>) {
self.from_email = value;
}
#[wasm_bindgen(getter, js_name = "fromName")]
pub fn from_name(&self) -> Option<String> {
self.from_name.clone()
}
#[wasm_bindgen(setter, js_name = "fromName")]
pub fn set_from_name(&mut self, value: Option<String>) {
self.from_name = value;
}
#[wasm_bindgen(getter, js_name = "toEmails")]
pub fn to_emails(&self) -> Vec<String> {
self.to_emails.clone()
}
#[wasm_bindgen(setter, js_name = "toEmails")]
pub fn set_to_emails(&mut self, value: Vec<String>) {
self.to_emails = value;
}
#[wasm_bindgen(getter, js_name = "ccEmails")]
pub fn cc_emails(&self) -> Vec<String> {
self.cc_emails.clone()
}
#[wasm_bindgen(setter, js_name = "ccEmails")]
pub fn set_cc_emails(&mut self, value: Vec<String>) {
self.cc_emails = value;
}
#[wasm_bindgen(getter, js_name = "bccEmails")]
pub fn bcc_emails(&self) -> Vec<String> {
self.bcc_emails.clone()
}
#[wasm_bindgen(setter, js_name = "bccEmails")]
pub fn set_bcc_emails(&mut self, value: Vec<String>) {
self.bcc_emails = value;
}
#[wasm_bindgen(getter, js_name = "messageId")]
pub fn message_id(&self) -> Option<String> {
self.message_id.clone()
}
#[wasm_bindgen(setter, js_name = "messageId")]
pub fn set_message_id(&mut self, value: Option<String>) {
self.message_id = value;
}
#[wasm_bindgen(getter)]
pub fn attachments(&self) -> Vec<String> {
self.attachments.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attachments(&mut self, value: Vec<String>) {
self.attachments = value;
}
}
/// Archive (ZIP/TAR/7Z) metadata.
///
/// Extracted from compressed archive files containing file lists and size information.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmArchiveMetadata {
format: String,
file_count: u32,
file_list: Vec<String>,
total_size: u64,
compressed_size: Option<u64>,
}
#[wasm_bindgen]
impl WasmArchiveMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
format: Option<String>,
fileCount: Option<u32>,
fileList: Option<Vec<String>>,
totalSize: Option<u64>,
compressedSize: Option<u64>,
) -> WasmArchiveMetadata {
WasmArchiveMetadata {
format: format.unwrap_or_default(),
file_count: fileCount.unwrap_or_default(),
file_list: fileList.unwrap_or_default(),
total_size: totalSize.unwrap_or_default(),
compressed_size: compressedSize,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmArchiveMetadata {
<WasmArchiveMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn format(&self) -> String {
self.format.clone()
}
#[wasm_bindgen(setter)]
pub fn set_format(&mut self, value: String) {
self.format = value;
}
#[wasm_bindgen(getter, js_name = "fileCount")]
pub fn file_count(&self) -> u32 {
self.file_count
}
#[wasm_bindgen(setter, js_name = "fileCount")]
pub fn set_file_count(&mut self, value: u32) {
self.file_count = value;
}
#[wasm_bindgen(getter, js_name = "fileList")]
pub fn file_list(&self) -> Vec<String> {
self.file_list.clone()
}
#[wasm_bindgen(setter, js_name = "fileList")]
pub fn set_file_list(&mut self, value: Vec<String>) {
self.file_list = value;
}
#[wasm_bindgen(getter, js_name = "totalSize")]
pub fn total_size(&self) -> u64 {
self.total_size
}
#[wasm_bindgen(setter, js_name = "totalSize")]
pub fn set_total_size(&mut self, value: u64) {
self.total_size = value;
}
#[wasm_bindgen(getter, js_name = "compressedSize")]
pub fn compressed_size(&self) -> Option<u64> {
self.compressed_size
}
#[wasm_bindgen(setter, js_name = "compressedSize")]
pub fn set_compressed_size(&mut self, value: Option<u64>) {
self.compressed_size = value;
}
}
/// Image metadata extracted from image files.
///
/// Includes dimensions, format, and EXIF data.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmImageMetadata {
width: u32,
height: u32,
format: String,
exif: JsValue,
}
#[wasm_bindgen]
impl WasmImageMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
width: Option<u32>,
height: Option<u32>,
format: Option<String>,
exif: Option<JsValue>,
) -> WasmImageMetadata {
WasmImageMetadata {
width: width.unwrap_or_default(),
height: height.unwrap_or_default(),
format: format.unwrap_or_default(),
exif: exif.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmImageMetadata {
<WasmImageMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn width(&self) -> u32 {
self.width
}
#[wasm_bindgen(setter)]
pub fn set_width(&mut self, value: u32) {
self.width = value;
}
#[wasm_bindgen(getter)]
pub fn height(&self) -> u32 {
self.height
}
#[wasm_bindgen(setter)]
pub fn set_height(&mut self, value: u32) {
self.height = value;
}
#[wasm_bindgen(getter)]
pub fn format(&self) -> String {
self.format.clone()
}
#[wasm_bindgen(setter)]
pub fn set_format(&mut self, value: String) {
self.format = value;
}
#[wasm_bindgen(getter)]
pub fn exif(&self) -> JsValue {
self.exif.clone()
}
#[wasm_bindgen(setter)]
pub fn set_exif(&mut self, value: JsValue) {
self.exif = value;
}
}
/// XML metadata extracted during XML parsing.
///
/// Provides statistics about XML document structure.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmXmlMetadata {
element_count: u32,
unique_elements: Vec<String>,
}
#[wasm_bindgen]
impl WasmXmlMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(elementCount: Option<u32>, uniqueElements: Option<Vec<String>>) -> WasmXmlMetadata {
WasmXmlMetadata {
element_count: elementCount.unwrap_or_default(),
unique_elements: uniqueElements.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmXmlMetadata {
<WasmXmlMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "elementCount")]
pub fn element_count(&self) -> u32 {
self.element_count
}
#[wasm_bindgen(setter, js_name = "elementCount")]
pub fn set_element_count(&mut self, value: u32) {
self.element_count = value;
}
#[wasm_bindgen(getter, js_name = "uniqueElements")]
pub fn unique_elements(&self) -> Vec<String> {
self.unique_elements.clone()
}
#[wasm_bindgen(setter, js_name = "uniqueElements")]
pub fn set_unique_elements(&mut self, value: Vec<String>) {
self.unique_elements = value;
}
}
/// Text/Markdown metadata.
///
/// Extracted from plain text and Markdown files. Includes word counts and,
/// for Markdown, structural elements like headers and links.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmTextMetadata {
line_count: u32,
word_count: u32,
character_count: u32,
headers: Option<Vec<String>>,
links: Option<JsValue>,
code_blocks: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmTextMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
lineCount: Option<u32>,
wordCount: Option<u32>,
characterCount: Option<u32>,
headers: Option<Vec<String>>,
links: Option<JsValue>,
codeBlocks: Option<JsValue>,
) -> WasmTextMetadata {
WasmTextMetadata {
line_count: lineCount.unwrap_or_default(),
word_count: wordCount.unwrap_or_default(),
character_count: characterCount.unwrap_or_default(),
headers,
links,
code_blocks: codeBlocks,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmTextMetadata {
<WasmTextMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "lineCount")]
pub fn line_count(&self) -> u32 {
self.line_count
}
#[wasm_bindgen(setter, js_name = "lineCount")]
pub fn set_line_count(&mut self, value: u32) {
self.line_count = value;
}
#[wasm_bindgen(getter, js_name = "wordCount")]
pub fn word_count(&self) -> u32 {
self.word_count
}
#[wasm_bindgen(setter, js_name = "wordCount")]
pub fn set_word_count(&mut self, value: u32) {
self.word_count = value;
}
#[wasm_bindgen(getter, js_name = "characterCount")]
pub fn character_count(&self) -> u32 {
self.character_count
}
#[wasm_bindgen(setter, js_name = "characterCount")]
pub fn set_character_count(&mut self, value: u32) {
self.character_count = value;
}
#[wasm_bindgen(getter)]
pub fn headers(&self) -> Option<Vec<String>> {
self.headers.clone()
}
#[wasm_bindgen(setter)]
pub fn set_headers(&mut self, value: Option<Vec<String>>) {
self.headers = value;
}
#[wasm_bindgen(getter)]
pub fn links(&self) -> Option<JsValue> {
self.links.clone()
}
#[wasm_bindgen(setter)]
pub fn set_links(&mut self, value: Option<JsValue>) {
self.links = value;
}
#[wasm_bindgen(getter, js_name = "codeBlocks")]
pub fn code_blocks(&self) -> Option<JsValue> {
self.code_blocks.clone()
}
#[wasm_bindgen(setter, js_name = "codeBlocks")]
pub fn set_code_blocks(&mut self, value: Option<JsValue>) {
self.code_blocks = value;
}
}
/// Header/heading element metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmHeaderMetadata {
level: u8,
text: String,
id: Option<String>,
depth: u32,
html_offset: u32,
}
#[wasm_bindgen]
impl WasmHeaderMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(level: u8, text: String, depth: u32, htmlOffset: u32, id: Option<String>) -> WasmHeaderMetadata {
WasmHeaderMetadata {
level,
text,
id,
depth,
html_offset: htmlOffset,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmHeaderMetadata {
<WasmHeaderMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn level(&self) -> u8 {
self.level
}
#[wasm_bindgen(setter)]
pub fn set_level(&mut self, value: u8) {
self.level = value;
}
#[wasm_bindgen(getter)]
pub fn text(&self) -> String {
self.text.clone()
}
#[wasm_bindgen(setter)]
pub fn set_text(&mut self, value: String) {
self.text = value;
}
#[wasm_bindgen(getter)]
pub fn id(&self) -> Option<String> {
self.id.clone()
}
#[wasm_bindgen(setter)]
pub fn set_id(&mut self, value: Option<String>) {
self.id = value;
}
#[wasm_bindgen(getter)]
pub fn depth(&self) -> u32 {
self.depth
}
#[wasm_bindgen(setter)]
pub fn set_depth(&mut self, value: u32) {
self.depth = value;
}
#[wasm_bindgen(getter, js_name = "htmlOffset")]
pub fn html_offset(&self) -> u32 {
self.html_offset
}
#[wasm_bindgen(setter, js_name = "htmlOffset")]
pub fn set_html_offset(&mut self, value: u32) {
self.html_offset = value;
}
}
/// Link element metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmLinkMetadata {
href: String,
text: String,
title: Option<String>,
link_type: WasmLinkType,
rel: Vec<String>,
attributes: JsValue,
}
#[wasm_bindgen]
impl WasmLinkMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
href: String,
text: String,
linkType: WasmLinkType,
rel: Vec<String>,
attributes: JsValue,
title: Option<String>,
) -> WasmLinkMetadata {
WasmLinkMetadata {
href,
text,
title,
link_type: linkType,
rel,
attributes,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmLinkMetadata {
<WasmLinkMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn href(&self) -> String {
self.href.clone()
}
#[wasm_bindgen(setter)]
pub fn set_href(&mut self, value: String) {
self.href = value;
}
#[wasm_bindgen(getter)]
pub fn text(&self) -> String {
self.text.clone()
}
#[wasm_bindgen(setter)]
pub fn set_text(&mut self, value: String) {
self.text = value;
}
#[wasm_bindgen(getter)]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter)]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter, js_name = "linkType")]
pub fn link_type(&self) -> String {
self.link_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "linkType")]
pub fn set_link_type(&mut self, value: WasmLinkType) {
self.link_type = value;
}
#[wasm_bindgen(getter)]
pub fn rel(&self) -> Vec<String> {
self.rel.clone()
}
#[wasm_bindgen(setter)]
pub fn set_rel(&mut self, value: Vec<String>) {
self.rel = value;
}
#[wasm_bindgen(getter)]
pub fn attributes(&self) -> JsValue {
self.attributes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attributes(&mut self, value: JsValue) {
self.attributes = value;
}
}
/// Image element metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmImageMetadataType {
src: String,
alt: Option<String>,
title: Option<String>,
dimensions: Option<Vec<u32>>,
image_type: WasmImageType,
attributes: JsValue,
}
#[wasm_bindgen]
impl WasmImageMetadataType {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
src: String,
imageType: WasmImageType,
attributes: JsValue,
alt: Option<String>,
title: Option<String>,
dimensions: Option<Vec<u32>>,
) -> WasmImageMetadataType {
WasmImageMetadataType {
src,
alt,
title,
dimensions,
image_type: imageType,
attributes,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmImageMetadataType {
<WasmImageMetadataType as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn src(&self) -> String {
self.src.clone()
}
#[wasm_bindgen(setter)]
pub fn set_src(&mut self, value: String) {
self.src = value;
}
#[wasm_bindgen(getter)]
pub fn alt(&self) -> Option<String> {
self.alt.clone()
}
#[wasm_bindgen(setter)]
pub fn set_alt(&mut self, value: Option<String>) {
self.alt = value;
}
#[wasm_bindgen(getter)]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter)]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter)]
pub fn dimensions(&self) -> Option<Vec<u32>> {
self.dimensions.clone()
}
#[wasm_bindgen(setter)]
pub fn set_dimensions(&mut self, value: Option<Vec<u32>>) {
self.dimensions = value;
}
#[wasm_bindgen(getter, js_name = "imageType")]
pub fn image_type(&self) -> String {
self.image_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "imageType")]
pub fn set_image_type(&mut self, value: WasmImageType) {
self.image_type = value;
}
#[wasm_bindgen(getter)]
pub fn attributes(&self) -> JsValue {
self.attributes.clone()
}
#[wasm_bindgen(setter)]
pub fn set_attributes(&mut self, value: JsValue) {
self.attributes = value;
}
}
/// Structured data (Schema.org, microdata, RDFa) block.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmStructuredData {
data_type: WasmStructuredDataType,
raw_json: String,
schema_type: Option<String>,
}
#[wasm_bindgen]
impl WasmStructuredData {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(dataType: WasmStructuredDataType, rawJson: String, schemaType: Option<String>) -> WasmStructuredData {
WasmStructuredData {
data_type: dataType,
raw_json: rawJson,
schema_type: schemaType,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmStructuredData {
<WasmStructuredData as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "dataType")]
pub fn data_type(&self) -> String {
self.data_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "dataType")]
pub fn set_data_type(&mut self, value: WasmStructuredDataType) {
self.data_type = value;
}
#[wasm_bindgen(getter, js_name = "rawJson")]
pub fn raw_json(&self) -> String {
self.raw_json.clone()
}
#[wasm_bindgen(setter, js_name = "rawJson")]
pub fn set_raw_json(&mut self, value: String) {
self.raw_json = value;
}
#[wasm_bindgen(getter, js_name = "schemaType")]
pub fn schema_type(&self) -> Option<String> {
self.schema_type.clone()
}
#[wasm_bindgen(setter, js_name = "schemaType")]
pub fn set_schema_type(&mut self, value: Option<String>) {
self.schema_type = value;
}
}
/// HTML metadata extracted from HTML documents.
///
/// Includes document-level metadata, Open Graph data, Twitter Card metadata,
/// and extracted structural elements (headers, links, images, structured data).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmHtmlMetadata {
title: Option<String>,
description: Option<String>,
keywords: Vec<String>,
author: Option<String>,
canonical_url: Option<String>,
base_href: Option<String>,
language: Option<String>,
text_direction: Option<WasmTextDirection>,
open_graph: JsValue,
twitter_card: JsValue,
meta_tags: JsValue,
headers: Vec<WasmHeaderMetadata>,
links: Vec<WasmLinkMetadata>,
images: Vec<WasmImageMetadataType>,
structured_data: Vec<WasmStructuredData>,
}
#[wasm_bindgen]
impl WasmHtmlMetadata {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
keywords: Option<Vec<String>>,
openGraph: Option<JsValue>,
twitterCard: Option<JsValue>,
metaTags: Option<JsValue>,
headers: Option<Vec<WasmHeaderMetadata>>,
links: Option<Vec<WasmLinkMetadata>>,
images: Option<Vec<WasmImageMetadataType>>,
structuredData: Option<Vec<WasmStructuredData>>,
title: Option<String>,
description: Option<String>,
author: Option<String>,
canonicalUrl: Option<String>,
baseHref: Option<String>,
language: Option<String>,
textDirection: Option<WasmTextDirection>,
) -> WasmHtmlMetadata {
WasmHtmlMetadata {
title,
description,
keywords: keywords.unwrap_or_default(),
author,
canonical_url: canonicalUrl,
base_href: baseHref,
language,
text_direction: textDirection,
open_graph: openGraph.unwrap_or_default(),
twitter_card: twitterCard.unwrap_or_default(),
meta_tags: metaTags.unwrap_or_default(),
headers: headers.unwrap_or_default(),
links: links.unwrap_or_default(),
images: images.unwrap_or_default(),
structured_data: structuredData.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmHtmlMetadata {
<WasmHtmlMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter)]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter)]
pub fn description(&self) -> Option<String> {
self.description.clone()
}
#[wasm_bindgen(setter)]
pub fn set_description(&mut self, value: Option<String>) {
self.description = value;
}
#[wasm_bindgen(getter)]
pub fn keywords(&self) -> Vec<String> {
self.keywords.clone()
}
#[wasm_bindgen(setter)]
pub fn set_keywords(&mut self, value: Vec<String>) {
self.keywords = value;
}
#[wasm_bindgen(getter)]
pub fn author(&self) -> Option<String> {
self.author.clone()
}
#[wasm_bindgen(setter)]
pub fn set_author(&mut self, value: Option<String>) {
self.author = value;
}
#[wasm_bindgen(getter, js_name = "canonicalUrl")]
pub fn canonical_url(&self) -> Option<String> {
self.canonical_url.clone()
}
#[wasm_bindgen(setter, js_name = "canonicalUrl")]
pub fn set_canonical_url(&mut self, value: Option<String>) {
self.canonical_url = value;
}
#[wasm_bindgen(getter, js_name = "baseHref")]
pub fn base_href(&self) -> Option<String> {
self.base_href.clone()
}
#[wasm_bindgen(setter, js_name = "baseHref")]
pub fn set_base_href(&mut self, value: Option<String>) {
self.base_href = value;
}
#[wasm_bindgen(getter)]
pub fn language(&self) -> Option<String> {
self.language.clone()
}
#[wasm_bindgen(setter)]
pub fn set_language(&mut self, value: Option<String>) {
self.language = value;
}
#[wasm_bindgen(getter, js_name = "textDirection")]
pub fn text_direction(&self) -> Option<String> {
self.text_direction.map(|v| v.to_api_str().to_owned())
}
#[wasm_bindgen(setter, js_name = "textDirection")]
pub fn set_text_direction(&mut self, value: Option<WasmTextDirection>) {
self.text_direction = value;
}
#[wasm_bindgen(getter, js_name = "openGraph")]
pub fn open_graph(&self) -> JsValue {
self.open_graph.clone()
}
#[wasm_bindgen(setter, js_name = "openGraph")]
pub fn set_open_graph(&mut self, value: JsValue) {
self.open_graph = value;
}
#[wasm_bindgen(getter, js_name = "twitterCard")]
pub fn twitter_card(&self) -> JsValue {
self.twitter_card.clone()
}
#[wasm_bindgen(setter, js_name = "twitterCard")]
pub fn set_twitter_card(&mut self, value: JsValue) {
self.twitter_card = value;
}
#[wasm_bindgen(getter, js_name = "metaTags")]
pub fn meta_tags(&self) -> JsValue {
self.meta_tags.clone()
}
#[wasm_bindgen(setter, js_name = "metaTags")]
pub fn set_meta_tags(&mut self, value: JsValue) {
self.meta_tags = value;
}
#[wasm_bindgen(getter)]
pub fn headers(&self) -> Vec<WasmHeaderMetadata> {
self.headers.clone()
}
#[wasm_bindgen(setter)]
pub fn set_headers(&mut self, value: Vec<WasmHeaderMetadata>) {
self.headers = value;
}
#[wasm_bindgen(getter)]
pub fn links(&self) -> Vec<WasmLinkMetadata> {
self.links.clone()
}
#[wasm_bindgen(setter)]
pub fn set_links(&mut self, value: Vec<WasmLinkMetadata>) {
self.links = value;
}
#[wasm_bindgen(getter)]
pub fn images(&self) -> Vec<WasmImageMetadataType> {
self.images.clone()
}
#[wasm_bindgen(setter)]
pub fn set_images(&mut self, value: Vec<WasmImageMetadataType>) {
self.images = value;
}
#[wasm_bindgen(getter, js_name = "structuredData")]
pub fn structured_data(&self) -> Vec<WasmStructuredData> {
self.structured_data.clone()
}
#[wasm_bindgen(setter, js_name = "structuredData")]
pub fn set_structured_data(&mut self, value: Vec<WasmStructuredData>) {
self.structured_data = value;
}
}
/// OCR processing metadata.
///
/// Captures information about OCR processing configuration and results.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrMetadata {
language: String,
psm: i32,
output_format: String,
table_count: u32,
table_rows: Option<u32>,
table_cols: Option<u32>,
}
#[wasm_bindgen]
impl WasmOcrMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
language: Option<String>,
psm: Option<i32>,
outputFormat: Option<String>,
tableCount: Option<u32>,
tableRows: Option<u32>,
tableCols: Option<u32>,
) -> WasmOcrMetadata {
WasmOcrMetadata {
language: language.unwrap_or_default(),
psm: psm.unwrap_or_default(),
output_format: outputFormat.unwrap_or_default(),
table_count: tableCount.unwrap_or_default(),
table_rows: tableRows,
table_cols: tableCols,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrMetadata {
<WasmOcrMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn language(&self) -> String {
self.language.clone()
}
#[wasm_bindgen(setter)]
pub fn set_language(&mut self, value: String) {
self.language = value;
}
#[wasm_bindgen(getter)]
pub fn psm(&self) -> i32 {
self.psm
}
#[wasm_bindgen(setter)]
pub fn set_psm(&mut self, value: i32) {
self.psm = value;
}
#[wasm_bindgen(getter, js_name = "outputFormat")]
pub fn output_format(&self) -> String {
self.output_format.clone()
}
#[wasm_bindgen(setter, js_name = "outputFormat")]
pub fn set_output_format(&mut self, value: String) {
self.output_format = value;
}
#[wasm_bindgen(getter, js_name = "tableCount")]
pub fn table_count(&self) -> u32 {
self.table_count
}
#[wasm_bindgen(setter, js_name = "tableCount")]
pub fn set_table_count(&mut self, value: u32) {
self.table_count = value;
}
#[wasm_bindgen(getter, js_name = "tableRows")]
pub fn table_rows(&self) -> Option<u32> {
self.table_rows
}
#[wasm_bindgen(setter, js_name = "tableRows")]
pub fn set_table_rows(&mut self, value: Option<u32>) {
self.table_rows = value;
}
#[wasm_bindgen(getter, js_name = "tableCols")]
pub fn table_cols(&self) -> Option<u32> {
self.table_cols
}
#[wasm_bindgen(setter, js_name = "tableCols")]
pub fn set_table_cols(&mut self, value: Option<u32>) {
self.table_cols = value;
}
}
/// Error metadata (for batch operations).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmErrorMetadata {
error_type: String,
message: String,
}
#[wasm_bindgen]
impl WasmErrorMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(errorType: String, message: String) -> WasmErrorMetadata {
WasmErrorMetadata {
error_type: errorType,
message,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmErrorMetadata {
<WasmErrorMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "errorType")]
pub fn error_type(&self) -> String {
self.error_type.clone()
}
#[wasm_bindgen(setter, js_name = "errorType")]
pub fn set_error_type(&mut self, value: String) {
self.error_type = value;
}
#[wasm_bindgen(getter)]
pub fn message(&self) -> String {
self.message.clone()
}
#[wasm_bindgen(setter)]
pub fn set_message(&mut self, value: String) {
self.message = value;
}
}
/// PowerPoint presentation metadata.
///
/// Extracted from PPTX files containing slide counts and presentation details.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPptxMetadata {
slide_count: u32,
slide_names: Vec<String>,
image_count: Option<u32>,
table_count: Option<u32>,
}
#[wasm_bindgen]
impl WasmPptxMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
slideCount: Option<u32>,
slideNames: Option<Vec<String>>,
imageCount: Option<u32>,
tableCount: Option<u32>,
) -> WasmPptxMetadata {
WasmPptxMetadata {
slide_count: slideCount.unwrap_or_default(),
slide_names: slideNames.unwrap_or_default(),
image_count: imageCount,
table_count: tableCount,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPptxMetadata {
<WasmPptxMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "slideCount")]
pub fn slide_count(&self) -> u32 {
self.slide_count
}
#[wasm_bindgen(setter, js_name = "slideCount")]
pub fn set_slide_count(&mut self, value: u32) {
self.slide_count = value;
}
#[wasm_bindgen(getter, js_name = "slideNames")]
pub fn slide_names(&self) -> Vec<String> {
self.slide_names.clone()
}
#[wasm_bindgen(setter, js_name = "slideNames")]
pub fn set_slide_names(&mut self, value: Vec<String>) {
self.slide_names = value;
}
#[wasm_bindgen(getter, js_name = "imageCount")]
pub fn image_count(&self) -> Option<u32> {
self.image_count
}
#[wasm_bindgen(setter, js_name = "imageCount")]
pub fn set_image_count(&mut self, value: Option<u32>) {
self.image_count = value;
}
#[wasm_bindgen(getter, js_name = "tableCount")]
pub fn table_count(&self) -> Option<u32> {
self.table_count
}
#[wasm_bindgen(setter, js_name = "tableCount")]
pub fn set_table_count(&mut self, value: Option<u32>) {
self.table_count = value;
}
}
/// CSV/TSV file metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmCsvMetadata {
row_count: u32,
column_count: u32,
delimiter: Option<String>,
has_header: bool,
column_types: Option<Vec<String>>,
}
#[wasm_bindgen]
impl WasmCsvMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
rowCount: Option<u32>,
columnCount: Option<u32>,
hasHeader: Option<bool>,
delimiter: Option<String>,
columnTypes: Option<Vec<String>>,
) -> WasmCsvMetadata {
WasmCsvMetadata {
row_count: rowCount.unwrap_or_default(),
column_count: columnCount.unwrap_or_default(),
delimiter,
has_header: hasHeader.unwrap_or_default(),
column_types: columnTypes,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmCsvMetadata {
<WasmCsvMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "rowCount")]
pub fn row_count(&self) -> u32 {
self.row_count
}
#[wasm_bindgen(setter, js_name = "rowCount")]
pub fn set_row_count(&mut self, value: u32) {
self.row_count = value;
}
#[wasm_bindgen(getter, js_name = "columnCount")]
pub fn column_count(&self) -> u32 {
self.column_count
}
#[wasm_bindgen(setter, js_name = "columnCount")]
pub fn set_column_count(&mut self, value: u32) {
self.column_count = value;
}
#[wasm_bindgen(getter)]
pub fn delimiter(&self) -> Option<String> {
self.delimiter.clone()
}
#[wasm_bindgen(setter)]
pub fn set_delimiter(&mut self, value: Option<String>) {
self.delimiter = value;
}
#[wasm_bindgen(getter, js_name = "hasHeader")]
pub fn has_header(&self) -> bool {
self.has_header
}
#[wasm_bindgen(setter, js_name = "hasHeader")]
pub fn set_has_header(&mut self, value: bool) {
self.has_header = value;
}
#[wasm_bindgen(getter, js_name = "columnTypes")]
pub fn column_types(&self) -> Option<Vec<String>> {
self.column_types.clone()
}
#[wasm_bindgen(setter, js_name = "columnTypes")]
pub fn set_column_types(&mut self, value: Option<Vec<String>>) {
self.column_types = value;
}
}
/// BibTeX bibliography metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmBibtexMetadata {
entry_count: usize,
citation_keys: Vec<String>,
authors: Vec<String>,
year_range: Option<WasmYearRange>,
entry_types: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmBibtexMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
entryCount: Option<usize>,
citationKeys: Option<Vec<String>>,
authors: Option<Vec<String>>,
yearRange: Option<WasmYearRange>,
entryTypes: Option<JsValue>,
) -> WasmBibtexMetadata {
WasmBibtexMetadata {
entry_count: entryCount.unwrap_or_default(),
citation_keys: citationKeys.unwrap_or_default(),
authors: authors.unwrap_or_default(),
year_range: yearRange,
entry_types: entryTypes,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmBibtexMetadata {
<WasmBibtexMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "entryCount")]
pub fn entry_count(&self) -> usize {
self.entry_count
}
#[wasm_bindgen(setter, js_name = "entryCount")]
pub fn set_entry_count(&mut self, value: usize) {
self.entry_count = value;
}
#[wasm_bindgen(getter, js_name = "citationKeys")]
pub fn citation_keys(&self) -> Vec<String> {
self.citation_keys.clone()
}
#[wasm_bindgen(setter, js_name = "citationKeys")]
pub fn set_citation_keys(&mut self, value: Vec<String>) {
self.citation_keys = value;
}
#[wasm_bindgen(getter)]
pub fn authors(&self) -> Vec<String> {
self.authors.clone()
}
#[wasm_bindgen(setter)]
pub fn set_authors(&mut self, value: Vec<String>) {
self.authors = value;
}
#[wasm_bindgen(getter, js_name = "yearRange")]
pub fn year_range(&self) -> Option<WasmYearRange> {
self.year_range.clone()
}
#[wasm_bindgen(setter, js_name = "yearRange")]
pub fn set_year_range(&mut self, value: Option<WasmYearRange>) {
self.year_range = value;
}
#[wasm_bindgen(getter, js_name = "entryTypes")]
pub fn entry_types(&self) -> Option<JsValue> {
self.entry_types.clone()
}
#[wasm_bindgen(setter, js_name = "entryTypes")]
pub fn set_entry_types(&mut self, value: Option<JsValue>) {
self.entry_types = value;
}
}
/// Citation file metadata (RIS, PubMed, EndNote).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmCitationMetadata {
citation_count: usize,
format: Option<String>,
authors: Vec<String>,
year_range: Option<WasmYearRange>,
dois: Vec<String>,
keywords: Vec<String>,
}
#[wasm_bindgen]
impl WasmCitationMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
citationCount: Option<usize>,
authors: Option<Vec<String>>,
dois: Option<Vec<String>>,
keywords: Option<Vec<String>>,
format: Option<String>,
yearRange: Option<WasmYearRange>,
) -> WasmCitationMetadata {
WasmCitationMetadata {
citation_count: citationCount.unwrap_or_default(),
format,
authors: authors.unwrap_or_default(),
year_range: yearRange,
dois: dois.unwrap_or_default(),
keywords: keywords.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmCitationMetadata {
<WasmCitationMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "citationCount")]
pub fn citation_count(&self) -> usize {
self.citation_count
}
#[wasm_bindgen(setter, js_name = "citationCount")]
pub fn set_citation_count(&mut self, value: usize) {
self.citation_count = value;
}
#[wasm_bindgen(getter)]
pub fn format(&self) -> Option<String> {
self.format.clone()
}
#[wasm_bindgen(setter)]
pub fn set_format(&mut self, value: Option<String>) {
self.format = value;
}
#[wasm_bindgen(getter)]
pub fn authors(&self) -> Vec<String> {
self.authors.clone()
}
#[wasm_bindgen(setter)]
pub fn set_authors(&mut self, value: Vec<String>) {
self.authors = value;
}
#[wasm_bindgen(getter, js_name = "yearRange")]
pub fn year_range(&self) -> Option<WasmYearRange> {
self.year_range.clone()
}
#[wasm_bindgen(setter, js_name = "yearRange")]
pub fn set_year_range(&mut self, value: Option<WasmYearRange>) {
self.year_range = value;
}
#[wasm_bindgen(getter)]
pub fn dois(&self) -> Vec<String> {
self.dois.clone()
}
#[wasm_bindgen(setter)]
pub fn set_dois(&mut self, value: Vec<String>) {
self.dois = value;
}
#[wasm_bindgen(getter)]
pub fn keywords(&self) -> Vec<String> {
self.keywords.clone()
}
#[wasm_bindgen(setter)]
pub fn set_keywords(&mut self, value: Vec<String>) {
self.keywords = value;
}
}
/// Year range for bibliographic metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmYearRange {
min: Option<u32>,
max: Option<u32>,
years: Vec<u32>,
}
#[wasm_bindgen]
impl WasmYearRange {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(years: Vec<u32>, min: Option<u32>, max: Option<u32>) -> WasmYearRange {
WasmYearRange { min, max, years }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmYearRange {
<WasmYearRange as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn min(&self) -> Option<u32> {
self.min
}
#[wasm_bindgen(setter)]
pub fn set_min(&mut self, value: Option<u32>) {
self.min = value;
}
#[wasm_bindgen(getter)]
pub fn max(&self) -> Option<u32> {
self.max
}
#[wasm_bindgen(setter)]
pub fn set_max(&mut self, value: Option<u32>) {
self.max = value;
}
#[wasm_bindgen(getter)]
pub fn years(&self) -> Vec<u32> {
self.years.clone()
}
#[wasm_bindgen(setter)]
pub fn set_years(&mut self, value: Vec<u32>) {
self.years = value;
}
}
/// FictionBook (FB2) metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmFictionBookMetadata {
genres: Vec<String>,
sequences: Vec<String>,
annotation: Option<String>,
}
#[wasm_bindgen]
impl WasmFictionBookMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
genres: Option<Vec<String>>,
sequences: Option<Vec<String>>,
annotation: Option<String>,
) -> WasmFictionBookMetadata {
WasmFictionBookMetadata {
genres: genres.unwrap_or_default(),
sequences: sequences.unwrap_or_default(),
annotation,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmFictionBookMetadata {
<WasmFictionBookMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn genres(&self) -> Vec<String> {
self.genres.clone()
}
#[wasm_bindgen(setter)]
pub fn set_genres(&mut self, value: Vec<String>) {
self.genres = value;
}
#[wasm_bindgen(getter)]
pub fn sequences(&self) -> Vec<String> {
self.sequences.clone()
}
#[wasm_bindgen(setter)]
pub fn set_sequences(&mut self, value: Vec<String>) {
self.sequences = value;
}
#[wasm_bindgen(getter)]
pub fn annotation(&self) -> Option<String> {
self.annotation.clone()
}
#[wasm_bindgen(setter)]
pub fn set_annotation(&mut self, value: Option<String>) {
self.annotation = value;
}
}
/// dBASE (DBF) file metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDbfMetadata {
record_count: usize,
field_count: usize,
fields: Vec<WasmDbfFieldInfo>,
}
#[wasm_bindgen]
impl WasmDbfMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
recordCount: Option<usize>,
fieldCount: Option<usize>,
fields: Option<Vec<WasmDbfFieldInfo>>,
) -> WasmDbfMetadata {
WasmDbfMetadata {
record_count: recordCount.unwrap_or_default(),
field_count: fieldCount.unwrap_or_default(),
fields: fields.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDbfMetadata {
<WasmDbfMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "recordCount")]
pub fn record_count(&self) -> usize {
self.record_count
}
#[wasm_bindgen(setter, js_name = "recordCount")]
pub fn set_record_count(&mut self, value: usize) {
self.record_count = value;
}
#[wasm_bindgen(getter, js_name = "fieldCount")]
pub fn field_count(&self) -> usize {
self.field_count
}
#[wasm_bindgen(setter, js_name = "fieldCount")]
pub fn set_field_count(&mut self, value: usize) {
self.field_count = value;
}
#[wasm_bindgen(getter)]
pub fn fields(&self) -> Vec<WasmDbfFieldInfo> {
self.fields.clone()
}
#[wasm_bindgen(setter)]
pub fn set_fields(&mut self, value: Vec<WasmDbfFieldInfo>) {
self.fields = value;
}
}
/// dBASE field information.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDbfFieldInfo {
name: String,
field_type: String,
}
#[wasm_bindgen]
impl WasmDbfFieldInfo {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(name: String, fieldType: String) -> WasmDbfFieldInfo {
WasmDbfFieldInfo {
name,
field_type: fieldType,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDbfFieldInfo {
<WasmDbfFieldInfo as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn name(&self) -> String {
self.name.clone()
}
#[wasm_bindgen(setter)]
pub fn set_name(&mut self, value: String) {
self.name = value;
}
#[wasm_bindgen(getter, js_name = "fieldType")]
pub fn field_type(&self) -> String {
self.field_type.clone()
}
#[wasm_bindgen(setter, js_name = "fieldType")]
pub fn set_field_type(&mut self, value: String) {
self.field_type = value;
}
}
/// JATS (Journal Article Tag Suite) metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmJatsMetadata {
copyright: Option<String>,
license: Option<String>,
history_dates: JsValue,
contributor_roles: Vec<WasmContributorRole>,
}
#[wasm_bindgen]
impl WasmJatsMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
historyDates: Option<JsValue>,
contributorRoles: Option<Vec<WasmContributorRole>>,
copyright: Option<String>,
license: Option<String>,
) -> WasmJatsMetadata {
WasmJatsMetadata {
copyright,
license,
history_dates: historyDates.unwrap_or_default(),
contributor_roles: contributorRoles.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmJatsMetadata {
<WasmJatsMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn copyright(&self) -> Option<String> {
self.copyright.clone()
}
#[wasm_bindgen(setter)]
pub fn set_copyright(&mut self, value: Option<String>) {
self.copyright = value;
}
#[wasm_bindgen(getter)]
pub fn license(&self) -> Option<String> {
self.license.clone()
}
#[wasm_bindgen(setter)]
pub fn set_license(&mut self, value: Option<String>) {
self.license = value;
}
#[wasm_bindgen(getter, js_name = "historyDates")]
pub fn history_dates(&self) -> JsValue {
self.history_dates.clone()
}
#[wasm_bindgen(setter, js_name = "historyDates")]
pub fn set_history_dates(&mut self, value: JsValue) {
self.history_dates = value;
}
#[wasm_bindgen(getter, js_name = "contributorRoles")]
pub fn contributor_roles(&self) -> Vec<WasmContributorRole> {
self.contributor_roles.clone()
}
#[wasm_bindgen(setter, js_name = "contributorRoles")]
pub fn set_contributor_roles(&mut self, value: Vec<WasmContributorRole>) {
self.contributor_roles = value;
}
}
/// JATS contributor with role.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmContributorRole {
name: String,
role: Option<String>,
}
#[wasm_bindgen]
impl WasmContributorRole {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(name: String, role: Option<String>) -> WasmContributorRole {
WasmContributorRole { name, role }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmContributorRole {
<WasmContributorRole as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn name(&self) -> String {
self.name.clone()
}
#[wasm_bindgen(setter)]
pub fn set_name(&mut self, value: String) {
self.name = value;
}
#[wasm_bindgen(getter)]
pub fn role(&self) -> Option<String> {
self.role.clone()
}
#[wasm_bindgen(setter)]
pub fn set_role(&mut self, value: Option<String>) {
self.role = value;
}
}
/// EPUB metadata (Dublin Core extensions).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmEpubMetadata {
coverage: Option<String>,
dc_format: Option<String>,
relation: Option<String>,
source: Option<String>,
dc_type: Option<String>,
cover_image: Option<String>,
}
#[wasm_bindgen]
impl WasmEpubMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
coverage: Option<String>,
dcFormat: Option<String>,
relation: Option<String>,
source: Option<String>,
dcType: Option<String>,
coverImage: Option<String>,
) -> WasmEpubMetadata {
WasmEpubMetadata {
coverage,
dc_format: dcFormat,
relation,
source,
dc_type: dcType,
cover_image: coverImage,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmEpubMetadata {
<WasmEpubMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn coverage(&self) -> Option<String> {
self.coverage.clone()
}
#[wasm_bindgen(setter)]
pub fn set_coverage(&mut self, value: Option<String>) {
self.coverage = value;
}
#[wasm_bindgen(getter, js_name = "dcFormat")]
pub fn dc_format(&self) -> Option<String> {
self.dc_format.clone()
}
#[wasm_bindgen(setter, js_name = "dcFormat")]
pub fn set_dc_format(&mut self, value: Option<String>) {
self.dc_format = value;
}
#[wasm_bindgen(getter)]
pub fn relation(&self) -> Option<String> {
self.relation.clone()
}
#[wasm_bindgen(setter)]
pub fn set_relation(&mut self, value: Option<String>) {
self.relation = value;
}
#[wasm_bindgen(getter)]
pub fn source(&self) -> Option<String> {
self.source.clone()
}
#[wasm_bindgen(setter)]
pub fn set_source(&mut self, value: Option<String>) {
self.source = value;
}
#[wasm_bindgen(getter, js_name = "dcType")]
pub fn dc_type(&self) -> Option<String> {
self.dc_type.clone()
}
#[wasm_bindgen(setter, js_name = "dcType")]
pub fn set_dc_type(&mut self, value: Option<String>) {
self.dc_type = value;
}
#[wasm_bindgen(getter, js_name = "coverImage")]
pub fn cover_image(&self) -> Option<String> {
self.cover_image.clone()
}
#[wasm_bindgen(setter, js_name = "coverImage")]
pub fn set_cover_image(&mut self, value: Option<String>) {
self.cover_image = value;
}
}
/// Outlook PST archive metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPstMetadata {
message_count: usize,
}
#[wasm_bindgen]
impl WasmPstMetadata {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(messageCount: Option<usize>) -> WasmPstMetadata {
WasmPstMetadata {
message_count: messageCount.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPstMetadata {
<WasmPstMetadata as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "messageCount")]
pub fn message_count(&self) -> usize {
self.message_count
}
#[wasm_bindgen(setter, js_name = "messageCount")]
pub fn set_message_count(&mut self, value: usize) {
self.message_count = value;
}
}
/// Confidence scores for an OCR element.
///
/// Separates detection confidence (how confident that text exists at this location)
/// from recognition confidence (how confident about the actual text content).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrConfidence {
detection: Option<f64>,
recognition: f64,
}
#[wasm_bindgen]
impl WasmOcrConfidence {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(recognition: Option<f64>, detection: Option<f64>) -> WasmOcrConfidence {
WasmOcrConfidence {
detection,
recognition: recognition.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrConfidence {
<WasmOcrConfidence as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn detection(&self) -> Option<f64> {
self.detection
}
#[wasm_bindgen(setter)]
pub fn set_detection(&mut self, value: Option<f64>) {
self.detection = value;
}
#[wasm_bindgen(getter)]
pub fn recognition(&self) -> f64 {
self.recognition
}
#[wasm_bindgen(setter)]
pub fn set_recognition(&mut self, value: f64) {
self.recognition = value;
}
}
/// Rotation information for an OCR element.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrRotation {
angle_degrees: f64,
confidence: Option<f64>,
}
#[wasm_bindgen]
impl WasmOcrRotation {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(angleDegrees: f64, confidence: Option<f64>) -> WasmOcrRotation {
WasmOcrRotation {
angle_degrees: angleDegrees,
confidence,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrRotation {
<WasmOcrRotation as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "angleDegrees")]
pub fn angle_degrees(&self) -> f64 {
self.angle_degrees
}
#[wasm_bindgen(setter, js_name = "angleDegrees")]
pub fn set_angle_degrees(&mut self, value: f64) {
self.angle_degrees = value;
}
#[wasm_bindgen(getter)]
pub fn confidence(&self) -> Option<f64> {
self.confidence
}
#[wasm_bindgen(setter)]
pub fn set_confidence(&mut self, value: Option<f64>) {
self.confidence = value;
}
}
/// A unified OCR element representing detected text with full metadata.
///
/// This is the primary type for structured OCR output, preserving all information
/// from both Tesseract and PaddleOCR backends.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrElement {
text: String,
geometry: JsValue,
confidence: WasmOcrConfidence,
level: WasmOcrElementLevel,
rotation: Option<WasmOcrRotation>,
page_number: u32,
parent_id: Option<String>,
backend_metadata: JsValue,
}
#[wasm_bindgen]
impl WasmOcrElement {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
text: Option<String>,
geometry: Option<JsValue>,
confidence: Option<WasmOcrConfidence>,
level: Option<WasmOcrElementLevel>,
pageNumber: Option<u32>,
backendMetadata: Option<JsValue>,
rotation: Option<WasmOcrRotation>,
parentId: Option<String>,
) -> WasmOcrElement {
WasmOcrElement {
text: text.unwrap_or_default(),
geometry: geometry.unwrap_or_default(),
confidence: confidence.unwrap_or_default(),
level: level.unwrap_or_default(),
rotation,
page_number: pageNumber.unwrap_or_default(),
parent_id: parentId,
backend_metadata: backendMetadata.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrElement {
<WasmOcrElement as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn text(&self) -> String {
self.text.clone()
}
#[wasm_bindgen(setter)]
pub fn set_text(&mut self, value: String) {
self.text = value;
}
#[wasm_bindgen(getter)]
pub fn geometry(&self) -> JsValue {
self.geometry.clone()
}
#[wasm_bindgen(setter)]
pub fn set_geometry(&mut self, value: JsValue) {
self.geometry = value;
}
#[wasm_bindgen(getter)]
pub fn confidence(&self) -> WasmOcrConfidence {
self.confidence.clone()
}
#[wasm_bindgen(setter)]
pub fn set_confidence(&mut self, value: WasmOcrConfidence) {
self.confidence = value;
}
#[wasm_bindgen(getter)]
pub fn level(&self) -> String {
self.level.to_api_str().to_owned()
}
#[wasm_bindgen(setter)]
pub fn set_level(&mut self, value: WasmOcrElementLevel) {
self.level = value;
}
#[wasm_bindgen(getter)]
pub fn rotation(&self) -> Option<WasmOcrRotation> {
self.rotation.clone()
}
#[wasm_bindgen(setter)]
pub fn set_rotation(&mut self, value: Option<WasmOcrRotation>) {
self.rotation = value;
}
#[wasm_bindgen(getter, js_name = "pageNumber")]
pub fn page_number(&self) -> u32 {
self.page_number
}
#[wasm_bindgen(setter, js_name = "pageNumber")]
pub fn set_page_number(&mut self, value: u32) {
self.page_number = value;
}
#[wasm_bindgen(getter, js_name = "parentId")]
pub fn parent_id(&self) -> Option<String> {
self.parent_id.clone()
}
#[wasm_bindgen(setter, js_name = "parentId")]
pub fn set_parent_id(&mut self, value: Option<String>) {
self.parent_id = value;
}
#[wasm_bindgen(getter, js_name = "backendMetadata")]
pub fn backend_metadata(&self) -> JsValue {
self.backend_metadata.clone()
}
#[wasm_bindgen(setter, js_name = "backendMetadata")]
pub fn set_backend_metadata(&mut self, value: JsValue) {
self.backend_metadata = value;
}
}
/// Configuration for OCR element extraction.
///
/// Controls how OCR elements are extracted and filtered.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmOcrElementConfig {
include_elements: bool,
min_level: WasmOcrElementLevel,
min_confidence: f64,
build_hierarchy: bool,
}
#[wasm_bindgen]
impl WasmOcrElementConfig {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
includeElements: Option<bool>,
minLevel: Option<WasmOcrElementLevel>,
minConfidence: Option<f64>,
buildHierarchy: Option<bool>,
) -> WasmOcrElementConfig {
WasmOcrElementConfig {
include_elements: includeElements.unwrap_or_default(),
min_level: minLevel.unwrap_or_default(),
min_confidence: minConfidence.unwrap_or_default(),
build_hierarchy: buildHierarchy.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrElementConfig {
<WasmOcrElementConfig as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "includeElements")]
pub fn include_elements(&self) -> bool {
self.include_elements
}
#[wasm_bindgen(setter, js_name = "includeElements")]
pub fn set_include_elements(&mut self, value: bool) {
self.include_elements = value;
}
#[wasm_bindgen(getter, js_name = "minLevel")]
pub fn min_level(&self) -> String {
self.min_level.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "minLevel")]
pub fn set_min_level(&mut self, value: WasmOcrElementLevel) {
self.min_level = value;
}
#[wasm_bindgen(getter, js_name = "minConfidence")]
pub fn min_confidence(&self) -> f64 {
self.min_confidence
}
#[wasm_bindgen(setter, js_name = "minConfidence")]
pub fn set_min_confidence(&mut self, value: f64) {
self.min_confidence = value;
}
#[wasm_bindgen(getter, js_name = "buildHierarchy")]
pub fn build_hierarchy(&self) -> bool {
self.build_hierarchy
}
#[wasm_bindgen(setter, js_name = "buildHierarchy")]
pub fn set_build_hierarchy(&mut self, value: bool) {
self.build_hierarchy = value;
}
}
/// Unified page structure for documents.
///
/// Supports different page types (PDF pages, PPTX slides, Excel sheets)
/// with character offset boundaries for chunk-to-page mapping.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPageStructure {
total_count: u32,
unit_type: WasmPageUnitType,
boundaries: Option<Vec<WasmPageBoundary>>,
pages: Option<Vec<WasmPageInfo>>,
}
#[wasm_bindgen]
impl WasmPageStructure {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
totalCount: u32,
unitType: WasmPageUnitType,
boundaries: Option<Vec<WasmPageBoundary>>,
pages: Option<Vec<WasmPageInfo>>,
) -> WasmPageStructure {
WasmPageStructure {
total_count: totalCount,
unit_type: unitType,
boundaries,
pages,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPageStructure {
<WasmPageStructure as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "totalCount")]
pub fn total_count(&self) -> u32 {
self.total_count
}
#[wasm_bindgen(setter, js_name = "totalCount")]
pub fn set_total_count(&mut self, value: u32) {
self.total_count = value;
}
#[wasm_bindgen(getter, js_name = "unitType")]
pub fn unit_type(&self) -> String {
self.unit_type.to_api_str().to_owned()
}
#[wasm_bindgen(setter, js_name = "unitType")]
pub fn set_unit_type(&mut self, value: WasmPageUnitType) {
self.unit_type = value;
}
#[wasm_bindgen(getter)]
pub fn boundaries(&self) -> Option<js_sys::Array> {
self.boundaries.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_boundaries(&mut self, value: Option<Vec<WasmPageBoundary>>) {
self.boundaries = value;
}
#[wasm_bindgen(getter)]
pub fn pages(&self) -> Option<js_sys::Array> {
self.pages.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter)]
pub fn set_pages(&mut self, value: Option<Vec<WasmPageInfo>>) {
self.pages = value;
}
}
/// Byte offset boundary for a page.
///
/// Tracks where a specific page's content starts and ends in the main content string,
/// enabling mapping from byte positions to page numbers. Offsets are guaranteed to be
/// at valid UTF-8 character boundaries when using standard String methods (push_str, push, etc.).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPageBoundary {
byte_start: usize,
byte_end: usize,
page_number: u32,
}
#[wasm_bindgen]
impl WasmPageBoundary {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(byteStart: usize, byteEnd: usize, pageNumber: u32) -> WasmPageBoundary {
WasmPageBoundary {
byte_start: byteStart,
byte_end: byteEnd,
page_number: pageNumber,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPageBoundary {
<WasmPageBoundary as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "byteStart")]
pub fn byte_start(&self) -> usize {
self.byte_start
}
#[wasm_bindgen(setter, js_name = "byteStart")]
pub fn set_byte_start(&mut self, value: usize) {
self.byte_start = value;
}
#[wasm_bindgen(getter, js_name = "byteEnd")]
pub fn byte_end(&self) -> usize {
self.byte_end
}
#[wasm_bindgen(setter, js_name = "byteEnd")]
pub fn set_byte_end(&mut self, value: usize) {
self.byte_end = value;
}
#[wasm_bindgen(getter, js_name = "pageNumber")]
pub fn page_number(&self) -> u32 {
self.page_number
}
#[wasm_bindgen(setter, js_name = "pageNumber")]
pub fn set_page_number(&mut self, value: u32) {
self.page_number = value;
}
}
/// Metadata for individual page/slide/sheet.
///
/// Captures per-page information including dimensions, content counts,
/// and visibility state (for presentations).
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPageInfo {
number: u32,
title: Option<String>,
dimensions: Option<Vec<f64>>,
image_count: Option<u32>,
table_count: Option<u32>,
hidden: Option<bool>,
is_blank: Option<bool>,
has_vector_graphics: bool,
}
#[wasm_bindgen]
impl WasmPageInfo {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
number: u32,
hasVectorGraphics: bool,
title: Option<String>,
dimensions: Option<Vec<f64>>,
imageCount: Option<u32>,
tableCount: Option<u32>,
hidden: Option<bool>,
isBlank: Option<bool>,
) -> WasmPageInfo {
WasmPageInfo {
number,
title,
dimensions,
image_count: imageCount,
table_count: tableCount,
hidden,
is_blank: isBlank,
has_vector_graphics: hasVectorGraphics,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPageInfo {
<WasmPageInfo as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn number(&self) -> u32 {
self.number
}
#[wasm_bindgen(setter)]
pub fn set_number(&mut self, value: u32) {
self.number = value;
}
#[wasm_bindgen(getter)]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter)]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter)]
pub fn dimensions(&self) -> Option<Vec<f64>> {
self.dimensions.clone()
}
#[wasm_bindgen(setter)]
pub fn set_dimensions(&mut self, value: Option<Vec<f64>>) {
self.dimensions = value;
}
#[wasm_bindgen(getter, js_name = "imageCount")]
pub fn image_count(&self) -> Option<u32> {
self.image_count
}
#[wasm_bindgen(setter, js_name = "imageCount")]
pub fn set_image_count(&mut self, value: Option<u32>) {
self.image_count = value;
}
#[wasm_bindgen(getter, js_name = "tableCount")]
pub fn table_count(&self) -> Option<u32> {
self.table_count
}
#[wasm_bindgen(setter, js_name = "tableCount")]
pub fn set_table_count(&mut self, value: Option<u32>) {
self.table_count = value;
}
#[wasm_bindgen(getter)]
pub fn hidden(&self) -> Option<bool> {
self.hidden
}
#[wasm_bindgen(setter)]
pub fn set_hidden(&mut self, value: Option<bool>) {
self.hidden = value;
}
#[wasm_bindgen(getter, js_name = "isBlank")]
pub fn is_blank(&self) -> Option<bool> {
self.is_blank
}
#[wasm_bindgen(setter, js_name = "isBlank")]
pub fn set_is_blank(&mut self, value: Option<bool>) {
self.is_blank = value;
}
#[wasm_bindgen(getter, js_name = "hasVectorGraphics")]
pub fn has_vector_graphics(&self) -> bool {
self.has_vector_graphics
}
#[wasm_bindgen(setter, js_name = "hasVectorGraphics")]
pub fn set_has_vector_graphics(&mut self, value: bool) {
self.has_vector_graphics = value;
}
}
/// Content for a single page/slide.
///
/// When page extraction is enabled, documents are split into per-page content
/// with associated tables and images mapped to each page.
///
/// # Performance
///
/// Uses Arc-wrapped tables and images for memory efficiency:
/// - `Vec<Arc<Table>>` enables zero-copy sharing of table data
/// - `Vec<Arc<ExtractedImage>>` enables zero-copy sharing of image data
/// - Maintains exact JSON compatibility via custom Serialize/Deserialize
///
/// This reduces memory overhead for documents with shared tables/images
/// by avoiding redundant copies during serialization.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPageContent {
page_number: u32,
content: String,
tables: Vec<WasmTable>,
image_indices: Vec<u32>,
hierarchy: Option<WasmPageHierarchy>,
is_blank: Option<bool>,
layout_regions: Option<Vec<WasmLayoutRegion>>,
speaker_notes: Option<String>,
section_name: Option<String>,
sheet_name: Option<String>,
}
#[wasm_bindgen]
impl WasmPageContent {
#[allow(clippy::too_many_arguments)]
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
pageNumber: u32,
content: String,
tables: Vec<WasmTable>,
imageIndices: Vec<u32>,
hierarchy: Option<WasmPageHierarchy>,
isBlank: Option<bool>,
layoutRegions: Option<Vec<WasmLayoutRegion>>,
speakerNotes: Option<String>,
sectionName: Option<String>,
sheetName: Option<String>,
) -> WasmPageContent {
WasmPageContent {
page_number: pageNumber,
content,
tables,
image_indices: imageIndices,
hierarchy,
is_blank: isBlank,
layout_regions: layoutRegions,
speaker_notes: speakerNotes,
section_name: sectionName,
sheet_name: sheetName,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPageContent {
<WasmPageContent as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "pageNumber")]
pub fn page_number(&self) -> u32 {
self.page_number
}
#[wasm_bindgen(setter, js_name = "pageNumber")]
pub fn set_page_number(&mut self, value: u32) {
self.page_number = value;
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter)]
pub fn tables(&self) -> Vec<WasmTable> {
self.tables.clone()
}
#[wasm_bindgen(setter)]
pub fn set_tables(&mut self, value: Vec<WasmTable>) {
self.tables = value;
}
#[wasm_bindgen(getter, js_name = "imageIndices")]
pub fn image_indices(&self) -> Vec<u32> {
self.image_indices.clone()
}
#[wasm_bindgen(setter, js_name = "imageIndices")]
pub fn set_image_indices(&mut self, value: Vec<u32>) {
self.image_indices = value;
}
#[wasm_bindgen(getter)]
pub fn hierarchy(&self) -> Option<WasmPageHierarchy> {
self.hierarchy.clone()
}
#[wasm_bindgen(setter)]
pub fn set_hierarchy(&mut self, value: Option<WasmPageHierarchy>) {
self.hierarchy = value;
}
#[wasm_bindgen(getter, js_name = "isBlank")]
pub fn is_blank(&self) -> Option<bool> {
self.is_blank
}
#[wasm_bindgen(setter, js_name = "isBlank")]
pub fn set_is_blank(&mut self, value: Option<bool>) {
self.is_blank = value;
}
#[wasm_bindgen(getter, js_name = "layoutRegions")]
pub fn layout_regions(&self) -> Option<js_sys::Array> {
self.layout_regions.as_ref().map(|items| {
let arr = js_sys::Array::new();
for item in items {
arr.push(&JsValue::from(item.clone()));
}
arr
})
}
#[wasm_bindgen(setter, js_name = "layoutRegions")]
pub fn set_layout_regions(&mut self, value: Option<Vec<WasmLayoutRegion>>) {
self.layout_regions = value;
}
#[wasm_bindgen(getter, js_name = "speakerNotes")]
pub fn speaker_notes(&self) -> Option<String> {
self.speaker_notes.clone()
}
#[wasm_bindgen(setter, js_name = "speakerNotes")]
pub fn set_speaker_notes(&mut self, value: Option<String>) {
self.speaker_notes = value;
}
#[wasm_bindgen(getter, js_name = "sectionName")]
pub fn section_name(&self) -> Option<String> {
self.section_name.clone()
}
#[wasm_bindgen(setter, js_name = "sectionName")]
pub fn set_section_name(&mut self, value: Option<String>) {
self.section_name = value;
}
#[wasm_bindgen(getter, js_name = "sheetName")]
pub fn sheet_name(&self) -> Option<String> {
self.sheet_name.clone()
}
#[wasm_bindgen(setter, js_name = "sheetName")]
pub fn set_sheet_name(&mut self, value: Option<String>) {
self.sheet_name = value;
}
}
/// A detected layout region on a page.
///
/// When layout detection is enabled, each page may have layout regions
/// identifying different content types (text, pictures, tables, etc.)
/// with confidence scores and spatial positions.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmLayoutRegion {
class_name: String,
confidence: f64,
bounding_box: WasmBoundingBox,
area_fraction: f64,
}
#[wasm_bindgen]
impl WasmLayoutRegion {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
className: Option<String>,
confidence: Option<f64>,
boundingBox: Option<WasmBoundingBox>,
areaFraction: Option<f64>,
) -> WasmLayoutRegion {
WasmLayoutRegion {
class_name: className.unwrap_or_default(),
confidence: confidence.unwrap_or_default(),
bounding_box: boundingBox.unwrap_or_default(),
area_fraction: areaFraction.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmLayoutRegion {
<WasmLayoutRegion as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "className")]
pub fn class_name(&self) -> String {
self.class_name.clone()
}
#[wasm_bindgen(setter, js_name = "className")]
pub fn set_class_name(&mut self, value: String) {
self.class_name = value;
}
#[wasm_bindgen(getter)]
pub fn confidence(&self) -> f64 {
self.confidence
}
#[wasm_bindgen(setter)]
pub fn set_confidence(&mut self, value: f64) {
self.confidence = value;
}
#[wasm_bindgen(getter, js_name = "boundingBox")]
pub fn bounding_box(&self) -> WasmBoundingBox {
self.bounding_box.clone()
}
#[wasm_bindgen(setter, js_name = "boundingBox")]
pub fn set_bounding_box(&mut self, value: WasmBoundingBox) {
self.bounding_box = value;
}
#[wasm_bindgen(getter, js_name = "areaFraction")]
pub fn area_fraction(&self) -> f64 {
self.area_fraction
}
#[wasm_bindgen(setter, js_name = "areaFraction")]
pub fn set_area_fraction(&mut self, value: f64) {
self.area_fraction = value;
}
}
/// Page hierarchy structure containing heading levels and block information.
///
/// Used when PDF text hierarchy extraction is enabled. Contains hierarchical
/// blocks with heading levels (H1-H6) for semantic document structure.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmPageHierarchy {
block_count: u32,
blocks: Vec<WasmHierarchicalBlock>,
}
#[wasm_bindgen]
impl WasmPageHierarchy {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(blockCount: u32, blocks: Vec<WasmHierarchicalBlock>) -> WasmPageHierarchy {
WasmPageHierarchy {
block_count: blockCount,
blocks,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmPageHierarchy {
<WasmPageHierarchy as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "blockCount")]
pub fn block_count(&self) -> u32 {
self.block_count
}
#[wasm_bindgen(setter, js_name = "blockCount")]
pub fn set_block_count(&mut self, value: u32) {
self.block_count = value;
}
#[wasm_bindgen(getter)]
pub fn blocks(&self) -> Vec<WasmHierarchicalBlock> {
self.blocks.clone()
}
#[wasm_bindgen(setter)]
pub fn set_blocks(&mut self, value: Vec<WasmHierarchicalBlock>) {
self.blocks = value;
}
}
/// A text block with hierarchy level assignment.
///
/// Represents a block of text with semantic heading information extracted from
/// font size clustering and hierarchical analysis.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmHierarchicalBlock {
text: String,
font_size: f32,
level: String,
bbox: Option<Vec<f32>>,
}
#[wasm_bindgen]
impl WasmHierarchicalBlock {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(text: String, fontSize: f32, level: String, bbox: Option<Vec<f32>>) -> WasmHierarchicalBlock {
WasmHierarchicalBlock {
text,
font_size: fontSize,
level,
bbox,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmHierarchicalBlock {
<WasmHierarchicalBlock as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn text(&self) -> String {
self.text.clone()
}
#[wasm_bindgen(setter)]
pub fn set_text(&mut self, value: String) {
self.text = value;
}
#[wasm_bindgen(getter, js_name = "fontSize")]
pub fn font_size(&self) -> f32 {
self.font_size
}
#[wasm_bindgen(setter, js_name = "fontSize")]
pub fn set_font_size(&mut self, value: f32) {
self.font_size = value;
}
#[wasm_bindgen(getter)]
pub fn level(&self) -> String {
self.level.clone()
}
#[wasm_bindgen(setter)]
pub fn set_level(&mut self, value: String) {
self.level = value;
}
#[wasm_bindgen(getter)]
pub fn bbox(&self) -> Option<Vec<f32>> {
self.bbox.clone()
}
#[wasm_bindgen(setter)]
pub fn set_bbox(&mut self, value: Option<Vec<f32>>) {
self.bbox = value;
}
}
/// A single changed cell within a table.
///
/// Defined here (rather than only in `crate.diff`) so `RevisionDelta` can
/// reference it unconditionally, without requiring the `diff` Cargo feature.
/// `crate.diff` re-exports this type verbatim.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmCellChange {
row: usize,
col: usize,
from: String,
to: String,
}
#[wasm_bindgen]
impl WasmCellChange {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(row: usize, col: usize, from: String, to: String) -> WasmCellChange {
WasmCellChange { row, col, from, to }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmCellChange {
<WasmCellChange as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn row(&self) -> usize {
self.row
}
#[wasm_bindgen(setter)]
pub fn set_row(&mut self, value: usize) {
self.row = value;
}
#[wasm_bindgen(getter)]
pub fn col(&self) -> usize {
self.col
}
#[wasm_bindgen(setter)]
pub fn set_col(&mut self, value: usize) {
self.col = value;
}
#[wasm_bindgen(getter)]
pub fn from(&self) -> String {
self.from.clone()
}
#[wasm_bindgen(setter)]
pub fn set_from(&mut self, value: String) {
self.from = value;
}
#[wasm_bindgen(getter)]
pub fn to(&self) -> String {
self.to.clone()
}
#[wasm_bindgen(setter)]
pub fn set_to(&mut self, value: String) {
self.to = value;
}
}
/// A single tracked change embedded in a document.
///
/// Populated by per-format extractors that understand change-tracking metadata
/// (DOCX `w:ins`/`w:del`/`w:rPrChange`, ODT `text:change-*`, …). Every
/// extractor defaults to `ExtractionResult.revisions = None` until a
/// format-specific implementation is added.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmDocumentRevision {
revision_id: String,
author: Option<String>,
timestamp: Option<String>,
kind: WasmRevisionKind,
anchor: Option<JsValue>,
delta: WasmRevisionDelta,
}
#[wasm_bindgen]
impl WasmDocumentRevision {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
revisionId: String,
kind: WasmRevisionKind,
delta: WasmRevisionDelta,
author: Option<String>,
timestamp: Option<String>,
anchor: Option<JsValue>,
) -> WasmDocumentRevision {
WasmDocumentRevision {
revision_id: revisionId,
author,
timestamp,
kind,
anchor,
delta,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDocumentRevision {
<WasmDocumentRevision as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "revisionId")]
pub fn revision_id(&self) -> String {
self.revision_id.clone()
}
#[wasm_bindgen(setter, js_name = "revisionId")]
pub fn set_revision_id(&mut self, value: String) {
self.revision_id = value;
}
#[wasm_bindgen(getter)]
pub fn author(&self) -> Option<String> {
self.author.clone()
}
#[wasm_bindgen(setter)]
pub fn set_author(&mut self, value: Option<String>) {
self.author = value;
}
#[wasm_bindgen(getter)]
pub fn timestamp(&self) -> Option<String> {
self.timestamp.clone()
}
#[wasm_bindgen(setter)]
pub fn set_timestamp(&mut self, value: Option<String>) {
self.timestamp = value;
}
#[wasm_bindgen(getter)]
pub fn kind(&self) -> String {
self.kind.to_api_str().to_owned()
}
#[wasm_bindgen(setter)]
pub fn set_kind(&mut self, value: WasmRevisionKind) {
self.kind = value;
}
#[wasm_bindgen(getter)]
pub fn anchor(&self) -> Option<JsValue> {
self.anchor.clone()
}
#[wasm_bindgen(setter)]
pub fn set_anchor(&mut self, value: Option<JsValue>) {
self.anchor = value;
}
#[wasm_bindgen(getter)]
pub fn delta(&self) -> WasmRevisionDelta {
self.delta.clone()
}
#[wasm_bindgen(setter)]
pub fn set_delta(&mut self, value: WasmRevisionDelta) {
self.delta = value;
}
}
/// The content changes that make up a single revision.
///
/// For insertions and deletions the `content` field carries the added/removed
/// lines as `DiffLine.Added` / `DiffLine.Removed` entries. For format
/// changes, `content` is empty — the property diff is left as a TODO for a
/// later enrichment pass.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmRevisionDelta {
content: JsValue,
table_changes: Vec<WasmCellChange>,
}
#[wasm_bindgen]
impl WasmRevisionDelta {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(content: Option<JsValue>, tableChanges: Option<Vec<WasmCellChange>>) -> WasmRevisionDelta {
WasmRevisionDelta {
content: content.unwrap_or_default(),
table_changes: tableChanges.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmRevisionDelta {
<WasmRevisionDelta as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> JsValue {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: JsValue) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "tableChanges")]
pub fn table_changes(&self) -> Vec<WasmCellChange> {
self.table_changes.clone()
}
#[wasm_bindgen(setter, js_name = "tableChanges")]
pub fn set_table_changes(&mut self, value: Vec<WasmCellChange>) {
self.table_changes = value;
}
}
/// Extracted table structure.
///
/// Represents a table detected and extracted from a document (PDF, image, etc.).
/// Tables are converted to both structured cell data and Markdown format.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmTable {
cells: JsValue,
markdown: String,
page_number: u32,
bounding_box: Option<WasmBoundingBox>,
}
#[wasm_bindgen]
impl WasmTable {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
cells: Option<JsValue>,
markdown: Option<String>,
pageNumber: Option<u32>,
boundingBox: Option<WasmBoundingBox>,
) -> WasmTable {
WasmTable {
cells: cells.unwrap_or_default(),
markdown: markdown.unwrap_or_default(),
page_number: pageNumber.unwrap_or_default(),
bounding_box: boundingBox,
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmTable {
<WasmTable as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn cells(&self) -> JsValue {
self.cells.clone()
}
#[wasm_bindgen(setter)]
pub fn set_cells(&mut self, value: JsValue) {
self.cells = value;
}
#[wasm_bindgen(getter)]
pub fn markdown(&self) -> String {
self.markdown.clone()
}
#[wasm_bindgen(setter)]
pub fn set_markdown(&mut self, value: String) {
self.markdown = value;
}
#[wasm_bindgen(getter, js_name = "pageNumber")]
pub fn page_number(&self) -> u32 {
self.page_number
}
#[wasm_bindgen(setter, js_name = "pageNumber")]
pub fn set_page_number(&mut self, value: u32) {
self.page_number = value;
}
#[wasm_bindgen(getter, js_name = "boundingBox")]
pub fn bounding_box(&self) -> Option<WasmBoundingBox> {
self.bounding_box.clone()
}
#[wasm_bindgen(setter, js_name = "boundingBox")]
pub fn set_bounding_box(&mut self, value: Option<WasmBoundingBox>) {
self.bounding_box = value;
}
}
/// Individual table cell with content and optional styling.
///
/// Future extension point for rich table support with cell-level metadata.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmTableCell {
content: String,
row_span: u32,
col_span: u32,
is_header: bool,
}
#[wasm_bindgen]
impl WasmTableCell {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(
content: Option<String>,
rowSpan: Option<u32>,
colSpan: Option<u32>,
isHeader: Option<bool>,
) -> WasmTableCell {
WasmTableCell {
content: content.unwrap_or_default(),
row_span: rowSpan.unwrap_or_default(),
col_span: colSpan.unwrap_or_default(),
is_header: isHeader.unwrap_or_default(),
}
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmTableCell {
<WasmTableCell as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn content(&self) -> String {
self.content.clone()
}
#[wasm_bindgen(setter)]
pub fn set_content(&mut self, value: String) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "rowSpan")]
pub fn row_span(&self) -> u32 {
self.row_span
}
#[wasm_bindgen(setter, js_name = "rowSpan")]
pub fn set_row_span(&mut self, value: u32) {
self.row_span = value;
}
#[wasm_bindgen(getter, js_name = "colSpan")]
pub fn col_span(&self) -> u32 {
self.col_span
}
#[wasm_bindgen(setter, js_name = "colSpan")]
pub fn set_col_span(&mut self, value: u32) {
self.col_span = value;
}
#[wasm_bindgen(getter, js_name = "isHeader")]
pub fn is_header(&self) -> bool {
self.is_header
}
#[wasm_bindgen(setter, js_name = "isHeader")]
pub fn set_is_header(&mut self, value: bool) {
self.is_header = value;
}
}
/// A URI extracted from a document.
///
/// Represents any link, reference, or resource pointer found during extraction.
/// The `kind` field classifies the URI semantically, while `label` carries
/// optional human-readable display text.
#[derive(Clone, Default)]
#[wasm_bindgen]
pub struct WasmExtractedUri {
url: String,
label: Option<String>,
page: Option<u32>,
kind: WasmUriKind,
}
#[wasm_bindgen]
impl WasmExtractedUri {
#[allow(non_snake_case)]
#[wasm_bindgen(constructor)]
pub fn new(url: String, kind: WasmUriKind, label: Option<String>, page: Option<u32>) -> WasmExtractedUri {
WasmExtractedUri { url, label, page, kind }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmExtractedUri {
<WasmExtractedUri as ::core::default::Default>::default()
}
#[wasm_bindgen(getter)]
pub fn url(&self) -> String {
self.url.clone()
}
#[wasm_bindgen(setter)]
pub fn set_url(&mut self, value: String) {
self.url = value;
}
#[wasm_bindgen(getter)]
pub fn label(&self) -> Option<String> {
self.label.clone()
}
#[wasm_bindgen(setter)]
pub fn set_label(&mut self, value: Option<String>) {
self.label = value;
}
#[wasm_bindgen(getter)]
pub fn page(&self) -> Option<u32> {
self.page
}
#[wasm_bindgen(setter)]
pub fn set_page(&mut self, value: Option<u32>) {
self.page = value;
}
#[wasm_bindgen(getter)]
pub fn kind(&self) -> String {
self.kind.to_api_str().to_owned()
}
#[wasm_bindgen(setter)]
pub fn set_kind(&mut self, value: WasmUriKind) {
self.kind = value;
}
}
/// ONNX Runtime execution provider type.
///
/// Determines which hardware backend is used for model inference.
/// `Auto` (default) selects the best available provider per platform.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmExecutionProviderType {
Auto = 0,
Cpu = 1,
CoreMl = 2,
Cuda = 3,
TensorRt = 4,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmExecutionProviderType {
fn default() -> Self {
Self::Auto
}
}
impl WasmExecutionProviderType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Auto => "auto",
Self::Cpu => "cpu",
Self::CoreMl => "coreml",
Self::Cuda => "cuda",
Self::TensorRt => "tensorrt",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"auto" => Some(Self::Auto),
"cpu" => Some(Self::Cpu),
"coreml" => Some(Self::CoreMl),
"cuda" => Some(Self::Cuda),
"tensorrt" => Some(Self::TensorRt),
_ => None,
}
}
}
/// Output format for extraction results.
///
/// Controls the format of the `content` field in `ExtractionResult`.
/// When set to `Markdown`, `Djot`, or `Html`, the output uses that format.
/// `Plain` returns the raw extracted text.
/// `Structured` returns JSON with full OCR element data including bounding
/// boxes and confidence scores.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmOutputFormat {
Plain = 0,
Markdown = 1,
Djot = 2,
Html = 3,
Json = 4,
Structured = 5,
Custom = 6,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmOutputFormat {
fn default() -> Self {
Self::Plain
}
}
impl WasmOutputFormat {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Plain => "plain",
Self::Markdown => "markdown",
Self::Djot => "djot",
Self::Html => "html",
Self::Json => "json",
Self::Structured => "structured",
Self::Custom => "custom",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"plain" => Some(Self::Plain),
"markdown" => Some(Self::Markdown),
"djot" => Some(Self::Djot),
"html" => Some(Self::Html),
"json" => Some(Self::Json),
"structured" => Some(Self::Structured),
"custom" => Some(Self::Custom),
_ => None,
}
}
}
/// Type of text chunker to use.
///
/// # Variants
///
/// * `Text` - Generic text splitter, splits on whitespace and punctuation
/// * `Markdown` - Markdown-aware splitter, preserves formatting and structure
/// * `Yaml` - YAML-aware splitter, creates one chunk per top-level key
/// * `Semantic` - Topic-aware chunker. With an `EmbeddingConfig`, splits at
/// embedding-based topic shifts tuned by `topic_threshold` (default 0.75,
/// lower = more splits). Without an embedding, falls back to a
/// structural-boundary heuristic (ALL-CAPS headers, numbered sections,
/// blank-line paragraphs) and merges groups into chunks capped at
/// `max_characters` (default 1000). `topic_threshold` has no effect in the
/// fallback path. For best results, pair with an embedding model.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmChunkerType {
Text = 0,
Markdown = 1,
Yaml = 2,
Semantic = 3,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmChunkerType {
fn default() -> Self {
Self::Text
}
}
impl WasmChunkerType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Text => "text",
Self::Markdown => "markdown",
Self::Yaml => "yaml",
Self::Semantic => "semantic",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"text" => Some(Self::Text),
"markdown" => Some(Self::Markdown),
"yaml" => Some(Self::Yaml),
"semantic" => Some(Self::Semantic),
_ => None,
}
}
}
/// How chunk size is measured.
///
/// Defaults to `Characters` (Unicode character count). When using token-based sizing,
/// chunks are sized by token count according to the specified tokenizer.
///
/// Token-based sizing uses HuggingFace tokenizers loaded at runtime. Any tokenizer
/// available on HuggingFace Hub can be used, including OpenAI-compatible tokenizers
/// (e.g., `Xenova/gpt-4o`, `Xenova/cl100k_base`).
#[wasm_bindgen]
#[derive(Clone, Default)]
pub struct WasmChunkSizing {
pub(crate) r#type: String,
pub(crate) model: Option<String>,
pub(crate) cache_dir: Option<String>,
}
#[wasm_bindgen]
impl WasmChunkSizing {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmChunkSizing {
Self::default()
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmChunkSizing {
<Self as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "type")]
pub fn r#type(&self) -> String {
self.r#type.clone()
}
#[wasm_bindgen(setter, js_name = "type")]
pub fn set_type(&mut self, value: String) {
self.r#type = value;
}
#[wasm_bindgen(getter, js_name = "model")]
pub fn model(&self) -> Option<String> {
self.model.clone()
}
#[wasm_bindgen(setter, js_name = "model")]
pub fn set_model(&mut self, value: Option<String>) {
self.model = value;
}
#[wasm_bindgen(getter, js_name = "cacheDir")]
pub fn cache_dir(&self) -> Option<String> {
self.cache_dir.clone()
}
#[wasm_bindgen(setter, js_name = "cacheDir")]
pub fn set_cache_dir(&mut self, value: Option<String>) {
self.cache_dir = value;
}
}
/// Embedding model types supported by Kreuzberg.
#[wasm_bindgen]
#[derive(Clone, Default)]
pub struct WasmEmbeddingModelType {
pub(crate) r#type: String,
pub(crate) name: Option<String>,
pub(crate) model_id: Option<String>,
pub(crate) dimensions: Option<usize>,
pub(crate) llm: Option<WasmLlmConfig>,
}
#[wasm_bindgen]
impl WasmEmbeddingModelType {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmEmbeddingModelType {
Self::default()
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmEmbeddingModelType {
<Self as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "type")]
pub fn r#type(&self) -> String {
self.r#type.clone()
}
#[wasm_bindgen(setter, js_name = "type")]
pub fn set_type(&mut self, value: String) {
self.r#type = value;
}
#[wasm_bindgen(getter, js_name = "name")]
pub fn name(&self) -> Option<String> {
self.name.clone()
}
#[wasm_bindgen(setter, js_name = "name")]
pub fn set_name(&mut self, value: Option<String>) {
self.name = value;
}
#[wasm_bindgen(getter, js_name = "modelId")]
pub fn model_id(&self) -> Option<String> {
self.model_id.clone()
}
#[wasm_bindgen(setter, js_name = "modelId")]
pub fn set_model_id(&mut self, value: Option<String>) {
self.model_id = value;
}
#[wasm_bindgen(getter, js_name = "dimensions")]
pub fn dimensions(&self) -> Option<usize> {
self.dimensions.clone()
}
#[wasm_bindgen(setter, js_name = "dimensions")]
pub fn set_dimensions(&mut self, value: Option<usize>) {
self.dimensions = value;
}
#[wasm_bindgen(getter, js_name = "llm")]
pub fn llm(&self) -> Option<WasmLlmConfig> {
self.llm.clone()
}
#[wasm_bindgen(setter, js_name = "llm")]
pub fn set_llm(&mut self, value: Option<WasmLlmConfig>) {
self.llm = value;
}
}
/// Type of list detection.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmListType {
Bullet = 0,
Numbered = 1,
Lettered = 2,
Indented = 3,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmListType {
fn default() -> Self {
Self::Bullet
}
}
impl WasmListType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Bullet => "Bullet",
Self::Numbered => "Numbered",
Self::Lettered => "Lettered",
Self::Indented => "Indented",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"Bullet" => Some(Self::Bullet),
"Numbered" => Some(Self::Numbered),
"Lettered" => Some(Self::Lettered),
"Indented" => Some(Self::Indented),
_ => None,
}
}
}
/// OCR backend types.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmOcrBackendType {
Tesseract = 0,
EasyOCR = 1,
PaddleOCR = 2,
Custom = 3,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmOcrBackendType {
fn default() -> Self {
Self::Tesseract
}
}
impl WasmOcrBackendType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Tesseract => "Tesseract",
Self::EasyOCR => "EasyOCR",
Self::PaddleOCR => "PaddleOCR",
Self::Custom => "Custom",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"Tesseract" => Some(Self::Tesseract),
"EasyOCR" => Some(Self::EasyOCR),
"PaddleOCR" => Some(Self::PaddleOCR),
"Custom" => Some(Self::Custom),
_ => None,
}
}
}
/// Processing stages for post-processors.
///
/// Post-processors are executed in stage order (Early → Middle → Late).
/// Use stages to control the order of post-processing operations.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmProcessingStage {
Early = 0,
Middle = 1,
Late = 2,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmProcessingStage {
fn default() -> Self {
Self::Early
}
}
impl WasmProcessingStage {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Early => "Early",
Self::Middle => "Middle",
Self::Late => "Late",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"Early" => Some(Self::Early),
"Middle" => Some(Self::Middle),
"Late" => Some(Self::Late),
_ => None,
}
}
}
/// Type of PDF annotation.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmPdfAnnotationType {
Text = 0,
Highlight = 1,
Link = 2,
Stamp = 3,
Underline = 4,
StrikeOut = 5,
Other = 6,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmPdfAnnotationType {
fn default() -> Self {
Self::Text
}
}
impl WasmPdfAnnotationType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Text => "text",
Self::Highlight => "highlight",
Self::Link => "link",
Self::Stamp => "stamp",
Self::Underline => "underline",
Self::StrikeOut => "strike_out",
Self::Other => "other",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"text" => Some(Self::Text),
"highlight" => Some(Self::Highlight),
"link" => Some(Self::Link),
"stamp" => Some(Self::Stamp),
"underline" => Some(Self::Underline),
"strike_out" => Some(Self::StrikeOut),
"other" => Some(Self::Other),
_ => None,
}
}
}
/// Types of block-level elements in Djot.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmBlockType {
Paragraph = 0,
Heading = 1,
Blockquote = 2,
CodeBlock = 3,
ListItem = 4,
OrderedList = 5,
BulletList = 6,
TaskList = 7,
DefinitionList = 8,
DefinitionTerm = 9,
DefinitionDescription = 10,
Div = 11,
Section = 12,
ThematicBreak = 13,
RawBlock = 14,
MathDisplay = 15,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmBlockType {
fn default() -> Self {
Self::Paragraph
}
}
impl WasmBlockType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Paragraph => "paragraph",
Self::Heading => "heading",
Self::Blockquote => "blockquote",
Self::CodeBlock => "code_block",
Self::ListItem => "list_item",
Self::OrderedList => "ordered_list",
Self::BulletList => "bullet_list",
Self::TaskList => "task_list",
Self::DefinitionList => "definition_list",
Self::DefinitionTerm => "definition_term",
Self::DefinitionDescription => "definition_description",
Self::Div => "div",
Self::Section => "section",
Self::ThematicBreak => "thematic_break",
Self::RawBlock => "raw_block",
Self::MathDisplay => "math_display",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"paragraph" => Some(Self::Paragraph),
"heading" => Some(Self::Heading),
"blockquote" => Some(Self::Blockquote),
"code_block" => Some(Self::CodeBlock),
"list_item" => Some(Self::ListItem),
"ordered_list" => Some(Self::OrderedList),
"bullet_list" => Some(Self::BulletList),
"task_list" => Some(Self::TaskList),
"definition_list" => Some(Self::DefinitionList),
"definition_term" => Some(Self::DefinitionTerm),
"definition_description" => Some(Self::DefinitionDescription),
"div" => Some(Self::Div),
"section" => Some(Self::Section),
"thematic_break" => Some(Self::ThematicBreak),
"raw_block" => Some(Self::RawBlock),
"math_display" => Some(Self::MathDisplay),
_ => None,
}
}
}
/// Types of inline elements in Djot.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmInlineType {
Text = 0,
Strong = 1,
Emphasis = 2,
Highlight = 3,
Subscript = 4,
Superscript = 5,
Insert = 6,
Delete = 7,
Code = 8,
Link = 9,
Image = 10,
Span = 11,
Math = 12,
RawInline = 13,
FootnoteRef = 14,
Symbol = 15,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmInlineType {
fn default() -> Self {
Self::Text
}
}
impl WasmInlineType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Text => "text",
Self::Strong => "strong",
Self::Emphasis => "emphasis",
Self::Highlight => "highlight",
Self::Subscript => "subscript",
Self::Superscript => "superscript",
Self::Insert => "insert",
Self::Delete => "delete",
Self::Code => "code",
Self::Link => "link",
Self::Image => "image",
Self::Span => "span",
Self::Math => "math",
Self::RawInline => "raw_inline",
Self::FootnoteRef => "footnote_ref",
Self::Symbol => "symbol",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"text" => Some(Self::Text),
"strong" => Some(Self::Strong),
"emphasis" => Some(Self::Emphasis),
"highlight" => Some(Self::Highlight),
"subscript" => Some(Self::Subscript),
"superscript" => Some(Self::Superscript),
"insert" => Some(Self::Insert),
"delete" => Some(Self::Delete),
"code" => Some(Self::Code),
"link" => Some(Self::Link),
"image" => Some(Self::Image),
"span" => Some(Self::Span),
"math" => Some(Self::Math),
"raw_inline" => Some(Self::RawInline),
"footnote_ref" => Some(Self::FootnoteRef),
"symbol" => Some(Self::Symbol),
_ => None,
}
}
}
/// Semantic kind of a relationship between document elements.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmRelationshipKind {
FootnoteReference = 0,
CitationReference = 1,
InternalLink = 2,
Caption = 3,
Label = 4,
TocEntry = 5,
CrossReference = 6,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmRelationshipKind {
fn default() -> Self {
Self::FootnoteReference
}
}
impl WasmRelationshipKind {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::FootnoteReference => "footnote_reference",
Self::CitationReference => "citation_reference",
Self::InternalLink => "internal_link",
Self::Caption => "caption",
Self::Label => "label",
Self::TocEntry => "toc_entry",
Self::CrossReference => "cross_reference",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"footnote_reference" => Some(Self::FootnoteReference),
"citation_reference" => Some(Self::CitationReference),
"internal_link" => Some(Self::InternalLink),
"caption" => Some(Self::Caption),
"label" => Some(Self::Label),
"toc_entry" => Some(Self::TocEntry),
"cross_reference" => Some(Self::CrossReference),
_ => None,
}
}
}
/// Content layer classification for document nodes.
///
/// Replaces separate body/furniture arrays with per-node granularity.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmContentLayer {
Body = 0,
Header = 1,
Footer = 2,
Footnote = 3,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmContentLayer {
fn default() -> Self {
Self::Body
}
}
impl WasmContentLayer {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Body => "body",
Self::Header => "header",
Self::Footer => "footer",
Self::Footnote => "footnote",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"body" => Some(Self::Body),
"header" => Some(Self::Header),
"footer" => Some(Self::Footer),
"footnote" => Some(Self::Footnote),
_ => None,
}
}
}
/// Tagged enum for node content. Each variant carries only type-specific data.
///
/// Uses `#[serde(tag = "node_type")]` to avoid "type" keyword collision in
/// Go/Java/TypeScript bindings.
#[wasm_bindgen]
#[derive(Clone, Default)]
pub struct WasmNodeContent {
pub(crate) node_type: String,
pub(crate) text: Option<String>,
pub(crate) level: Option<u8>,
pub(crate) ordered: Option<bool>,
pub(crate) grid: Option<WasmTableGrid>,
pub(crate) description: Option<String>,
pub(crate) image_index: Option<u32>,
pub(crate) src: Option<String>,
pub(crate) language: Option<String>,
pub(crate) label: Option<String>,
pub(crate) heading_level: Option<u8>,
pub(crate) heading_text: Option<String>,
pub(crate) number: Option<u32>,
pub(crate) title: Option<String>,
pub(crate) term: Option<String>,
pub(crate) definition: Option<String>,
pub(crate) key: Option<String>,
pub(crate) kind: Option<String>,
pub(crate) format: Option<String>,
pub(crate) content: Option<String>,
pub(crate) entries: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmNodeContent {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmNodeContent {
Self::default()
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmNodeContent {
<Self as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "nodeType")]
pub fn node_type(&self) -> String {
self.node_type.clone()
}
#[wasm_bindgen(setter, js_name = "nodeType")]
pub fn set_node_type(&mut self, value: String) {
self.node_type = value;
}
#[wasm_bindgen(getter, js_name = "text")]
pub fn text(&self) -> Option<String> {
self.text.clone()
}
#[wasm_bindgen(setter, js_name = "text")]
pub fn set_text(&mut self, value: Option<String>) {
self.text = value;
}
#[wasm_bindgen(getter, js_name = "level")]
pub fn level(&self) -> Option<u8> {
self.level.clone()
}
#[wasm_bindgen(setter, js_name = "level")]
pub fn set_level(&mut self, value: Option<u8>) {
self.level = value;
}
#[wasm_bindgen(getter, js_name = "ordered")]
pub fn ordered(&self) -> Option<bool> {
self.ordered.clone()
}
#[wasm_bindgen(setter, js_name = "ordered")]
pub fn set_ordered(&mut self, value: Option<bool>) {
self.ordered = value;
}
#[wasm_bindgen(getter, js_name = "grid")]
pub fn grid(&self) -> Option<WasmTableGrid> {
self.grid.clone()
}
#[wasm_bindgen(setter, js_name = "grid")]
pub fn set_grid(&mut self, value: Option<WasmTableGrid>) {
self.grid = value;
}
#[wasm_bindgen(getter, js_name = "description")]
pub fn description(&self) -> Option<String> {
self.description.clone()
}
#[wasm_bindgen(setter, js_name = "description")]
pub fn set_description(&mut self, value: Option<String>) {
self.description = value;
}
#[wasm_bindgen(getter, js_name = "imageIndex")]
pub fn image_index(&self) -> Option<u32> {
self.image_index.clone()
}
#[wasm_bindgen(setter, js_name = "imageIndex")]
pub fn set_image_index(&mut self, value: Option<u32>) {
self.image_index = value;
}
#[wasm_bindgen(getter, js_name = "src")]
pub fn src(&self) -> Option<String> {
self.src.clone()
}
#[wasm_bindgen(setter, js_name = "src")]
pub fn set_src(&mut self, value: Option<String>) {
self.src = value;
}
#[wasm_bindgen(getter, js_name = "language")]
pub fn language(&self) -> Option<String> {
self.language.clone()
}
#[wasm_bindgen(setter, js_name = "language")]
pub fn set_language(&mut self, value: Option<String>) {
self.language = value;
}
#[wasm_bindgen(getter, js_name = "label")]
pub fn label(&self) -> Option<String> {
self.label.clone()
}
#[wasm_bindgen(setter, js_name = "label")]
pub fn set_label(&mut self, value: Option<String>) {
self.label = value;
}
#[wasm_bindgen(getter, js_name = "headingLevel")]
pub fn heading_level(&self) -> Option<u8> {
self.heading_level.clone()
}
#[wasm_bindgen(setter, js_name = "headingLevel")]
pub fn set_heading_level(&mut self, value: Option<u8>) {
self.heading_level = value;
}
#[wasm_bindgen(getter, js_name = "headingText")]
pub fn heading_text(&self) -> Option<String> {
self.heading_text.clone()
}
#[wasm_bindgen(setter, js_name = "headingText")]
pub fn set_heading_text(&mut self, value: Option<String>) {
self.heading_text = value;
}
#[wasm_bindgen(getter, js_name = "number")]
pub fn number(&self) -> Option<u32> {
self.number.clone()
}
#[wasm_bindgen(setter, js_name = "number")]
pub fn set_number(&mut self, value: Option<u32>) {
self.number = value;
}
#[wasm_bindgen(getter, js_name = "title")]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter, js_name = "title")]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter, js_name = "term")]
pub fn term(&self) -> Option<String> {
self.term.clone()
}
#[wasm_bindgen(setter, js_name = "term")]
pub fn set_term(&mut self, value: Option<String>) {
self.term = value;
}
#[wasm_bindgen(getter, js_name = "definition")]
pub fn definition(&self) -> Option<String> {
self.definition.clone()
}
#[wasm_bindgen(setter, js_name = "definition")]
pub fn set_definition(&mut self, value: Option<String>) {
self.definition = value;
}
#[wasm_bindgen(getter, js_name = "key")]
pub fn key(&self) -> Option<String> {
self.key.clone()
}
#[wasm_bindgen(setter, js_name = "key")]
pub fn set_key(&mut self, value: Option<String>) {
self.key = value;
}
#[wasm_bindgen(getter, js_name = "kind")]
pub fn kind(&self) -> Option<String> {
self.kind.clone()
}
#[wasm_bindgen(setter, js_name = "kind")]
pub fn set_kind(&mut self, value: Option<String>) {
self.kind = value;
}
#[wasm_bindgen(getter, js_name = "format")]
pub fn format(&self) -> Option<String> {
self.format.clone()
}
#[wasm_bindgen(setter, js_name = "format")]
pub fn set_format(&mut self, value: Option<String>) {
self.format = value;
}
#[wasm_bindgen(getter, js_name = "content")]
pub fn content(&self) -> Option<String> {
self.content.clone()
}
#[wasm_bindgen(setter, js_name = "content")]
pub fn set_content(&mut self, value: Option<String>) {
self.content = value;
}
#[wasm_bindgen(getter, js_name = "entries")]
pub fn entries(&self) -> Option<JsValue> {
self.entries.clone()
}
#[wasm_bindgen(setter, js_name = "entries")]
pub fn set_entries(&mut self, value: Option<JsValue>) {
self.entries = value;
}
}
/// Types of inline text annotations.
#[wasm_bindgen]
#[derive(Clone, Default)]
pub struct WasmAnnotationKind {
pub(crate) annotation_type: String,
pub(crate) url: Option<String>,
pub(crate) title: Option<String>,
pub(crate) value: Option<String>,
pub(crate) name: Option<String>,
}
#[wasm_bindgen]
impl WasmAnnotationKind {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmAnnotationKind {
Self::default()
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmAnnotationKind {
<Self as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "annotationType")]
pub fn annotation_type(&self) -> String {
self.annotation_type.clone()
}
#[wasm_bindgen(setter, js_name = "annotationType")]
pub fn set_annotation_type(&mut self, value: String) {
self.annotation_type = value;
}
#[wasm_bindgen(getter, js_name = "url")]
pub fn url(&self) -> Option<String> {
self.url.clone()
}
#[wasm_bindgen(setter, js_name = "url")]
pub fn set_url(&mut self, value: Option<String>) {
self.url = value;
}
#[wasm_bindgen(getter, js_name = "title")]
pub fn title(&self) -> Option<String> {
self.title.clone()
}
#[wasm_bindgen(setter, js_name = "title")]
pub fn set_title(&mut self, value: Option<String>) {
self.title = value;
}
#[wasm_bindgen(getter, js_name = "value")]
pub fn value(&self) -> Option<String> {
self.value.clone()
}
#[wasm_bindgen(setter, js_name = "value")]
pub fn set_value(&mut self, value: Option<String>) {
self.value = value;
}
#[wasm_bindgen(getter, js_name = "name")]
pub fn name(&self) -> Option<String> {
self.name.clone()
}
#[wasm_bindgen(setter, js_name = "name")]
pub fn set_name(&mut self, value: Option<String>) {
self.name = value;
}
}
/// How the extracted text was produced.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmExtractionMethod {
Native = 0,
Ocr = 1,
Mixed = 2,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmExtractionMethod {
fn default() -> Self {
Self::Native
}
}
impl WasmExtractionMethod {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Native => "native",
Self::Ocr => "ocr",
Self::Mixed => "mixed",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"native" => Some(Self::Native),
"ocr" => Some(Self::Ocr),
"mixed" => Some(Self::Mixed),
_ => None,
}
}
}
/// Semantic structural classification of a text chunk.
///
/// Assigned by the heuristic classifier in `chunking.classifier`.
/// Defaults to `Unknown` when no rule matches.
/// Designed to be extended in future versions without breaking changes.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmChunkType {
Heading = 0,
PartyList = 1,
Definitions = 2,
OperativeClause = 3,
SignatureBlock = 4,
Schedule = 5,
TableLike = 6,
Formula = 7,
CodeBlock = 8,
Image = 9,
OrgChart = 10,
Diagram = 11,
Unknown = 12,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmChunkType {
fn default() -> Self {
Self::Unknown
}
}
impl WasmChunkType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Heading => "heading",
Self::PartyList => "party_list",
Self::Definitions => "definitions",
Self::OperativeClause => "operative_clause",
Self::SignatureBlock => "signature_block",
Self::Schedule => "schedule",
Self::TableLike => "table_like",
Self::Formula => "formula",
Self::CodeBlock => "code_block",
Self::Image => "image",
Self::OrgChart => "org_chart",
Self::Diagram => "diagram",
Self::Unknown => "unknown",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"heading" => Some(Self::Heading),
"party_list" => Some(Self::PartyList),
"definitions" => Some(Self::Definitions),
"operative_clause" => Some(Self::OperativeClause),
"signature_block" => Some(Self::SignatureBlock),
"schedule" => Some(Self::Schedule),
"table_like" => Some(Self::TableLike),
"formula" => Some(Self::Formula),
"code_block" => Some(Self::CodeBlock),
"image" => Some(Self::Image),
"org_chart" => Some(Self::OrgChart),
"diagram" => Some(Self::Diagram),
"unknown" => Some(Self::Unknown),
_ => None,
}
}
}
/// Heuristic classification of what an image likely depicts.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmImageKind {
Photograph = 0,
Diagram = 1,
Chart = 2,
Drawing = 3,
TextBlock = 4,
Decoration = 5,
Logo = 6,
Icon = 7,
TileFragment = 8,
Mask = 9,
PageRaster = 10,
Unknown = 11,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmImageKind {
fn default() -> Self {
Self::Photograph
}
}
impl WasmImageKind {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Photograph => "photograph",
Self::Diagram => "diagram",
Self::Chart => "chart",
Self::Drawing => "drawing",
Self::TextBlock => "text_block",
Self::Decoration => "decoration",
Self::Logo => "logo",
Self::Icon => "icon",
Self::TileFragment => "tile_fragment",
Self::Mask => "mask",
Self::PageRaster => "page_raster",
Self::Unknown => "unknown",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"photograph" => Some(Self::Photograph),
"diagram" => Some(Self::Diagram),
"chart" => Some(Self::Chart),
"drawing" => Some(Self::Drawing),
"text_block" => Some(Self::TextBlock),
"decoration" => Some(Self::Decoration),
"logo" => Some(Self::Logo),
"icon" => Some(Self::Icon),
"tile_fragment" => Some(Self::TileFragment),
"mask" => Some(Self::Mask),
"page_raster" => Some(Self::PageRaster),
"unknown" => Some(Self::Unknown),
_ => None,
}
}
}
/// Result-shape selection for extraction results.
///
/// Distinct from `OutputFormat` (which controls rendering — Plain, Markdown,
/// HTML, etc.). `ResultFormat` controls the *shape* of the result: a unified content
/// blob vs. an element-based decomposition.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmResultFormat {
Unified = 0,
ElementBased = 1,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmResultFormat {
fn default() -> Self {
Self::Unified
}
}
impl WasmResultFormat {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Unified => "unified",
Self::ElementBased => "element_based",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"unified" => Some(Self::Unified),
"element_based" => Some(Self::ElementBased),
_ => None,
}
}
}
/// Semantic element type classification.
///
/// Categorizes text content into semantic units for downstream processing.
/// Supports the element types commonly found in Unstructured documents.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmElementType {
Title = 0,
NarrativeText = 1,
Heading = 2,
ListItem = 3,
Table = 4,
Image = 5,
PageBreak = 6,
CodeBlock = 7,
BlockQuote = 8,
Footer = 9,
Header = 10,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmElementType {
fn default() -> Self {
Self::Title
}
}
impl WasmElementType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Title => "title",
Self::NarrativeText => "narrative_text",
Self::Heading => "heading",
Self::ListItem => "list_item",
Self::Table => "table",
Self::Image => "image",
Self::PageBreak => "page_break",
Self::CodeBlock => "code_block",
Self::BlockQuote => "block_quote",
Self::Footer => "footer",
Self::Header => "header",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"title" => Some(Self::Title),
"narrative_text" => Some(Self::NarrativeText),
"heading" => Some(Self::Heading),
"list_item" => Some(Self::ListItem),
"table" => Some(Self::Table),
"image" => Some(Self::Image),
"page_break" => Some(Self::PageBreak),
"code_block" => Some(Self::CodeBlock),
"block_quote" => Some(Self::BlockQuote),
"footer" => Some(Self::Footer),
"header" => Some(Self::Header),
_ => None,
}
}
}
/// Format-specific metadata (discriminated union).
///
/// Only one format type can exist per extraction result. This provides
/// type-safe, clean metadata without nested optionals.
#[wasm_bindgen]
#[derive(Clone, Default)]
pub struct WasmFormatMetadata {
pub(crate) format_type: String,
pub(crate) _0: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmFormatMetadata {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmFormatMetadata {
Self::default()
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmFormatMetadata {
<Self as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "formatType")]
pub fn format_type(&self) -> String {
self.format_type.clone()
}
#[wasm_bindgen(setter, js_name = "formatType")]
pub fn set_format_type(&mut self, value: String) {
self.format_type = value;
}
#[wasm_bindgen(getter, js_name = "0")]
pub fn field_0(&self) -> Option<JsValue> {
self._0.clone()
}
#[wasm_bindgen(setter, js_name = "0")]
pub fn set_field_0(&mut self, value: Option<JsValue>) {
self._0 = value;
}
}
/// Text direction enumeration for HTML documents.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmTextDirection {
LeftToRight = 0,
RightToLeft = 1,
Auto = 2,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmTextDirection {
fn default() -> Self {
Self::LeftToRight
}
}
impl WasmTextDirection {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::LeftToRight => "ltr",
Self::RightToLeft => "rtl",
Self::Auto => "auto",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"ltr" => Some(Self::LeftToRight),
"rtl" => Some(Self::RightToLeft),
"auto" => Some(Self::Auto),
_ => None,
}
}
}
/// Link type classification.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmLinkType {
Anchor = 0,
Internal = 1,
External = 2,
Email = 3,
Phone = 4,
Other = 5,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmLinkType {
fn default() -> Self {
Self::Anchor
}
}
impl WasmLinkType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Anchor => "anchor",
Self::Internal => "internal",
Self::External => "external",
Self::Email => "email",
Self::Phone => "phone",
Self::Other => "other",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"anchor" => Some(Self::Anchor),
"internal" => Some(Self::Internal),
"external" => Some(Self::External),
"email" => Some(Self::Email),
"phone" => Some(Self::Phone),
"other" => Some(Self::Other),
_ => None,
}
}
}
/// Image type classification.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmImageType {
DataUri = 0,
InlineSvg = 1,
External = 2,
Relative = 3,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmImageType {
fn default() -> Self {
Self::DataUri
}
}
impl WasmImageType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::DataUri => "data-uri",
Self::InlineSvg => "inline-svg",
Self::External => "external",
Self::Relative => "relative",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"data-uri" => Some(Self::DataUri),
"inline-svg" => Some(Self::InlineSvg),
"external" => Some(Self::External),
"relative" => Some(Self::Relative),
_ => None,
}
}
}
/// Structured data type classification.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmStructuredDataType {
JsonLd = 0,
Microdata = 1,
RDFa = 2,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmStructuredDataType {
fn default() -> Self {
Self::JsonLd
}
}
impl WasmStructuredDataType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::JsonLd => "json-ld",
Self::Microdata => "microdata",
Self::RDFa => "rdfa",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"json-ld" => Some(Self::JsonLd),
"microdata" => Some(Self::Microdata),
"rdfa" => Some(Self::RDFa),
_ => None,
}
}
}
/// Bounding geometry for an OCR element.
///
/// Supports both axis-aligned rectangles (from Tesseract) and 4-point quadrilaterals
/// (from PaddleOCR and rotated text detection).
#[wasm_bindgen]
#[derive(Clone, Default)]
pub struct WasmOcrBoundingGeometry {
pub(crate) r#type: String,
pub(crate) left: Option<u32>,
pub(crate) top: Option<u32>,
pub(crate) width: Option<u32>,
pub(crate) height: Option<u32>,
pub(crate) points: Option<JsValue>,
}
#[wasm_bindgen]
impl WasmOcrBoundingGeometry {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmOcrBoundingGeometry {
Self::default()
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmOcrBoundingGeometry {
<Self as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "type")]
pub fn r#type(&self) -> String {
self.r#type.clone()
}
#[wasm_bindgen(setter, js_name = "type")]
pub fn set_type(&mut self, value: String) {
self.r#type = value;
}
#[wasm_bindgen(getter, js_name = "left")]
pub fn left(&self) -> Option<u32> {
self.left.clone()
}
#[wasm_bindgen(setter, js_name = "left")]
pub fn set_left(&mut self, value: Option<u32>) {
self.left = value;
}
#[wasm_bindgen(getter, js_name = "top")]
pub fn top(&self) -> Option<u32> {
self.top.clone()
}
#[wasm_bindgen(setter, js_name = "top")]
pub fn set_top(&mut self, value: Option<u32>) {
self.top = value;
}
#[wasm_bindgen(getter, js_name = "width")]
pub fn width(&self) -> Option<u32> {
self.width.clone()
}
#[wasm_bindgen(setter, js_name = "width")]
pub fn set_width(&mut self, value: Option<u32>) {
self.width = value;
}
#[wasm_bindgen(getter, js_name = "height")]
pub fn height(&self) -> Option<u32> {
self.height.clone()
}
#[wasm_bindgen(setter, js_name = "height")]
pub fn set_height(&mut self, value: Option<u32>) {
self.height = value;
}
#[wasm_bindgen(getter, js_name = "points")]
pub fn points(&self) -> Option<JsValue> {
self.points.clone()
}
#[wasm_bindgen(setter, js_name = "points")]
pub fn set_points(&mut self, value: Option<JsValue>) {
self.points = value;
}
}
/// Hierarchical level of an OCR element.
///
/// Maps to Tesseract's page segmentation hierarchy and provides
/// equivalent semantics for PaddleOCR.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmOcrElementLevel {
Word = 0,
Line = 1,
Block = 2,
Page = 3,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmOcrElementLevel {
fn default() -> Self {
Self::Line
}
}
impl WasmOcrElementLevel {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Word => "word",
Self::Line => "line",
Self::Block => "block",
Self::Page => "page",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"word" => Some(Self::Word),
"line" => Some(Self::Line),
"block" => Some(Self::Block),
"page" => Some(Self::Page),
_ => None,
}
}
}
/// Type of paginated unit in a document.
///
/// Distinguishes between different types of "pages" (PDF pages, presentation slides, spreadsheet sheets).
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmPageUnitType {
Page = 0,
Slide = 1,
Sheet = 2,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmPageUnitType {
fn default() -> Self {
Self::Page
}
}
impl WasmPageUnitType {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Page => "page",
Self::Slide => "slide",
Self::Sheet => "sheet",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"page" => Some(Self::Page),
"slide" => Some(Self::Slide),
"sheet" => Some(Self::Sheet),
_ => None,
}
}
}
/// A single line in a unified-diff hunk.
///
/// Defined here (rather than only in `crate.diff`) so `RevisionDelta` can
/// reference it unconditionally, without requiring the `diff` Cargo feature.
/// `crate.diff` re-exports this type verbatim.
#[wasm_bindgen]
#[derive(Clone, Default)]
pub struct WasmDiffLine {
pub(crate) kind: String,
pub(crate) _0: Option<String>,
}
#[wasm_bindgen]
impl WasmDiffLine {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmDiffLine {
Self::default()
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmDiffLine {
<Self as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "kind")]
pub fn kind(&self) -> String {
self.kind.clone()
}
#[wasm_bindgen(setter, js_name = "kind")]
pub fn set_kind(&mut self, value: String) {
self.kind = value;
}
#[wasm_bindgen(getter, js_name = "0")]
pub fn field_0(&self) -> Option<String> {
self._0.clone()
}
#[wasm_bindgen(setter, js_name = "0")]
pub fn set_field_0(&mut self, value: Option<String>) {
self._0 = value;
}
}
/// Semantic classification of a tracked change.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmRevisionKind {
Insertion = 0,
Deletion = 1,
FormatChange = 2,
Comment = 3,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmRevisionKind {
fn default() -> Self {
Self::Insertion
}
}
impl WasmRevisionKind {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Insertion => "insertion",
Self::Deletion => "deletion",
Self::FormatChange => "format_change",
Self::Comment => "comment",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"insertion" => Some(Self::Insertion),
"deletion" => Some(Self::Deletion),
"format_change" => Some(Self::FormatChange),
"comment" => Some(Self::Comment),
_ => None,
}
}
}
/// Best-effort document location for a revision.
#[wasm_bindgen]
#[derive(Clone, Default)]
pub struct WasmRevisionAnchor {
pub(crate) r#type: String,
pub(crate) index: Option<usize>,
pub(crate) row: Option<usize>,
pub(crate) col: Option<usize>,
pub(crate) table_index: Option<usize>,
pub(crate) name: Option<String>,
}
#[wasm_bindgen]
impl WasmRevisionAnchor {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmRevisionAnchor {
Self::default()
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn default() -> WasmRevisionAnchor {
<Self as ::core::default::Default>::default()
}
#[wasm_bindgen(getter, js_name = "type")]
pub fn r#type(&self) -> String {
self.r#type.clone()
}
#[wasm_bindgen(setter, js_name = "type")]
pub fn set_type(&mut self, value: String) {
self.r#type = value;
}
#[wasm_bindgen(getter, js_name = "index")]
pub fn index(&self) -> Option<usize> {
self.index.clone()
}
#[wasm_bindgen(setter, js_name = "index")]
pub fn set_index(&mut self, value: Option<usize>) {
self.index = value;
}
#[wasm_bindgen(getter, js_name = "row")]
pub fn row(&self) -> Option<usize> {
self.row.clone()
}
#[wasm_bindgen(setter, js_name = "row")]
pub fn set_row(&mut self, value: Option<usize>) {
self.row = value;
}
#[wasm_bindgen(getter, js_name = "col")]
pub fn col(&self) -> Option<usize> {
self.col.clone()
}
#[wasm_bindgen(setter, js_name = "col")]
pub fn set_col(&mut self, value: Option<usize>) {
self.col = value;
}
#[wasm_bindgen(getter, js_name = "tableIndex")]
pub fn table_index(&self) -> Option<usize> {
self.table_index.clone()
}
#[wasm_bindgen(setter, js_name = "tableIndex")]
pub fn set_table_index(&mut self, value: Option<usize>) {
self.table_index = value;
}
#[wasm_bindgen(getter, js_name = "name")]
pub fn name(&self) -> Option<String> {
self.name.clone()
}
#[wasm_bindgen(setter, js_name = "name")]
pub fn set_name(&mut self, value: Option<String>) {
self.name = value;
}
}
/// Semantic classification of an extracted URI.
#[wasm_bindgen]
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum WasmUriKind {
Hyperlink = 0,
Image = 1,
Anchor = 2,
Citation = 3,
Reference = 4,
Email = 5,
}
#[allow(clippy::derivable_impls)]
impl Default for WasmUriKind {
fn default() -> Self {
Self::Hyperlink
}
}
impl WasmUriKind {
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
pub fn to_api_str(self) -> &'static str {
match self {
Self::Hyperlink => "hyperlink",
Self::Image => "image",
Self::Anchor => "anchor",
Self::Citation => "citation",
Self::Reference => "reference",
Self::Email => "email",
}
}
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
pub fn from_api_str(s: &str) -> Option<Self> {
match s {
"hyperlink" => Some(Self::Hyperlink),
"image" => Some(Self::Image),
"anchor" => Some(Self::Anchor),
"citation" => Some(Self::Citation),
"reference" => Some(Self::Reference),
"email" => Some(Self::Email),
_ => None,
}
}
}
#[derive(serde::Deserialize, Default)]
#[serde(default)]
pub struct ExtractionConfigInput {
#[serde(rename = "useCache")]
pub use_cache: Option<bool>,
#[serde(rename = "enableQualityProcessing")]
pub enable_quality_processing: Option<bool>,
#[serde(rename = "ocr")]
pub ocr: Option<kreuzberg::OcrConfig>,
#[serde(rename = "forceOcr")]
pub force_ocr: Option<bool>,
#[serde(rename = "forceOcrPages")]
pub force_ocr_pages: Option<Vec<u32>>,
#[serde(rename = "disableOcr")]
pub disable_ocr: Option<bool>,
#[serde(rename = "chunking")]
pub chunking: Option<kreuzberg::ChunkingConfig>,
#[serde(rename = "contentFilter")]
pub content_filter: Option<kreuzberg::ContentFilterConfig>,
#[serde(rename = "images")]
pub images: Option<kreuzberg::ImageExtractionConfig>,
#[cfg(feature = "pdf")]
#[serde(skip)]
pub pdf_options: Option<kreuzberg::PdfConfig>,
#[serde(rename = "tokenReduction")]
pub token_reduction: Option<kreuzberg::TokenReductionOptions>,
#[serde(rename = "languageDetection")]
pub language_detection: Option<kreuzberg::LanguageDetectionConfig>,
#[serde(rename = "pages")]
pub pages: Option<kreuzberg::PageConfig>,
#[cfg(any(feature = "keywords-yake", feature = "keywords-rake"))]
#[serde(skip)]
pub keywords: Option<kreuzberg::KeywordConfig>,
#[serde(rename = "postprocessor")]
pub postprocessor: Option<kreuzberg::PostProcessorConfig>,
#[cfg(feature = "html")]
#[serde(skip)]
pub html_options: Option<String>,
#[cfg(feature = "html")]
#[serde(skip)]
pub html_output: Option<kreuzberg::HtmlOutputConfig>,
#[serde(rename = "extractionTimeoutSecs")]
pub extraction_timeout_secs: Option<u64>,
#[serde(rename = "maxConcurrentExtractions")]
pub max_concurrent_extractions: Option<usize>,
#[serde(rename = "resultFormat")]
pub result_format: Option<kreuzberg::ResultFormat>,
#[serde(rename = "securityLimits")]
pub security_limits: Option<kreuzberg::SecurityLimits>,
#[serde(rename = "maxEmbeddedFileBytes")]
pub max_embedded_file_bytes: Option<u64>,
#[serde(rename = "outputFormat")]
pub output_format: Option<kreuzberg::OutputFormat>,
#[cfg(feature = "layout-types")]
#[serde(skip)]
pub layout: Option<kreuzberg::LayoutDetectionConfig>,
#[serde(rename = "useLayoutForMarkdown")]
pub use_layout_for_markdown: Option<bool>,
#[serde(rename = "includeDocumentStructure")]
pub include_document_structure: Option<bool>,
#[serde(rename = "acceleration")]
pub acceleration: Option<kreuzberg::AccelerationConfig>,
#[serde(rename = "cacheNamespace")]
pub cache_namespace: Option<String>,
#[serde(rename = "cacheTtlSecs")]
pub cache_ttl_secs: Option<u64>,
#[serde(rename = "email")]
pub email: Option<kreuzberg::EmailConfig>,
#[serde(rename = "concurrency")]
pub concurrency: Option<String>,
#[serde(rename = "maxArchiveDepth")]
pub max_archive_depth: Option<usize>,
#[cfg(feature = "tree-sitter")]
#[serde(skip)]
pub tree_sitter: Option<kreuzberg::TreeSitterConfig>,
#[serde(rename = "structuredExtraction")]
pub structured_extraction: Option<kreuzberg::StructuredExtractionConfig>,
#[serde(rename = "cancelToken")]
pub cancel_token: Option<String>,
}
impl From<ExtractionConfigInput> for kreuzberg::ExtractionConfig {
fn from(val: ExtractionConfigInput) -> Self {
let mut out = Self::default();
if let Some(v) = val.use_cache {
out.use_cache = v.into();
}
if let Some(v) = val.enable_quality_processing {
out.enable_quality_processing = v.into();
}
if let Some(v) = val.ocr {
out.ocr = v.into();
}
if let Some(v) = val.force_ocr {
out.force_ocr = v.into();
}
if let Some(v) = val.force_ocr_pages {
out.force_ocr_pages = v.into();
}
if let Some(v) = val.disable_ocr {
out.disable_ocr = v.into();
}
if let Some(v) = val.chunking {
out.chunking = v.into();
}
if let Some(v) = val.content_filter {
out.content_filter = v.into();
}
if let Some(v) = val.images {
out.images = v.into();
}
#[cfg(feature = "pdf")]
if let Some(v) = val.pdf_options {
out.pdf_options = v.into();
}
if let Some(v) = val.token_reduction {
out.token_reduction = v.into();
}
if let Some(v) = val.language_detection {
out.language_detection = v.into();
}
if let Some(v) = val.pages {
out.pages = v.into();
}
#[cfg(any(feature = "keywords-yake", feature = "keywords-rake"))]
if let Some(v) = val.keywords {
out.keywords = v.into();
}
if let Some(v) = val.postprocessor {
out.postprocessor = v.into();
}
#[cfg(feature = "html")]
if let Some(v) = val.html_options {
out.html_options = serde_json::from_str(&v).unwrap_or_default();
}
#[cfg(feature = "html")]
if let Some(v) = val.html_output {
out.html_output = v.into();
}
if let Some(v) = val.extraction_timeout_secs {
out.extraction_timeout_secs = v.into();
}
if let Some(v) = val.max_concurrent_extractions {
out.max_concurrent_extractions = v.into();
}
if let Some(v) = val.result_format {
out.result_format = v.into();
}
if let Some(v) = val.security_limits {
out.security_limits = v.into();
}
if let Some(v) = val.max_embedded_file_bytes {
out.max_embedded_file_bytes = v.into();
}
if let Some(v) = val.output_format {
out.output_format = v.into();
}
#[cfg(feature = "layout-types")]
if let Some(v) = val.layout {
out.layout = v.into();
}
if let Some(v) = val.use_layout_for_markdown {
out.use_layout_for_markdown = v.into();
}
if let Some(v) = val.include_document_structure {
out.include_document_structure = v.into();
}
if let Some(v) = val.acceleration {
out.acceleration = v.into();
}
if let Some(v) = val.cache_namespace {
out.cache_namespace = v.into();
}
if let Some(v) = val.cache_ttl_secs {
out.cache_ttl_secs = v.into();
}
if let Some(v) = val.email {
out.email = v.into();
}
if let Some(v) = val.concurrency {
out.concurrency = serde_json::from_str(&v).unwrap_or_default();
}
if let Some(v) = val.max_archive_depth {
out.max_archive_depth = v.into();
}
#[cfg(feature = "tree-sitter")]
if let Some(v) = val.tree_sitter {
out.tree_sitter = v.into();
}
if let Some(v) = val.structured_extraction {
out.structured_extraction = v.into();
}
if let Some(v) = val.cancel_token {
out.cancel_token = serde_json::from_str(&v).unwrap_or_default();
}
out
}
}
/// Extract content from a byte array.
///
/// This is the main entry point for in-memory extraction. It performs the following steps:
/// 1. Validate MIME type
/// 2. Handle legacy format conversion if needed
/// 3. Select appropriate extractor from registry
/// 4. Extract content
/// 5. Run post-processing pipeline
///
/// # Arguments
///
/// * `content` - The byte array to extract
/// * `mime_type` - MIME type of the content
/// * `config` - Extraction configuration
///
/// # Returns
///
/// An `ExtractionResult` containing the extracted content and metadata.
///
/// # Errors
///
/// Returns `KreuzbergError.Validation` if MIME type is invalid.
/// Returns `KreuzbergError.UnsupportedFormat` if MIME type is not supported.
///
/// # Example
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "extractBytes")]
pub async fn extract_bytes(
content: Vec<u8>,
mime_type: String,
config: JsValue,
) -> Result<WasmExtractionResult, JsValue> {
let config_core: kreuzberg::ExtractionConfig = if config.is_undefined() {
kreuzberg::ExtractionConfig::default()
} else {
serde_wasm_bindgen::from_value::<kreuzberg::ExtractionConfig>(config)
.map_err(|e| JsValue::from_str(&e.to_string()))?
};
let result = kreuzberg::extract_bytes(&content, &mime_type, &config_core)
.await
.map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(WasmExtractionResult::from(result))
}
/// Extract content from a file.
///
/// This is the main entry point for file-based extraction. It performs the following steps:
/// 1. Check cache for existing result (if caching enabled)
/// 2. Detect or validate MIME type
/// 3. Select appropriate extractor from registry
/// 4. Extract content
/// 5. Run post-processing pipeline
/// 6. Store result in cache (if caching enabled)
///
/// # Arguments
///
/// * `path` - Path to the file to extract
/// * `mime_type` - Optional MIME type override. If undefined, will be auto-detected
/// * `config` - Extraction configuration
///
/// # Returns
///
/// An `ExtractionResult` containing the extracted content and metadata.
///
/// # Errors
///
/// Returns `KreuzbergError.Io` if the file doesn't exist (NotFound) or for other file I/O errors.
/// Returns `KreuzbergError.UnsupportedFormat` if MIME type is not supported.
///
/// # Example
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "extractFile")]
pub async fn extract_file(
path: String,
mime_type: Option<String>,
config: JsValue,
) -> Result<WasmExtractionResult, JsValue> {
let config_core: kreuzberg::ExtractionConfig = if config.is_undefined() {
kreuzberg::ExtractionConfig::default()
} else {
serde_wasm_bindgen::from_value::<kreuzberg::ExtractionConfig>(config)
.map_err(|e| JsValue::from_str(&e.to_string()))?
};
let result = kreuzberg::extract_file(std::path::PathBuf::from(path), mime_type.as_deref(), &config_core)
.await
.map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(WasmExtractionResult::from(result))
}
/// Detect MIME type from raw file bytes.
///
/// Uses magic byte signatures to detect file type from content.
/// Falls back to `infer` crate for comprehensive detection.
///
/// For ZIP-based files, inspects contents to distinguish Office Open XML
/// formats (DOCX, XLSX, PPTX) from plain ZIP archives.
///
/// # Arguments
///
/// * `content` - Raw file bytes
///
/// # Returns
///
/// The detected MIME type string.
///
/// # Errors
///
/// Returns `KreuzbergError.UnsupportedFormat` if MIME type cannot be determined.
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "detectMimeTypeFromBytes")]
pub fn detect_mime_type_from_bytes(content: Vec<u8>) -> Result<String, JsValue> {
let result = kreuzberg::detect_mime_type_from_bytes(&content).map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
/// Get file extensions for a given MIME type.
///
/// Returns all known file extensions that map to the specified MIME type.
///
/// # Arguments
///
/// * `mime_type` - The MIME type to look up
///
/// # Returns
///
/// A vector of file extensions (without leading dot) for the MIME type.
///
/// # Example
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "getExtensionsForMime")]
pub fn get_extensions_for_mime(mime_type: String) -> Result<Vec<String>, JsValue> {
let result = kreuzberg::get_extensions_for_mime(&mime_type).map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
/// List the names of all registered embedding backends.
///
/// Used by `kreuzberg-cli`, the api/mcp endpoints, and generated language
/// bindings.
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "listEmbeddingBackends")]
pub fn list_embedding_backends() -> Result<Vec<String>, JsValue> {
let result = kreuzberg::list_embedding_backends().map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
/// List names of all registered document extractors.
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "listDocumentExtractors")]
pub fn list_document_extractors() -> Result<Vec<String>, JsValue> {
let result = kreuzberg::list_document_extractors().map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
/// List all registered OCR backends.
///
/// Returns the names of all OCR backends currently registered in the global registry.
///
/// # Returns
///
/// A vector of OCR backend names.
///
/// # Example
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "listOcrBackends")]
pub fn list_ocr_backends() -> Result<Vec<String>, JsValue> {
let result = kreuzberg::list_ocr_backends().map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
/// List all registered post-processor names.
///
/// Returns a vector of all post-processor names currently registered in the
/// global registry.
///
/// # Returns
///
/// - `Ok(Vec<String>)` - Vector of post-processor names
/// - `Err(...)` if the registry lock is poisoned
///
/// # Example
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "listPostProcessors")]
pub fn list_post_processors() -> Result<Vec<String>, JsValue> {
let result = kreuzberg::list_post_processors().map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
/// List names of all registered renderers.
///
/// # Errors
///
/// Returns an error if the registry lock is poisoned.
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "listRenderers")]
pub fn list_renderers() -> Result<Vec<String>, JsValue> {
let result = kreuzberg::list_renderers().map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
/// List names of all registered validators.
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "listValidators")]
pub fn list_validators() -> Result<Vec<String>, JsValue> {
let result = kreuzberg::list_validators().map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
/// Detect the MIME type of a file at the given path.
///
/// Uses the file extension and optionally the file content to determine the MIME type.
/// Set `check_exists` to `true` to verify the file exists before detection.
#[allow(clippy::missing_errors_doc)]
#[wasm_bindgen(js_name = "detectMimeType")]
pub fn detect_mime_type(path: String, check_exists: bool) -> Result<String, JsValue> {
let result = kreuzberg::detect_mime_type(path, check_exists).map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(result)
}
#[cfg(target_arch = "wasm32")]
mod __alef_wasm_bridge_ocrbackend {
use super::*;
/// Wrapper that bridges a foreign Wasm object to the `OcrBackend` trait.
pub struct WasmOcrBackendBridge {
inner: wasm_bindgen::JsValue,
cached_name: String,
}
impl std::fmt::Debug for WasmOcrBackendBridge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "WasmOcrBackendBridge")
}
}
impl WasmOcrBackendBridge {
/// Create a new bridge wrapping a JS object.
///
/// Validates that the JS object provides all required methods.
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("processImage")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "process_image"));
}
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("supportsLanguage")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "supports_language"));
}
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("backendType")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "backend_type"));
}
let cached_name = {
let key = wasm_bindgen::JsValue::from_str("name");
js_sys::Reflect::get(&js_obj, &key)
.ok()
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
.and_then(|v| v.as_string())
.unwrap_or_else(|| "wasm_bridge".to_string())
};
Ok(Self {
inner: js_obj,
cached_name,
})
}
}
impl kreuzberg::plugins::Plugin for WasmOcrBackendBridge {
fn name(&self) -> &str {
&self.cached_name
}
fn version(&self) -> String {
let key = wasm_bindgen::JsValue::from_str("version");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
result.as_string().unwrap_or_default()
}
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("initialize");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"initialize"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
})?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
// Convert result
Ok(())
}
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("shutdown");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"shutdown"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
// Convert result
Ok(())
}
}
#[async_trait::async_trait(?Send)]
impl kreuzberg::OcrBackend for WasmOcrBackendBridge {
async fn process_image(
&self,
image_bytes: &[u8],
config: &kreuzberg::OcrConfig,
) -> std::result::Result<kreuzberg::ExtractionResult, kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("processImage");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"process_image"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "process_image")))?;
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "process_image"))
})?;
let args = js_sys::Array::new();
args.push(&js_sys::Uint8Array::from(image_bytes).into());
args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL));
let promise_val = func.apply(&self.inner, &args).map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "process_image"))
})?;
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "process_image"))
})?;
let result = wasm_bindgen_futures::JsFuture::from(promise)
.await
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
// Convert result
result
.as_string()
.ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string()))
.and_then(|s| {
serde_json::from_str::<kreuzberg::ExtractionResult>(&s)
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
})
}
fn supports_language(&self, lang: &str) -> bool {
let key = wasm_bindgen::JsValue::from_str("supportsLanguage");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
args.push(&wasm_bindgen::JsValue::from_str(lang));
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
// Convert JS boolean to Rust bool
result.as_bool().unwrap_or_default()
}
fn backend_type(&self) -> kreuzberg::OcrBackendType {
let key = wasm_bindgen::JsValue::from_str("backendType");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
// Convert bare enum string (non-JSON) to kreuzberg::OcrBackendType
result
.as_string()
.and_then(|s| {
serde_json::from_str::<kreuzberg::OcrBackendType>(&format!("\"{}\"", s))
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
})
.unwrap_or_default()
}
}
#[wasm_bindgen(js_name = "registerOcrBackend")]
pub fn register_ocr_backend(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
let required_methods = vec!["processImage", "supportsLanguage", "backendType"];
for method_name in required_methods {
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
return Err(wasm_bindgen::JsValue::from_str(&format!(
"Backend missing required method: {}",
method_name
)));
}
}
let wrapper = WasmOcrBackendBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
let arc: std::sync::Arc<dyn kreuzberg::OcrBackend> = std::sync::Arc::new(wrapper);
let registry = kreuzberg::plugins::registry::get_ocr_backend_registry();
let mut registry = registry.write();
registry
.register(arc)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "unregisterOcrBackend")]
pub fn unregister_ocr_backend(name: String) -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::ocr_backend::unregister_ocr_backend(&name)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "clearOcrBackends")]
pub fn clear_ocr_backends() -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::ocr_backend::clear_ocr_backends()
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
}
#[cfg(target_arch = "wasm32")]
pub use __alef_wasm_bridge_ocrbackend::*;
#[cfg(target_arch = "wasm32")]
mod __alef_wasm_bridge_postprocessor {
use super::*;
/// Wrapper that bridges a foreign Wasm object to the `PostProcessor` trait.
pub struct WasmPostProcessorBridge {
inner: wasm_bindgen::JsValue,
cached_name: String,
}
impl std::fmt::Debug for WasmPostProcessorBridge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "WasmPostProcessorBridge")
}
}
impl WasmPostProcessorBridge {
/// Create a new bridge wrapping a JS object.
///
/// Validates that the JS object provides all required methods.
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("process")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "process"));
}
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("processingStage")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "processing_stage"));
}
let cached_name = {
let key = wasm_bindgen::JsValue::from_str("name");
js_sys::Reflect::get(&js_obj, &key)
.ok()
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
.and_then(|v| v.as_string())
.unwrap_or_else(|| "wasm_bridge".to_string())
};
Ok(Self {
inner: js_obj,
cached_name,
})
}
}
impl kreuzberg::plugins::Plugin for WasmPostProcessorBridge {
fn name(&self) -> &str {
&self.cached_name
}
fn version(&self) -> String {
let key = wasm_bindgen::JsValue::from_str("version");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
result.as_string().unwrap_or_default()
}
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("initialize");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"initialize"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
})?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
// Convert result
Ok(())
}
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("shutdown");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"shutdown"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
// Convert result
Ok(())
}
}
#[async_trait::async_trait(?Send)]
impl kreuzberg::PostProcessor for WasmPostProcessorBridge {
async fn process(
&self,
result: &mut kreuzberg::ExtractionResult,
config: &kreuzberg::ExtractionConfig,
) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("process");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"process"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "process")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "process")))?;
let args = js_sys::Array::new();
args.push(&serde_wasm_bindgen::to_value(result).unwrap_or(wasm_bindgen::JsValue::NULL));
args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL));
let promise_val = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "process")))?;
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "process"))
})?;
let result = wasm_bindgen_futures::JsFuture::from(promise)
.await
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
// Convert result
Ok(())
}
fn processing_stage(&self) -> kreuzberg::ProcessingStage {
let key = wasm_bindgen::JsValue::from_str("processingStage");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
// Convert bare enum string (non-JSON) to kreuzberg::ProcessingStage
result
.as_string()
.and_then(|s| {
serde_json::from_str::<kreuzberg::ProcessingStage>(&format!("\"{}\"", s))
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
})
.unwrap_or_default()
}
}
#[wasm_bindgen(js_name = "registerPostProcessor")]
pub fn register_post_processor(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
let required_methods = vec!["process", "processingStage"];
for method_name in required_methods {
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
return Err(wasm_bindgen::JsValue::from_str(&format!(
"Backend missing required method: {}",
method_name
)));
}
}
let wrapper = WasmPostProcessorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
let arc: std::sync::Arc<dyn kreuzberg::PostProcessor> = std::sync::Arc::new(wrapper);
let registry = kreuzberg::plugins::registry::get_post_processor_registry();
let mut registry = registry.write();
registry
.register(arc)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "unregisterPostProcessor")]
pub fn unregister_post_processor(name: String) -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::post_processor::unregister_post_processor(&name)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "clearPostProcessors")]
pub fn clear_post_processors() -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::post_processor::clear_post_processors()
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
}
#[cfg(target_arch = "wasm32")]
pub use __alef_wasm_bridge_postprocessor::*;
#[cfg(target_arch = "wasm32")]
mod __alef_wasm_bridge_validator {
use super::*;
/// Wrapper that bridges a foreign Wasm object to the `Validator` trait.
pub struct WasmValidatorBridge {
inner: wasm_bindgen::JsValue,
cached_name: String,
}
impl std::fmt::Debug for WasmValidatorBridge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "WasmValidatorBridge")
}
}
impl WasmValidatorBridge {
/// Create a new bridge wrapping a JS object.
///
/// Validates that the JS object provides all required methods.
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("validate")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "validate"));
}
let cached_name = {
let key = wasm_bindgen::JsValue::from_str("name");
js_sys::Reflect::get(&js_obj, &key)
.ok()
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
.and_then(|v| v.as_string())
.unwrap_or_else(|| "wasm_bridge".to_string())
};
Ok(Self {
inner: js_obj,
cached_name,
})
}
}
impl kreuzberg::plugins::Plugin for WasmValidatorBridge {
fn name(&self) -> &str {
&self.cached_name
}
fn version(&self) -> String {
let key = wasm_bindgen::JsValue::from_str("version");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
result.as_string().unwrap_or_default()
}
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("initialize");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"initialize"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
})?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
// Convert result
Ok(())
}
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("shutdown");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"shutdown"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
// Convert result
Ok(())
}
}
#[async_trait::async_trait(?Send)]
impl kreuzberg::Validator for WasmValidatorBridge {
async fn validate(
&self,
result: &kreuzberg::ExtractionResult,
config: &kreuzberg::ExtractionConfig,
) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("validate");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"validate"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "validate")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "validate")))?;
let args = js_sys::Array::new();
args.push(&serde_wasm_bindgen::to_value(result).unwrap_or(wasm_bindgen::JsValue::NULL));
args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL));
let promise_val = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "validate")))?;
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "validate"))
})?;
let result = wasm_bindgen_futures::JsFuture::from(promise)
.await
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
// Convert result
Ok(())
}
}
#[wasm_bindgen(js_name = "registerValidator")]
pub fn register_validator(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
let required_methods = vec!["validate"];
for method_name in required_methods {
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
return Err(wasm_bindgen::JsValue::from_str(&format!(
"Backend missing required method: {}",
method_name
)));
}
}
let wrapper = WasmValidatorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
let arc: std::sync::Arc<dyn kreuzberg::Validator> = std::sync::Arc::new(wrapper);
let registry = kreuzberg::plugins::registry::get_validator_registry();
let mut registry = registry.write();
registry
.register(arc)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "unregisterValidator")]
pub fn unregister_validator(name: String) -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::validator::unregister_validator(&name)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "clearValidators")]
pub fn clear_validators() -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::validator::clear_validators().map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
}
#[cfg(target_arch = "wasm32")]
pub use __alef_wasm_bridge_validator::*;
#[cfg(target_arch = "wasm32")]
mod __alef_wasm_bridge_embeddingbackend {
use super::*;
/// Wrapper that bridges a foreign Wasm object to the `EmbeddingBackend` trait.
pub struct WasmEmbeddingBackendBridge {
inner: wasm_bindgen::JsValue,
cached_name: String,
}
impl std::fmt::Debug for WasmEmbeddingBackendBridge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "WasmEmbeddingBackendBridge")
}
}
impl WasmEmbeddingBackendBridge {
/// Create a new bridge wrapping a JS object.
///
/// Validates that the JS object provides all required methods.
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("dimensions")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "dimensions"));
}
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("embed")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "embed"));
}
let cached_name = {
let key = wasm_bindgen::JsValue::from_str("name");
js_sys::Reflect::get(&js_obj, &key)
.ok()
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
.and_then(|v| v.as_string())
.unwrap_or_else(|| "wasm_bridge".to_string())
};
Ok(Self {
inner: js_obj,
cached_name,
})
}
}
impl kreuzberg::plugins::Plugin for WasmEmbeddingBackendBridge {
fn name(&self) -> &str {
&self.cached_name
}
fn version(&self) -> String {
let key = wasm_bindgen::JsValue::from_str("version");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
result.as_string().unwrap_or_default()
}
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("initialize");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"initialize"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
})?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
// Convert result
Ok(())
}
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("shutdown");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"shutdown"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
// Convert result
Ok(())
}
}
#[async_trait::async_trait(?Send)]
impl kreuzberg::EmbeddingBackend for WasmEmbeddingBackendBridge {
fn dimensions(&self) -> usize {
let key = wasm_bindgen::JsValue::from_str("dimensions");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
// Convert JS result to usize
result
.as_string()
.and_then(|s| {
serde_json::from_str::<usize>(&s)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
})
.unwrap_or_default()
}
async fn embed(&self, texts: Vec<String>) -> std::result::Result<Vec<Vec<f32>>, kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("embed");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"embed"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "embed")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "embed")))?;
let args = js_sys::Array::new();
args.push(&serde_wasm_bindgen::to_value(&texts).unwrap_or(wasm_bindgen::JsValue::NULL));
let promise_val = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "embed")))?;
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "embed"))
})?;
let result = wasm_bindgen_futures::JsFuture::from(promise)
.await
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
// Convert result
result
.as_string()
.ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string()))
.and_then(|s| {
serde_json::from_str::<Vec<Vec<f32>>>(&s)
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
})
}
}
#[wasm_bindgen(js_name = "registerEmbeddingBackend")]
pub fn register_embedding_backend(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
let required_methods = vec!["dimensions", "embed"];
for method_name in required_methods {
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
return Err(wasm_bindgen::JsValue::from_str(&format!(
"Backend missing required method: {}",
method_name
)));
}
}
let wrapper = WasmEmbeddingBackendBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
let arc: std::sync::Arc<dyn kreuzberg::EmbeddingBackend> = std::sync::Arc::new(wrapper);
let registry = kreuzberg::plugins::registry::get_embedding_backend_registry();
let mut registry = registry.write();
registry
.register(arc)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "unregisterEmbeddingBackend")]
pub fn unregister_embedding_backend(name: String) -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::embedding_backend::unregister_embedding_backend(&name)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "clearEmbeddingBackends")]
pub fn clear_embedding_backends() -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::embedding_backend::clear_embedding_backends()
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
}
#[cfg(target_arch = "wasm32")]
pub use __alef_wasm_bridge_embeddingbackend::*;
#[cfg(target_arch = "wasm32")]
mod __alef_wasm_bridge_documentextractor {
use super::*;
/// Wrapper that bridges a foreign Wasm object to the `DocumentExtractor` trait.
pub struct WasmDocumentExtractorBridge {
inner: wasm_bindgen::JsValue,
cached_name: String,
}
impl std::fmt::Debug for WasmDocumentExtractorBridge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "WasmDocumentExtractorBridge")
}
}
impl WasmDocumentExtractorBridge {
/// Create a new bridge wrapping a JS object.
///
/// Validates that the JS object provides all required methods.
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("extractBytes")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "extract_bytes"));
}
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("supportedMimeTypes")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "supported_mime_types"));
}
let cached_name = {
let key = wasm_bindgen::JsValue::from_str("name");
js_sys::Reflect::get(&js_obj, &key)
.ok()
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
.and_then(|v| v.as_string())
.unwrap_or_else(|| "wasm_bridge".to_string())
};
Ok(Self {
inner: js_obj,
cached_name,
})
}
}
impl kreuzberg::plugins::Plugin for WasmDocumentExtractorBridge {
fn name(&self) -> &str {
&self.cached_name
}
fn version(&self) -> String {
let key = wasm_bindgen::JsValue::from_str("version");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
result.as_string().unwrap_or_default()
}
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("initialize");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"initialize"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
})?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
// Convert result
Ok(())
}
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("shutdown");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"shutdown"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
// Convert result
Ok(())
}
}
#[async_trait::async_trait(?Send)]
impl kreuzberg::DocumentExtractor for WasmDocumentExtractorBridge {
async fn extract_bytes(
&self,
content: &[u8],
mime_type: &str,
config: &kreuzberg::ExtractionConfig,
) -> std::result::Result<kreuzberg::InternalDocument, kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("extractBytes");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"extract_bytes"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "extract_bytes")))?;
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "extract_bytes"))
})?;
let args = js_sys::Array::new();
args.push(&js_sys::Uint8Array::from(content).into());
args.push(&wasm_bindgen::JsValue::from_str(mime_type));
args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL));
let promise_val = func.apply(&self.inner, &args).map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "extract_bytes"))
})?;
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "extract_bytes"))
})?;
let result = wasm_bindgen_futures::JsFuture::from(promise)
.await
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
// Convert result
result
.as_string()
.ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string()))
.and_then(|s| {
serde_json::from_str::<kreuzberg::InternalDocument>(&s)
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
})
}
fn supported_mime_types(&self) -> &[&str] {
let __types: Vec<String> = {
let key = wasm_bindgen::JsValue::from_str("supportedMimeTypes");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
// Convert JS result to Vec<String>
result
.as_string()
.and_then(|s| {
serde_json::from_str::<Vec<String>>(&s).map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e))
})
})
.unwrap_or_default()
};
let __strs: Vec<&'static str> = __types
.into_iter()
.map(|s| -> &'static str { Box::leak(s.into_boxed_str()) })
.collect();
Box::leak(__strs.into_boxed_slice())
}
}
#[wasm_bindgen(js_name = "registerDocumentExtractor")]
pub fn register_document_extractor(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
let required_methods = vec!["extractBytes", "supportedMimeTypes"];
for method_name in required_methods {
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
return Err(wasm_bindgen::JsValue::from_str(&format!(
"Backend missing required method: {}",
method_name
)));
}
}
let wrapper = WasmDocumentExtractorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
let arc: std::sync::Arc<dyn kreuzberg::DocumentExtractor> = std::sync::Arc::new(wrapper);
let registry = kreuzberg::plugins::registry::get_document_extractor_registry();
let mut registry = registry.write();
registry
.register(arc)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "unregisterDocumentExtractor")]
pub fn unregister_document_extractor(name: String) -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::document_extractor::unregister_document_extractor(&name)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "clearDocumentExtractors")]
pub fn clear_document_extractors() -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::document_extractor::clear_document_extractors()
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
}
#[cfg(target_arch = "wasm32")]
pub use __alef_wasm_bridge_documentextractor::*;
#[cfg(target_arch = "wasm32")]
mod __alef_wasm_bridge_renderer {
use super::*;
/// Wrapper that bridges a foreign Wasm object to the `Renderer` trait.
pub struct WasmRendererBridge {
inner: wasm_bindgen::JsValue,
cached_name: String,
}
impl std::fmt::Debug for WasmRendererBridge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "WasmRendererBridge")
}
}
impl WasmRendererBridge {
/// Create a new bridge wrapping a JS object.
///
/// Validates that the JS object provides all required methods.
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("render")).unwrap_or(false) {
return Err(format!("JS object missing required method: {}", "render"));
}
let cached_name = {
let key = wasm_bindgen::JsValue::from_str("name");
js_sys::Reflect::get(&js_obj, &key)
.ok()
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
.and_then(|v| v.as_string())
.unwrap_or_else(|| "wasm_bridge".to_string())
};
Ok(Self {
inner: js_obj,
cached_name,
})
}
}
impl kreuzberg::plugins::Plugin for WasmRendererBridge {
fn name(&self) -> &str {
&self.cached_name
}
fn version(&self) -> String {
let key = wasm_bindgen::JsValue::from_str("version");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Default::default();
}
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
Ok(f) => f,
Err(_) => return Default::default(),
};
let func: js_sys::Function = match func_val.dyn_into() {
Ok(f) => f,
Err(_) => return Default::default(),
};
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = match func.apply(&self.inner, &args) {
Ok(r) => r,
Err(_) => return Default::default(),
};
// Convert result
result.as_string().unwrap_or_default()
}
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("initialize");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"initialize"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
})?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
// Convert result
Ok(())
}
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("shutdown");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"shutdown"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
// Build args array
let args = js_sys::Array::new();
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
// Convert result
Ok(())
}
}
impl kreuzberg::Renderer for WasmRendererBridge {
fn render(&self, doc: &kreuzberg::InternalDocument) -> std::result::Result<String, kreuzberg::KreuzbergError> {
let key = wasm_bindgen::JsValue::from_str("render");
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
if !has_method {
return Err(kreuzberg::KreuzbergError::Other(format!(
"Method '{}' not found on JS object",
"render"
)));
}
let func_val = js_sys::Reflect::get(&self.inner, &key)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "render")))?;
let func: js_sys::Function = func_val
.dyn_into()
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "render")))?;
// Build args array
let args = js_sys::Array::new();
args.push(&serde_wasm_bindgen::to_value(doc).unwrap_or(wasm_bindgen::JsValue::NULL));
// Call the function
let result = func
.apply(&self.inner, &args)
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "render")))?;
// Convert result
result
.as_string()
.ok_or_else(|| kreuzberg::KreuzbergError::Other("Expected string return".to_string()))
}
}
#[wasm_bindgen(js_name = "registerRenderer")]
pub fn register_renderer(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
let required_methods = vec!["render"];
for method_name in required_methods {
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
return Err(wasm_bindgen::JsValue::from_str(&format!(
"Backend missing required method: {}",
method_name
)));
}
}
let wrapper = WasmRendererBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
let arc: std::sync::Arc<dyn kreuzberg::Renderer> = std::sync::Arc::new(wrapper);
let registry = kreuzberg::plugins::registry::get_renderer_registry();
let mut registry = registry.write();
registry
.register(arc)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "unregisterRenderer")]
pub fn unregister_renderer(name: String) -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::renderer::unregister_renderer(&name)
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "clearRenderers")]
pub fn clear_renderers() -> Result<(), wasm_bindgen::JsValue> {
kreuzberg::plugins::renderer::clear_renderers().map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
}
}
#[cfg(target_arch = "wasm32")]
pub use __alef_wasm_bridge_renderer::*;
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::CacheStats> for WasmCacheStats {
fn from(val: kreuzberg::CacheStats) -> Self {
Self {
total_files: val.total_files,
total_size_mb: val.total_size_mb,
available_space_mb: val.available_space_mb,
oldest_file_age_days: val.oldest_file_age_days,
newest_file_age_days: val.newest_file_age_days,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmAccelerationConfig> for kreuzberg::AccelerationConfig {
fn from(val: WasmAccelerationConfig) -> Self {
Self {
provider: val.provider.into(),
device_id: val.device_id,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::AccelerationConfig> for WasmAccelerationConfig {
fn from(val: kreuzberg::AccelerationConfig) -> Self {
Self {
provider: val.provider.into(),
device_id: val.device_id,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmContentFilterConfig> for kreuzberg::ContentFilterConfig {
fn from(val: WasmContentFilterConfig) -> Self {
Self {
include_headers: val.include_headers,
include_footers: val.include_footers,
strip_repeating_text: val.strip_repeating_text,
include_watermarks: val.include_watermarks,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ContentFilterConfig> for WasmContentFilterConfig {
fn from(val: kreuzberg::ContentFilterConfig) -> Self {
Self {
include_headers: val.include_headers,
include_footers: val.include_footers,
strip_repeating_text: val.strip_repeating_text,
include_watermarks: val.include_watermarks,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmEmailConfig> for kreuzberg::EmailConfig {
fn from(val: WasmEmailConfig) -> Self {
Self {
msg_fallback_codepage: val.msg_fallback_codepage,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::EmailConfig> for WasmEmailConfig {
fn from(val: kreuzberg::EmailConfig) -> Self {
Self {
msg_fallback_codepage: val.msg_fallback_codepage,
}
}
}
#[allow(clippy::needless_update)]
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmExtractionConfig> for kreuzberg::ExtractionConfig {
fn from(val: WasmExtractionConfig) -> Self {
Self {
use_cache: val.use_cache,
enable_quality_processing: val.enable_quality_processing,
ocr: val.ocr.map(Into::into),
force_ocr: val.force_ocr,
force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()),
disable_ocr: val.disable_ocr,
chunking: val.chunking.map(Into::into),
content_filter: val.content_filter.map(Into::into),
images: val.images.map(Into::into),
token_reduction: val.token_reduction.map(Into::into),
language_detection: val.language_detection.map(Into::into),
pages: val.pages.map(Into::into),
postprocessor: val.postprocessor.map(Into::into),
html_options: Default::default(),
extraction_timeout_secs: val.extraction_timeout_secs,
max_concurrent_extractions: val.max_concurrent_extractions,
result_format: val.result_format.into(),
security_limits: val.security_limits.map(Into::into),
max_embedded_file_bytes: val.max_embedded_file_bytes,
output_format: val.output_format.into(),
use_layout_for_markdown: val.use_layout_for_markdown,
include_document_structure: val.include_document_structure,
acceleration: val.acceleration.map(Into::into),
cache_namespace: val.cache_namespace,
cache_ttl_secs: val.cache_ttl_secs,
email: val.email.map(Into::into),
concurrency: Default::default(),
max_archive_depth: val.max_archive_depth,
structured_extraction: val.structured_extraction.map(Into::into),
cancel_token: Default::default(),
..Default::default()
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ExtractionConfig> for WasmExtractionConfig {
fn from(val: kreuzberg::ExtractionConfig) -> Self {
Self {
use_cache: val.use_cache,
enable_quality_processing: val.enable_quality_processing,
ocr: val.ocr.map(Into::into),
force_ocr: val.force_ocr,
force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()),
disable_ocr: val.disable_ocr,
chunking: val.chunking.map(Into::into),
content_filter: val.content_filter.map(Into::into),
images: val.images.map(Into::into),
token_reduction: val.token_reduction.map(Into::into),
language_detection: val.language_detection.map(Into::into),
pages: val.pages.map(Into::into),
postprocessor: val.postprocessor.map(Into::into),
html_options: val.html_options.as_ref().map(|v| format!("{v:?}")),
extraction_timeout_secs: val.extraction_timeout_secs,
max_concurrent_extractions: val.max_concurrent_extractions,
result_format: val.result_format.into(),
security_limits: val.security_limits.map(Into::into),
max_embedded_file_bytes: val.max_embedded_file_bytes,
output_format: val.output_format.into(),
use_layout_for_markdown: val.use_layout_for_markdown,
include_document_structure: val.include_document_structure,
acceleration: val.acceleration.map(Into::into),
cache_namespace: val.cache_namespace,
cache_ttl_secs: val.cache_ttl_secs,
email: val.email.map(Into::into),
concurrency: val.concurrency.as_ref().map(|v| format!("{v:?}")),
max_archive_depth: val.max_archive_depth,
structured_extraction: val.structured_extraction.map(Into::into),
cancel_token: val.cancel_token.as_ref().map(|v| format!("{v:?}")),
}
}
}
#[allow(clippy::needless_update)]
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmFileExtractionConfig> for kreuzberg::FileExtractionConfig {
fn from(val: WasmFileExtractionConfig) -> Self {
Self {
enable_quality_processing: val.enable_quality_processing,
ocr: val.ocr.map(Into::into),
force_ocr: val.force_ocr,
force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()),
disable_ocr: val.disable_ocr,
chunking: val.chunking.map(Into::into),
content_filter: val.content_filter.map(Into::into),
images: val.images.map(Into::into),
token_reduction: val.token_reduction.map(Into::into),
language_detection: val.language_detection.map(Into::into),
pages: val.pages.map(Into::into),
postprocessor: val.postprocessor.map(Into::into),
html_options: Default::default(),
result_format: val.result_format.map(Into::into),
output_format: val.output_format.map(Into::into),
include_document_structure: val.include_document_structure,
timeout_secs: val.timeout_secs,
structured_extraction: val.structured_extraction.map(Into::into),
..Default::default()
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::FileExtractionConfig> for WasmFileExtractionConfig {
fn from(val: kreuzberg::FileExtractionConfig) -> Self {
Self {
enable_quality_processing: val.enable_quality_processing,
ocr: val.ocr.map(Into::into),
force_ocr: val.force_ocr,
force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()),
disable_ocr: val.disable_ocr,
chunking: val.chunking.map(Into::into),
content_filter: val.content_filter.map(Into::into),
images: val.images.map(Into::into),
token_reduction: val.token_reduction.map(Into::into),
language_detection: val.language_detection.map(Into::into),
pages: val.pages.map(Into::into),
postprocessor: val.postprocessor.map(Into::into),
html_options: val.html_options.as_ref().map(|v| format!("{v:?}")),
result_format: val.result_format.map(Into::into),
output_format: val.output_format.map(Into::into),
include_document_structure: val.include_document_structure,
timeout_secs: val.timeout_secs,
structured_extraction: val.structured_extraction.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmBatchBytesItem> for kreuzberg::BatchBytesItem {
fn from(val: WasmBatchBytesItem) -> Self {
Self {
content: val.content.to_vec().into(),
mime_type: val.mime_type,
config: val.config.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::BatchBytesItem> for WasmBatchBytesItem {
fn from(val: kreuzberg::BatchBytesItem) -> Self {
Self {
content: val.content.to_vec().into(),
mime_type: val.mime_type,
config: val.config.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmBatchFileItem> for kreuzberg::BatchFileItem {
fn from(val: WasmBatchFileItem) -> Self {
Self {
path: val.path.into(),
config: val.config.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::BatchFileItem> for WasmBatchFileItem {
fn from(val: kreuzberg::BatchFileItem) -> Self {
Self {
path: val.path.to_string_lossy().to_string(),
config: val.config.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmImageExtractionConfig> for kreuzberg::ImageExtractionConfig {
fn from(val: WasmImageExtractionConfig) -> Self {
Self {
extract_images: val.extract_images,
target_dpi: val.target_dpi,
max_image_dimension: val.max_image_dimension,
inject_placeholders: val.inject_placeholders,
auto_adjust_dpi: val.auto_adjust_dpi,
min_dpi: val.min_dpi,
max_dpi: val.max_dpi,
max_images_per_page: val.max_images_per_page,
classify: val.classify,
include_page_rasters: val.include_page_rasters,
run_ocr_on_images: val.run_ocr_on_images,
ocr_text_only: val.ocr_text_only,
append_ocr_text: val.append_ocr_text,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ImageExtractionConfig> for WasmImageExtractionConfig {
fn from(val: kreuzberg::ImageExtractionConfig) -> Self {
Self {
extract_images: val.extract_images,
target_dpi: val.target_dpi,
max_image_dimension: val.max_image_dimension,
inject_placeholders: val.inject_placeholders,
auto_adjust_dpi: val.auto_adjust_dpi,
min_dpi: val.min_dpi,
max_dpi: val.max_dpi,
max_images_per_page: val.max_images_per_page,
classify: val.classify,
include_page_rasters: val.include_page_rasters,
run_ocr_on_images: val.run_ocr_on_images,
ocr_text_only: val.ocr_text_only,
append_ocr_text: val.append_ocr_text,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmTokenReductionOptions> for kreuzberg::TokenReductionOptions {
fn from(val: WasmTokenReductionOptions) -> Self {
Self {
mode: val.mode,
preserve_important_words: val.preserve_important_words,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::TokenReductionOptions> for WasmTokenReductionOptions {
fn from(val: kreuzberg::TokenReductionOptions) -> Self {
Self {
mode: val.mode,
preserve_important_words: val.preserve_important_words,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmLanguageDetectionConfig> for kreuzberg::LanguageDetectionConfig {
fn from(val: WasmLanguageDetectionConfig) -> Self {
Self {
enabled: val.enabled,
min_confidence: val.min_confidence,
detect_multiple: val.detect_multiple,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::LanguageDetectionConfig> for WasmLanguageDetectionConfig {
fn from(val: kreuzberg::LanguageDetectionConfig) -> Self {
Self {
enabled: val.enabled,
min_confidence: val.min_confidence,
detect_multiple: val.detect_multiple,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmLlmConfig> for kreuzberg::LlmConfig {
fn from(val: WasmLlmConfig) -> Self {
Self {
model: val.model,
api_key: val.api_key,
base_url: val.base_url,
timeout_secs: val.timeout_secs,
max_retries: val.max_retries,
temperature: val.temperature,
max_tokens: val.max_tokens,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::LlmConfig> for WasmLlmConfig {
fn from(val: kreuzberg::LlmConfig) -> Self {
Self {
model: val.model,
api_key: val.api_key,
base_url: val.base_url,
timeout_secs: val.timeout_secs,
max_retries: val.max_retries,
temperature: val.temperature,
max_tokens: val.max_tokens,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmStructuredExtractionConfig> for kreuzberg::StructuredExtractionConfig {
fn from(val: WasmStructuredExtractionConfig) -> Self {
Self {
schema: serde_wasm_bindgen::from_value(val.schema.clone()).unwrap_or_default(),
schema_name: val.schema_name,
schema_description: val.schema_description,
strict: val.strict,
prompt: val.prompt,
llm: val.llm.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::StructuredExtractionConfig> for WasmStructuredExtractionConfig {
fn from(val: kreuzberg::StructuredExtractionConfig) -> Self {
Self {
schema: serde_wasm_bindgen::to_value(&val.schema).unwrap_or(JsValue::NULL),
schema_name: val.schema_name,
schema_description: val.schema_description,
strict: val.strict,
prompt: val.prompt,
llm: val.llm.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrQualityThresholds> for kreuzberg::OcrQualityThresholds {
fn from(val: WasmOcrQualityThresholds) -> Self {
Self {
min_total_non_whitespace: val.min_total_non_whitespace,
min_non_whitespace_per_page: val.min_non_whitespace_per_page,
min_meaningful_word_len: val.min_meaningful_word_len,
min_meaningful_words: val.min_meaningful_words,
min_alnum_ratio: val.min_alnum_ratio,
min_garbage_chars: val.min_garbage_chars,
max_fragmented_word_ratio: val.max_fragmented_word_ratio,
critical_fragmented_word_ratio: val.critical_fragmented_word_ratio,
min_avg_word_length: val.min_avg_word_length,
min_words_for_avg_length_check: val.min_words_for_avg_length_check,
min_consecutive_repeat_ratio: val.min_consecutive_repeat_ratio,
min_words_for_repeat_check: val.min_words_for_repeat_check,
substantive_min_chars: val.substantive_min_chars,
non_text_min_chars: val.non_text_min_chars,
alnum_ws_ratio_threshold: val.alnum_ws_ratio_threshold,
pipeline_min_quality: val.pipeline_min_quality,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrQualityThresholds> for WasmOcrQualityThresholds {
fn from(val: kreuzberg::OcrQualityThresholds) -> Self {
Self {
min_total_non_whitespace: val.min_total_non_whitespace,
min_non_whitespace_per_page: val.min_non_whitespace_per_page,
min_meaningful_word_len: val.min_meaningful_word_len,
min_meaningful_words: val.min_meaningful_words,
min_alnum_ratio: val.min_alnum_ratio,
min_garbage_chars: val.min_garbage_chars,
max_fragmented_word_ratio: val.max_fragmented_word_ratio,
critical_fragmented_word_ratio: val.critical_fragmented_word_ratio,
min_avg_word_length: val.min_avg_word_length,
min_words_for_avg_length_check: val.min_words_for_avg_length_check,
min_consecutive_repeat_ratio: val.min_consecutive_repeat_ratio,
min_words_for_repeat_check: val.min_words_for_repeat_check,
substantive_min_chars: val.substantive_min_chars,
non_text_min_chars: val.non_text_min_chars,
alnum_ws_ratio_threshold: val.alnum_ws_ratio_threshold,
pipeline_min_quality: val.pipeline_min_quality,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrPipelineStage> for kreuzberg::OcrPipelineStage {
fn from(val: WasmOcrPipelineStage) -> Self {
Self {
backend: val.backend,
priority: val.priority,
language: val.language,
tesseract_config: val.tesseract_config.map(Into::into),
paddle_ocr_config: val
.paddle_ocr_config
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
vlm_config: val.vlm_config.map(Into::into),
backend_options: val
.backend_options
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrPipelineStage> for WasmOcrPipelineStage {
fn from(val: kreuzberg::OcrPipelineStage) -> Self {
Self {
backend: val.backend,
priority: val.priority,
language: val.language,
tesseract_config: val.tesseract_config.map(Into::into),
paddle_ocr_config: val
.paddle_ocr_config
.as_ref()
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
vlm_config: val.vlm_config.map(Into::into),
backend_options: val
.backend_options
.as_ref()
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrPipelineConfig> for kreuzberg::OcrPipelineConfig {
fn from(val: WasmOcrPipelineConfig) -> Self {
Self {
stages: val.stages.into_iter().map(Into::into).collect(),
quality_thresholds: val.quality_thresholds.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrPipelineConfig> for WasmOcrPipelineConfig {
fn from(val: kreuzberg::OcrPipelineConfig) -> Self {
Self {
stages: val.stages.into_iter().map(Into::into).collect(),
quality_thresholds: val.quality_thresholds.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrConfig> for kreuzberg::OcrConfig {
fn from(val: WasmOcrConfig) -> Self {
Self {
enabled: val.enabled,
backend: val.backend,
language: val.language,
tesseract_config: val.tesseract_config.map(Into::into),
output_format: val.output_format.map(Into::into),
paddle_ocr_config: val
.paddle_ocr_config
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
backend_options: val
.backend_options
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
element_config: val.element_config.map(Into::into),
quality_thresholds: val.quality_thresholds.map(Into::into),
pipeline: val.pipeline.map(Into::into),
auto_rotate: val.auto_rotate,
vlm_config: val.vlm_config.map(Into::into),
vlm_prompt: val.vlm_prompt,
acceleration: val.acceleration.map(Into::into),
tessdata_bytes: val
.tessdata_bytes
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrConfig> for WasmOcrConfig {
fn from(val: kreuzberg::OcrConfig) -> Self {
Self {
enabled: val.enabled,
backend: val.backend,
language: val.language,
tesseract_config: val.tesseract_config.map(Into::into),
output_format: val.output_format.map(Into::into),
paddle_ocr_config: val
.paddle_ocr_config
.as_ref()
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
backend_options: val
.backend_options
.as_ref()
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
element_config: val.element_config.map(Into::into),
quality_thresholds: val.quality_thresholds.map(Into::into),
pipeline: val.pipeline.map(Into::into),
auto_rotate: val.auto_rotate,
vlm_config: val.vlm_config.map(Into::into),
vlm_prompt: val.vlm_prompt,
acceleration: val.acceleration.map(Into::into),
tessdata_bytes: val
.tessdata_bytes
.as_ref()
.and_then(|v| serde_json::to_string(v).ok())
.and_then(|s| js_sys::JSON::parse(&s).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPageConfig> for kreuzberg::PageConfig {
fn from(val: WasmPageConfig) -> Self {
Self {
extract_pages: val.extract_pages,
insert_page_markers: val.insert_page_markers,
marker_format: val.marker_format,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PageConfig> for WasmPageConfig {
fn from(val: kreuzberg::PageConfig) -> Self {
Self {
extract_pages: val.extract_pages,
insert_page_markers: val.insert_page_markers,
marker_format: val.marker_format,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPostProcessorConfig> for kreuzberg::PostProcessorConfig {
fn from(val: WasmPostProcessorConfig) -> Self {
Self {
enabled: val.enabled,
enabled_processors: val.enabled_processors.map(|v| v.into_iter().collect()),
disabled_processors: val.disabled_processors.map(|v| v.into_iter().collect()),
enabled_set: val.enabled_set.map(|v| v.into_iter().collect()),
disabled_set: val.disabled_set.map(|v| v.into_iter().collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PostProcessorConfig> for WasmPostProcessorConfig {
fn from(val: kreuzberg::PostProcessorConfig) -> Self {
Self {
enabled: val.enabled,
enabled_processors: val.enabled_processors.map(|v| v.into_iter().collect()),
disabled_processors: val.disabled_processors.map(|v| v.into_iter().collect()),
enabled_set: val.enabled_set.map(|v| v.into_iter().collect()),
disabled_set: val.disabled_set.map(|v| v.into_iter().collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmChunkingConfig> for kreuzberg::ChunkingConfig {
fn from(val: WasmChunkingConfig) -> Self {
Self {
max_characters: val.max_characters,
overlap: val.overlap,
trim: val.trim,
chunker_type: val.chunker_type.into(),
embedding: val.embedding.map(Into::into),
preset: val.preset,
sizing: serde_wasm_bindgen::from_value(val.sizing.clone()).unwrap_or_default(),
prepend_heading_context: val.prepend_heading_context,
topic_threshold: val.topic_threshold,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ChunkingConfig> for WasmChunkingConfig {
fn from(val: kreuzberg::ChunkingConfig) -> Self {
Self {
max_characters: val.max_characters,
overlap: val.overlap,
trim: val.trim,
chunker_type: val.chunker_type.into(),
embedding: val.embedding.map(Into::into),
preset: val.preset,
sizing: serde_wasm_bindgen::to_value(&val.sizing).unwrap_or(JsValue::NULL),
prepend_heading_context: val.prepend_heading_context,
topic_threshold: val.topic_threshold,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmEmbeddingConfig> for kreuzberg::EmbeddingConfig {
fn from(val: WasmEmbeddingConfig) -> Self {
Self {
model: serde_wasm_bindgen::from_value(val.model.clone()).unwrap_or_default(),
normalize: val.normalize,
batch_size: val.batch_size,
show_download_progress: val.show_download_progress,
cache_dir: val.cache_dir.map(Into::into),
acceleration: val.acceleration.map(Into::into),
max_embed_duration_secs: val.max_embed_duration_secs,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::EmbeddingConfig> for WasmEmbeddingConfig {
fn from(val: kreuzberg::EmbeddingConfig) -> Self {
Self {
model: serde_wasm_bindgen::to_value(&val.model).unwrap_or(JsValue::NULL),
normalize: val.normalize,
batch_size: val.batch_size,
show_download_progress: val.show_download_progress,
cache_dir: val.cache_dir.map(|p| p.to_string_lossy().to_string()),
acceleration: val.acceleration.map(Into::into),
max_embed_duration_secs: val.max_embed_duration_secs,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::SupportedFormat> for WasmSupportedFormat {
fn from(val: kreuzberg::SupportedFormat) -> Self {
Self {
extension: val.extension,
mime_type: val.mime_type,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::extraction::structured::StructuredDataResult> for WasmStructuredDataResult {
fn from(val: kreuzberg::extraction::structured::StructuredDataResult) -> Self {
Self {
content: val.content,
format: val.format.to_string(),
metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default())
.unwrap_or(JsValue::NULL),
text_fields: val.text_fields.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::extraction::office_metadata::app_properties::XlsxAppProperties> for WasmXlsxAppProperties {
fn from(val: kreuzberg::extraction::office_metadata::app_properties::XlsxAppProperties) -> Self {
Self {
application: val.application,
app_version: val.app_version,
doc_security: val.doc_security,
scale_crop: val.scale_crop,
links_up_to_date: val.links_up_to_date,
shared_doc: val.shared_doc,
hyperlinks_changed: val.hyperlinks_changed,
company: val.company,
worksheet_names: val.worksheet_names.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::extraction::office_metadata::app_properties::PptxAppProperties> for WasmPptxAppProperties {
fn from(val: kreuzberg::extraction::office_metadata::app_properties::PptxAppProperties) -> Self {
Self {
application: val.application,
app_version: val.app_version,
total_time: val.total_time,
company: val.company,
doc_security: val.doc_security,
scale_crop: val.scale_crop,
links_up_to_date: val.links_up_to_date,
shared_doc: val.shared_doc,
hyperlinks_changed: val.hyperlinks_changed,
slides: val.slides,
notes: val.notes,
hidden_slides: val.hidden_slides,
multimedia_clips: val.multimedia_clips,
presentation_format: val.presentation_format,
slide_titles: val.slide_titles.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmSecurityLimits> for kreuzberg::SecurityLimits {
fn from(val: WasmSecurityLimits) -> Self {
Self {
max_archive_size: val.max_archive_size,
max_compression_ratio: val.max_compression_ratio,
max_files_in_archive: val.max_files_in_archive,
max_nesting_depth: val.max_nesting_depth,
max_entity_length: val.max_entity_length,
max_content_size: val.max_content_size,
max_iterations: val.max_iterations,
max_xml_depth: val.max_xml_depth,
max_table_cells: val.max_table_cells,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::SecurityLimits> for WasmSecurityLimits {
fn from(val: kreuzberg::SecurityLimits) -> Self {
Self {
max_archive_size: val.max_archive_size,
max_compression_ratio: val.max_compression_ratio,
max_files_in_archive: val.max_files_in_archive,
max_nesting_depth: val.max_nesting_depth,
max_entity_length: val.max_entity_length,
max_content_size: val.max_content_size,
max_iterations: val.max_iterations,
max_xml_depth: val.max_xml_depth,
max_table_cells: val.max_table_cells,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPdfAnnotation> for kreuzberg::PdfAnnotation {
fn from(val: WasmPdfAnnotation) -> Self {
Self {
annotation_type: val.annotation_type.into(),
content: val.content,
page_number: val.page_number,
bounding_box: val.bounding_box.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PdfAnnotation> for WasmPdfAnnotation {
fn from(val: kreuzberg::PdfAnnotation) -> Self {
Self {
annotation_type: val.annotation_type.into(),
content: val.content,
page_number: val.page_number,
bounding_box: val.bounding_box.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDjotContent> for kreuzberg::DjotContent {
fn from(val: WasmDjotContent) -> Self {
Self {
plain_text: val.plain_text,
blocks: val.blocks.into_iter().map(Into::into).collect(),
metadata: val.metadata.into(),
tables: val.tables.into_iter().map(Into::into).collect(),
images: val.images.into_iter().map(Into::into).collect(),
links: val.links.into_iter().map(Into::into).collect(),
footnotes: val.footnotes.into_iter().map(Into::into).collect(),
attributes: Default::default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DjotContent> for WasmDjotContent {
fn from(val: kreuzberg::DjotContent) -> Self {
Self {
plain_text: val.plain_text,
blocks: val.blocks.into_iter().map(Into::into).collect(),
metadata: val.metadata.into(),
tables: val.tables.into_iter().map(Into::into).collect(),
images: val.images.into_iter().map(Into::into).collect(),
links: val.links.into_iter().map(Into::into).collect(),
footnotes: val.footnotes.into_iter().map(Into::into).collect(),
attributes: val.attributes.iter().map(|i| format!("{:?}", i)).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmFormattedBlock> for kreuzberg::FormattedBlock {
fn from(val: WasmFormattedBlock) -> Self {
Self {
block_type: val.block_type.into(),
level: val.level,
inline_content: val.inline_content.into_iter().map(Into::into).collect(),
attributes: Default::default(),
language: val.language,
code: val.code,
children: val.children.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::FormattedBlock> for WasmFormattedBlock {
fn from(val: kreuzberg::FormattedBlock) -> Self {
Self {
block_type: val.block_type.into(),
level: val.level,
inline_content: val.inline_content.into_iter().map(Into::into).collect(),
attributes: val.attributes.as_ref().map(|v| format!("{v:?}")),
language: val.language,
code: val.code,
children: val.children.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmInlineElement> for kreuzberg::InlineElement {
fn from(val: WasmInlineElement) -> Self {
Self {
element_type: val.element_type.into(),
content: val.content,
attributes: Default::default(),
metadata: val
.metadata
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::InlineElement> for WasmInlineElement {
fn from(val: kreuzberg::InlineElement) -> Self {
Self {
element_type: val.element_type.into(),
content: val.content,
attributes: val.attributes.as_ref().map(|v| format!("{v:?}")),
metadata: val
.metadata
.as_ref()
.and_then(|v| serde_json::to_string(v).ok())
.and_then(|s| js_sys::JSON::parse(&s).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDjotImage> for kreuzberg::DjotImage {
fn from(val: WasmDjotImage) -> Self {
Self {
src: val.src,
alt: val.alt,
title: val.title,
attributes: Default::default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DjotImage> for WasmDjotImage {
fn from(val: kreuzberg::DjotImage) -> Self {
Self {
src: val.src,
alt: val.alt,
title: val.title,
attributes: val.attributes.as_ref().map(|v| format!("{v:?}")),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDjotLink> for kreuzberg::DjotLink {
fn from(val: WasmDjotLink) -> Self {
Self {
url: val.url,
text: val.text,
title: val.title,
attributes: Default::default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DjotLink> for WasmDjotLink {
fn from(val: kreuzberg::DjotLink) -> Self {
Self {
url: val.url,
text: val.text,
title: val.title,
attributes: val.attributes.as_ref().map(|v| format!("{v:?}")),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmFootnote> for kreuzberg::Footnote {
fn from(val: WasmFootnote) -> Self {
Self {
label: val.label,
content: val.content.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::Footnote> for WasmFootnote {
fn from(val: kreuzberg::Footnote) -> Self {
Self {
label: val.label,
content: val.content.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDocumentStructure> for kreuzberg::DocumentStructure {
fn from(val: WasmDocumentStructure) -> Self {
Self {
nodes: val.nodes.into_iter().map(Into::into).collect(),
source_format: val.source_format,
relationships: val.relationships.into_iter().map(Into::into).collect(),
node_types: val.node_types.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DocumentStructure> for WasmDocumentStructure {
fn from(val: kreuzberg::DocumentStructure) -> Self {
Self {
nodes: val.nodes.into_iter().map(Into::into).collect(),
source_format: val.source_format,
relationships: val.relationships.into_iter().map(Into::into).collect(),
node_types: val.node_types.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDocumentRelationship> for kreuzberg::DocumentRelationship {
fn from(val: WasmDocumentRelationship) -> Self {
Self {
source: kreuzberg::NodeIndex(val.source),
target: kreuzberg::NodeIndex(val.target),
kind: val.kind.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DocumentRelationship> for WasmDocumentRelationship {
fn from(val: kreuzberg::DocumentRelationship) -> Self {
Self {
source: val.source.0,
target: val.target.0,
kind: val.kind.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDocumentNode> for kreuzberg::DocumentNode {
fn from(val: WasmDocumentNode) -> Self {
Self {
id: Default::default(),
content: serde_wasm_bindgen::from_value(val.content.clone()).unwrap_or_default(),
parent: (val.parent).map(kreuzberg::NodeIndex),
children: (val.children.into_iter().collect::<Vec<_>>())
.into_iter()
.map(kreuzberg::NodeIndex)
.collect(),
content_layer: val.content_layer.into(),
page: val.page,
page_end: val.page_end,
bbox: val.bbox.map(Into::into),
annotations: val.annotations.into_iter().map(Into::into).collect(),
attributes: val
.attributes
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DocumentNode> for WasmDocumentNode {
fn from(val: kreuzberg::DocumentNode) -> Self {
Self {
id: format!("{:?}", val.id),
content: serde_wasm_bindgen::to_value(&val.content).unwrap_or(JsValue::NULL),
parent: val.parent.map(|v| v.0),
children: val
.children
.iter()
.map(|v| v.0)
.collect::<Vec<_>>()
.into_iter()
.collect(),
content_layer: val.content_layer.into(),
page: val.page,
page_end: val.page_end,
bbox: val.bbox.map(Into::into),
annotations: val.annotations.into_iter().map(Into::into).collect(),
attributes: val
.attributes
.as_ref()
.and_then(|v| serde_json::to_string(v).ok())
.and_then(|s| js_sys::JSON::parse(&s).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmTableGrid> for kreuzberg::TableGrid {
fn from(val: WasmTableGrid) -> Self {
Self {
rows: val.rows,
cols: val.cols,
cells: val.cells.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::TableGrid> for WasmTableGrid {
fn from(val: kreuzberg::TableGrid) -> Self {
Self {
rows: val.rows,
cols: val.cols,
cells: val.cells.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmGridCell> for kreuzberg::GridCell {
fn from(val: WasmGridCell) -> Self {
Self {
content: val.content,
row: val.row,
col: val.col,
row_span: val.row_span,
col_span: val.col_span,
is_header: val.is_header,
bbox: val.bbox.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::GridCell> for WasmGridCell {
fn from(val: kreuzberg::GridCell) -> Self {
Self {
content: val.content,
row: val.row,
col: val.col,
row_span: val.row_span,
col_span: val.col_span,
is_header: val.is_header,
bbox: val.bbox.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmTextAnnotation> for kreuzberg::TextAnnotation {
fn from(val: WasmTextAnnotation) -> Self {
Self {
start: val.start,
end: val.end,
kind: serde_wasm_bindgen::from_value(val.kind.clone()).unwrap_or_default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::TextAnnotation> for WasmTextAnnotation {
fn from(val: kreuzberg::TextAnnotation) -> Self {
Self {
start: val.start,
end: val.end,
kind: serde_wasm_bindgen::to_value(&val.kind).unwrap_or(JsValue::NULL),
}
}
}
#[allow(clippy::needless_update)]
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmExtractionResult> for kreuzberg::ExtractionResult {
fn from(val: WasmExtractionResult) -> Self {
Self {
content: val.content,
mime_type: val.mime_type.into(),
metadata: val.metadata.into(),
extraction_method: val.extraction_method.map(Into::into),
tables: val.tables.into_iter().map(Into::into).collect(),
detected_languages: val.detected_languages.map(|v| v.into_iter().collect()),
chunks: val.chunks.map(|v| v.into_iter().map(Into::into).collect()),
images: val.images.map(|v| v.into_iter().map(Into::into).collect()),
pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()),
elements: val.elements.map(|v| v.into_iter().map(Into::into).collect()),
djot_content: val.djot_content.map(Into::into),
ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()),
document: val.document.map(Into::into),
quality_score: val.quality_score,
processing_warnings: val.processing_warnings.into_iter().map(Into::into).collect(),
annotations: val.annotations.map(|v| v.into_iter().map(Into::into).collect()),
children: val.children.map(|v| v.into_iter().map(Into::into).collect()),
uris: val.uris.map(|v| v.into_iter().map(Into::into).collect()),
revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()),
structured_output: val
.structured_output
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
code_intelligence: val
.code_intelligence
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
llm_usage: val.llm_usage.map(|v| v.into_iter().map(Into::into).collect()),
formatted_content: val.formatted_content,
ocr_internal_document: Default::default(),
..Default::default()
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ExtractionResult> for WasmExtractionResult {
fn from(val: kreuzberg::ExtractionResult) -> Self {
Self {
content: val.content,
mime_type: val.mime_type.to_string(),
metadata: val.metadata.into(),
extraction_method: val.extraction_method.map(Into::into),
tables: val.tables.into_iter().map(Into::into).collect(),
detected_languages: val.detected_languages.map(|v| v.into_iter().collect()),
chunks: val.chunks.map(|v| v.into_iter().map(Into::into).collect()),
images: val.images.map(|v| v.into_iter().map(Into::into).collect()),
pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()),
elements: val.elements.map(|v| v.into_iter().map(Into::into).collect()),
djot_content: val.djot_content.map(Into::into),
ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()),
document: val.document.map(Into::into),
quality_score: val.quality_score,
processing_warnings: val.processing_warnings.into_iter().map(Into::into).collect(),
annotations: val.annotations.map(|v| v.into_iter().map(Into::into).collect()),
children: val.children.map(|v| v.into_iter().map(Into::into).collect()),
uris: val.uris.map(|v| v.into_iter().map(Into::into).collect()),
revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()),
structured_output: val
.structured_output
.as_ref()
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
code_intelligence: val
.code_intelligence
.as_ref()
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
llm_usage: val.llm_usage.map(|v| v.into_iter().map(Into::into).collect()),
formatted_content: val.formatted_content,
ocr_internal_document: val.ocr_internal_document.as_ref().map(|v| format!("{v:?}")),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmArchiveEntry> for kreuzberg::ArchiveEntry {
fn from(val: WasmArchiveEntry) -> Self {
Self {
path: val.path,
mime_type: val.mime_type,
result: Box::new(val.result.into()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ArchiveEntry> for WasmArchiveEntry {
fn from(val: kreuzberg::ArchiveEntry) -> Self {
Self {
path: val.path,
mime_type: val.mime_type,
result: (*val.result).into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmProcessingWarning> for kreuzberg::ProcessingWarning {
fn from(val: WasmProcessingWarning) -> Self {
Self {
source: val.source.into(),
message: val.message.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ProcessingWarning> for WasmProcessingWarning {
fn from(val: kreuzberg::ProcessingWarning) -> Self {
Self {
source: val.source.to_string(),
message: val.message.to_string(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmLlmUsage> for kreuzberg::LlmUsage {
fn from(val: WasmLlmUsage) -> Self {
Self {
model: val.model,
source: val.source,
input_tokens: val.input_tokens,
output_tokens: val.output_tokens,
total_tokens: val.total_tokens,
estimated_cost: val.estimated_cost,
finish_reason: val.finish_reason,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::LlmUsage> for WasmLlmUsage {
fn from(val: kreuzberg::LlmUsage) -> Self {
Self {
model: val.model,
source: val.source,
input_tokens: val.input_tokens,
output_tokens: val.output_tokens,
total_tokens: val.total_tokens,
estimated_cost: val.estimated_cost,
finish_reason: val.finish_reason,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmChunk> for kreuzberg::Chunk {
fn from(val: WasmChunk) -> Self {
Self {
content: val.content,
chunk_type: val.chunk_type.into(),
embedding: val.embedding.map(|v| v.into_iter().collect()),
metadata: val.metadata.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::Chunk> for WasmChunk {
fn from(val: kreuzberg::Chunk) -> Self {
Self {
content: val.content,
chunk_type: val.chunk_type.into(),
embedding: val.embedding.map(|v| v.into_iter().collect()),
metadata: val.metadata.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmHeadingContext> for kreuzberg::HeadingContext {
fn from(val: WasmHeadingContext) -> Self {
Self {
headings: val.headings.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::HeadingContext> for WasmHeadingContext {
fn from(val: kreuzberg::HeadingContext) -> Self {
Self {
headings: val.headings.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmHeadingLevel> for kreuzberg::HeadingLevel {
fn from(val: WasmHeadingLevel) -> Self {
Self {
level: val.level,
text: val.text,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::HeadingLevel> for WasmHeadingLevel {
fn from(val: kreuzberg::HeadingLevel) -> Self {
Self {
level: val.level,
text: val.text,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmChunkMetadata> for kreuzberg::ChunkMetadata {
fn from(val: WasmChunkMetadata) -> Self {
Self {
byte_start: val.byte_start,
byte_end: val.byte_end,
token_count: val.token_count,
chunk_index: val.chunk_index,
total_chunks: val.total_chunks,
first_page: val.first_page,
last_page: val.last_page,
heading_context: val.heading_context.map(Into::into),
image_indices: val.image_indices.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ChunkMetadata> for WasmChunkMetadata {
fn from(val: kreuzberg::ChunkMetadata) -> Self {
Self {
byte_start: val.byte_start,
byte_end: val.byte_end,
token_count: val.token_count,
chunk_index: val.chunk_index,
total_chunks: val.total_chunks,
first_page: val.first_page,
last_page: val.last_page,
heading_context: val.heading_context.map(Into::into),
image_indices: val.image_indices.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmExtractedImage> for kreuzberg::ExtractedImage {
fn from(val: WasmExtractedImage) -> Self {
Self {
data: val.data.to_vec().into(),
format: val.format.into(),
image_index: val.image_index,
page_number: val.page_number,
width: val.width,
height: val.height,
colorspace: val.colorspace,
bits_per_component: val.bits_per_component,
is_mask: val.is_mask,
description: val.description,
ocr_result: val.ocr_result.map(Into::into).map(Box::new),
bounding_box: val.bounding_box.map(Into::into),
source_path: val.source_path,
image_kind: val.image_kind.map(Into::into),
kind_confidence: val.kind_confidence,
cluster_id: val.cluster_id,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ExtractedImage> for WasmExtractedImage {
fn from(val: kreuzberg::ExtractedImage) -> Self {
Self {
data: val.data.to_vec().into(),
format: val.format.to_string(),
image_index: val.image_index,
page_number: val.page_number,
width: val.width,
height: val.height,
colorspace: val.colorspace,
bits_per_component: val.bits_per_component,
is_mask: val.is_mask,
description: val.description,
ocr_result: val.ocr_result.map(|v| (*v).into()),
bounding_box: val.bounding_box.map(Into::into),
source_path: val.source_path,
image_kind: val.image_kind.map(Into::into),
kind_confidence: val.kind_confidence,
cluster_id: val.cluster_id,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmBoundingBox> for kreuzberg::BoundingBox {
fn from(val: WasmBoundingBox) -> Self {
Self {
x0: val.x0,
y0: val.y0,
x1: val.x1,
y1: val.y1,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::BoundingBox> for WasmBoundingBox {
fn from(val: kreuzberg::BoundingBox) -> Self {
Self {
x0: val.x0,
y0: val.y0,
x1: val.x1,
y1: val.y1,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmElementMetadata> for kreuzberg::ElementMetadata {
fn from(val: WasmElementMetadata) -> Self {
Self {
page_number: val.page_number,
filename: val.filename,
coordinates: val.coordinates.map(Into::into),
element_index: val.element_index,
additional: serde_wasm_bindgen::from_value(val.additional.clone()).unwrap_or_default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ElementMetadata> for WasmElementMetadata {
fn from(val: kreuzberg::ElementMetadata) -> Self {
Self {
page_number: val.page_number,
filename: val.filename,
coordinates: val.coordinates.map(Into::into),
element_index: val.element_index,
additional: js_sys::JSON::parse(&serde_json::to_string(&val.additional).unwrap_or_default())
.unwrap_or(JsValue::NULL),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmElement> for kreuzberg::Element {
fn from(val: WasmElement) -> Self {
Self {
element_id: Default::default(),
element_type: val.element_type.into(),
text: val.text,
metadata: val.metadata.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::Element> for WasmElement {
fn from(val: kreuzberg::Element) -> Self {
Self {
element_id: format!("{:?}", val.element_id),
element_type: val.element_type.into(),
text: val.text,
metadata: val.metadata.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ExcelWorkbook> for WasmExcelWorkbook {
fn from(val: kreuzberg::ExcelWorkbook) -> Self {
Self {
sheets: val.sheets.into_iter().map(Into::into).collect(),
metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default())
.unwrap_or(JsValue::NULL),
revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ExcelSheet> for WasmExcelSheet {
fn from(val: kreuzberg::ExcelSheet) -> Self {
Self {
name: val.name,
markdown: val.markdown,
row_count: val.row_count,
col_count: val.col_count,
cell_count: val.cell_count,
table_cells: val
.table_cells
.as_ref()
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::XmlExtractionResult> for WasmXmlExtractionResult {
fn from(val: kreuzberg::XmlExtractionResult) -> Self {
Self {
content: val.content,
element_count: val.element_count,
unique_elements: val.unique_elements.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::TextExtractionResult> for WasmTextExtractionResult {
fn from(val: kreuzberg::TextExtractionResult) -> Self {
Self {
content: val.content,
line_count: val.line_count,
word_count: val.word_count,
character_count: val.character_count,
headers: val.headers.map(|v| v.into_iter().collect()),
links: val.links.as_ref().and_then(|v| {
serde_wasm_bindgen::to_value(
&v.iter()
.map(|(a, b)| vec![a.to_string(), b.to_string()])
.collect::<Vec<Vec<String>>>(),
)
.ok()
}),
code_blocks: val.code_blocks.as_ref().and_then(|v| {
serde_wasm_bindgen::to_value(
&v.iter()
.map(|(a, b)| vec![a.to_string(), b.to_string()])
.collect::<Vec<Vec<String>>>(),
)
.ok()
}),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PptxExtractionResult> for WasmPptxExtractionResult {
fn from(val: kreuzberg::PptxExtractionResult) -> Self {
Self {
content: val.content,
metadata: val.metadata.into(),
slide_count: val.slide_count,
image_count: val.image_count,
table_count: val.table_count,
images: val.images.into_iter().map(Into::into).collect(),
page_structure: val.page_structure.map(Into::into),
page_contents: val.page_contents.map(|v| v.into_iter().map(Into::into).collect()),
document: val.document.map(Into::into),
hyperlinks: val.hyperlinks.iter().map(|i| format!("{:?}", i)).collect(),
office_metadata: js_sys::JSON::parse(&serde_json::to_string(&val.office_metadata).unwrap_or_default())
.unwrap_or(JsValue::NULL),
revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::EmailExtractionResult> for WasmEmailExtractionResult {
fn from(val: kreuzberg::EmailExtractionResult) -> Self {
Self {
subject: val.subject,
from_email: val.from_email,
to_emails: val.to_emails.into_iter().collect(),
cc_emails: val.cc_emails.into_iter().collect(),
bcc_emails: val.bcc_emails.into_iter().collect(),
date: val.date,
message_id: val.message_id,
plain_text: val.plain_text,
html_content: val.html_content,
content: val.content,
attachments: val.attachments.into_iter().map(Into::into).collect(),
metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default())
.unwrap_or(JsValue::NULL),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::EmailAttachment> for WasmEmailAttachment {
fn from(val: kreuzberg::EmailAttachment) -> Self {
Self {
name: val.name,
filename: val.filename,
mime_type: val.mime_type,
size: val.size,
is_image: val.is_image,
data: val.data.map(|v| v.to_vec().into()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrExtractionResult> for kreuzberg::OcrExtractionResult {
fn from(val: WasmOcrExtractionResult) -> Self {
Self {
content: val.content,
mime_type: val.mime_type,
metadata: serde_wasm_bindgen::from_value(val.metadata.clone()).unwrap_or_default(),
tables: val.tables.into_iter().map(Into::into).collect(),
ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()),
internal_document: Default::default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrExtractionResult> for WasmOcrExtractionResult {
fn from(val: kreuzberg::OcrExtractionResult) -> Self {
Self {
content: val.content,
mime_type: val.mime_type,
metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default())
.unwrap_or(JsValue::NULL),
tables: val.tables.into_iter().map(Into::into).collect(),
ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()),
internal_document: val.internal_document.as_ref().map(|v| format!("{v:?}")),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrTable> for kreuzberg::OcrTable {
fn from(val: WasmOcrTable) -> Self {
Self {
cells: serde_wasm_bindgen::from_value(val.cells.clone()).unwrap_or_default(),
markdown: val.markdown,
page_number: val.page_number,
bounding_box: val.bounding_box.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrTable> for WasmOcrTable {
fn from(val: kreuzberg::OcrTable) -> Self {
Self {
cells: serde_wasm_bindgen::to_value(&val.cells).unwrap_or(JsValue::NULL),
markdown: val.markdown,
page_number: val.page_number,
bounding_box: val.bounding_box.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrTableBoundingBox> for kreuzberg::OcrTableBoundingBox {
fn from(val: WasmOcrTableBoundingBox) -> Self {
Self {
left: val.left,
top: val.top,
right: val.right,
bottom: val.bottom,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrTableBoundingBox> for WasmOcrTableBoundingBox {
fn from(val: kreuzberg::OcrTableBoundingBox) -> Self {
Self {
left: val.left,
top: val.top,
right: val.right,
bottom: val.bottom,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmImagePreprocessingConfig> for kreuzberg::ImagePreprocessingConfig {
fn from(val: WasmImagePreprocessingConfig) -> Self {
Self {
target_dpi: val.target_dpi,
auto_rotate: val.auto_rotate,
deskew: val.deskew,
denoise: val.denoise,
contrast_enhance: val.contrast_enhance,
binarization_method: val.binarization_method,
invert_colors: val.invert_colors,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ImagePreprocessingConfig> for WasmImagePreprocessingConfig {
fn from(val: kreuzberg::ImagePreprocessingConfig) -> Self {
Self {
target_dpi: val.target_dpi,
auto_rotate: val.auto_rotate,
deskew: val.deskew,
denoise: val.denoise,
contrast_enhance: val.contrast_enhance,
binarization_method: val.binarization_method,
invert_colors: val.invert_colors,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmTesseractConfig> for kreuzberg::TesseractConfig {
fn from(val: WasmTesseractConfig) -> Self {
Self {
language: val.language,
psm: val.psm,
output_format: val.output_format,
oem: val.oem,
min_confidence: val.min_confidence,
preprocessing: val.preprocessing.map(Into::into),
enable_table_detection: val.enable_table_detection,
table_min_confidence: val.table_min_confidence,
table_column_threshold: val.table_column_threshold,
table_row_threshold_ratio: val.table_row_threshold_ratio,
use_cache: val.use_cache,
classify_use_pre_adapted_templates: val.classify_use_pre_adapted_templates,
language_model_ngram_on: val.language_model_ngram_on,
tessedit_dont_blkrej_good_wds: val.tessedit_dont_blkrej_good_wds,
tessedit_dont_rowrej_good_wds: val.tessedit_dont_rowrej_good_wds,
tessedit_enable_dict_correction: val.tessedit_enable_dict_correction,
tessedit_char_whitelist: val.tessedit_char_whitelist,
tessedit_char_blacklist: val.tessedit_char_blacklist,
tessedit_use_primary_params_model: val.tessedit_use_primary_params_model,
textord_space_size_is_variable: val.textord_space_size_is_variable,
thresholding_method: val.thresholding_method,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::TesseractConfig> for WasmTesseractConfig {
fn from(val: kreuzberg::TesseractConfig) -> Self {
Self {
language: val.language,
psm: val.psm,
output_format: val.output_format,
oem: val.oem,
min_confidence: val.min_confidence,
preprocessing: val.preprocessing.map(Into::into),
enable_table_detection: val.enable_table_detection,
table_min_confidence: val.table_min_confidence,
table_column_threshold: val.table_column_threshold,
table_row_threshold_ratio: val.table_row_threshold_ratio,
use_cache: val.use_cache,
classify_use_pre_adapted_templates: val.classify_use_pre_adapted_templates,
language_model_ngram_on: val.language_model_ngram_on,
tessedit_dont_blkrej_good_wds: val.tessedit_dont_blkrej_good_wds,
tessedit_dont_rowrej_good_wds: val.tessedit_dont_rowrej_good_wds,
tessedit_enable_dict_correction: val.tessedit_enable_dict_correction,
tessedit_char_whitelist: val.tessedit_char_whitelist,
tessedit_char_blacklist: val.tessedit_char_blacklist,
tessedit_use_primary_params_model: val.tessedit_use_primary_params_model,
textord_space_size_is_variable: val.textord_space_size_is_variable,
thresholding_method: val.thresholding_method,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmImagePreprocessingMetadata> for kreuzberg::ImagePreprocessingMetadata {
fn from(val: WasmImagePreprocessingMetadata) -> Self {
Self {
original_dimensions: Default::default(),
original_dpi: Default::default(),
target_dpi: val.target_dpi,
scale_factor: val.scale_factor,
auto_adjusted: val.auto_adjusted,
final_dpi: val.final_dpi,
new_dimensions: Default::default(),
resample_method: val.resample_method,
dimension_clamped: val.dimension_clamped,
calculated_dpi: val.calculated_dpi,
skipped_resize: val.skipped_resize,
resize_error: val.resize_error,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ImagePreprocessingMetadata> for WasmImagePreprocessingMetadata {
fn from(val: kreuzberg::ImagePreprocessingMetadata) -> Self {
Self {
original_dimensions: vec![val.original_dimensions.0 as _, val.original_dimensions.1 as _],
original_dpi: vec![val.original_dpi.0 as _, val.original_dpi.1 as _],
target_dpi: val.target_dpi,
scale_factor: val.scale_factor,
auto_adjusted: val.auto_adjusted,
final_dpi: val.final_dpi,
new_dimensions: val.new_dimensions.map(|t| {
let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect();
arr
}),
resample_method: val.resample_method,
dimension_clamped: val.dimension_clamped,
calculated_dpi: val.calculated_dpi,
skipped_resize: val.skipped_resize,
resize_error: val.resize_error,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmMetadata> for kreuzberg::Metadata {
fn from(val: WasmMetadata) -> Self {
Self {
title: val.title,
subject: val.subject,
authors: val.authors.map(|v| v.into_iter().collect()),
keywords: val.keywords.map(|v| v.into_iter().collect()),
language: val.language,
created_at: val.created_at,
modified_at: val.modified_at,
created_by: val.created_by,
modified_by: val.modified_by,
pages: val.pages.map(Into::into),
format: val
.format
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
image_preprocessing: val.image_preprocessing.map(Into::into),
json_schema: val
.json_schema
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
error: val.error.map(Into::into),
extraction_duration_ms: val.extraction_duration_ms,
category: val.category,
tags: val.tags.map(|v| v.into_iter().collect()),
document_version: val.document_version,
abstract_text: val.abstract_text,
output_format: val.output_format,
ocr_used: val.ocr_used,
additional: serde_wasm_bindgen::from_value(val.additional.clone()).unwrap_or_default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::Metadata> for WasmMetadata {
fn from(val: kreuzberg::Metadata) -> Self {
Self {
title: val.title,
subject: val.subject,
authors: val.authors.map(|v| v.into_iter().collect()),
keywords: val.keywords.map(|v| v.into_iter().collect()),
language: val.language,
created_at: val.created_at,
modified_at: val.modified_at,
created_by: val.created_by,
modified_by: val.modified_by,
pages: val.pages.map(Into::into),
format: val.format.as_ref().and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
image_preprocessing: val.image_preprocessing.map(Into::into),
json_schema: val
.json_schema
.as_ref()
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
error: val.error.map(Into::into),
extraction_duration_ms: val.extraction_duration_ms,
category: val.category,
tags: val.tags.map(|v| v.into_iter().collect()),
document_version: val.document_version,
abstract_text: val.abstract_text,
output_format: val.output_format,
ocr_used: val.ocr_used,
additional: js_sys::JSON::parse(&serde_json::to_string(&val.additional).unwrap_or_default())
.unwrap_or(JsValue::NULL),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmExcelMetadata> for kreuzberg::ExcelMetadata {
fn from(val: WasmExcelMetadata) -> Self {
Self {
sheet_count: val.sheet_count,
sheet_names: val.sheet_names.map(|v| v.into_iter().collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ExcelMetadata> for WasmExcelMetadata {
fn from(val: kreuzberg::ExcelMetadata) -> Self {
Self {
sheet_count: val.sheet_count,
sheet_names: val.sheet_names.map(|v| v.into_iter().collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmEmailMetadata> for kreuzberg::EmailMetadata {
fn from(val: WasmEmailMetadata) -> Self {
Self {
from_email: val.from_email,
from_name: val.from_name,
to_emails: val.to_emails.into_iter().collect(),
cc_emails: val.cc_emails.into_iter().collect(),
bcc_emails: val.bcc_emails.into_iter().collect(),
message_id: val.message_id,
attachments: val.attachments.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::EmailMetadata> for WasmEmailMetadata {
fn from(val: kreuzberg::EmailMetadata) -> Self {
Self {
from_email: val.from_email,
from_name: val.from_name,
to_emails: val.to_emails.into_iter().collect(),
cc_emails: val.cc_emails.into_iter().collect(),
bcc_emails: val.bcc_emails.into_iter().collect(),
message_id: val.message_id,
attachments: val.attachments.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmArchiveMetadata> for kreuzberg::ArchiveMetadata {
fn from(val: WasmArchiveMetadata) -> Self {
Self {
format: val.format.into(),
file_count: val.file_count,
file_list: val.file_list.into_iter().collect(),
total_size: val.total_size,
compressed_size: val.compressed_size,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ArchiveMetadata> for WasmArchiveMetadata {
fn from(val: kreuzberg::ArchiveMetadata) -> Self {
Self {
format: val.format.to_string(),
file_count: val.file_count,
file_list: val.file_list.into_iter().collect(),
total_size: val.total_size,
compressed_size: val.compressed_size,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmImageMetadata> for kreuzberg::ImageMetadata {
fn from(val: WasmImageMetadata) -> Self {
Self {
width: val.width,
height: val.height,
format: val.format,
exif: serde_wasm_bindgen::from_value(val.exif.clone()).unwrap_or_default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ImageMetadata> for WasmImageMetadata {
fn from(val: kreuzberg::ImageMetadata) -> Self {
Self {
width: val.width,
height: val.height,
format: val.format,
exif: js_sys::JSON::parse(&serde_json::to_string(&val.exif).unwrap_or_default()).unwrap_or(JsValue::NULL),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmXmlMetadata> for kreuzberg::XmlMetadata {
fn from(val: WasmXmlMetadata) -> Self {
Self {
element_count: val.element_count,
unique_elements: val.unique_elements.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::XmlMetadata> for WasmXmlMetadata {
fn from(val: kreuzberg::XmlMetadata) -> Self {
Self {
element_count: val.element_count,
unique_elements: val.unique_elements.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmTextMetadata> for kreuzberg::TextMetadata {
fn from(val: WasmTextMetadata) -> Self {
Self {
line_count: val.line_count,
word_count: val.word_count,
character_count: val.character_count,
headers: val.headers.map(|v| v.into_iter().collect()),
links: Default::default(),
code_blocks: Default::default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::TextMetadata> for WasmTextMetadata {
fn from(val: kreuzberg::TextMetadata) -> Self {
Self {
line_count: val.line_count,
word_count: val.word_count,
character_count: val.character_count,
headers: val.headers.map(|v| v.into_iter().collect()),
links: val.links.as_ref().and_then(|v| {
serde_wasm_bindgen::to_value(
&v.iter()
.map(|(a, b)| vec![a.to_string(), b.to_string()])
.collect::<Vec<Vec<String>>>(),
)
.ok()
}),
code_blocks: val.code_blocks.as_ref().and_then(|v| {
serde_wasm_bindgen::to_value(
&v.iter()
.map(|(a, b)| vec![a.to_string(), b.to_string()])
.collect::<Vec<Vec<String>>>(),
)
.ok()
}),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmHeaderMetadata> for kreuzberg::HeaderMetadata {
fn from(val: WasmHeaderMetadata) -> Self {
Self {
level: val.level,
text: val.text,
id: val.id,
depth: val.depth,
html_offset: val.html_offset,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::HeaderMetadata> for WasmHeaderMetadata {
fn from(val: kreuzberg::HeaderMetadata) -> Self {
Self {
level: val.level,
text: val.text,
id: val.id,
depth: val.depth,
html_offset: val.html_offset,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmLinkMetadata> for kreuzberg::LinkMetadata {
fn from(val: WasmLinkMetadata) -> Self {
Self {
href: val.href,
text: val.text,
title: val.title,
link_type: val.link_type.into(),
rel: val.rel.into_iter().collect(),
attributes: Default::default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::LinkMetadata> for WasmLinkMetadata {
fn from(val: kreuzberg::LinkMetadata) -> Self {
Self {
href: val.href,
text: val.text,
title: val.title,
link_type: val.link_type.into(),
rel: val.rel.into_iter().collect(),
attributes: serde_wasm_bindgen::to_value(
&val.attributes
.iter()
.map(|(a, b)| vec![a.to_string(), b.to_string()])
.collect::<Vec<Vec<String>>>(),
)
.unwrap_or(JsValue::NULL),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmImageMetadataType> for kreuzberg::ImageMetadataType {
fn from(val: WasmImageMetadataType) -> Self {
Self {
src: val.src,
alt: val.alt,
title: val.title,
dimensions: Default::default(),
image_type: val.image_type.into(),
attributes: Default::default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ImageMetadataType> for WasmImageMetadataType {
fn from(val: kreuzberg::ImageMetadataType) -> Self {
Self {
src: val.src,
alt: val.alt,
title: val.title,
dimensions: val.dimensions.map(|t| {
let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect();
arr
}),
image_type: val.image_type.into(),
attributes: serde_wasm_bindgen::to_value(
&val.attributes
.iter()
.map(|(a, b)| vec![a.to_string(), b.to_string()])
.collect::<Vec<Vec<String>>>(),
)
.unwrap_or(JsValue::NULL),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmStructuredData> for kreuzberg::StructuredData {
fn from(val: WasmStructuredData) -> Self {
Self {
data_type: val.data_type.into(),
raw_json: val.raw_json,
schema_type: val.schema_type,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::StructuredData> for WasmStructuredData {
fn from(val: kreuzberg::StructuredData) -> Self {
Self {
data_type: val.data_type.into(),
raw_json: val.raw_json,
schema_type: val.schema_type,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmHtmlMetadata> for kreuzberg::HtmlMetadata {
fn from(val: WasmHtmlMetadata) -> Self {
Self {
title: val.title,
description: val.description,
keywords: val.keywords.into_iter().collect(),
author: val.author,
canonical_url: val.canonical_url,
base_href: val.base_href,
language: val.language,
text_direction: val.text_direction.map(Into::into),
open_graph: serde_wasm_bindgen::from_value(val.open_graph.clone()).unwrap_or_default(),
twitter_card: serde_wasm_bindgen::from_value(val.twitter_card.clone()).unwrap_or_default(),
meta_tags: serde_wasm_bindgen::from_value(val.meta_tags.clone()).unwrap_or_default(),
headers: val.headers.into_iter().map(Into::into).collect(),
links: val.links.into_iter().map(Into::into).collect(),
images: val.images.into_iter().map(Into::into).collect(),
structured_data: val.structured_data.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::HtmlMetadata> for WasmHtmlMetadata {
fn from(val: kreuzberg::HtmlMetadata) -> Self {
Self {
title: val.title,
description: val.description,
keywords: val.keywords.into_iter().collect(),
author: val.author,
canonical_url: val.canonical_url,
base_href: val.base_href,
language: val.language,
text_direction: val.text_direction.map(Into::into),
open_graph: js_sys::JSON::parse(&serde_json::to_string(&val.open_graph).unwrap_or_default())
.unwrap_or(JsValue::NULL),
twitter_card: js_sys::JSON::parse(&serde_json::to_string(&val.twitter_card).unwrap_or_default())
.unwrap_or(JsValue::NULL),
meta_tags: js_sys::JSON::parse(&serde_json::to_string(&val.meta_tags).unwrap_or_default())
.unwrap_or(JsValue::NULL),
headers: val.headers.into_iter().map(Into::into).collect(),
links: val.links.into_iter().map(Into::into).collect(),
images: val.images.into_iter().map(Into::into).collect(),
structured_data: val.structured_data.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrMetadata> for kreuzberg::OcrMetadata {
fn from(val: WasmOcrMetadata) -> Self {
Self {
language: val.language,
psm: val.psm,
output_format: val.output_format,
table_count: val.table_count,
table_rows: val.table_rows,
table_cols: val.table_cols,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrMetadata> for WasmOcrMetadata {
fn from(val: kreuzberg::OcrMetadata) -> Self {
Self {
language: val.language,
psm: val.psm,
output_format: val.output_format,
table_count: val.table_count,
table_rows: val.table_rows,
table_cols: val.table_cols,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmErrorMetadata> for kreuzberg::ErrorMetadata {
fn from(val: WasmErrorMetadata) -> Self {
Self {
error_type: val.error_type,
message: val.message,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ErrorMetadata> for WasmErrorMetadata {
fn from(val: kreuzberg::ErrorMetadata) -> Self {
Self {
error_type: val.error_type,
message: val.message,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPptxMetadata> for kreuzberg::PptxMetadata {
fn from(val: WasmPptxMetadata) -> Self {
Self {
slide_count: val.slide_count,
slide_names: val.slide_names.into_iter().collect(),
image_count: val.image_count,
table_count: val.table_count,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PptxMetadata> for WasmPptxMetadata {
fn from(val: kreuzberg::PptxMetadata) -> Self {
Self {
slide_count: val.slide_count,
slide_names: val.slide_names.into_iter().collect(),
image_count: val.image_count,
table_count: val.table_count,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmCsvMetadata> for kreuzberg::CsvMetadata {
fn from(val: WasmCsvMetadata) -> Self {
Self {
row_count: val.row_count,
column_count: val.column_count,
delimiter: val.delimiter,
has_header: val.has_header,
column_types: val.column_types.map(|v| v.into_iter().collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::CsvMetadata> for WasmCsvMetadata {
fn from(val: kreuzberg::CsvMetadata) -> Self {
Self {
row_count: val.row_count,
column_count: val.column_count,
delimiter: val.delimiter,
has_header: val.has_header,
column_types: val.column_types.map(|v| v.into_iter().collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmBibtexMetadata> for kreuzberg::BibtexMetadata {
fn from(val: WasmBibtexMetadata) -> Self {
Self {
entry_count: val.entry_count,
citation_keys: val.citation_keys.into_iter().collect(),
authors: val.authors.into_iter().collect(),
year_range: val.year_range.map(Into::into),
entry_types: val
.entry_types
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::BibtexMetadata> for WasmBibtexMetadata {
fn from(val: kreuzberg::BibtexMetadata) -> Self {
Self {
entry_count: val.entry_count,
citation_keys: val.citation_keys.into_iter().collect(),
authors: val.authors.into_iter().collect(),
year_range: val.year_range.map(Into::into),
entry_types: val
.entry_types
.as_ref()
.and_then(|v| serde_json::to_string(v).ok())
.and_then(|s| js_sys::JSON::parse(&s).ok()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmCitationMetadata> for kreuzberg::CitationMetadata {
fn from(val: WasmCitationMetadata) -> Self {
Self {
citation_count: val.citation_count,
format: val.format,
authors: val.authors.into_iter().collect(),
year_range: val.year_range.map(Into::into),
dois: val.dois.into_iter().collect(),
keywords: val.keywords.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::CitationMetadata> for WasmCitationMetadata {
fn from(val: kreuzberg::CitationMetadata) -> Self {
Self {
citation_count: val.citation_count,
format: val.format,
authors: val.authors.into_iter().collect(),
year_range: val.year_range.map(Into::into),
dois: val.dois.into_iter().collect(),
keywords: val.keywords.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmYearRange> for kreuzberg::YearRange {
fn from(val: WasmYearRange) -> Self {
Self {
min: val.min,
max: val.max,
years: val.years.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::YearRange> for WasmYearRange {
fn from(val: kreuzberg::YearRange) -> Self {
Self {
min: val.min,
max: val.max,
years: val.years.into_iter().collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmFictionBookMetadata> for kreuzberg::FictionBookMetadata {
fn from(val: WasmFictionBookMetadata) -> Self {
Self {
genres: val.genres.into_iter().collect(),
sequences: val.sequences.into_iter().collect(),
annotation: val.annotation,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::FictionBookMetadata> for WasmFictionBookMetadata {
fn from(val: kreuzberg::FictionBookMetadata) -> Self {
Self {
genres: val.genres.into_iter().collect(),
sequences: val.sequences.into_iter().collect(),
annotation: val.annotation,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDbfMetadata> for kreuzberg::DbfMetadata {
fn from(val: WasmDbfMetadata) -> Self {
Self {
record_count: val.record_count,
field_count: val.field_count,
fields: val.fields.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DbfMetadata> for WasmDbfMetadata {
fn from(val: kreuzberg::DbfMetadata) -> Self {
Self {
record_count: val.record_count,
field_count: val.field_count,
fields: val.fields.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDbfFieldInfo> for kreuzberg::DbfFieldInfo {
fn from(val: WasmDbfFieldInfo) -> Self {
Self {
name: val.name,
field_type: val.field_type,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DbfFieldInfo> for WasmDbfFieldInfo {
fn from(val: kreuzberg::DbfFieldInfo) -> Self {
Self {
name: val.name,
field_type: val.field_type,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmJatsMetadata> for kreuzberg::JatsMetadata {
fn from(val: WasmJatsMetadata) -> Self {
Self {
copyright: val.copyright,
license: val.license,
history_dates: serde_wasm_bindgen::from_value(val.history_dates.clone()).unwrap_or_default(),
contributor_roles: val.contributor_roles.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::JatsMetadata> for WasmJatsMetadata {
fn from(val: kreuzberg::JatsMetadata) -> Self {
Self {
copyright: val.copyright,
license: val.license,
history_dates: js_sys::JSON::parse(&serde_json::to_string(&val.history_dates).unwrap_or_default())
.unwrap_or(JsValue::NULL),
contributor_roles: val.contributor_roles.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmContributorRole> for kreuzberg::ContributorRole {
fn from(val: WasmContributorRole) -> Self {
Self {
name: val.name,
role: val.role,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ContributorRole> for WasmContributorRole {
fn from(val: kreuzberg::ContributorRole) -> Self {
Self {
name: val.name,
role: val.role,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmEpubMetadata> for kreuzberg::EpubMetadata {
fn from(val: WasmEpubMetadata) -> Self {
Self {
coverage: val.coverage,
dc_format: val.dc_format,
relation: val.relation,
source: val.source,
dc_type: val.dc_type,
cover_image: val.cover_image,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::EpubMetadata> for WasmEpubMetadata {
fn from(val: kreuzberg::EpubMetadata) -> Self {
Self {
coverage: val.coverage,
dc_format: val.dc_format,
relation: val.relation,
source: val.source,
dc_type: val.dc_type,
cover_image: val.cover_image,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPstMetadata> for kreuzberg::PstMetadata {
fn from(val: WasmPstMetadata) -> Self {
Self {
message_count: val.message_count,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PstMetadata> for WasmPstMetadata {
fn from(val: kreuzberg::PstMetadata) -> Self {
Self {
message_count: val.message_count,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrConfidence> for kreuzberg::OcrConfidence {
fn from(val: WasmOcrConfidence) -> Self {
Self {
detection: val.detection,
recognition: val.recognition,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrConfidence> for WasmOcrConfidence {
fn from(val: kreuzberg::OcrConfidence) -> Self {
Self {
detection: val.detection,
recognition: val.recognition,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrRotation> for kreuzberg::OcrRotation {
fn from(val: WasmOcrRotation) -> Self {
Self {
angle_degrees: val.angle_degrees,
confidence: val.confidence,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrRotation> for WasmOcrRotation {
fn from(val: kreuzberg::OcrRotation) -> Self {
Self {
angle_degrees: val.angle_degrees,
confidence: val.confidence,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrElement> for kreuzberg::OcrElement {
fn from(val: WasmOcrElement) -> Self {
Self {
text: val.text,
geometry: serde_wasm_bindgen::from_value(val.geometry.clone()).unwrap_or_default(),
confidence: val.confidence.into(),
level: val.level.into(),
rotation: val.rotation.map(Into::into),
page_number: val.page_number,
parent_id: val.parent_id,
backend_metadata: serde_wasm_bindgen::from_value(val.backend_metadata.clone()).unwrap_or_default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrElement> for WasmOcrElement {
fn from(val: kreuzberg::OcrElement) -> Self {
Self {
text: val.text,
geometry: serde_wasm_bindgen::to_value(&val.geometry).unwrap_or(JsValue::NULL),
confidence: val.confidence.into(),
level: val.level.into(),
rotation: val.rotation.map(Into::into),
page_number: val.page_number,
parent_id: val.parent_id,
backend_metadata: js_sys::JSON::parse(&serde_json::to_string(&val.backend_metadata).unwrap_or_default())
.unwrap_or(JsValue::NULL),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmOcrElementConfig> for kreuzberg::OcrElementConfig {
fn from(val: WasmOcrElementConfig) -> Self {
Self {
include_elements: val.include_elements,
min_level: val.min_level.into(),
min_confidence: val.min_confidence,
build_hierarchy: val.build_hierarchy,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::OcrElementConfig> for WasmOcrElementConfig {
fn from(val: kreuzberg::OcrElementConfig) -> Self {
Self {
include_elements: val.include_elements,
min_level: val.min_level.into(),
min_confidence: val.min_confidence,
build_hierarchy: val.build_hierarchy,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPageStructure> for kreuzberg::PageStructure {
fn from(val: WasmPageStructure) -> Self {
Self {
total_count: val.total_count,
unit_type: val.unit_type.into(),
boundaries: val.boundaries.map(|v| v.into_iter().map(Into::into).collect()),
pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PageStructure> for WasmPageStructure {
fn from(val: kreuzberg::PageStructure) -> Self {
Self {
total_count: val.total_count,
unit_type: val.unit_type.into(),
boundaries: val.boundaries.map(|v| v.into_iter().map(Into::into).collect()),
pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPageBoundary> for kreuzberg::PageBoundary {
fn from(val: WasmPageBoundary) -> Self {
Self {
byte_start: val.byte_start,
byte_end: val.byte_end,
page_number: val.page_number,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PageBoundary> for WasmPageBoundary {
fn from(val: kreuzberg::PageBoundary) -> Self {
Self {
byte_start: val.byte_start,
byte_end: val.byte_end,
page_number: val.page_number,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPageInfo> for kreuzberg::PageInfo {
fn from(val: WasmPageInfo) -> Self {
Self {
number: val.number,
title: val.title,
dimensions: Default::default(),
image_count: val.image_count,
table_count: val.table_count,
hidden: val.hidden,
is_blank: val.is_blank,
has_vector_graphics: val.has_vector_graphics,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PageInfo> for WasmPageInfo {
fn from(val: kreuzberg::PageInfo) -> Self {
Self {
number: val.number,
title: val.title,
dimensions: val.dimensions.map(|t| {
let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect();
arr
}),
image_count: val.image_count,
table_count: val.table_count,
hidden: val.hidden,
is_blank: val.is_blank,
has_vector_graphics: val.has_vector_graphics,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPageContent> for kreuzberg::PageContent {
fn from(val: WasmPageContent) -> Self {
Self {
page_number: val.page_number,
content: val.content,
tables: val.tables.into_iter().map(|v| std::sync::Arc::new(v.into())).collect(),
image_indices: val.image_indices.into_iter().collect(),
hierarchy: val.hierarchy.map(Into::into),
is_blank: val.is_blank,
layout_regions: val.layout_regions.map(|v| v.into_iter().map(Into::into).collect()),
speaker_notes: val.speaker_notes,
section_name: val.section_name,
sheet_name: val.sheet_name,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PageContent> for WasmPageContent {
fn from(val: kreuzberg::PageContent) -> Self {
Self {
page_number: val.page_number,
content: val.content,
tables: val.tables.into_iter().map(|v| (*v).clone().into()).collect(),
image_indices: val.image_indices.into_iter().collect(),
hierarchy: val.hierarchy.map(Into::into),
is_blank: val.is_blank,
layout_regions: val.layout_regions.map(|v| v.into_iter().map(Into::into).collect()),
speaker_notes: val.speaker_notes,
section_name: val.section_name,
sheet_name: val.sheet_name,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmLayoutRegion> for kreuzberg::LayoutRegion {
fn from(val: WasmLayoutRegion) -> Self {
Self {
class_name: val.class_name,
confidence: val.confidence,
bounding_box: val.bounding_box.into(),
area_fraction: val.area_fraction,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::LayoutRegion> for WasmLayoutRegion {
fn from(val: kreuzberg::LayoutRegion) -> Self {
Self {
class_name: val.class_name,
confidence: val.confidence,
bounding_box: val.bounding_box.into(),
area_fraction: val.area_fraction,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmPageHierarchy> for kreuzberg::PageHierarchy {
fn from(val: WasmPageHierarchy) -> Self {
Self {
block_count: val.block_count,
blocks: val.blocks.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::PageHierarchy> for WasmPageHierarchy {
fn from(val: kreuzberg::PageHierarchy) -> Self {
Self {
block_count: val.block_count,
blocks: val.blocks.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmHierarchicalBlock> for kreuzberg::HierarchicalBlock {
fn from(val: WasmHierarchicalBlock) -> Self {
Self {
text: val.text,
font_size: val.font_size,
level: val.level,
bbox: Default::default(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::HierarchicalBlock> for WasmHierarchicalBlock {
fn from(val: kreuzberg::HierarchicalBlock) -> Self {
Self {
text: val.text,
font_size: val.font_size,
level: val.level,
bbox: val.bbox.map(|t| {
let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect();
arr
}),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmCellChange> for kreuzberg::CellChange {
fn from(val: WasmCellChange) -> Self {
Self {
row: val.row,
col: val.col,
from: val.from,
to: val.to,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::CellChange> for WasmCellChange {
fn from(val: kreuzberg::CellChange) -> Self {
Self {
row: val.row,
col: val.col,
from: val.from,
to: val.to,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmDocumentRevision> for kreuzberg::DocumentRevision {
fn from(val: WasmDocumentRevision) -> Self {
Self {
revision_id: val.revision_id,
author: val.author,
timestamp: val.timestamp,
kind: val.kind.into(),
anchor: val
.anchor
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
delta: val.delta.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::DocumentRevision> for WasmDocumentRevision {
fn from(val: kreuzberg::DocumentRevision) -> Self {
Self {
revision_id: val.revision_id,
author: val.author,
timestamp: val.timestamp,
kind: val.kind.into(),
anchor: val.anchor.as_ref().and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
delta: val.delta.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmRevisionDelta> for kreuzberg::RevisionDelta {
fn from(val: WasmRevisionDelta) -> Self {
Self {
content: serde_wasm_bindgen::from_value(val.content.clone()).unwrap_or_default(),
table_changes: val.table_changes.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::RevisionDelta> for WasmRevisionDelta {
fn from(val: kreuzberg::RevisionDelta) -> Self {
Self {
content: serde_wasm_bindgen::to_value(&val.content).unwrap_or(JsValue::NULL),
table_changes: val.table_changes.into_iter().map(Into::into).collect(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmTable> for kreuzberg::Table {
fn from(val: WasmTable) -> Self {
Self {
cells: serde_wasm_bindgen::from_value(val.cells.clone()).unwrap_or_default(),
markdown: val.markdown,
page_number: val.page_number,
bounding_box: val.bounding_box.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::Table> for WasmTable {
fn from(val: kreuzberg::Table) -> Self {
Self {
cells: serde_wasm_bindgen::to_value(&val.cells).unwrap_or(JsValue::NULL),
markdown: val.markdown,
page_number: val.page_number,
bounding_box: val.bounding_box.map(Into::into),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::TableCell> for WasmTableCell {
fn from(val: kreuzberg::TableCell) -> Self {
Self {
content: val.content,
row_span: val.row_span,
col_span: val.col_span,
is_header: val.is_header,
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<WasmExtractedUri> for kreuzberg::ExtractedUri {
fn from(val: WasmExtractedUri) -> Self {
Self {
url: val.url,
label: val.label,
page: val.page,
kind: val.kind.into(),
}
}
}
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
impl From<kreuzberg::ExtractedUri> for WasmExtractedUri {
fn from(val: kreuzberg::ExtractedUri) -> Self {
Self {
url: val.url,
label: val.label,
page: val.page,
kind: val.kind.into(),
}
}
}
impl From<WasmExecutionProviderType> for kreuzberg::ExecutionProviderType {
fn from(val: WasmExecutionProviderType) -> Self {
match val {
WasmExecutionProviderType::Auto => Self::Auto,
WasmExecutionProviderType::Cpu => Self::Cpu,
WasmExecutionProviderType::CoreMl => Self::CoreMl,
WasmExecutionProviderType::Cuda => Self::Cuda,
WasmExecutionProviderType::TensorRt => Self::TensorRt,
}
}
}
impl From<kreuzberg::ExecutionProviderType> for WasmExecutionProviderType {
fn from(val: kreuzberg::ExecutionProviderType) -> Self {
match val {
kreuzberg::ExecutionProviderType::Auto => Self::Auto,
kreuzberg::ExecutionProviderType::Cpu => Self::Cpu,
kreuzberg::ExecutionProviderType::CoreMl => Self::CoreMl,
kreuzberg::ExecutionProviderType::Cuda => Self::Cuda,
kreuzberg::ExecutionProviderType::TensorRt => Self::TensorRt,
}
}
}
impl From<WasmOutputFormat> for kreuzberg::OutputFormat {
fn from(val: WasmOutputFormat) -> Self {
match val {
WasmOutputFormat::Plain => Self::Plain,
WasmOutputFormat::Markdown => Self::Markdown,
WasmOutputFormat::Djot => Self::Djot,
WasmOutputFormat::Html => Self::Html,
WasmOutputFormat::Json => Self::Json,
WasmOutputFormat::Structured => Self::Structured,
WasmOutputFormat::Custom => Self::Custom(Default::default()),
}
}
}
impl From<kreuzberg::OutputFormat> for WasmOutputFormat {
fn from(val: kreuzberg::OutputFormat) -> Self {
match val {
kreuzberg::OutputFormat::Plain => Self::Plain,
kreuzberg::OutputFormat::Markdown => Self::Markdown,
kreuzberg::OutputFormat::Djot => Self::Djot,
kreuzberg::OutputFormat::Html => Self::Html,
kreuzberg::OutputFormat::Json => Self::Json,
kreuzberg::OutputFormat::Structured => Self::Structured,
kreuzberg::OutputFormat::Custom(..) => Self::Custom,
}
}
}
impl From<WasmChunkerType> for kreuzberg::ChunkerType {
fn from(val: WasmChunkerType) -> Self {
match val {
WasmChunkerType::Text => Self::Text,
WasmChunkerType::Markdown => Self::Markdown,
WasmChunkerType::Yaml => Self::Yaml,
WasmChunkerType::Semantic => Self::Semantic,
}
}
}
impl From<kreuzberg::ChunkerType> for WasmChunkerType {
fn from(val: kreuzberg::ChunkerType) -> Self {
match val {
kreuzberg::ChunkerType::Text => Self::Text,
kreuzberg::ChunkerType::Markdown => Self::Markdown,
kreuzberg::ChunkerType::Yaml => Self::Yaml,
kreuzberg::ChunkerType::Semantic => Self::Semantic,
}
}
}
impl From<WasmChunkSizing> for kreuzberg::ChunkSizing {
fn from(val: WasmChunkSizing) -> Self {
match val.r#type.as_str() {
"characters" => Self::Characters,
"tokenizer" => Self::Tokenizer {
model: val.model.clone().unwrap_or_default(),
cache_dir: val.cache_dir.clone().map(Into::into),
},
_ => Self::Characters,
}
}
}
impl From<kreuzberg::ChunkSizing> for WasmChunkSizing {
fn from(val: kreuzberg::ChunkSizing) -> Self {
match val {
kreuzberg::ChunkSizing::Characters => Self {
r#type: "characters".to_string(),
cache_dir: None,
model: None,
},
kreuzberg::ChunkSizing::Tokenizer { model, cache_dir } => Self {
r#type: "tokenizer".to_string(),
cache_dir: cache_dir.map(|p| p.to_string_lossy().to_string()),
model: Some(model),
},
}
}
}
impl From<WasmEmbeddingModelType> for kreuzberg::EmbeddingModelType {
fn from(val: WasmEmbeddingModelType) -> Self {
match val.r#type.as_str() {
"preset" => Self::Preset {
name: val.name.clone().unwrap_or_default(),
},
"custom" => Self::Custom {
model_id: val.model_id.clone().unwrap_or_default(),
dimensions: val.dimensions.clone().unwrap_or_default(),
},
"llm" => Self::Llm {
llm: val.llm.clone().map(Into::into).unwrap_or_default(),
},
"plugin" => Self::Plugin {
name: val.name.clone().unwrap_or_default(),
},
_ => Self::Preset {
name: Default::default(),
},
}
}
}
impl From<kreuzberg::EmbeddingModelType> for WasmEmbeddingModelType {
fn from(val: kreuzberg::EmbeddingModelType) -> Self {
match val {
kreuzberg::EmbeddingModelType::Preset { name } => Self {
r#type: "preset".to_string(),
dimensions: None,
llm: None,
model_id: None,
name: Some(name),
},
kreuzberg::EmbeddingModelType::Custom { model_id, dimensions } => Self {
r#type: "custom".to_string(),
dimensions: Some(dimensions),
llm: None,
model_id: Some(model_id),
name: None,
},
kreuzberg::EmbeddingModelType::Llm { llm } => Self {
r#type: "llm".to_string(),
dimensions: None,
llm: Some(llm.into()),
model_id: None,
name: None,
},
kreuzberg::EmbeddingModelType::Plugin { name } => Self {
r#type: "plugin".to_string(),
dimensions: None,
llm: None,
model_id: None,
name: Some(name),
},
}
}
}
impl From<kreuzberg::extraction::transform::ListType> for WasmListType {
fn from(val: kreuzberg::extraction::transform::ListType) -> Self {
match val {
kreuzberg::extraction::transform::ListType::Bullet => Self::Bullet,
kreuzberg::extraction::transform::ListType::Numbered => Self::Numbered,
kreuzberg::extraction::transform::ListType::Lettered => Self::Lettered,
kreuzberg::extraction::transform::ListType::Indented => Self::Indented,
}
}
}
impl From<kreuzberg::OcrBackendType> for WasmOcrBackendType {
fn from(val: kreuzberg::OcrBackendType) -> Self {
match val {
kreuzberg::OcrBackendType::Tesseract => Self::Tesseract,
kreuzberg::OcrBackendType::EasyOCR => Self::EasyOCR,
kreuzberg::OcrBackendType::PaddleOCR => Self::PaddleOCR,
kreuzberg::OcrBackendType::Custom => Self::Custom,
}
}
}
impl From<kreuzberg::ProcessingStage> for WasmProcessingStage {
fn from(val: kreuzberg::ProcessingStage) -> Self {
match val {
kreuzberg::ProcessingStage::Early => Self::Early,
kreuzberg::ProcessingStage::Middle => Self::Middle,
kreuzberg::ProcessingStage::Late => Self::Late,
}
}
}
impl From<WasmPdfAnnotationType> for kreuzberg::PdfAnnotationType {
fn from(val: WasmPdfAnnotationType) -> Self {
match val {
WasmPdfAnnotationType::Text => Self::Text,
WasmPdfAnnotationType::Highlight => Self::Highlight,
WasmPdfAnnotationType::Link => Self::Link,
WasmPdfAnnotationType::Stamp => Self::Stamp,
WasmPdfAnnotationType::Underline => Self::Underline,
WasmPdfAnnotationType::StrikeOut => Self::StrikeOut,
WasmPdfAnnotationType::Other => Self::Other,
}
}
}
impl From<kreuzberg::PdfAnnotationType> for WasmPdfAnnotationType {
fn from(val: kreuzberg::PdfAnnotationType) -> Self {
match val {
kreuzberg::PdfAnnotationType::Text => Self::Text,
kreuzberg::PdfAnnotationType::Highlight => Self::Highlight,
kreuzberg::PdfAnnotationType::Link => Self::Link,
kreuzberg::PdfAnnotationType::Stamp => Self::Stamp,
kreuzberg::PdfAnnotationType::Underline => Self::Underline,
kreuzberg::PdfAnnotationType::StrikeOut => Self::StrikeOut,
kreuzberg::PdfAnnotationType::Other => Self::Other,
}
}
}
impl From<WasmBlockType> for kreuzberg::BlockType {
fn from(val: WasmBlockType) -> Self {
match val {
WasmBlockType::Paragraph => Self::Paragraph,
WasmBlockType::Heading => Self::Heading,
WasmBlockType::Blockquote => Self::Blockquote,
WasmBlockType::CodeBlock => Self::CodeBlock,
WasmBlockType::ListItem => Self::ListItem,
WasmBlockType::OrderedList => Self::OrderedList,
WasmBlockType::BulletList => Self::BulletList,
WasmBlockType::TaskList => Self::TaskList,
WasmBlockType::DefinitionList => Self::DefinitionList,
WasmBlockType::DefinitionTerm => Self::DefinitionTerm,
WasmBlockType::DefinitionDescription => Self::DefinitionDescription,
WasmBlockType::Div => Self::Div,
WasmBlockType::Section => Self::Section,
WasmBlockType::ThematicBreak => Self::ThematicBreak,
WasmBlockType::RawBlock => Self::RawBlock,
WasmBlockType::MathDisplay => Self::MathDisplay,
}
}
}
impl From<kreuzberg::BlockType> for WasmBlockType {
fn from(val: kreuzberg::BlockType) -> Self {
match val {
kreuzberg::BlockType::Paragraph => Self::Paragraph,
kreuzberg::BlockType::Heading => Self::Heading,
kreuzberg::BlockType::Blockquote => Self::Blockquote,
kreuzberg::BlockType::CodeBlock => Self::CodeBlock,
kreuzberg::BlockType::ListItem => Self::ListItem,
kreuzberg::BlockType::OrderedList => Self::OrderedList,
kreuzberg::BlockType::BulletList => Self::BulletList,
kreuzberg::BlockType::TaskList => Self::TaskList,
kreuzberg::BlockType::DefinitionList => Self::DefinitionList,
kreuzberg::BlockType::DefinitionTerm => Self::DefinitionTerm,
kreuzberg::BlockType::DefinitionDescription => Self::DefinitionDescription,
kreuzberg::BlockType::Div => Self::Div,
kreuzberg::BlockType::Section => Self::Section,
kreuzberg::BlockType::ThematicBreak => Self::ThematicBreak,
kreuzberg::BlockType::RawBlock => Self::RawBlock,
kreuzberg::BlockType::MathDisplay => Self::MathDisplay,
}
}
}
impl From<WasmInlineType> for kreuzberg::InlineType {
fn from(val: WasmInlineType) -> Self {
match val {
WasmInlineType::Text => Self::Text,
WasmInlineType::Strong => Self::Strong,
WasmInlineType::Emphasis => Self::Emphasis,
WasmInlineType::Highlight => Self::Highlight,
WasmInlineType::Subscript => Self::Subscript,
WasmInlineType::Superscript => Self::Superscript,
WasmInlineType::Insert => Self::Insert,
WasmInlineType::Delete => Self::Delete,
WasmInlineType::Code => Self::Code,
WasmInlineType::Link => Self::Link,
WasmInlineType::Image => Self::Image,
WasmInlineType::Span => Self::Span,
WasmInlineType::Math => Self::Math,
WasmInlineType::RawInline => Self::RawInline,
WasmInlineType::FootnoteRef => Self::FootnoteRef,
WasmInlineType::Symbol => Self::Symbol,
}
}
}
impl From<kreuzberg::InlineType> for WasmInlineType {
fn from(val: kreuzberg::InlineType) -> Self {
match val {
kreuzberg::InlineType::Text => Self::Text,
kreuzberg::InlineType::Strong => Self::Strong,
kreuzberg::InlineType::Emphasis => Self::Emphasis,
kreuzberg::InlineType::Highlight => Self::Highlight,
kreuzberg::InlineType::Subscript => Self::Subscript,
kreuzberg::InlineType::Superscript => Self::Superscript,
kreuzberg::InlineType::Insert => Self::Insert,
kreuzberg::InlineType::Delete => Self::Delete,
kreuzberg::InlineType::Code => Self::Code,
kreuzberg::InlineType::Link => Self::Link,
kreuzberg::InlineType::Image => Self::Image,
kreuzberg::InlineType::Span => Self::Span,
kreuzberg::InlineType::Math => Self::Math,
kreuzberg::InlineType::RawInline => Self::RawInline,
kreuzberg::InlineType::FootnoteRef => Self::FootnoteRef,
kreuzberg::InlineType::Symbol => Self::Symbol,
}
}
}
impl From<WasmRelationshipKind> for kreuzberg::RelationshipKind {
fn from(val: WasmRelationshipKind) -> Self {
match val {
WasmRelationshipKind::FootnoteReference => Self::FootnoteReference,
WasmRelationshipKind::CitationReference => Self::CitationReference,
WasmRelationshipKind::InternalLink => Self::InternalLink,
WasmRelationshipKind::Caption => Self::Caption,
WasmRelationshipKind::Label => Self::Label,
WasmRelationshipKind::TocEntry => Self::TocEntry,
WasmRelationshipKind::CrossReference => Self::CrossReference,
}
}
}
impl From<kreuzberg::RelationshipKind> for WasmRelationshipKind {
fn from(val: kreuzberg::RelationshipKind) -> Self {
match val {
kreuzberg::RelationshipKind::FootnoteReference => Self::FootnoteReference,
kreuzberg::RelationshipKind::CitationReference => Self::CitationReference,
kreuzberg::RelationshipKind::InternalLink => Self::InternalLink,
kreuzberg::RelationshipKind::Caption => Self::Caption,
kreuzberg::RelationshipKind::Label => Self::Label,
kreuzberg::RelationshipKind::TocEntry => Self::TocEntry,
kreuzberg::RelationshipKind::CrossReference => Self::CrossReference,
}
}
}
impl From<WasmContentLayer> for kreuzberg::ContentLayer {
fn from(val: WasmContentLayer) -> Self {
match val {
WasmContentLayer::Body => Self::Body,
WasmContentLayer::Header => Self::Header,
WasmContentLayer::Footer => Self::Footer,
WasmContentLayer::Footnote => Self::Footnote,
}
}
}
impl From<kreuzberg::ContentLayer> for WasmContentLayer {
fn from(val: kreuzberg::ContentLayer) -> Self {
match val {
kreuzberg::ContentLayer::Body => Self::Body,
kreuzberg::ContentLayer::Header => Self::Header,
kreuzberg::ContentLayer::Footer => Self::Footer,
kreuzberg::ContentLayer::Footnote => Self::Footnote,
}
}
}
impl From<WasmNodeContent> for kreuzberg::NodeContent {
fn from(val: WasmNodeContent) -> Self {
match val.node_type.as_str() {
"title" => Self::Title {
text: val.text.clone().unwrap_or_default(),
},
"heading" => Self::Heading {
level: val.level.clone().unwrap_or_default(),
text: val.text.clone().unwrap_or_default(),
},
"paragraph" => Self::Paragraph {
text: val.text.clone().unwrap_or_default(),
},
"list" => Self::List {
ordered: val.ordered.clone().unwrap_or_default(),
},
"list_item" => Self::ListItem {
text: val.text.clone().unwrap_or_default(),
},
"table" => Self::Table {
grid: val.grid.clone().map(Into::into).unwrap_or_default(),
},
"image" => Self::Image {
description: val.description.clone(),
image_index: val.image_index.clone(),
src: val.src.clone(),
},
"code" => Self::Code {
text: val.text.clone().unwrap_or_default(),
language: val.language.clone(),
},
"quote" => Self::Quote,
"formula" => Self::Formula {
text: val.text.clone().unwrap_or_default(),
},
"footnote" => Self::Footnote {
text: val.text.clone().unwrap_or_default(),
},
"group" => Self::Group {
label: val.label.clone(),
heading_level: val.heading_level.clone(),
heading_text: val.heading_text.clone(),
},
"page_break" => Self::PageBreak,
"slide" => Self::Slide {
number: val.number.clone().unwrap_or_default(),
title: val.title.clone(),
},
"definition_list" => Self::DefinitionList,
"definition_item" => Self::DefinitionItem {
term: val.term.clone().unwrap_or_default(),
definition: val.definition.clone().unwrap_or_default(),
},
"citation" => Self::Citation {
key: val.key.clone().unwrap_or_default(),
text: val.text.clone().unwrap_or_default(),
},
"admonition" => Self::Admonition {
kind: val.kind.clone().unwrap_or_default(),
title: val.title.clone(),
},
"raw_block" => Self::RawBlock {
format: val.format.clone().unwrap_or_default(),
content: val.content.clone().unwrap_or_default(),
},
"metadata_block" => Self::MetadataBlock {
entries: val
.entries
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<Vec<(String, String)>>(v.clone()).ok())
.unwrap_or_default(),
},
_ => Self::Title {
text: Default::default(),
},
}
}
}
impl From<kreuzberg::NodeContent> for WasmNodeContent {
fn from(val: kreuzberg::NodeContent) -> Self {
match val {
kreuzberg::NodeContent::Title { text } => Self {
node_type: "title".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: Some(text),
title: None,
},
kreuzberg::NodeContent::Heading { level, text } => Self {
node_type: "heading".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: Some(level),
number: None,
ordered: None,
src: None,
term: None,
text: Some(text),
title: None,
},
kreuzberg::NodeContent::Paragraph { text } => Self {
node_type: "paragraph".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: Some(text),
title: None,
},
kreuzberg::NodeContent::List { ordered } => Self {
node_type: "list".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: Some(ordered),
src: None,
term: None,
text: None,
title: None,
},
kreuzberg::NodeContent::ListItem { text } => Self {
node_type: "list_item".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: Some(text),
title: None,
},
kreuzberg::NodeContent::Table { grid } => Self {
node_type: "table".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: Some(grid.into()),
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: None,
title: None,
},
kreuzberg::NodeContent::Image {
description,
image_index,
src,
} => Self {
node_type: "image".to_string(),
content: None,
definition: None,
description,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src,
term: None,
text: None,
title: None,
},
kreuzberg::NodeContent::Code { text, language } => Self {
node_type: "code".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: Some(text),
title: None,
},
kreuzberg::NodeContent::Quote => Self {
node_type: "quote".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: None,
title: None,
},
kreuzberg::NodeContent::Formula { text } => Self {
node_type: "formula".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: Some(text),
title: None,
},
kreuzberg::NodeContent::Footnote { text } => Self {
node_type: "footnote".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: Some(text),
title: None,
},
kreuzberg::NodeContent::Group {
label,
heading_level,
heading_text,
} => Self {
node_type: "group".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level,
heading_text,
image_index: None,
key: None,
kind: None,
label,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: None,
title: None,
},
kreuzberg::NodeContent::PageBreak => Self {
node_type: "page_break".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: None,
title: None,
},
kreuzberg::NodeContent::Slide { number, title } => Self {
node_type: "slide".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: Some(number),
ordered: None,
src: None,
term: None,
text: None,
title,
},
kreuzberg::NodeContent::DefinitionList => Self {
node_type: "definition_list".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: None,
title: None,
},
kreuzberg::NodeContent::DefinitionItem { term, definition } => Self {
node_type: "definition_item".to_string(),
content: None,
definition: Some(definition),
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: Some(term),
text: None,
title: None,
},
kreuzberg::NodeContent::Citation { key, text } => Self {
node_type: "citation".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: Some(key),
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: Some(text),
title: None,
},
kreuzberg::NodeContent::Admonition { kind, title } => Self {
node_type: "admonition".to_string(),
content: None,
definition: None,
description: None,
entries: None,
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: Some(kind),
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: None,
title,
},
kreuzberg::NodeContent::RawBlock { format, content } => Self {
node_type: "raw_block".to_string(),
content: Some(content),
definition: None,
description: None,
entries: None,
format: Some(format),
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: None,
title: None,
},
kreuzberg::NodeContent::MetadataBlock { entries } => Self {
node_type: "metadata_block".to_string(),
content: None,
definition: None,
description: None,
entries: serde_wasm_bindgen::to_value(&entries).ok(),
format: None,
grid: None,
heading_level: None,
heading_text: None,
image_index: None,
key: None,
kind: None,
label: None,
language: None,
level: None,
number: None,
ordered: None,
src: None,
term: None,
text: None,
title: None,
},
}
}
}
impl From<WasmAnnotationKind> for kreuzberg::AnnotationKind {
fn from(val: WasmAnnotationKind) -> Self {
match val.annotation_type.as_str() {
"bold" => Self::Bold,
"italic" => Self::Italic,
"underline" => Self::Underline,
"strikethrough" => Self::Strikethrough,
"code" => Self::Code,
"subscript" => Self::Subscript,
"superscript" => Self::Superscript,
"link" => Self::Link {
url: val.url.clone().unwrap_or_default(),
title: val.title.clone(),
},
"highlight" => Self::Highlight,
"color" => Self::Color {
value: val.value.clone().unwrap_or_default(),
},
"font_size" => Self::FontSize {
value: val.value.clone().unwrap_or_default(),
},
"custom" => Self::Custom {
name: val.name.clone().unwrap_or_default(),
value: val.value.clone(),
},
_ => Self::Bold,
}
}
}
impl From<kreuzberg::AnnotationKind> for WasmAnnotationKind {
fn from(val: kreuzberg::AnnotationKind) -> Self {
match val {
kreuzberg::AnnotationKind::Bold => Self {
annotation_type: "bold".to_string(),
name: None,
title: None,
url: None,
value: None,
},
kreuzberg::AnnotationKind::Italic => Self {
annotation_type: "italic".to_string(),
name: None,
title: None,
url: None,
value: None,
},
kreuzberg::AnnotationKind::Underline => Self {
annotation_type: "underline".to_string(),
name: None,
title: None,
url: None,
value: None,
},
kreuzberg::AnnotationKind::Strikethrough => Self {
annotation_type: "strikethrough".to_string(),
name: None,
title: None,
url: None,
value: None,
},
kreuzberg::AnnotationKind::Code => Self {
annotation_type: "code".to_string(),
name: None,
title: None,
url: None,
value: None,
},
kreuzberg::AnnotationKind::Subscript => Self {
annotation_type: "subscript".to_string(),
name: None,
title: None,
url: None,
value: None,
},
kreuzberg::AnnotationKind::Superscript => Self {
annotation_type: "superscript".to_string(),
name: None,
title: None,
url: None,
value: None,
},
kreuzberg::AnnotationKind::Link { url, title } => Self {
annotation_type: "link".to_string(),
name: None,
title,
url: Some(url),
value: None,
},
kreuzberg::AnnotationKind::Highlight => Self {
annotation_type: "highlight".to_string(),
name: None,
title: None,
url: None,
value: None,
},
kreuzberg::AnnotationKind::Color { value } => Self {
annotation_type: "color".to_string(),
name: None,
title: None,
url: None,
value: Some(value),
},
kreuzberg::AnnotationKind::FontSize { value } => Self {
annotation_type: "font_size".to_string(),
name: None,
title: None,
url: None,
value: Some(value),
},
kreuzberg::AnnotationKind::Custom { name, value } => Self {
annotation_type: "custom".to_string(),
name: Some(name),
title: None,
url: None,
value,
},
}
}
}
impl From<WasmExtractionMethod> for kreuzberg::ExtractionMethod {
fn from(val: WasmExtractionMethod) -> Self {
match val {
WasmExtractionMethod::Native => Self::Native,
WasmExtractionMethod::Ocr => Self::Ocr,
WasmExtractionMethod::Mixed => Self::Mixed,
}
}
}
impl From<kreuzberg::ExtractionMethod> for WasmExtractionMethod {
fn from(val: kreuzberg::ExtractionMethod) -> Self {
match val {
kreuzberg::ExtractionMethod::Native => Self::Native,
kreuzberg::ExtractionMethod::Ocr => Self::Ocr,
kreuzberg::ExtractionMethod::Mixed => Self::Mixed,
}
}
}
impl From<WasmChunkType> for kreuzberg::ChunkType {
fn from(val: WasmChunkType) -> Self {
match val {
WasmChunkType::Heading => Self::Heading,
WasmChunkType::PartyList => Self::PartyList,
WasmChunkType::Definitions => Self::Definitions,
WasmChunkType::OperativeClause => Self::OperativeClause,
WasmChunkType::SignatureBlock => Self::SignatureBlock,
WasmChunkType::Schedule => Self::Schedule,
WasmChunkType::TableLike => Self::TableLike,
WasmChunkType::Formula => Self::Formula,
WasmChunkType::CodeBlock => Self::CodeBlock,
WasmChunkType::Image => Self::Image,
WasmChunkType::OrgChart => Self::OrgChart,
WasmChunkType::Diagram => Self::Diagram,
WasmChunkType::Unknown => Self::Unknown,
}
}
}
impl From<kreuzberg::ChunkType> for WasmChunkType {
fn from(val: kreuzberg::ChunkType) -> Self {
match val {
kreuzberg::ChunkType::Heading => Self::Heading,
kreuzberg::ChunkType::PartyList => Self::PartyList,
kreuzberg::ChunkType::Definitions => Self::Definitions,
kreuzberg::ChunkType::OperativeClause => Self::OperativeClause,
kreuzberg::ChunkType::SignatureBlock => Self::SignatureBlock,
kreuzberg::ChunkType::Schedule => Self::Schedule,
kreuzberg::ChunkType::TableLike => Self::TableLike,
kreuzberg::ChunkType::Formula => Self::Formula,
kreuzberg::ChunkType::CodeBlock => Self::CodeBlock,
kreuzberg::ChunkType::Image => Self::Image,
kreuzberg::ChunkType::OrgChart => Self::OrgChart,
kreuzberg::ChunkType::Diagram => Self::Diagram,
kreuzberg::ChunkType::Unknown => Self::Unknown,
}
}
}
impl From<WasmImageKind> for kreuzberg::ImageKind {
fn from(val: WasmImageKind) -> Self {
match val {
WasmImageKind::Photograph => Self::Photograph,
WasmImageKind::Diagram => Self::Diagram,
WasmImageKind::Chart => Self::Chart,
WasmImageKind::Drawing => Self::Drawing,
WasmImageKind::TextBlock => Self::TextBlock,
WasmImageKind::Decoration => Self::Decoration,
WasmImageKind::Logo => Self::Logo,
WasmImageKind::Icon => Self::Icon,
WasmImageKind::TileFragment => Self::TileFragment,
WasmImageKind::Mask => Self::Mask,
WasmImageKind::PageRaster => Self::PageRaster,
WasmImageKind::Unknown => Self::Unknown,
}
}
}
impl From<kreuzberg::ImageKind> for WasmImageKind {
fn from(val: kreuzberg::ImageKind) -> Self {
match val {
kreuzberg::ImageKind::Photograph => Self::Photograph,
kreuzberg::ImageKind::Diagram => Self::Diagram,
kreuzberg::ImageKind::Chart => Self::Chart,
kreuzberg::ImageKind::Drawing => Self::Drawing,
kreuzberg::ImageKind::TextBlock => Self::TextBlock,
kreuzberg::ImageKind::Decoration => Self::Decoration,
kreuzberg::ImageKind::Logo => Self::Logo,
kreuzberg::ImageKind::Icon => Self::Icon,
kreuzberg::ImageKind::TileFragment => Self::TileFragment,
kreuzberg::ImageKind::Mask => Self::Mask,
kreuzberg::ImageKind::PageRaster => Self::PageRaster,
kreuzberg::ImageKind::Unknown => Self::Unknown,
}
}
}
impl From<WasmResultFormat> for kreuzberg::ResultFormat {
fn from(val: WasmResultFormat) -> Self {
match val {
WasmResultFormat::Unified => Self::Unified,
WasmResultFormat::ElementBased => Self::ElementBased,
}
}
}
impl From<kreuzberg::ResultFormat> for WasmResultFormat {
fn from(val: kreuzberg::ResultFormat) -> Self {
match val {
kreuzberg::ResultFormat::Unified => Self::Unified,
kreuzberg::ResultFormat::ElementBased => Self::ElementBased,
}
}
}
impl From<WasmElementType> for kreuzberg::ElementType {
fn from(val: WasmElementType) -> Self {
match val {
WasmElementType::Title => Self::Title,
WasmElementType::NarrativeText => Self::NarrativeText,
WasmElementType::Heading => Self::Heading,
WasmElementType::ListItem => Self::ListItem,
WasmElementType::Table => Self::Table,
WasmElementType::Image => Self::Image,
WasmElementType::PageBreak => Self::PageBreak,
WasmElementType::CodeBlock => Self::CodeBlock,
WasmElementType::BlockQuote => Self::BlockQuote,
WasmElementType::Footer => Self::Footer,
WasmElementType::Header => Self::Header,
}
}
}
impl From<kreuzberg::ElementType> for WasmElementType {
fn from(val: kreuzberg::ElementType) -> Self {
match val {
kreuzberg::ElementType::Title => Self::Title,
kreuzberg::ElementType::NarrativeText => Self::NarrativeText,
kreuzberg::ElementType::Heading => Self::Heading,
kreuzberg::ElementType::ListItem => Self::ListItem,
kreuzberg::ElementType::Table => Self::Table,
kreuzberg::ElementType::Image => Self::Image,
kreuzberg::ElementType::PageBreak => Self::PageBreak,
kreuzberg::ElementType::CodeBlock => Self::CodeBlock,
kreuzberg::ElementType::BlockQuote => Self::BlockQuote,
kreuzberg::ElementType::Footer => Self::Footer,
kreuzberg::ElementType::Header => Self::Header,
}
}
}
impl From<WasmFormatMetadata> for kreuzberg::FormatMetadata {
fn from(val: WasmFormatMetadata) -> Self {
match val.format_type.as_str() {
"pdf" => Self::Pdf(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::PdfMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"docx" => Self::Docx(Box::new(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::DocxMetadata>(v.clone()).ok())
.unwrap_or_default(),
)),
"excel" => Self::Excel(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::ExcelMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"email" => Self::Email(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::EmailMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"pptx" => Self::Pptx(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::PptxMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"archive" => Self::Archive(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::ArchiveMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"image" => Self::Image(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::ImageMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"xml" => Self::Xml(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::XmlMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"text" => Self::Text(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::TextMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"html" => Self::Html(Box::new(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::HtmlMetadata>(v.clone()).ok())
.unwrap_or_default(),
)),
"ocr" => Self::Ocr(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::OcrMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"csv" => Self::Csv(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::CsvMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"bibtex" => Self::Bibtex(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::BibtexMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"citation" => Self::Citation(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::CitationMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"fiction_book" => Self::FictionBook(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::FictionBookMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"dbf" => Self::Dbf(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::DbfMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"jats" => Self::Jats(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::JatsMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"epub" => Self::Epub(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::EpubMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"pst" => Self::Pst(
val._0
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::PstMetadata>(v.clone()).ok())
.unwrap_or_default(),
),
"code" => Self::Code(Default::default()),
_ => Self::Pdf(Default::default()),
}
}
}
impl From<kreuzberg::FormatMetadata> for WasmFormatMetadata {
fn from(val: kreuzberg::FormatMetadata) -> Self {
match val {
kreuzberg::FormatMetadata::Pdf(field0) => Self {
format_type: "pdf".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Docx(field0) => Self {
format_type: "docx".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Excel(field0) => Self {
format_type: "excel".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Email(field0) => Self {
format_type: "email".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Pptx(field0) => Self {
format_type: "pptx".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Archive(field0) => Self {
format_type: "archive".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Image(field0) => Self {
format_type: "image".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Xml(field0) => Self {
format_type: "xml".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Text(field0) => Self {
format_type: "text".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Html(field0) => Self {
format_type: "html".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Ocr(field0) => Self {
format_type: "ocr".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Csv(field0) => Self {
format_type: "csv".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Bibtex(field0) => Self {
format_type: "bibtex".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Citation(field0) => Self {
format_type: "citation".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::FictionBook(field0) => Self {
format_type: "fiction_book".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Dbf(field0) => Self {
format_type: "dbf".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Jats(field0) => Self {
format_type: "jats".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Epub(field0) => Self {
format_type: "epub".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Pst(field0) => Self {
format_type: "pst".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
kreuzberg::FormatMetadata::Code(field0) => Self {
format_type: "code".to_string(),
_0: serde_wasm_bindgen::to_value(&field0).ok(),
},
}
}
}
impl From<WasmTextDirection> for kreuzberg::TextDirection {
fn from(val: WasmTextDirection) -> Self {
match val {
WasmTextDirection::LeftToRight => Self::LeftToRight,
WasmTextDirection::RightToLeft => Self::RightToLeft,
WasmTextDirection::Auto => Self::Auto,
}
}
}
impl From<kreuzberg::TextDirection> for WasmTextDirection {
fn from(val: kreuzberg::TextDirection) -> Self {
match val {
kreuzberg::TextDirection::LeftToRight => Self::LeftToRight,
kreuzberg::TextDirection::RightToLeft => Self::RightToLeft,
kreuzberg::TextDirection::Auto => Self::Auto,
}
}
}
impl From<WasmLinkType> for kreuzberg::LinkType {
fn from(val: WasmLinkType) -> Self {
match val {
WasmLinkType::Anchor => Self::Anchor,
WasmLinkType::Internal => Self::Internal,
WasmLinkType::External => Self::External,
WasmLinkType::Email => Self::Email,
WasmLinkType::Phone => Self::Phone,
WasmLinkType::Other => Self::Other,
}
}
}
impl From<kreuzberg::LinkType> for WasmLinkType {
fn from(val: kreuzberg::LinkType) -> Self {
match val {
kreuzberg::LinkType::Anchor => Self::Anchor,
kreuzberg::LinkType::Internal => Self::Internal,
kreuzberg::LinkType::External => Self::External,
kreuzberg::LinkType::Email => Self::Email,
kreuzberg::LinkType::Phone => Self::Phone,
kreuzberg::LinkType::Other => Self::Other,
}
}
}
impl From<WasmImageType> for kreuzberg::ImageType {
fn from(val: WasmImageType) -> Self {
match val {
WasmImageType::DataUri => Self::DataUri,
WasmImageType::InlineSvg => Self::InlineSvg,
WasmImageType::External => Self::External,
WasmImageType::Relative => Self::Relative,
}
}
}
impl From<kreuzberg::ImageType> for WasmImageType {
fn from(val: kreuzberg::ImageType) -> Self {
match val {
kreuzberg::ImageType::DataUri => Self::DataUri,
kreuzberg::ImageType::InlineSvg => Self::InlineSvg,
kreuzberg::ImageType::External => Self::External,
kreuzberg::ImageType::Relative => Self::Relative,
}
}
}
impl From<WasmStructuredDataType> for kreuzberg::StructuredDataType {
fn from(val: WasmStructuredDataType) -> Self {
match val {
WasmStructuredDataType::JsonLd => Self::JsonLd,
WasmStructuredDataType::Microdata => Self::Microdata,
WasmStructuredDataType::RDFa => Self::RDFa,
}
}
}
impl From<kreuzberg::StructuredDataType> for WasmStructuredDataType {
fn from(val: kreuzberg::StructuredDataType) -> Self {
match val {
kreuzberg::StructuredDataType::JsonLd => Self::JsonLd,
kreuzberg::StructuredDataType::Microdata => Self::Microdata,
kreuzberg::StructuredDataType::RDFa => Self::RDFa,
}
}
}
impl From<WasmOcrBoundingGeometry> for kreuzberg::OcrBoundingGeometry {
fn from(val: WasmOcrBoundingGeometry) -> Self {
match val.r#type.as_str() {
"rectangle" => Self::Rectangle {
left: val.left.clone().unwrap_or_default(),
top: val.top.clone().unwrap_or_default(),
width: val.width.clone().unwrap_or_default(),
height: val.height.clone().unwrap_or_default(),
},
"quadrilateral" => Self::Quadrilateral {
points: val
.points
.as_ref()
.and_then(|v| serde_wasm_bindgen::from_value::<[(u32, u32); 4]>(v.clone()).ok())
.unwrap_or_default(),
},
_ => Self::Rectangle {
left: Default::default(),
top: Default::default(),
width: Default::default(),
height: Default::default(),
},
}
}
}
impl From<kreuzberg::OcrBoundingGeometry> for WasmOcrBoundingGeometry {
fn from(val: kreuzberg::OcrBoundingGeometry) -> Self {
match val {
kreuzberg::OcrBoundingGeometry::Rectangle {
left,
top,
width,
height,
} => Self {
r#type: "rectangle".to_string(),
height: Some(height),
left: Some(left),
points: None,
top: Some(top),
width: Some(width),
},
kreuzberg::OcrBoundingGeometry::Quadrilateral { points } => Self {
r#type: "quadrilateral".to_string(),
height: None,
left: None,
points: serde_wasm_bindgen::to_value(&points).ok(),
top: None,
width: None,
},
}
}
}
impl From<WasmOcrElementLevel> for kreuzberg::OcrElementLevel {
fn from(val: WasmOcrElementLevel) -> Self {
match val {
WasmOcrElementLevel::Word => Self::Word,
WasmOcrElementLevel::Line => Self::Line,
WasmOcrElementLevel::Block => Self::Block,
WasmOcrElementLevel::Page => Self::Page,
}
}
}
impl From<kreuzberg::OcrElementLevel> for WasmOcrElementLevel {
fn from(val: kreuzberg::OcrElementLevel) -> Self {
match val {
kreuzberg::OcrElementLevel::Word => Self::Word,
kreuzberg::OcrElementLevel::Line => Self::Line,
kreuzberg::OcrElementLevel::Block => Self::Block,
kreuzberg::OcrElementLevel::Page => Self::Page,
}
}
}
impl From<WasmPageUnitType> for kreuzberg::PageUnitType {
fn from(val: WasmPageUnitType) -> Self {
match val {
WasmPageUnitType::Page => Self::Page,
WasmPageUnitType::Slide => Self::Slide,
WasmPageUnitType::Sheet => Self::Sheet,
}
}
}
impl From<kreuzberg::PageUnitType> for WasmPageUnitType {
fn from(val: kreuzberg::PageUnitType) -> Self {
match val {
kreuzberg::PageUnitType::Page => Self::Page,
kreuzberg::PageUnitType::Slide => Self::Slide,
kreuzberg::PageUnitType::Sheet => Self::Sheet,
}
}
}
impl From<WasmDiffLine> for kreuzberg::DiffLine {
fn from(val: WasmDiffLine) -> Self {
match val.kind.as_str() {
"context" => Self::Context(val._0.clone().unwrap_or_default()),
"added" => Self::Added(val._0.clone().unwrap_or_default()),
"removed" => Self::Removed(val._0.clone().unwrap_or_default()),
_ => Self::Context(Default::default()),
}
}
}
impl From<kreuzberg::DiffLine> for WasmDiffLine {
fn from(val: kreuzberg::DiffLine) -> Self {
match val {
kreuzberg::DiffLine::Context(field0) => Self {
kind: "context".to_string(),
_0: Some(field0),
},
kreuzberg::DiffLine::Added(field0) => Self {
kind: "added".to_string(),
_0: Some(field0),
},
kreuzberg::DiffLine::Removed(field0) => Self {
kind: "removed".to_string(),
_0: Some(field0),
},
}
}
}
impl From<WasmRevisionKind> for kreuzberg::RevisionKind {
fn from(val: WasmRevisionKind) -> Self {
match val {
WasmRevisionKind::Insertion => Self::Insertion,
WasmRevisionKind::Deletion => Self::Deletion,
WasmRevisionKind::FormatChange => Self::FormatChange,
WasmRevisionKind::Comment => Self::Comment,
}
}
}
impl From<kreuzberg::RevisionKind> for WasmRevisionKind {
fn from(val: kreuzberg::RevisionKind) -> Self {
match val {
kreuzberg::RevisionKind::Insertion => Self::Insertion,
kreuzberg::RevisionKind::Deletion => Self::Deletion,
kreuzberg::RevisionKind::FormatChange => Self::FormatChange,
kreuzberg::RevisionKind::Comment => Self::Comment,
}
}
}
impl From<WasmRevisionAnchor> for kreuzberg::RevisionAnchor {
fn from(val: WasmRevisionAnchor) -> Self {
match val.r#type.as_str() {
"paragraph" => Self::Paragraph {
index: val.index.clone().unwrap_or_default(),
},
"table_cell" => Self::TableCell {
row: val.row.clone().unwrap_or_default(),
col: val.col.clone().unwrap_or_default(),
table_index: val.table_index.clone().unwrap_or_default(),
},
"page" => Self::Page {
index: val.index.clone().unwrap_or_default(),
},
"slide" => Self::Slide {
index: val.index.clone().unwrap_or_default(),
},
"sheet" => Self::Sheet {
index: val.index.clone().unwrap_or_default(),
name: val.name.clone(),
},
_ => Self::Paragraph {
index: Default::default(),
},
}
}
}
impl From<kreuzberg::RevisionAnchor> for WasmRevisionAnchor {
fn from(val: kreuzberg::RevisionAnchor) -> Self {
match val {
kreuzberg::RevisionAnchor::Paragraph { index } => Self {
r#type: "paragraph".to_string(),
col: None,
index: Some(index),
name: None,
row: None,
table_index: None,
},
kreuzberg::RevisionAnchor::TableCell { row, col, table_index } => Self {
r#type: "table_cell".to_string(),
col: Some(col),
index: None,
name: None,
row: Some(row),
table_index: Some(table_index),
},
kreuzberg::RevisionAnchor::Page { index } => Self {
r#type: "page".to_string(),
col: None,
index: Some(index),
name: None,
row: None,
table_index: None,
},
kreuzberg::RevisionAnchor::Slide { index } => Self {
r#type: "slide".to_string(),
col: None,
index: Some(index),
name: None,
row: None,
table_index: None,
},
kreuzberg::RevisionAnchor::Sheet { index, name } => Self {
r#type: "sheet".to_string(),
col: None,
index: Some(index),
name,
row: None,
table_index: None,
},
}
}
}
impl From<WasmUriKind> for kreuzberg::UriKind {
fn from(val: WasmUriKind) -> Self {
match val {
WasmUriKind::Hyperlink => Self::Hyperlink,
WasmUriKind::Image => Self::Image,
WasmUriKind::Anchor => Self::Anchor,
WasmUriKind::Citation => Self::Citation,
WasmUriKind::Reference => Self::Reference,
WasmUriKind::Email => Self::Email,
}
}
}
impl From<kreuzberg::UriKind> for WasmUriKind {
fn from(val: kreuzberg::UriKind) -> Self {
match val {
kreuzberg::UriKind::Hyperlink => Self::Hyperlink,
kreuzberg::UriKind::Image => Self::Image,
kreuzberg::UriKind::Anchor => Self::Anchor,
kreuzberg::UriKind::Citation => Self::Citation,
kreuzberg::UriKind::Reference => Self::Reference,
kreuzberg::UriKind::Email => Self::Email,
}
}
}
/// Return the error code string for a `kreuzberg::error::KreuzbergError` variant.
#[allow(dead_code)]
fn kreuzberg_error_error_code(e: &kreuzberg::error::KreuzbergError) -> &'static str {
#[allow(unreachable_patterns)]
match e {
kreuzberg::error::KreuzbergError::Io(..) => "io",
kreuzberg::error::KreuzbergError::Parsing { .. } => "parsing",
kreuzberg::error::KreuzbergError::Ocr { .. } => "ocr",
kreuzberg::error::KreuzbergError::Validation { .. } => "validation",
kreuzberg::error::KreuzbergError::Cache { .. } => "cache",
kreuzberg::error::KreuzbergError::ImageProcessing { .. } => "image_processing",
kreuzberg::error::KreuzbergError::Serialization { .. } => "serialization",
kreuzberg::error::KreuzbergError::MissingDependency(..) => "missing_dependency",
kreuzberg::error::KreuzbergError::Plugin { .. } => "plugin",
kreuzberg::error::KreuzbergError::LockPoisoned(..) => "lock_poisoned",
kreuzberg::error::KreuzbergError::UnsupportedFormat(..) => "unsupported_format",
kreuzberg::error::KreuzbergError::Embedding { .. } => "embedding",
kreuzberg::error::KreuzbergError::Timeout { .. } => "timeout",
kreuzberg::error::KreuzbergError::Cancelled => "cancelled",
kreuzberg::error::KreuzbergError::Security { .. } => "security",
kreuzberg::error::KreuzbergError::Other(..) => "other",
_ => "kreuzberg_error",
}
}
/// Convert a `kreuzberg::error::KreuzbergError` error to a `JsValue` object with `code` and `message` fields.
#[allow(dead_code)]
fn kreuzberg_error_to_js_value(e: kreuzberg::error::KreuzbergError) -> wasm_bindgen::JsValue {
let code = kreuzberg_error_error_code(&e);
let message = e.to_string();
let obj = js_sys::Object::new();
js_sys::Reflect::set(&obj, &"code".into(), &code.into()).ok();
js_sys::Reflect::set(&obj, &"message".into(), &message.into()).ok();
obj.into()
}