19849 lines
611 KiB
Rust
Generated
19849 lines
611 KiB
Rust
Generated
// This file is auto-generated by alef. DO NOT EDIT.
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
|
// Re-generate with: alef generate
|
|
#![allow(dead_code, unused_imports, unused_variables)]
|
|
#![allow(
|
|
clippy::too_many_arguments,
|
|
clippy::let_unit_value,
|
|
clippy::needless_borrow,
|
|
clippy::map_identity,
|
|
clippy::just_underscores_and_digits,
|
|
clippy::unused_unit,
|
|
clippy::unnecessary_cast,
|
|
clippy::unwrap_or_default,
|
|
clippy::derivable_impls,
|
|
clippy::needless_borrows_for_generic_args,
|
|
clippy::unnecessary_fallible_conversions,
|
|
clippy::useless_conversion,
|
|
clippy::arc_with_non_send_sync,
|
|
clippy::collapsible_if,
|
|
clippy::clone_on_copy,
|
|
clippy::should_implement_trait,
|
|
clippy::await_holding_refcell_ref
|
|
)]
|
|
|
|
use std::sync::Arc;
|
|
use std::sync::Mutex;
|
|
use wasm_bindgen::prelude::*;
|
|
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmCacheStats {
|
|
total_files: usize,
|
|
total_size_mb: f64,
|
|
available_space_mb: f64,
|
|
oldest_file_age_days: f64,
|
|
newest_file_age_days: f64,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmCacheStats {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
totalFiles: usize,
|
|
totalSizeMb: f64,
|
|
availableSpaceMb: f64,
|
|
oldestFileAgeDays: f64,
|
|
newestFileAgeDays: f64,
|
|
) -> WasmCacheStats {
|
|
WasmCacheStats {
|
|
total_files: totalFiles,
|
|
total_size_mb: totalSizeMb,
|
|
available_space_mb: availableSpaceMb,
|
|
oldest_file_age_days: oldestFileAgeDays,
|
|
newest_file_age_days: newestFileAgeDays,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmCacheStats {
|
|
<WasmCacheStats as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "totalFiles")]
|
|
pub fn total_files(&self) -> usize {
|
|
self.total_files
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "totalFiles")]
|
|
pub fn set_total_files(&mut self, value: usize) {
|
|
self.total_files = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "totalSizeMb")]
|
|
pub fn total_size_mb(&self) -> f64 {
|
|
self.total_size_mb
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "totalSizeMb")]
|
|
pub fn set_total_size_mb(&mut self, value: f64) {
|
|
self.total_size_mb = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "availableSpaceMb")]
|
|
pub fn available_space_mb(&self) -> f64 {
|
|
self.available_space_mb
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "availableSpaceMb")]
|
|
pub fn set_available_space_mb(&mut self, value: f64) {
|
|
self.available_space_mb = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "oldestFileAgeDays")]
|
|
pub fn oldest_file_age_days(&self) -> f64 {
|
|
self.oldest_file_age_days
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "oldestFileAgeDays")]
|
|
pub fn set_oldest_file_age_days(&mut self, value: f64) {
|
|
self.oldest_file_age_days = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "newestFileAgeDays")]
|
|
pub fn newest_file_age_days(&self) -> f64 {
|
|
self.newest_file_age_days
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "newestFileAgeDays")]
|
|
pub fn set_newest_file_age_days(&mut self, value: f64) {
|
|
self.newest_file_age_days = value;
|
|
}
|
|
}
|
|
|
|
/// Hardware acceleration configuration for ONNX Runtime models.
|
|
///
|
|
/// Controls which execution provider (CPU, CoreML, CUDA, TensorRT) is used
|
|
/// for inference in layout detection and embedding generation.
|
|
///
|
|
/// # Example
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmAccelerationConfig {
|
|
provider: WasmExecutionProviderType,
|
|
device_id: u32,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmAccelerationConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(provider: Option<WasmExecutionProviderType>, deviceId: Option<u32>) -> WasmAccelerationConfig {
|
|
WasmAccelerationConfig {
|
|
provider: provider.unwrap_or_default(),
|
|
device_id: deviceId.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmAccelerationConfig {
|
|
<WasmAccelerationConfig as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn provider(&self) -> String {
|
|
self.provider.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_provider(&mut self, value: WasmExecutionProviderType) {
|
|
self.provider = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "deviceId")]
|
|
pub fn device_id(&self) -> u32 {
|
|
self.device_id
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "deviceId")]
|
|
pub fn set_device_id(&mut self, value: u32) {
|
|
self.device_id = value;
|
|
}
|
|
}
|
|
|
|
/// Cross-extractor content filtering configuration.
|
|
///
|
|
/// Controls whether "furniture" content (headers, footers, page numbers,
|
|
/// watermarks, repeating text) is included in or stripped from extraction
|
|
/// results. Applies across all extractors (PDF, DOCX, RTF, ODT, HTML, etc.)
|
|
/// with format-specific implementation.
|
|
///
|
|
/// When `None` on `ExtractionConfig`, each extractor uses its current
|
|
/// default behavior unchanged.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmContentFilterConfig {
|
|
include_headers: bool,
|
|
include_footers: bool,
|
|
strip_repeating_text: bool,
|
|
include_watermarks: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmContentFilterConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
includeHeaders: Option<bool>,
|
|
includeFooters: Option<bool>,
|
|
stripRepeatingText: Option<bool>,
|
|
includeWatermarks: Option<bool>,
|
|
) -> WasmContentFilterConfig {
|
|
WasmContentFilterConfig {
|
|
include_headers: includeHeaders.unwrap_or(false),
|
|
include_footers: includeFooters.unwrap_or(false),
|
|
strip_repeating_text: stripRepeatingText.unwrap_or(true),
|
|
include_watermarks: includeWatermarks.unwrap_or(false),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "includeHeaders")]
|
|
pub fn include_headers(&self) -> bool {
|
|
self.include_headers
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "includeHeaders")]
|
|
pub fn set_include_headers(&mut self, value: bool) {
|
|
self.include_headers = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "includeFooters")]
|
|
pub fn include_footers(&self) -> bool {
|
|
self.include_footers
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "includeFooters")]
|
|
pub fn set_include_footers(&mut self, value: bool) {
|
|
self.include_footers = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "stripRepeatingText")]
|
|
pub fn strip_repeating_text(&self) -> bool {
|
|
self.strip_repeating_text
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "stripRepeatingText")]
|
|
pub fn set_strip_repeating_text(&mut self, value: bool) {
|
|
self.strip_repeating_text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "includeWatermarks")]
|
|
pub fn include_watermarks(&self) -> bool {
|
|
self.include_watermarks
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "includeWatermarks")]
|
|
pub fn set_include_watermarks(&mut self, value: bool) {
|
|
self.include_watermarks = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmContentFilterConfig {
|
|
kreuzberg::ContentFilterConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Configuration for email extraction.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmEmailConfig {
|
|
msg_fallback_codepage: Option<u32>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmEmailConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(msgFallbackCodepage: Option<u32>) -> WasmEmailConfig {
|
|
WasmEmailConfig {
|
|
msg_fallback_codepage: msgFallbackCodepage,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmEmailConfig {
|
|
<WasmEmailConfig as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "msgFallbackCodepage")]
|
|
pub fn msg_fallback_codepage(&self) -> Option<u32> {
|
|
self.msg_fallback_codepage
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "msgFallbackCodepage")]
|
|
pub fn set_msg_fallback_codepage(&mut self, value: Option<u32>) {
|
|
self.msg_fallback_codepage = value;
|
|
}
|
|
}
|
|
|
|
/// Main extraction configuration.
|
|
///
|
|
/// This struct contains all configuration options for the extraction process.
|
|
/// It can be loaded from TOML, YAML, or JSON files, or created programmatically.
|
|
///
|
|
/// # Example
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmExtractionConfig {
|
|
use_cache: bool,
|
|
enable_quality_processing: bool,
|
|
ocr: Option<WasmOcrConfig>,
|
|
force_ocr: bool,
|
|
force_ocr_pages: Option<Vec<u32>>,
|
|
disable_ocr: bool,
|
|
chunking: Option<WasmChunkingConfig>,
|
|
content_filter: Option<WasmContentFilterConfig>,
|
|
images: Option<WasmImageExtractionConfig>,
|
|
token_reduction: Option<WasmTokenReductionOptions>,
|
|
language_detection: Option<WasmLanguageDetectionConfig>,
|
|
pages: Option<WasmPageConfig>,
|
|
postprocessor: Option<WasmPostProcessorConfig>,
|
|
html_options: Option<String>,
|
|
extraction_timeout_secs: Option<u64>,
|
|
max_concurrent_extractions: Option<usize>,
|
|
result_format: WasmResultFormat,
|
|
security_limits: Option<WasmSecurityLimits>,
|
|
max_embedded_file_bytes: Option<u64>,
|
|
output_format: WasmOutputFormat,
|
|
use_layout_for_markdown: bool,
|
|
include_document_structure: bool,
|
|
acceleration: Option<WasmAccelerationConfig>,
|
|
cache_namespace: Option<String>,
|
|
cache_ttl_secs: Option<u64>,
|
|
email: Option<WasmEmailConfig>,
|
|
concurrency: Option<String>,
|
|
max_archive_depth: usize,
|
|
structured_extraction: Option<WasmStructuredExtractionConfig>,
|
|
cancel_token: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmExtractionConfig {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
useCache: Option<bool>,
|
|
enableQualityProcessing: Option<bool>,
|
|
forceOcr: Option<bool>,
|
|
disableOcr: Option<bool>,
|
|
resultFormat: Option<WasmResultFormat>,
|
|
outputFormat: Option<WasmOutputFormat>,
|
|
useLayoutForMarkdown: Option<bool>,
|
|
includeDocumentStructure: Option<bool>,
|
|
maxArchiveDepth: Option<usize>,
|
|
ocr: Option<WasmOcrConfig>,
|
|
forceOcrPages: Option<Vec<u32>>,
|
|
chunking: Option<WasmChunkingConfig>,
|
|
contentFilter: Option<WasmContentFilterConfig>,
|
|
images: Option<WasmImageExtractionConfig>,
|
|
tokenReduction: Option<WasmTokenReductionOptions>,
|
|
languageDetection: Option<WasmLanguageDetectionConfig>,
|
|
pages: Option<WasmPageConfig>,
|
|
postprocessor: Option<WasmPostProcessorConfig>,
|
|
extractionTimeoutSecs: Option<u64>,
|
|
maxConcurrentExtractions: Option<usize>,
|
|
securityLimits: Option<WasmSecurityLimits>,
|
|
maxEmbeddedFileBytes: Option<u64>,
|
|
acceleration: Option<WasmAccelerationConfig>,
|
|
cacheNamespace: Option<String>,
|
|
cacheTtlSecs: Option<u64>,
|
|
email: Option<WasmEmailConfig>,
|
|
concurrency: Option<String>,
|
|
structuredExtraction: Option<WasmStructuredExtractionConfig>,
|
|
cancelToken: Option<String>,
|
|
) -> WasmExtractionConfig {
|
|
WasmExtractionConfig {
|
|
use_cache: useCache.unwrap_or(true),
|
|
enable_quality_processing: enableQualityProcessing.unwrap_or(true),
|
|
ocr,
|
|
force_ocr: forceOcr.unwrap_or(false),
|
|
force_ocr_pages: forceOcrPages,
|
|
disable_ocr: disableOcr.unwrap_or(false),
|
|
chunking,
|
|
content_filter: contentFilter,
|
|
images,
|
|
token_reduction: tokenReduction,
|
|
language_detection: languageDetection,
|
|
pages,
|
|
postprocessor,
|
|
html_options: Default::default(),
|
|
extraction_timeout_secs: extractionTimeoutSecs,
|
|
max_concurrent_extractions: maxConcurrentExtractions,
|
|
result_format: resultFormat.unwrap_or_default(),
|
|
security_limits: securityLimits,
|
|
max_embedded_file_bytes: maxEmbeddedFileBytes,
|
|
output_format: outputFormat.unwrap_or_default(),
|
|
use_layout_for_markdown: useLayoutForMarkdown.unwrap_or(false),
|
|
include_document_structure: includeDocumentStructure.unwrap_or(false),
|
|
acceleration,
|
|
cache_namespace: cacheNamespace,
|
|
cache_ttl_secs: cacheTtlSecs,
|
|
email,
|
|
concurrency,
|
|
max_archive_depth: maxArchiveDepth.unwrap_or_default(),
|
|
structured_extraction: structuredExtraction,
|
|
cancel_token: cancelToken,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "useCache")]
|
|
pub fn use_cache(&self) -> bool {
|
|
self.use_cache
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "useCache")]
|
|
pub fn set_use_cache(&mut self, value: bool) {
|
|
self.use_cache = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "enableQualityProcessing")]
|
|
pub fn enable_quality_processing(&self) -> bool {
|
|
self.enable_quality_processing
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "enableQualityProcessing")]
|
|
pub fn set_enable_quality_processing(&mut self, value: bool) {
|
|
self.enable_quality_processing = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn ocr(&self) -> Option<WasmOcrConfig> {
|
|
self.ocr.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_ocr(&mut self, value: Option<WasmOcrConfig>) {
|
|
self.ocr = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "forceOcr")]
|
|
pub fn force_ocr(&self) -> bool {
|
|
self.force_ocr
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "forceOcr")]
|
|
pub fn set_force_ocr(&mut self, value: bool) {
|
|
self.force_ocr = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "forceOcrPages")]
|
|
pub fn force_ocr_pages(&self) -> Option<Vec<u32>> {
|
|
self.force_ocr_pages.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "forceOcrPages")]
|
|
pub fn set_force_ocr_pages(&mut self, value: Option<Vec<u32>>) {
|
|
self.force_ocr_pages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "disableOcr")]
|
|
pub fn disable_ocr(&self) -> bool {
|
|
self.disable_ocr
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "disableOcr")]
|
|
pub fn set_disable_ocr(&mut self, value: bool) {
|
|
self.disable_ocr = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn chunking(&self) -> Option<WasmChunkingConfig> {
|
|
self.chunking.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_chunking(&mut self, value: Option<WasmChunkingConfig>) {
|
|
self.chunking = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "contentFilter")]
|
|
pub fn content_filter(&self) -> Option<WasmContentFilterConfig> {
|
|
self.content_filter.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "contentFilter")]
|
|
pub fn set_content_filter(&mut self, value: Option<WasmContentFilterConfig>) {
|
|
self.content_filter = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn images(&self) -> Option<WasmImageExtractionConfig> {
|
|
self.images.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_images(&mut self, value: Option<WasmImageExtractionConfig>) {
|
|
self.images = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tokenReduction")]
|
|
pub fn token_reduction(&self) -> Option<WasmTokenReductionOptions> {
|
|
self.token_reduction.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tokenReduction")]
|
|
pub fn set_token_reduction(&mut self, value: Option<WasmTokenReductionOptions>) {
|
|
self.token_reduction = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "languageDetection")]
|
|
pub fn language_detection(&self) -> Option<WasmLanguageDetectionConfig> {
|
|
self.language_detection.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "languageDetection")]
|
|
pub fn set_language_detection(&mut self, value: Option<WasmLanguageDetectionConfig>) {
|
|
self.language_detection = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn pages(&self) -> Option<WasmPageConfig> {
|
|
self.pages.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_pages(&mut self, value: Option<WasmPageConfig>) {
|
|
self.pages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn postprocessor(&self) -> Option<WasmPostProcessorConfig> {
|
|
self.postprocessor.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_postprocessor(&mut self, value: Option<WasmPostProcessorConfig>) {
|
|
self.postprocessor = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "htmlOptions")]
|
|
pub fn html_options(&self) -> Option<String> {
|
|
self.html_options.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "htmlOptions")]
|
|
pub fn set_html_options(&mut self, value: Option<String>) {
|
|
self.html_options = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "extractionTimeoutSecs")]
|
|
pub fn extraction_timeout_secs(&self) -> Option<u64> {
|
|
self.extraction_timeout_secs
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "extractionTimeoutSecs")]
|
|
pub fn set_extraction_timeout_secs(&mut self, value: Option<u64>) {
|
|
self.extraction_timeout_secs = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxConcurrentExtractions")]
|
|
pub fn max_concurrent_extractions(&self) -> Option<usize> {
|
|
self.max_concurrent_extractions
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxConcurrentExtractions")]
|
|
pub fn set_max_concurrent_extractions(&mut self, value: Option<usize>) {
|
|
self.max_concurrent_extractions = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "resultFormat")]
|
|
pub fn result_format(&self) -> String {
|
|
self.result_format.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "resultFormat")]
|
|
pub fn set_result_format(&mut self, value: WasmResultFormat) {
|
|
self.result_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "securityLimits")]
|
|
pub fn security_limits(&self) -> Option<WasmSecurityLimits> {
|
|
self.security_limits.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "securityLimits")]
|
|
pub fn set_security_limits(&mut self, value: Option<WasmSecurityLimits>) {
|
|
self.security_limits = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxEmbeddedFileBytes")]
|
|
pub fn max_embedded_file_bytes(&self) -> Option<u64> {
|
|
self.max_embedded_file_bytes
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxEmbeddedFileBytes")]
|
|
pub fn set_max_embedded_file_bytes(&mut self, value: Option<u64>) {
|
|
self.max_embedded_file_bytes = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "outputFormat")]
|
|
pub fn output_format(&self) -> String {
|
|
self.output_format.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "outputFormat")]
|
|
pub fn set_output_format(&mut self, value: WasmOutputFormat) {
|
|
self.output_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "useLayoutForMarkdown")]
|
|
pub fn use_layout_for_markdown(&self) -> bool {
|
|
self.use_layout_for_markdown
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "useLayoutForMarkdown")]
|
|
pub fn set_use_layout_for_markdown(&mut self, value: bool) {
|
|
self.use_layout_for_markdown = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "includeDocumentStructure")]
|
|
pub fn include_document_structure(&self) -> bool {
|
|
self.include_document_structure
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "includeDocumentStructure")]
|
|
pub fn set_include_document_structure(&mut self, value: bool) {
|
|
self.include_document_structure = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn acceleration(&self) -> Option<WasmAccelerationConfig> {
|
|
self.acceleration.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_acceleration(&mut self, value: Option<WasmAccelerationConfig>) {
|
|
self.acceleration = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "cacheNamespace")]
|
|
pub fn cache_namespace(&self) -> Option<String> {
|
|
self.cache_namespace.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "cacheNamespace")]
|
|
pub fn set_cache_namespace(&mut self, value: Option<String>) {
|
|
self.cache_namespace = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "cacheTtlSecs")]
|
|
pub fn cache_ttl_secs(&self) -> Option<u64> {
|
|
self.cache_ttl_secs
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "cacheTtlSecs")]
|
|
pub fn set_cache_ttl_secs(&mut self, value: Option<u64>) {
|
|
self.cache_ttl_secs = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn email(&self) -> Option<WasmEmailConfig> {
|
|
self.email.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_email(&mut self, value: Option<WasmEmailConfig>) {
|
|
self.email = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn concurrency(&self) -> Option<String> {
|
|
self.concurrency.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_concurrency(&mut self, value: Option<String>) {
|
|
self.concurrency = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxArchiveDepth")]
|
|
pub fn max_archive_depth(&self) -> usize {
|
|
self.max_archive_depth
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxArchiveDepth")]
|
|
pub fn set_max_archive_depth(&mut self, value: usize) {
|
|
self.max_archive_depth = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "structuredExtraction")]
|
|
pub fn structured_extraction(&self) -> Option<WasmStructuredExtractionConfig> {
|
|
self.structured_extraction.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "structuredExtraction")]
|
|
pub fn set_structured_extraction(&mut self, value: Option<WasmStructuredExtractionConfig>) {
|
|
self.structured_extraction = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "cancelToken")]
|
|
pub fn cancel_token(&self) -> Option<String> {
|
|
self.cancel_token.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "cancelToken")]
|
|
pub fn set_cancel_token(&mut self, value: Option<String>) {
|
|
self.cancel_token = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmExtractionConfig {
|
|
kreuzberg::ExtractionConfig::default().into()
|
|
}
|
|
|
|
/// Check if image processing is needed by examining OCR and image extraction settings.
|
|
///
|
|
/// Returns `true` if either OCR is enabled or image extraction is configured,
|
|
/// indicating that image decompression and processing should occur.
|
|
/// Returns `false` if both are disabled, allowing optimization to skip unnecessary
|
|
/// image decompression for text-only extraction workflows.
|
|
///
|
|
/// # Optimization Impact
|
|
/// For text-only extractions (no OCR, no image extraction), skipping image
|
|
/// decompression can improve CPU utilization by 5-10% by avoiding wasteful
|
|
/// image I/O and processing when results won't be used.
|
|
#[wasm_bindgen(js_name = "needsImageProcessing")]
|
|
pub fn needs_image_processing(&self) -> bool {
|
|
kreuzberg::ExtractionConfig::from(self.clone()).needs_image_processing()
|
|
}
|
|
}
|
|
|
|
/// Per-file extraction configuration overrides for batch processing.
|
|
///
|
|
/// All fields are `Option<T>` — `None` means "use the batch-level default."
|
|
/// This type is used with `batch_extract_files` and
|
|
/// `batch_extract_bytes` to allow heterogeneous
|
|
/// extraction settings within a single batch.
|
|
///
|
|
/// # Excluded Fields
|
|
///
|
|
/// The following `ExtractionConfig` fields are batch-level only and
|
|
/// cannot be overridden per file:
|
|
/// - `max_concurrent_extractions` — controls batch parallelism
|
|
/// - `use_cache` — global caching policy
|
|
/// - `acceleration` — shared ONNX execution provider
|
|
/// - `security_limits` — global archive security policy
|
|
///
|
|
/// # Example
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmFileExtractionConfig {
|
|
enable_quality_processing: Option<bool>,
|
|
ocr: Option<WasmOcrConfig>,
|
|
force_ocr: Option<bool>,
|
|
force_ocr_pages: Option<Vec<u32>>,
|
|
disable_ocr: Option<bool>,
|
|
chunking: Option<WasmChunkingConfig>,
|
|
content_filter: Option<WasmContentFilterConfig>,
|
|
images: Option<WasmImageExtractionConfig>,
|
|
token_reduction: Option<WasmTokenReductionOptions>,
|
|
language_detection: Option<WasmLanguageDetectionConfig>,
|
|
pages: Option<WasmPageConfig>,
|
|
postprocessor: Option<WasmPostProcessorConfig>,
|
|
html_options: Option<String>,
|
|
result_format: Option<WasmResultFormat>,
|
|
output_format: Option<WasmOutputFormat>,
|
|
include_document_structure: Option<bool>,
|
|
timeout_secs: Option<u64>,
|
|
structured_extraction: Option<WasmStructuredExtractionConfig>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmFileExtractionConfig {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
enableQualityProcessing: Option<bool>,
|
|
ocr: Option<WasmOcrConfig>,
|
|
forceOcr: Option<bool>,
|
|
forceOcrPages: Option<Vec<u32>>,
|
|
disableOcr: Option<bool>,
|
|
chunking: Option<WasmChunkingConfig>,
|
|
contentFilter: Option<WasmContentFilterConfig>,
|
|
images: Option<WasmImageExtractionConfig>,
|
|
tokenReduction: Option<WasmTokenReductionOptions>,
|
|
languageDetection: Option<WasmLanguageDetectionConfig>,
|
|
pages: Option<WasmPageConfig>,
|
|
postprocessor: Option<WasmPostProcessorConfig>,
|
|
resultFormat: Option<WasmResultFormat>,
|
|
outputFormat: Option<WasmOutputFormat>,
|
|
includeDocumentStructure: Option<bool>,
|
|
timeoutSecs: Option<u64>,
|
|
structuredExtraction: Option<WasmStructuredExtractionConfig>,
|
|
) -> WasmFileExtractionConfig {
|
|
WasmFileExtractionConfig {
|
|
enable_quality_processing: enableQualityProcessing,
|
|
ocr,
|
|
force_ocr: forceOcr,
|
|
force_ocr_pages: forceOcrPages,
|
|
disable_ocr: disableOcr,
|
|
chunking,
|
|
content_filter: contentFilter,
|
|
images,
|
|
token_reduction: tokenReduction,
|
|
language_detection: languageDetection,
|
|
pages,
|
|
postprocessor,
|
|
html_options: Default::default(),
|
|
result_format: resultFormat,
|
|
output_format: outputFormat,
|
|
include_document_structure: includeDocumentStructure,
|
|
timeout_secs: timeoutSecs,
|
|
structured_extraction: structuredExtraction,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmFileExtractionConfig {
|
|
<WasmFileExtractionConfig as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "enableQualityProcessing")]
|
|
pub fn enable_quality_processing(&self) -> Option<bool> {
|
|
self.enable_quality_processing
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "enableQualityProcessing")]
|
|
pub fn set_enable_quality_processing(&mut self, value: Option<bool>) {
|
|
self.enable_quality_processing = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn ocr(&self) -> Option<WasmOcrConfig> {
|
|
self.ocr.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_ocr(&mut self, value: Option<WasmOcrConfig>) {
|
|
self.ocr = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "forceOcr")]
|
|
pub fn force_ocr(&self) -> Option<bool> {
|
|
self.force_ocr
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "forceOcr")]
|
|
pub fn set_force_ocr(&mut self, value: Option<bool>) {
|
|
self.force_ocr = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "forceOcrPages")]
|
|
pub fn force_ocr_pages(&self) -> Option<Vec<u32>> {
|
|
self.force_ocr_pages.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "forceOcrPages")]
|
|
pub fn set_force_ocr_pages(&mut self, value: Option<Vec<u32>>) {
|
|
self.force_ocr_pages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "disableOcr")]
|
|
pub fn disable_ocr(&self) -> Option<bool> {
|
|
self.disable_ocr
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "disableOcr")]
|
|
pub fn set_disable_ocr(&mut self, value: Option<bool>) {
|
|
self.disable_ocr = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn chunking(&self) -> Option<WasmChunkingConfig> {
|
|
self.chunking.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_chunking(&mut self, value: Option<WasmChunkingConfig>) {
|
|
self.chunking = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "contentFilter")]
|
|
pub fn content_filter(&self) -> Option<WasmContentFilterConfig> {
|
|
self.content_filter.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "contentFilter")]
|
|
pub fn set_content_filter(&mut self, value: Option<WasmContentFilterConfig>) {
|
|
self.content_filter = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn images(&self) -> Option<WasmImageExtractionConfig> {
|
|
self.images.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_images(&mut self, value: Option<WasmImageExtractionConfig>) {
|
|
self.images = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tokenReduction")]
|
|
pub fn token_reduction(&self) -> Option<WasmTokenReductionOptions> {
|
|
self.token_reduction.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tokenReduction")]
|
|
pub fn set_token_reduction(&mut self, value: Option<WasmTokenReductionOptions>) {
|
|
self.token_reduction = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "languageDetection")]
|
|
pub fn language_detection(&self) -> Option<WasmLanguageDetectionConfig> {
|
|
self.language_detection.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "languageDetection")]
|
|
pub fn set_language_detection(&mut self, value: Option<WasmLanguageDetectionConfig>) {
|
|
self.language_detection = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn pages(&self) -> Option<WasmPageConfig> {
|
|
self.pages.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_pages(&mut self, value: Option<WasmPageConfig>) {
|
|
self.pages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn postprocessor(&self) -> Option<WasmPostProcessorConfig> {
|
|
self.postprocessor.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_postprocessor(&mut self, value: Option<WasmPostProcessorConfig>) {
|
|
self.postprocessor = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "htmlOptions")]
|
|
pub fn html_options(&self) -> Option<String> {
|
|
self.html_options.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "htmlOptions")]
|
|
pub fn set_html_options(&mut self, value: Option<String>) {
|
|
self.html_options = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "resultFormat")]
|
|
pub fn result_format(&self) -> Option<String> {
|
|
self.result_format.map(|v| v.to_api_str().to_owned())
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "resultFormat")]
|
|
pub fn set_result_format(&mut self, value: Option<WasmResultFormat>) {
|
|
self.result_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "outputFormat")]
|
|
pub fn output_format(&self) -> Option<String> {
|
|
self.output_format.map(|v| v.to_api_str().to_owned())
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "outputFormat")]
|
|
pub fn set_output_format(&mut self, value: Option<WasmOutputFormat>) {
|
|
self.output_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "includeDocumentStructure")]
|
|
pub fn include_document_structure(&self) -> Option<bool> {
|
|
self.include_document_structure
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "includeDocumentStructure")]
|
|
pub fn set_include_document_structure(&mut self, value: Option<bool>) {
|
|
self.include_document_structure = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "timeoutSecs")]
|
|
pub fn timeout_secs(&self) -> Option<u64> {
|
|
self.timeout_secs
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "timeoutSecs")]
|
|
pub fn set_timeout_secs(&mut self, value: Option<u64>) {
|
|
self.timeout_secs = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "structuredExtraction")]
|
|
pub fn structured_extraction(&self) -> Option<WasmStructuredExtractionConfig> {
|
|
self.structured_extraction.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "structuredExtraction")]
|
|
pub fn set_structured_extraction(&mut self, value: Option<WasmStructuredExtractionConfig>) {
|
|
self.structured_extraction = value;
|
|
}
|
|
}
|
|
|
|
/// Batch item for byte array extraction.
|
|
///
|
|
/// Used with `batch_extract_bytes` and `batch_extract_bytes_sync`
|
|
/// to represent a single item in a batch extraction job.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmBatchBytesItem {
|
|
content: Vec<u8>,
|
|
mime_type: String,
|
|
config: Option<WasmFileExtractionConfig>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmBatchBytesItem {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(content: Vec<u8>, mimeType: String, config: Option<WasmFileExtractionConfig>) -> WasmBatchBytesItem {
|
|
WasmBatchBytesItem {
|
|
content,
|
|
mime_type: mimeType,
|
|
config,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmBatchBytesItem {
|
|
<WasmBatchBytesItem as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> Vec<u8> {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: Vec<u8>) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "mimeType")]
|
|
pub fn mime_type(&self) -> String {
|
|
self.mime_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "mimeType")]
|
|
pub fn set_mime_type(&mut self, value: String) {
|
|
self.mime_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn config(&self) -> Option<WasmFileExtractionConfig> {
|
|
self.config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_config(&mut self, value: Option<WasmFileExtractionConfig>) {
|
|
self.config = value;
|
|
}
|
|
}
|
|
|
|
/// Batch item for file extraction.
|
|
///
|
|
/// Used with `batch_extract_files` and `batch_extract_files_sync`
|
|
/// to represent a single file in a batch extraction job.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmBatchFileItem {
|
|
path: String,
|
|
config: Option<WasmFileExtractionConfig>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmBatchFileItem {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(path: String, config: Option<WasmFileExtractionConfig>) -> WasmBatchFileItem {
|
|
WasmBatchFileItem { path, config }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmBatchFileItem {
|
|
<WasmBatchFileItem as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn path(&self) -> String {
|
|
self.path.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_path(&mut self, value: String) {
|
|
self.path = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn config(&self) -> Option<WasmFileExtractionConfig> {
|
|
self.config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_config(&mut self, value: Option<WasmFileExtractionConfig>) {
|
|
self.config = value;
|
|
}
|
|
}
|
|
|
|
/// Image extraction configuration.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmImageExtractionConfig {
|
|
extract_images: bool,
|
|
target_dpi: i32,
|
|
max_image_dimension: i32,
|
|
inject_placeholders: bool,
|
|
auto_adjust_dpi: bool,
|
|
min_dpi: i32,
|
|
max_dpi: i32,
|
|
max_images_per_page: Option<u32>,
|
|
classify: bool,
|
|
include_page_rasters: bool,
|
|
run_ocr_on_images: bool,
|
|
ocr_text_only: bool,
|
|
append_ocr_text: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmImageExtractionConfig {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
extractImages: Option<bool>,
|
|
targetDpi: Option<i32>,
|
|
maxImageDimension: Option<i32>,
|
|
injectPlaceholders: Option<bool>,
|
|
autoAdjustDpi: Option<bool>,
|
|
minDpi: Option<i32>,
|
|
maxDpi: Option<i32>,
|
|
classify: Option<bool>,
|
|
includePageRasters: Option<bool>,
|
|
runOcrOnImages: Option<bool>,
|
|
ocrTextOnly: Option<bool>,
|
|
appendOcrText: Option<bool>,
|
|
maxImagesPerPage: Option<u32>,
|
|
) -> WasmImageExtractionConfig {
|
|
WasmImageExtractionConfig {
|
|
extract_images: extractImages.unwrap_or(true),
|
|
target_dpi: targetDpi.unwrap_or(300),
|
|
max_image_dimension: maxImageDimension.unwrap_or(4096),
|
|
inject_placeholders: injectPlaceholders.unwrap_or(true),
|
|
auto_adjust_dpi: autoAdjustDpi.unwrap_or(true),
|
|
min_dpi: minDpi.unwrap_or(72),
|
|
max_dpi: maxDpi.unwrap_or(600),
|
|
max_images_per_page: maxImagesPerPage,
|
|
classify: classify.unwrap_or(true),
|
|
include_page_rasters: includePageRasters.unwrap_or(false),
|
|
run_ocr_on_images: runOcrOnImages.unwrap_or(true),
|
|
ocr_text_only: ocrTextOnly.unwrap_or(false),
|
|
append_ocr_text: appendOcrText.unwrap_or(false),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "extractImages")]
|
|
pub fn extract_images(&self) -> bool {
|
|
self.extract_images
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "extractImages")]
|
|
pub fn set_extract_images(&mut self, value: bool) {
|
|
self.extract_images = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "targetDpi")]
|
|
pub fn target_dpi(&self) -> i32 {
|
|
self.target_dpi
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "targetDpi")]
|
|
pub fn set_target_dpi(&mut self, value: i32) {
|
|
self.target_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxImageDimension")]
|
|
pub fn max_image_dimension(&self) -> i32 {
|
|
self.max_image_dimension
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxImageDimension")]
|
|
pub fn set_max_image_dimension(&mut self, value: i32) {
|
|
self.max_image_dimension = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "injectPlaceholders")]
|
|
pub fn inject_placeholders(&self) -> bool {
|
|
self.inject_placeholders
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "injectPlaceholders")]
|
|
pub fn set_inject_placeholders(&mut self, value: bool) {
|
|
self.inject_placeholders = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "autoAdjustDpi")]
|
|
pub fn auto_adjust_dpi(&self) -> bool {
|
|
self.auto_adjust_dpi
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "autoAdjustDpi")]
|
|
pub fn set_auto_adjust_dpi(&mut self, value: bool) {
|
|
self.auto_adjust_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minDpi")]
|
|
pub fn min_dpi(&self) -> i32 {
|
|
self.min_dpi
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minDpi")]
|
|
pub fn set_min_dpi(&mut self, value: i32) {
|
|
self.min_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxDpi")]
|
|
pub fn max_dpi(&self) -> i32 {
|
|
self.max_dpi
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxDpi")]
|
|
pub fn set_max_dpi(&mut self, value: i32) {
|
|
self.max_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxImagesPerPage")]
|
|
pub fn max_images_per_page(&self) -> Option<u32> {
|
|
self.max_images_per_page
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxImagesPerPage")]
|
|
pub fn set_max_images_per_page(&mut self, value: Option<u32>) {
|
|
self.max_images_per_page = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn classify(&self) -> bool {
|
|
self.classify
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_classify(&mut self, value: bool) {
|
|
self.classify = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "includePageRasters")]
|
|
pub fn include_page_rasters(&self) -> bool {
|
|
self.include_page_rasters
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "includePageRasters")]
|
|
pub fn set_include_page_rasters(&mut self, value: bool) {
|
|
self.include_page_rasters = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "runOcrOnImages")]
|
|
pub fn run_ocr_on_images(&self) -> bool {
|
|
self.run_ocr_on_images
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "runOcrOnImages")]
|
|
pub fn set_run_ocr_on_images(&mut self, value: bool) {
|
|
self.run_ocr_on_images = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ocrTextOnly")]
|
|
pub fn ocr_text_only(&self) -> bool {
|
|
self.ocr_text_only
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "ocrTextOnly")]
|
|
pub fn set_ocr_text_only(&mut self, value: bool) {
|
|
self.ocr_text_only = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "appendOcrText")]
|
|
pub fn append_ocr_text(&self) -> bool {
|
|
self.append_ocr_text
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "appendOcrText")]
|
|
pub fn set_append_ocr_text(&mut self, value: bool) {
|
|
self.append_ocr_text = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmImageExtractionConfig {
|
|
kreuzberg::ImageExtractionConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Token reduction configuration.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmTokenReductionOptions {
|
|
mode: String,
|
|
preserve_important_words: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmTokenReductionOptions {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(mode: Option<String>, preserveImportantWords: Option<bool>) -> WasmTokenReductionOptions {
|
|
WasmTokenReductionOptions {
|
|
mode: mode.unwrap_or_default(),
|
|
preserve_important_words: preserveImportantWords.unwrap_or(true),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn mode(&self) -> String {
|
|
self.mode.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_mode(&mut self, value: String) {
|
|
self.mode = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "preserveImportantWords")]
|
|
pub fn preserve_important_words(&self) -> bool {
|
|
self.preserve_important_words
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "preserveImportantWords")]
|
|
pub fn set_preserve_important_words(&mut self, value: bool) {
|
|
self.preserve_important_words = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmTokenReductionOptions {
|
|
kreuzberg::TokenReductionOptions::default().into()
|
|
}
|
|
}
|
|
|
|
/// Language detection configuration.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmLanguageDetectionConfig {
|
|
enabled: bool,
|
|
min_confidence: f64,
|
|
detect_multiple: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmLanguageDetectionConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
enabled: Option<bool>,
|
|
minConfidence: Option<f64>,
|
|
detectMultiple: Option<bool>,
|
|
) -> WasmLanguageDetectionConfig {
|
|
WasmLanguageDetectionConfig {
|
|
enabled: enabled.unwrap_or(true),
|
|
min_confidence: minConfidence.unwrap_or(0.8),
|
|
detect_multiple: detectMultiple.unwrap_or(false),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn enabled(&self) -> bool {
|
|
self.enabled
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_enabled(&mut self, value: bool) {
|
|
self.enabled = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minConfidence")]
|
|
pub fn min_confidence(&self) -> f64 {
|
|
self.min_confidence
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minConfidence")]
|
|
pub fn set_min_confidence(&mut self, value: f64) {
|
|
self.min_confidence = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "detectMultiple")]
|
|
pub fn detect_multiple(&self) -> bool {
|
|
self.detect_multiple
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "detectMultiple")]
|
|
pub fn set_detect_multiple(&mut self, value: bool) {
|
|
self.detect_multiple = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmLanguageDetectionConfig {
|
|
kreuzberg::LanguageDetectionConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Configuration for an LLM provider/model via liter-llm.
|
|
///
|
|
/// Each feature (VLM OCR, VLM embeddings, structured extraction) carries
|
|
/// its own `LlmConfig`, allowing different providers per feature.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```toml
|
|
/// [structured_extraction.llm]
|
|
/// model = "openai/gpt-4o"
|
|
/// api_key = "sk-..." # or use KREUZBERG_LLM_API_KEY env var
|
|
/// ```
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmLlmConfig {
|
|
model: String,
|
|
api_key: Option<String>,
|
|
base_url: Option<String>,
|
|
timeout_secs: Option<u64>,
|
|
max_retries: Option<u32>,
|
|
temperature: Option<f64>,
|
|
max_tokens: Option<u64>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmLlmConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
model: Option<String>,
|
|
apiKey: Option<String>,
|
|
baseUrl: Option<String>,
|
|
timeoutSecs: Option<u64>,
|
|
maxRetries: Option<u32>,
|
|
temperature: Option<f64>,
|
|
maxTokens: Option<u64>,
|
|
) -> WasmLlmConfig {
|
|
WasmLlmConfig {
|
|
model: model.unwrap_or_default(),
|
|
api_key: apiKey,
|
|
base_url: baseUrl,
|
|
timeout_secs: timeoutSecs,
|
|
max_retries: maxRetries,
|
|
temperature,
|
|
max_tokens: maxTokens,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmLlmConfig {
|
|
<WasmLlmConfig as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn model(&self) -> String {
|
|
self.model.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_model(&mut self, value: String) {
|
|
self.model = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "apiKey")]
|
|
pub fn api_key(&self) -> Option<String> {
|
|
self.api_key.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "apiKey")]
|
|
pub fn set_api_key(&mut self, value: Option<String>) {
|
|
self.api_key = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "baseUrl")]
|
|
pub fn base_url(&self) -> Option<String> {
|
|
self.base_url.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "baseUrl")]
|
|
pub fn set_base_url(&mut self, value: Option<String>) {
|
|
self.base_url = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "timeoutSecs")]
|
|
pub fn timeout_secs(&self) -> Option<u64> {
|
|
self.timeout_secs
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "timeoutSecs")]
|
|
pub fn set_timeout_secs(&mut self, value: Option<u64>) {
|
|
self.timeout_secs = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxRetries")]
|
|
pub fn max_retries(&self) -> Option<u32> {
|
|
self.max_retries
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxRetries")]
|
|
pub fn set_max_retries(&mut self, value: Option<u32>) {
|
|
self.max_retries = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn temperature(&self) -> Option<f64> {
|
|
self.temperature
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_temperature(&mut self, value: Option<f64>) {
|
|
self.temperature = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxTokens")]
|
|
pub fn max_tokens(&self) -> Option<u64> {
|
|
self.max_tokens
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxTokens")]
|
|
pub fn set_max_tokens(&mut self, value: Option<u64>) {
|
|
self.max_tokens = value;
|
|
}
|
|
}
|
|
|
|
/// Configuration for LLM-based structured data extraction.
|
|
///
|
|
/// Sends extracted document content to a VLM with a JSON schema,
|
|
/// returning structured data that conforms to the schema.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```toml
|
|
/// [structured_extraction]
|
|
/// schema_name = "invoice_data"
|
|
/// strict = true
|
|
///
|
|
/// [structured_extraction.schema]
|
|
/// type = "object"
|
|
/// properties.vendor = { type = "string" }
|
|
/// properties.total = { type = "number" }
|
|
/// required = ["vendor", "total"]
|
|
///
|
|
/// [structured_extraction.llm]
|
|
/// model = "openai/gpt-4o"
|
|
/// ```
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmStructuredExtractionConfig {
|
|
schema: JsValue,
|
|
schema_name: String,
|
|
schema_description: Option<String>,
|
|
strict: bool,
|
|
prompt: Option<String>,
|
|
llm: WasmLlmConfig,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmStructuredExtractionConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
schema: JsValue,
|
|
schemaName: String,
|
|
strict: bool,
|
|
llm: WasmLlmConfig,
|
|
schemaDescription: Option<String>,
|
|
prompt: Option<String>,
|
|
) -> WasmStructuredExtractionConfig {
|
|
WasmStructuredExtractionConfig {
|
|
schema,
|
|
schema_name: schemaName,
|
|
schema_description: schemaDescription,
|
|
strict,
|
|
prompt,
|
|
llm,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmStructuredExtractionConfig {
|
|
<WasmStructuredExtractionConfig as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn schema(&self) -> JsValue {
|
|
self.schema.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_schema(&mut self, value: JsValue) {
|
|
self.schema = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "schemaName")]
|
|
pub fn schema_name(&self) -> String {
|
|
self.schema_name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "schemaName")]
|
|
pub fn set_schema_name(&mut self, value: String) {
|
|
self.schema_name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "schemaDescription")]
|
|
pub fn schema_description(&self) -> Option<String> {
|
|
self.schema_description.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "schemaDescription")]
|
|
pub fn set_schema_description(&mut self, value: Option<String>) {
|
|
self.schema_description = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn strict(&self) -> bool {
|
|
self.strict
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_strict(&mut self, value: bool) {
|
|
self.strict = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn prompt(&self) -> Option<String> {
|
|
self.prompt.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_prompt(&mut self, value: Option<String>) {
|
|
self.prompt = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn llm(&self) -> WasmLlmConfig {
|
|
self.llm.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_llm(&mut self, value: WasmLlmConfig) {
|
|
self.llm = value;
|
|
}
|
|
}
|
|
|
|
/// Quality thresholds for OCR fallback decisions and pipeline quality gating.
|
|
///
|
|
/// All fields default to the values that match the previous hardcoded behavior,
|
|
/// so `OcrQualityThresholds.default()` preserves existing semantics exactly.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrQualityThresholds {
|
|
min_total_non_whitespace: usize,
|
|
min_non_whitespace_per_page: f64,
|
|
min_meaningful_word_len: usize,
|
|
min_meaningful_words: usize,
|
|
min_alnum_ratio: f64,
|
|
min_garbage_chars: usize,
|
|
max_fragmented_word_ratio: f64,
|
|
critical_fragmented_word_ratio: f64,
|
|
min_avg_word_length: f64,
|
|
min_words_for_avg_length_check: usize,
|
|
min_consecutive_repeat_ratio: f64,
|
|
min_words_for_repeat_check: usize,
|
|
substantive_min_chars: usize,
|
|
non_text_min_chars: usize,
|
|
alnum_ws_ratio_threshold: f64,
|
|
pipeline_min_quality: f64,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrQualityThresholds {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
minTotalNonWhitespace: Option<usize>,
|
|
minNonWhitespacePerPage: Option<f64>,
|
|
minMeaningfulWordLen: Option<usize>,
|
|
minMeaningfulWords: Option<usize>,
|
|
minAlnumRatio: Option<f64>,
|
|
minGarbageChars: Option<usize>,
|
|
maxFragmentedWordRatio: Option<f64>,
|
|
criticalFragmentedWordRatio: Option<f64>,
|
|
minAvgWordLength: Option<f64>,
|
|
minWordsForAvgLengthCheck: Option<usize>,
|
|
minConsecutiveRepeatRatio: Option<f64>,
|
|
minWordsForRepeatCheck: Option<usize>,
|
|
substantiveMinChars: Option<usize>,
|
|
nonTextMinChars: Option<usize>,
|
|
alnumWsRatioThreshold: Option<f64>,
|
|
pipelineMinQuality: Option<f64>,
|
|
) -> WasmOcrQualityThresholds {
|
|
WasmOcrQualityThresholds {
|
|
min_total_non_whitespace: minTotalNonWhitespace.unwrap_or(64),
|
|
min_non_whitespace_per_page: minNonWhitespacePerPage.unwrap_or(32.0),
|
|
min_meaningful_word_len: minMeaningfulWordLen.unwrap_or(4),
|
|
min_meaningful_words: minMeaningfulWords.unwrap_or(3),
|
|
min_alnum_ratio: minAlnumRatio.unwrap_or(0.3),
|
|
min_garbage_chars: minGarbageChars.unwrap_or(5),
|
|
max_fragmented_word_ratio: maxFragmentedWordRatio.unwrap_or(0.6),
|
|
critical_fragmented_word_ratio: criticalFragmentedWordRatio.unwrap_or(0.8),
|
|
min_avg_word_length: minAvgWordLength.unwrap_or(2.0),
|
|
min_words_for_avg_length_check: minWordsForAvgLengthCheck.unwrap_or(50),
|
|
min_consecutive_repeat_ratio: minConsecutiveRepeatRatio.unwrap_or(0.08),
|
|
min_words_for_repeat_check: minWordsForRepeatCheck.unwrap_or(50),
|
|
substantive_min_chars: substantiveMinChars.unwrap_or(100),
|
|
non_text_min_chars: nonTextMinChars.unwrap_or(20),
|
|
alnum_ws_ratio_threshold: alnumWsRatioThreshold.unwrap_or(0.4),
|
|
pipeline_min_quality: pipelineMinQuality.unwrap_or(0.5),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minTotalNonWhitespace")]
|
|
pub fn min_total_non_whitespace(&self) -> usize {
|
|
self.min_total_non_whitespace
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minTotalNonWhitespace")]
|
|
pub fn set_min_total_non_whitespace(&mut self, value: usize) {
|
|
self.min_total_non_whitespace = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minNonWhitespacePerPage")]
|
|
pub fn min_non_whitespace_per_page(&self) -> f64 {
|
|
self.min_non_whitespace_per_page
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minNonWhitespacePerPage")]
|
|
pub fn set_min_non_whitespace_per_page(&mut self, value: f64) {
|
|
self.min_non_whitespace_per_page = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minMeaningfulWordLen")]
|
|
pub fn min_meaningful_word_len(&self) -> usize {
|
|
self.min_meaningful_word_len
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minMeaningfulWordLen")]
|
|
pub fn set_min_meaningful_word_len(&mut self, value: usize) {
|
|
self.min_meaningful_word_len = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minMeaningfulWords")]
|
|
pub fn min_meaningful_words(&self) -> usize {
|
|
self.min_meaningful_words
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minMeaningfulWords")]
|
|
pub fn set_min_meaningful_words(&mut self, value: usize) {
|
|
self.min_meaningful_words = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minAlnumRatio")]
|
|
pub fn min_alnum_ratio(&self) -> f64 {
|
|
self.min_alnum_ratio
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minAlnumRatio")]
|
|
pub fn set_min_alnum_ratio(&mut self, value: f64) {
|
|
self.min_alnum_ratio = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minGarbageChars")]
|
|
pub fn min_garbage_chars(&self) -> usize {
|
|
self.min_garbage_chars
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minGarbageChars")]
|
|
pub fn set_min_garbage_chars(&mut self, value: usize) {
|
|
self.min_garbage_chars = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxFragmentedWordRatio")]
|
|
pub fn max_fragmented_word_ratio(&self) -> f64 {
|
|
self.max_fragmented_word_ratio
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxFragmentedWordRatio")]
|
|
pub fn set_max_fragmented_word_ratio(&mut self, value: f64) {
|
|
self.max_fragmented_word_ratio = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "criticalFragmentedWordRatio")]
|
|
pub fn critical_fragmented_word_ratio(&self) -> f64 {
|
|
self.critical_fragmented_word_ratio
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "criticalFragmentedWordRatio")]
|
|
pub fn set_critical_fragmented_word_ratio(&mut self, value: f64) {
|
|
self.critical_fragmented_word_ratio = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minAvgWordLength")]
|
|
pub fn min_avg_word_length(&self) -> f64 {
|
|
self.min_avg_word_length
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minAvgWordLength")]
|
|
pub fn set_min_avg_word_length(&mut self, value: f64) {
|
|
self.min_avg_word_length = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minWordsForAvgLengthCheck")]
|
|
pub fn min_words_for_avg_length_check(&self) -> usize {
|
|
self.min_words_for_avg_length_check
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minWordsForAvgLengthCheck")]
|
|
pub fn set_min_words_for_avg_length_check(&mut self, value: usize) {
|
|
self.min_words_for_avg_length_check = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minConsecutiveRepeatRatio")]
|
|
pub fn min_consecutive_repeat_ratio(&self) -> f64 {
|
|
self.min_consecutive_repeat_ratio
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minConsecutiveRepeatRatio")]
|
|
pub fn set_min_consecutive_repeat_ratio(&mut self, value: f64) {
|
|
self.min_consecutive_repeat_ratio = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minWordsForRepeatCheck")]
|
|
pub fn min_words_for_repeat_check(&self) -> usize {
|
|
self.min_words_for_repeat_check
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minWordsForRepeatCheck")]
|
|
pub fn set_min_words_for_repeat_check(&mut self, value: usize) {
|
|
self.min_words_for_repeat_check = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "substantiveMinChars")]
|
|
pub fn substantive_min_chars(&self) -> usize {
|
|
self.substantive_min_chars
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "substantiveMinChars")]
|
|
pub fn set_substantive_min_chars(&mut self, value: usize) {
|
|
self.substantive_min_chars = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "nonTextMinChars")]
|
|
pub fn non_text_min_chars(&self) -> usize {
|
|
self.non_text_min_chars
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "nonTextMinChars")]
|
|
pub fn set_non_text_min_chars(&mut self, value: usize) {
|
|
self.non_text_min_chars = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "alnumWsRatioThreshold")]
|
|
pub fn alnum_ws_ratio_threshold(&self) -> f64 {
|
|
self.alnum_ws_ratio_threshold
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "alnumWsRatioThreshold")]
|
|
pub fn set_alnum_ws_ratio_threshold(&mut self, value: f64) {
|
|
self.alnum_ws_ratio_threshold = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pipelineMinQuality")]
|
|
pub fn pipeline_min_quality(&self) -> f64 {
|
|
self.pipeline_min_quality
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pipelineMinQuality")]
|
|
pub fn set_pipeline_min_quality(&mut self, value: f64) {
|
|
self.pipeline_min_quality = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmOcrQualityThresholds {
|
|
kreuzberg::OcrQualityThresholds::default().into()
|
|
}
|
|
}
|
|
|
|
/// A single backend stage in the OCR pipeline.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrPipelineStage {
|
|
backend: String,
|
|
priority: u32,
|
|
language: Option<String>,
|
|
tesseract_config: Option<WasmTesseractConfig>,
|
|
paddle_ocr_config: Option<JsValue>,
|
|
vlm_config: Option<WasmLlmConfig>,
|
|
backend_options: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrPipelineStage {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
backend: String,
|
|
priority: u32,
|
|
language: Option<String>,
|
|
tesseractConfig: Option<WasmTesseractConfig>,
|
|
paddleOcrConfig: Option<JsValue>,
|
|
vlmConfig: Option<WasmLlmConfig>,
|
|
backendOptions: Option<JsValue>,
|
|
) -> WasmOcrPipelineStage {
|
|
WasmOcrPipelineStage {
|
|
backend,
|
|
priority,
|
|
language,
|
|
tesseract_config: tesseractConfig,
|
|
paddle_ocr_config: paddleOcrConfig,
|
|
vlm_config: vlmConfig,
|
|
backend_options: backendOptions,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrPipelineStage {
|
|
<WasmOcrPipelineStage as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn backend(&self) -> String {
|
|
self.backend.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_backend(&mut self, value: String) {
|
|
self.backend = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn priority(&self) -> u32 {
|
|
self.priority
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_priority(&mut self, value: u32) {
|
|
self.priority = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn language(&self) -> Option<String> {
|
|
self.language.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_language(&mut self, value: Option<String>) {
|
|
self.language = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tesseractConfig")]
|
|
pub fn tesseract_config(&self) -> Option<WasmTesseractConfig> {
|
|
self.tesseract_config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tesseractConfig")]
|
|
pub fn set_tesseract_config(&mut self, value: Option<WasmTesseractConfig>) {
|
|
self.tesseract_config = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "paddleOcrConfig")]
|
|
pub fn paddle_ocr_config(&self) -> Option<JsValue> {
|
|
self.paddle_ocr_config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "paddleOcrConfig")]
|
|
pub fn set_paddle_ocr_config(&mut self, value: Option<JsValue>) {
|
|
self.paddle_ocr_config = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "vlmConfig")]
|
|
pub fn vlm_config(&self) -> Option<WasmLlmConfig> {
|
|
self.vlm_config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "vlmConfig")]
|
|
pub fn set_vlm_config(&mut self, value: Option<WasmLlmConfig>) {
|
|
self.vlm_config = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "backendOptions")]
|
|
pub fn backend_options(&self) -> Option<JsValue> {
|
|
self.backend_options.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "backendOptions")]
|
|
pub fn set_backend_options(&mut self, value: Option<JsValue>) {
|
|
self.backend_options = value;
|
|
}
|
|
}
|
|
|
|
/// Multi-backend OCR pipeline with quality-based fallback.
|
|
///
|
|
/// Backends are tried in priority order (highest first). After each backend
|
|
/// produces output, quality is evaluated. If it meets `quality_thresholds.pipeline_min_quality`,
|
|
/// the result is accepted. Otherwise the next backend is tried.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrPipelineConfig {
|
|
stages: Vec<WasmOcrPipelineStage>,
|
|
quality_thresholds: WasmOcrQualityThresholds,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrPipelineConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
stages: Vec<WasmOcrPipelineStage>,
|
|
qualityThresholds: WasmOcrQualityThresholds,
|
|
) -> WasmOcrPipelineConfig {
|
|
WasmOcrPipelineConfig {
|
|
stages,
|
|
quality_thresholds: qualityThresholds,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrPipelineConfig {
|
|
<WasmOcrPipelineConfig as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn stages(&self) -> Vec<WasmOcrPipelineStage> {
|
|
self.stages.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_stages(&mut self, value: Vec<WasmOcrPipelineStage>) {
|
|
self.stages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "qualityThresholds")]
|
|
pub fn quality_thresholds(&self) -> WasmOcrQualityThresholds {
|
|
self.quality_thresholds.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "qualityThresholds")]
|
|
pub fn set_quality_thresholds(&mut self, value: WasmOcrQualityThresholds) {
|
|
self.quality_thresholds = value;
|
|
}
|
|
}
|
|
|
|
/// OCR configuration.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrConfig {
|
|
enabled: bool,
|
|
backend: String,
|
|
language: String,
|
|
tesseract_config: Option<WasmTesseractConfig>,
|
|
output_format: Option<WasmOutputFormat>,
|
|
paddle_ocr_config: Option<JsValue>,
|
|
backend_options: Option<JsValue>,
|
|
element_config: Option<WasmOcrElementConfig>,
|
|
quality_thresholds: Option<WasmOcrQualityThresholds>,
|
|
pipeline: Option<WasmOcrPipelineConfig>,
|
|
auto_rotate: bool,
|
|
vlm_config: Option<WasmLlmConfig>,
|
|
vlm_prompt: Option<String>,
|
|
acceleration: Option<WasmAccelerationConfig>,
|
|
tessdata_bytes: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrConfig {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
enabled: Option<bool>,
|
|
backend: Option<String>,
|
|
language: Option<String>,
|
|
autoRotate: Option<bool>,
|
|
tesseractConfig: Option<WasmTesseractConfig>,
|
|
outputFormat: Option<WasmOutputFormat>,
|
|
paddleOcrConfig: Option<JsValue>,
|
|
backendOptions: Option<JsValue>,
|
|
elementConfig: Option<WasmOcrElementConfig>,
|
|
qualityThresholds: Option<WasmOcrQualityThresholds>,
|
|
pipeline: Option<WasmOcrPipelineConfig>,
|
|
vlmConfig: Option<WasmLlmConfig>,
|
|
vlmPrompt: Option<String>,
|
|
acceleration: Option<WasmAccelerationConfig>,
|
|
tessdataBytes: Option<JsValue>,
|
|
) -> WasmOcrConfig {
|
|
WasmOcrConfig {
|
|
enabled: enabled.unwrap_or(true),
|
|
backend: backend.unwrap_or_default(),
|
|
language: language.unwrap_or_default(),
|
|
tesseract_config: tesseractConfig,
|
|
output_format: outputFormat,
|
|
paddle_ocr_config: paddleOcrConfig,
|
|
backend_options: backendOptions,
|
|
element_config: elementConfig,
|
|
quality_thresholds: qualityThresholds,
|
|
pipeline,
|
|
auto_rotate: autoRotate.unwrap_or(false),
|
|
vlm_config: vlmConfig,
|
|
vlm_prompt: vlmPrompt,
|
|
acceleration,
|
|
tessdata_bytes: tessdataBytes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn enabled(&self) -> bool {
|
|
self.enabled
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_enabled(&mut self, value: bool) {
|
|
self.enabled = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn backend(&self) -> String {
|
|
self.backend.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_backend(&mut self, value: String) {
|
|
self.backend = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn language(&self) -> String {
|
|
self.language.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_language(&mut self, value: String) {
|
|
self.language = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tesseractConfig")]
|
|
pub fn tesseract_config(&self) -> Option<WasmTesseractConfig> {
|
|
self.tesseract_config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tesseractConfig")]
|
|
pub fn set_tesseract_config(&mut self, value: Option<WasmTesseractConfig>) {
|
|
self.tesseract_config = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "outputFormat")]
|
|
pub fn output_format(&self) -> Option<String> {
|
|
self.output_format.map(|v| v.to_api_str().to_owned())
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "outputFormat")]
|
|
pub fn set_output_format(&mut self, value: Option<WasmOutputFormat>) {
|
|
self.output_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "paddleOcrConfig")]
|
|
pub fn paddle_ocr_config(&self) -> Option<JsValue> {
|
|
self.paddle_ocr_config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "paddleOcrConfig")]
|
|
pub fn set_paddle_ocr_config(&mut self, value: Option<JsValue>) {
|
|
self.paddle_ocr_config = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "backendOptions")]
|
|
pub fn backend_options(&self) -> Option<JsValue> {
|
|
self.backend_options.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "backendOptions")]
|
|
pub fn set_backend_options(&mut self, value: Option<JsValue>) {
|
|
self.backend_options = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "elementConfig")]
|
|
pub fn element_config(&self) -> Option<WasmOcrElementConfig> {
|
|
self.element_config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "elementConfig")]
|
|
pub fn set_element_config(&mut self, value: Option<WasmOcrElementConfig>) {
|
|
self.element_config = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "qualityThresholds")]
|
|
pub fn quality_thresholds(&self) -> Option<WasmOcrQualityThresholds> {
|
|
self.quality_thresholds.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "qualityThresholds")]
|
|
pub fn set_quality_thresholds(&mut self, value: Option<WasmOcrQualityThresholds>) {
|
|
self.quality_thresholds = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn pipeline(&self) -> Option<WasmOcrPipelineConfig> {
|
|
self.pipeline.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_pipeline(&mut self, value: Option<WasmOcrPipelineConfig>) {
|
|
self.pipeline = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "autoRotate")]
|
|
pub fn auto_rotate(&self) -> bool {
|
|
self.auto_rotate
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "autoRotate")]
|
|
pub fn set_auto_rotate(&mut self, value: bool) {
|
|
self.auto_rotate = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "vlmConfig")]
|
|
pub fn vlm_config(&self) -> Option<WasmLlmConfig> {
|
|
self.vlm_config.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "vlmConfig")]
|
|
pub fn set_vlm_config(&mut self, value: Option<WasmLlmConfig>) {
|
|
self.vlm_config = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "vlmPrompt")]
|
|
pub fn vlm_prompt(&self) -> Option<String> {
|
|
self.vlm_prompt.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "vlmPrompt")]
|
|
pub fn set_vlm_prompt(&mut self, value: Option<String>) {
|
|
self.vlm_prompt = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn acceleration(&self) -> Option<WasmAccelerationConfig> {
|
|
self.acceleration.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_acceleration(&mut self, value: Option<WasmAccelerationConfig>) {
|
|
self.acceleration = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tessdataBytes")]
|
|
pub fn tessdata_bytes(&self) -> Option<JsValue> {
|
|
self.tessdata_bytes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tessdataBytes")]
|
|
pub fn set_tessdata_bytes(&mut self, value: Option<JsValue>) {
|
|
self.tessdata_bytes = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmOcrConfig {
|
|
kreuzberg::OcrConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Page extraction and tracking configuration.
|
|
///
|
|
/// Controls how pages are extracted, tracked, and represented in the extraction results.
|
|
/// When `None`, page tracking is disabled.
|
|
///
|
|
/// Page range tracking in chunk metadata (first_page/last_page) is automatically enabled
|
|
/// when page boundaries are available and chunking is configured.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPageConfig {
|
|
extract_pages: bool,
|
|
insert_page_markers: bool,
|
|
marker_format: String,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPageConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
extractPages: Option<bool>,
|
|
insertPageMarkers: Option<bool>,
|
|
markerFormat: Option<String>,
|
|
) -> WasmPageConfig {
|
|
WasmPageConfig {
|
|
extract_pages: extractPages.unwrap_or(false),
|
|
insert_page_markers: insertPageMarkers.unwrap_or(false),
|
|
marker_format: markerFormat.unwrap_or_else(|| "\n\n<!-- PAGE {page_num} -->\n\n".to_string()),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "extractPages")]
|
|
pub fn extract_pages(&self) -> bool {
|
|
self.extract_pages
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "extractPages")]
|
|
pub fn set_extract_pages(&mut self, value: bool) {
|
|
self.extract_pages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "insertPageMarkers")]
|
|
pub fn insert_page_markers(&self) -> bool {
|
|
self.insert_page_markers
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "insertPageMarkers")]
|
|
pub fn set_insert_page_markers(&mut self, value: bool) {
|
|
self.insert_page_markers = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "markerFormat")]
|
|
pub fn marker_format(&self) -> String {
|
|
self.marker_format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "markerFormat")]
|
|
pub fn set_marker_format(&mut self, value: String) {
|
|
self.marker_format = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmPageConfig {
|
|
kreuzberg::PageConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Post-processor configuration.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPostProcessorConfig {
|
|
enabled: bool,
|
|
enabled_processors: Option<Vec<String>>,
|
|
disabled_processors: Option<Vec<String>>,
|
|
enabled_set: Option<Vec<String>>,
|
|
disabled_set: Option<Vec<String>>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPostProcessorConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
enabled: Option<bool>,
|
|
enabledProcessors: Option<Vec<String>>,
|
|
disabledProcessors: Option<Vec<String>>,
|
|
enabledSet: Option<Vec<String>>,
|
|
disabledSet: Option<Vec<String>>,
|
|
) -> WasmPostProcessorConfig {
|
|
WasmPostProcessorConfig {
|
|
enabled: enabled.unwrap_or(true),
|
|
enabled_processors: enabledProcessors,
|
|
disabled_processors: disabledProcessors,
|
|
enabled_set: enabledSet,
|
|
disabled_set: disabledSet,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn enabled(&self) -> bool {
|
|
self.enabled
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_enabled(&mut self, value: bool) {
|
|
self.enabled = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "enabledProcessors")]
|
|
pub fn enabled_processors(&self) -> Option<Vec<String>> {
|
|
self.enabled_processors.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "enabledProcessors")]
|
|
pub fn set_enabled_processors(&mut self, value: Option<Vec<String>>) {
|
|
self.enabled_processors = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "disabledProcessors")]
|
|
pub fn disabled_processors(&self) -> Option<Vec<String>> {
|
|
self.disabled_processors.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "disabledProcessors")]
|
|
pub fn set_disabled_processors(&mut self, value: Option<Vec<String>>) {
|
|
self.disabled_processors = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "enabledSet")]
|
|
pub fn enabled_set(&self) -> Option<Vec<String>> {
|
|
self.enabled_set.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "enabledSet")]
|
|
pub fn set_enabled_set(&mut self, value: Option<Vec<String>>) {
|
|
self.enabled_set = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "disabledSet")]
|
|
pub fn disabled_set(&self) -> Option<Vec<String>> {
|
|
self.disabled_set.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "disabledSet")]
|
|
pub fn set_disabled_set(&mut self, value: Option<Vec<String>>) {
|
|
self.disabled_set = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmPostProcessorConfig {
|
|
kreuzberg::PostProcessorConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Chunking configuration.
|
|
///
|
|
/// Configures text chunking for document content, including chunk size,
|
|
/// overlap, trimming behavior, and optional embeddings.
|
|
///
|
|
/// Use `..Default.default()` when constructing to allow for future field additions:
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmChunkingConfig {
|
|
max_characters: usize,
|
|
overlap: usize,
|
|
trim: bool,
|
|
chunker_type: WasmChunkerType,
|
|
embedding: Option<WasmEmbeddingConfig>,
|
|
preset: Option<String>,
|
|
sizing: JsValue,
|
|
prepend_heading_context: bool,
|
|
topic_threshold: Option<f32>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmChunkingConfig {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
maxCharacters: Option<usize>,
|
|
overlap: Option<usize>,
|
|
trim: Option<bool>,
|
|
chunkerType: Option<WasmChunkerType>,
|
|
sizing: Option<JsValue>,
|
|
prependHeadingContext: Option<bool>,
|
|
embedding: Option<WasmEmbeddingConfig>,
|
|
preset: Option<String>,
|
|
topicThreshold: Option<f32>,
|
|
) -> WasmChunkingConfig {
|
|
WasmChunkingConfig {
|
|
max_characters: maxCharacters.unwrap_or(1000),
|
|
overlap: overlap.unwrap_or(200),
|
|
trim: trim.unwrap_or(true),
|
|
chunker_type: chunkerType.unwrap_or_default(),
|
|
embedding,
|
|
preset,
|
|
sizing: sizing.unwrap_or_default(),
|
|
prepend_heading_context: prependHeadingContext.unwrap_or(false),
|
|
topic_threshold: topicThreshold,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxCharacters")]
|
|
pub fn max_characters(&self) -> usize {
|
|
self.max_characters
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxCharacters")]
|
|
pub fn set_max_characters(&mut self, value: usize) {
|
|
self.max_characters = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn overlap(&self) -> usize {
|
|
self.overlap
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_overlap(&mut self, value: usize) {
|
|
self.overlap = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn trim(&self) -> bool {
|
|
self.trim
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_trim(&mut self, value: bool) {
|
|
self.trim = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "chunkerType")]
|
|
pub fn chunker_type(&self) -> String {
|
|
self.chunker_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "chunkerType")]
|
|
pub fn set_chunker_type(&mut self, value: WasmChunkerType) {
|
|
self.chunker_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn embedding(&self) -> Option<WasmEmbeddingConfig> {
|
|
self.embedding.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_embedding(&mut self, value: Option<WasmEmbeddingConfig>) {
|
|
self.embedding = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn preset(&self) -> Option<String> {
|
|
self.preset.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_preset(&mut self, value: Option<String>) {
|
|
self.preset = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn sizing(&self) -> JsValue {
|
|
self.sizing.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_sizing(&mut self, value: JsValue) {
|
|
self.sizing = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "prependHeadingContext")]
|
|
pub fn prepend_heading_context(&self) -> bool {
|
|
self.prepend_heading_context
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "prependHeadingContext")]
|
|
pub fn set_prepend_heading_context(&mut self, value: bool) {
|
|
self.prepend_heading_context = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "topicThreshold")]
|
|
pub fn topic_threshold(&self) -> Option<f32> {
|
|
self.topic_threshold
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "topicThreshold")]
|
|
pub fn set_topic_threshold(&mut self, value: Option<f32>) {
|
|
self.topic_threshold = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmChunkingConfig {
|
|
kreuzberg::ChunkingConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Embedding configuration for text chunks.
|
|
///
|
|
/// Configures embedding generation using ONNX models via the vendored embedding engine.
|
|
/// Requires the `embeddings` feature to be enabled.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmEmbeddingConfig {
|
|
model: JsValue,
|
|
normalize: bool,
|
|
batch_size: usize,
|
|
show_download_progress: bool,
|
|
cache_dir: Option<String>,
|
|
acceleration: Option<WasmAccelerationConfig>,
|
|
max_embed_duration_secs: Option<u64>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmEmbeddingConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
model: Option<JsValue>,
|
|
normalize: Option<bool>,
|
|
batchSize: Option<usize>,
|
|
showDownloadProgress: Option<bool>,
|
|
cacheDir: Option<String>,
|
|
acceleration: Option<WasmAccelerationConfig>,
|
|
maxEmbedDurationSecs: Option<u64>,
|
|
) -> WasmEmbeddingConfig {
|
|
WasmEmbeddingConfig {
|
|
model: model.unwrap_or_default(),
|
|
normalize: normalize.unwrap_or(true),
|
|
batch_size: batchSize.unwrap_or(32),
|
|
show_download_progress: showDownloadProgress.unwrap_or(false),
|
|
cache_dir: cacheDir,
|
|
acceleration,
|
|
max_embed_duration_secs: maxEmbedDurationSecs,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn model(&self) -> JsValue {
|
|
self.model.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_model(&mut self, value: JsValue) {
|
|
self.model = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn normalize(&self) -> bool {
|
|
self.normalize
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_normalize(&mut self, value: bool) {
|
|
self.normalize = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "batchSize")]
|
|
pub fn batch_size(&self) -> usize {
|
|
self.batch_size
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "batchSize")]
|
|
pub fn set_batch_size(&mut self, value: usize) {
|
|
self.batch_size = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "showDownloadProgress")]
|
|
pub fn show_download_progress(&self) -> bool {
|
|
self.show_download_progress
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "showDownloadProgress")]
|
|
pub fn set_show_download_progress(&mut self, value: bool) {
|
|
self.show_download_progress = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "cacheDir")]
|
|
pub fn cache_dir(&self) -> Option<String> {
|
|
self.cache_dir.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "cacheDir")]
|
|
pub fn set_cache_dir(&mut self, value: Option<String>) {
|
|
self.cache_dir = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn acceleration(&self) -> Option<WasmAccelerationConfig> {
|
|
self.acceleration.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_acceleration(&mut self, value: Option<WasmAccelerationConfig>) {
|
|
self.acceleration = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxEmbedDurationSecs")]
|
|
pub fn max_embed_duration_secs(&self) -> Option<u64> {
|
|
self.max_embed_duration_secs
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxEmbedDurationSecs")]
|
|
pub fn set_max_embed_duration_secs(&mut self, value: Option<u64>) {
|
|
self.max_embed_duration_secs = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmEmbeddingConfig {
|
|
kreuzberg::EmbeddingConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// A supported document format entry.
|
|
///
|
|
/// Represents a file extension and its corresponding MIME type that Kreuzberg can process.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmSupportedFormat {
|
|
extension: String,
|
|
mime_type: String,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmSupportedFormat {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(extension: String, mimeType: String) -> WasmSupportedFormat {
|
|
WasmSupportedFormat {
|
|
extension,
|
|
mime_type: mimeType,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmSupportedFormat {
|
|
<WasmSupportedFormat as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn extension(&self) -> String {
|
|
self.extension.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_extension(&mut self, value: String) {
|
|
self.extension = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "mimeType")]
|
|
pub fn mime_type(&self) -> String {
|
|
self.mime_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "mimeType")]
|
|
pub fn set_mime_type(&mut self, value: String) {
|
|
self.mime_type = value;
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmStructuredDataResult {
|
|
content: String,
|
|
format: String,
|
|
metadata: JsValue,
|
|
text_fields: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmStructuredDataResult {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
content: String,
|
|
format: String,
|
|
metadata: JsValue,
|
|
textFields: Vec<String>,
|
|
) -> WasmStructuredDataResult {
|
|
WasmStructuredDataResult {
|
|
content,
|
|
format,
|
|
metadata,
|
|
text_fields: textFields,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmStructuredDataResult {
|
|
<WasmStructuredDataResult as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn format(&self) -> String {
|
|
self.format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_format(&mut self, value: String) {
|
|
self.format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> JsValue {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: JsValue) {
|
|
self.metadata = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "textFields")]
|
|
pub fn text_fields(&self) -> Vec<String> {
|
|
self.text_fields.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "textFields")]
|
|
pub fn set_text_fields(&mut self, value: Vec<String>) {
|
|
self.text_fields = value;
|
|
}
|
|
}
|
|
|
|
/// Application properties from docProps/app.xml for XLSX
|
|
///
|
|
/// Contains Excel-specific document metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmXlsxAppProperties {
|
|
application: Option<String>,
|
|
app_version: Option<String>,
|
|
doc_security: Option<i32>,
|
|
scale_crop: Option<bool>,
|
|
links_up_to_date: Option<bool>,
|
|
shared_doc: Option<bool>,
|
|
hyperlinks_changed: Option<bool>,
|
|
company: Option<String>,
|
|
worksheet_names: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmXlsxAppProperties {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
worksheetNames: Option<Vec<String>>,
|
|
application: Option<String>,
|
|
appVersion: Option<String>,
|
|
docSecurity: Option<i32>,
|
|
scaleCrop: Option<bool>,
|
|
linksUpToDate: Option<bool>,
|
|
sharedDoc: Option<bool>,
|
|
hyperlinksChanged: Option<bool>,
|
|
company: Option<String>,
|
|
) -> WasmXlsxAppProperties {
|
|
WasmXlsxAppProperties {
|
|
application,
|
|
app_version: appVersion,
|
|
doc_security: docSecurity,
|
|
scale_crop: scaleCrop,
|
|
links_up_to_date: linksUpToDate,
|
|
shared_doc: sharedDoc,
|
|
hyperlinks_changed: hyperlinksChanged,
|
|
company,
|
|
worksheet_names: worksheetNames.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmXlsxAppProperties {
|
|
<WasmXlsxAppProperties as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn application(&self) -> Option<String> {
|
|
self.application.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_application(&mut self, value: Option<String>) {
|
|
self.application = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "appVersion")]
|
|
pub fn app_version(&self) -> Option<String> {
|
|
self.app_version.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "appVersion")]
|
|
pub fn set_app_version(&mut self, value: Option<String>) {
|
|
self.app_version = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "docSecurity")]
|
|
pub fn doc_security(&self) -> Option<i32> {
|
|
self.doc_security
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "docSecurity")]
|
|
pub fn set_doc_security(&mut self, value: Option<i32>) {
|
|
self.doc_security = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "scaleCrop")]
|
|
pub fn scale_crop(&self) -> Option<bool> {
|
|
self.scale_crop
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "scaleCrop")]
|
|
pub fn set_scale_crop(&mut self, value: Option<bool>) {
|
|
self.scale_crop = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "linksUpToDate")]
|
|
pub fn links_up_to_date(&self) -> Option<bool> {
|
|
self.links_up_to_date
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "linksUpToDate")]
|
|
pub fn set_links_up_to_date(&mut self, value: Option<bool>) {
|
|
self.links_up_to_date = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "sharedDoc")]
|
|
pub fn shared_doc(&self) -> Option<bool> {
|
|
self.shared_doc
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "sharedDoc")]
|
|
pub fn set_shared_doc(&mut self, value: Option<bool>) {
|
|
self.shared_doc = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "hyperlinksChanged")]
|
|
pub fn hyperlinks_changed(&self) -> Option<bool> {
|
|
self.hyperlinks_changed
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "hyperlinksChanged")]
|
|
pub fn set_hyperlinks_changed(&mut self, value: Option<bool>) {
|
|
self.hyperlinks_changed = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn company(&self) -> Option<String> {
|
|
self.company.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_company(&mut self, value: Option<String>) {
|
|
self.company = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "worksheetNames")]
|
|
pub fn worksheet_names(&self) -> Vec<String> {
|
|
self.worksheet_names.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "worksheetNames")]
|
|
pub fn set_worksheet_names(&mut self, value: Vec<String>) {
|
|
self.worksheet_names = value;
|
|
}
|
|
}
|
|
|
|
/// Application properties from docProps/app.xml for PPTX
|
|
///
|
|
/// Contains PowerPoint-specific document metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPptxAppProperties {
|
|
application: Option<String>,
|
|
app_version: Option<String>,
|
|
total_time: Option<i32>,
|
|
company: Option<String>,
|
|
doc_security: Option<i32>,
|
|
scale_crop: Option<bool>,
|
|
links_up_to_date: Option<bool>,
|
|
shared_doc: Option<bool>,
|
|
hyperlinks_changed: Option<bool>,
|
|
slides: Option<i32>,
|
|
notes: Option<i32>,
|
|
hidden_slides: Option<i32>,
|
|
multimedia_clips: Option<i32>,
|
|
presentation_format: Option<String>,
|
|
slide_titles: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPptxAppProperties {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
slideTitles: Option<Vec<String>>,
|
|
application: Option<String>,
|
|
appVersion: Option<String>,
|
|
totalTime: Option<i32>,
|
|
company: Option<String>,
|
|
docSecurity: Option<i32>,
|
|
scaleCrop: Option<bool>,
|
|
linksUpToDate: Option<bool>,
|
|
sharedDoc: Option<bool>,
|
|
hyperlinksChanged: Option<bool>,
|
|
slides: Option<i32>,
|
|
notes: Option<i32>,
|
|
hiddenSlides: Option<i32>,
|
|
multimediaClips: Option<i32>,
|
|
presentationFormat: Option<String>,
|
|
) -> WasmPptxAppProperties {
|
|
WasmPptxAppProperties {
|
|
application,
|
|
app_version: appVersion,
|
|
total_time: totalTime,
|
|
company,
|
|
doc_security: docSecurity,
|
|
scale_crop: scaleCrop,
|
|
links_up_to_date: linksUpToDate,
|
|
shared_doc: sharedDoc,
|
|
hyperlinks_changed: hyperlinksChanged,
|
|
slides,
|
|
notes,
|
|
hidden_slides: hiddenSlides,
|
|
multimedia_clips: multimediaClips,
|
|
presentation_format: presentationFormat,
|
|
slide_titles: slideTitles.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPptxAppProperties {
|
|
<WasmPptxAppProperties as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn application(&self) -> Option<String> {
|
|
self.application.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_application(&mut self, value: Option<String>) {
|
|
self.application = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "appVersion")]
|
|
pub fn app_version(&self) -> Option<String> {
|
|
self.app_version.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "appVersion")]
|
|
pub fn set_app_version(&mut self, value: Option<String>) {
|
|
self.app_version = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "totalTime")]
|
|
pub fn total_time(&self) -> Option<i32> {
|
|
self.total_time
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "totalTime")]
|
|
pub fn set_total_time(&mut self, value: Option<i32>) {
|
|
self.total_time = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn company(&self) -> Option<String> {
|
|
self.company.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_company(&mut self, value: Option<String>) {
|
|
self.company = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "docSecurity")]
|
|
pub fn doc_security(&self) -> Option<i32> {
|
|
self.doc_security
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "docSecurity")]
|
|
pub fn set_doc_security(&mut self, value: Option<i32>) {
|
|
self.doc_security = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "scaleCrop")]
|
|
pub fn scale_crop(&self) -> Option<bool> {
|
|
self.scale_crop
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "scaleCrop")]
|
|
pub fn set_scale_crop(&mut self, value: Option<bool>) {
|
|
self.scale_crop = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "linksUpToDate")]
|
|
pub fn links_up_to_date(&self) -> Option<bool> {
|
|
self.links_up_to_date
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "linksUpToDate")]
|
|
pub fn set_links_up_to_date(&mut self, value: Option<bool>) {
|
|
self.links_up_to_date = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "sharedDoc")]
|
|
pub fn shared_doc(&self) -> Option<bool> {
|
|
self.shared_doc
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "sharedDoc")]
|
|
pub fn set_shared_doc(&mut self, value: Option<bool>) {
|
|
self.shared_doc = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "hyperlinksChanged")]
|
|
pub fn hyperlinks_changed(&self) -> Option<bool> {
|
|
self.hyperlinks_changed
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "hyperlinksChanged")]
|
|
pub fn set_hyperlinks_changed(&mut self, value: Option<bool>) {
|
|
self.hyperlinks_changed = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn slides(&self) -> Option<i32> {
|
|
self.slides
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_slides(&mut self, value: Option<i32>) {
|
|
self.slides = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn notes(&self) -> Option<i32> {
|
|
self.notes
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_notes(&mut self, value: Option<i32>) {
|
|
self.notes = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "hiddenSlides")]
|
|
pub fn hidden_slides(&self) -> Option<i32> {
|
|
self.hidden_slides
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "hiddenSlides")]
|
|
pub fn set_hidden_slides(&mut self, value: Option<i32>) {
|
|
self.hidden_slides = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "multimediaClips")]
|
|
pub fn multimedia_clips(&self) -> Option<i32> {
|
|
self.multimedia_clips
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "multimediaClips")]
|
|
pub fn set_multimedia_clips(&mut self, value: Option<i32>) {
|
|
self.multimedia_clips = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "presentationFormat")]
|
|
pub fn presentation_format(&self) -> Option<String> {
|
|
self.presentation_format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "presentationFormat")]
|
|
pub fn set_presentation_format(&mut self, value: Option<String>) {
|
|
self.presentation_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "slideTitles")]
|
|
pub fn slide_titles(&self) -> Vec<String> {
|
|
self.slide_titles.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "slideTitles")]
|
|
pub fn set_slide_titles(&mut self, value: Vec<String>) {
|
|
self.slide_titles = value;
|
|
}
|
|
}
|
|
|
|
/// Configuration for security limits across extractors.
|
|
///
|
|
/// All limits are intentionally conservative to prevent DoS attacks
|
|
/// while still supporting legitimate documents.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmSecurityLimits {
|
|
max_archive_size: usize,
|
|
max_compression_ratio: usize,
|
|
max_files_in_archive: usize,
|
|
max_nesting_depth: usize,
|
|
max_entity_length: usize,
|
|
max_content_size: usize,
|
|
max_iterations: usize,
|
|
max_xml_depth: usize,
|
|
max_table_cells: usize,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmSecurityLimits {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
maxArchiveSize: Option<usize>,
|
|
maxCompressionRatio: Option<usize>,
|
|
maxFilesInArchive: Option<usize>,
|
|
maxNestingDepth: Option<usize>,
|
|
maxEntityLength: Option<usize>,
|
|
maxContentSize: Option<usize>,
|
|
maxIterations: Option<usize>,
|
|
maxXmlDepth: Option<usize>,
|
|
maxTableCells: Option<usize>,
|
|
) -> WasmSecurityLimits {
|
|
WasmSecurityLimits {
|
|
max_archive_size: maxArchiveSize.unwrap_or(524288000),
|
|
max_compression_ratio: maxCompressionRatio.unwrap_or(100),
|
|
max_files_in_archive: maxFilesInArchive.unwrap_or(10000),
|
|
max_nesting_depth: maxNestingDepth.unwrap_or(1024),
|
|
max_entity_length: maxEntityLength.unwrap_or(1048576),
|
|
max_content_size: maxContentSize.unwrap_or(104857600),
|
|
max_iterations: maxIterations.unwrap_or(10000000),
|
|
max_xml_depth: maxXmlDepth.unwrap_or(1024),
|
|
max_table_cells: maxTableCells.unwrap_or(100000),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxArchiveSize")]
|
|
pub fn max_archive_size(&self) -> usize {
|
|
self.max_archive_size
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxArchiveSize")]
|
|
pub fn set_max_archive_size(&mut self, value: usize) {
|
|
self.max_archive_size = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxCompressionRatio")]
|
|
pub fn max_compression_ratio(&self) -> usize {
|
|
self.max_compression_ratio
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxCompressionRatio")]
|
|
pub fn set_max_compression_ratio(&mut self, value: usize) {
|
|
self.max_compression_ratio = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxFilesInArchive")]
|
|
pub fn max_files_in_archive(&self) -> usize {
|
|
self.max_files_in_archive
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxFilesInArchive")]
|
|
pub fn set_max_files_in_archive(&mut self, value: usize) {
|
|
self.max_files_in_archive = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxNestingDepth")]
|
|
pub fn max_nesting_depth(&self) -> usize {
|
|
self.max_nesting_depth
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxNestingDepth")]
|
|
pub fn set_max_nesting_depth(&mut self, value: usize) {
|
|
self.max_nesting_depth = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxEntityLength")]
|
|
pub fn max_entity_length(&self) -> usize {
|
|
self.max_entity_length
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxEntityLength")]
|
|
pub fn set_max_entity_length(&mut self, value: usize) {
|
|
self.max_entity_length = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxContentSize")]
|
|
pub fn max_content_size(&self) -> usize {
|
|
self.max_content_size
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxContentSize")]
|
|
pub fn set_max_content_size(&mut self, value: usize) {
|
|
self.max_content_size = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxIterations")]
|
|
pub fn max_iterations(&self) -> usize {
|
|
self.max_iterations
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxIterations")]
|
|
pub fn set_max_iterations(&mut self, value: usize) {
|
|
self.max_iterations = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxXmlDepth")]
|
|
pub fn max_xml_depth(&self) -> usize {
|
|
self.max_xml_depth
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxXmlDepth")]
|
|
pub fn set_max_xml_depth(&mut self, value: usize) {
|
|
self.max_xml_depth = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "maxTableCells")]
|
|
pub fn max_table_cells(&self) -> usize {
|
|
self.max_table_cells
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "maxTableCells")]
|
|
pub fn set_max_table_cells(&mut self, value: usize) {
|
|
self.max_table_cells = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmSecurityLimits {
|
|
kreuzberg::SecurityLimits::default().into()
|
|
}
|
|
}
|
|
|
|
/// A PDF annotation extracted from a document page.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPdfAnnotation {
|
|
annotation_type: WasmPdfAnnotationType,
|
|
content: Option<String>,
|
|
page_number: u32,
|
|
bounding_box: Option<WasmBoundingBox>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPdfAnnotation {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
annotationType: WasmPdfAnnotationType,
|
|
pageNumber: u32,
|
|
content: Option<String>,
|
|
boundingBox: Option<WasmBoundingBox>,
|
|
) -> WasmPdfAnnotation {
|
|
WasmPdfAnnotation {
|
|
annotation_type: annotationType,
|
|
content,
|
|
page_number: pageNumber,
|
|
bounding_box: boundingBox,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPdfAnnotation {
|
|
<WasmPdfAnnotation as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "annotationType")]
|
|
pub fn annotation_type(&self) -> String {
|
|
self.annotation_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "annotationType")]
|
|
pub fn set_annotation_type(&mut self, value: WasmPdfAnnotationType) {
|
|
self.annotation_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> Option<String> {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: Option<String>) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageNumber")]
|
|
pub fn page_number(&self) -> u32 {
|
|
self.page_number
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageNumber")]
|
|
pub fn set_page_number(&mut self, value: u32) {
|
|
self.page_number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "boundingBox")]
|
|
pub fn bounding_box(&self) -> Option<WasmBoundingBox> {
|
|
self.bounding_box.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "boundingBox")]
|
|
pub fn set_bounding_box(&mut self, value: Option<WasmBoundingBox>) {
|
|
self.bounding_box = value;
|
|
}
|
|
}
|
|
|
|
/// Comprehensive Djot document structure with semantic preservation.
|
|
///
|
|
/// This type captures the full richness of Djot markup, including:
|
|
/// - Block-level structures (headings, lists, blockquotes, code blocks, etc.)
|
|
/// - Inline formatting (emphasis, strong, highlight, subscript, superscript, etc.)
|
|
/// - Attributes (classes, IDs, key-value pairs)
|
|
/// - Links, images, footnotes
|
|
/// - Math expressions (inline and display)
|
|
/// - Tables with full structure
|
|
///
|
|
/// Available when the `djot` feature is enabled.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDjotContent {
|
|
plain_text: String,
|
|
blocks: Vec<WasmFormattedBlock>,
|
|
metadata: WasmMetadata,
|
|
tables: Vec<WasmTable>,
|
|
images: Vec<WasmDjotImage>,
|
|
links: Vec<WasmDjotLink>,
|
|
footnotes: Vec<WasmFootnote>,
|
|
attributes: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDjotContent {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
plainText: String,
|
|
blocks: Vec<WasmFormattedBlock>,
|
|
metadata: WasmMetadata,
|
|
tables: Vec<WasmTable>,
|
|
images: Vec<WasmDjotImage>,
|
|
links: Vec<WasmDjotLink>,
|
|
footnotes: Vec<WasmFootnote>,
|
|
attributes: Vec<String>,
|
|
) -> WasmDjotContent {
|
|
WasmDjotContent {
|
|
plain_text: plainText,
|
|
blocks,
|
|
metadata,
|
|
tables,
|
|
images,
|
|
links,
|
|
footnotes,
|
|
attributes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDjotContent {
|
|
<WasmDjotContent as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "plainText")]
|
|
pub fn plain_text(&self) -> String {
|
|
self.plain_text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "plainText")]
|
|
pub fn set_plain_text(&mut self, value: String) {
|
|
self.plain_text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn blocks(&self) -> Vec<WasmFormattedBlock> {
|
|
self.blocks.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_blocks(&mut self, value: Vec<WasmFormattedBlock>) {
|
|
self.blocks = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> WasmMetadata {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: WasmMetadata) {
|
|
self.metadata = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn tables(&self) -> Vec<WasmTable> {
|
|
self.tables.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_tables(&mut self, value: Vec<WasmTable>) {
|
|
self.tables = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn images(&self) -> Vec<WasmDjotImage> {
|
|
self.images.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_images(&mut self, value: Vec<WasmDjotImage>) {
|
|
self.images = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn links(&self) -> Vec<WasmDjotLink> {
|
|
self.links.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_links(&mut self, value: Vec<WasmDjotLink>) {
|
|
self.links = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn footnotes(&self) -> Vec<WasmFootnote> {
|
|
self.footnotes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_footnotes(&mut self, value: Vec<WasmFootnote>) {
|
|
self.footnotes = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attributes(&self) -> Vec<String> {
|
|
self.attributes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attributes(&mut self, value: Vec<String>) {
|
|
self.attributes = value;
|
|
}
|
|
}
|
|
|
|
/// Block-level element in a Djot document.
|
|
///
|
|
/// Represents structural elements like headings, paragraphs, lists, code blocks, etc.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmFormattedBlock {
|
|
block_type: WasmBlockType,
|
|
level: Option<usize>,
|
|
inline_content: Vec<WasmInlineElement>,
|
|
attributes: Option<String>,
|
|
language: Option<String>,
|
|
code: Option<String>,
|
|
children: Vec<WasmFormattedBlock>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmFormattedBlock {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
blockType: WasmBlockType,
|
|
inlineContent: Vec<WasmInlineElement>,
|
|
children: Vec<WasmFormattedBlock>,
|
|
level: Option<usize>,
|
|
attributes: Option<String>,
|
|
language: Option<String>,
|
|
code: Option<String>,
|
|
) -> WasmFormattedBlock {
|
|
WasmFormattedBlock {
|
|
block_type: blockType,
|
|
level,
|
|
inline_content: inlineContent,
|
|
attributes,
|
|
language,
|
|
code,
|
|
children,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmFormattedBlock {
|
|
<WasmFormattedBlock as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "blockType")]
|
|
pub fn block_type(&self) -> String {
|
|
self.block_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "blockType")]
|
|
pub fn set_block_type(&mut self, value: WasmBlockType) {
|
|
self.block_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn level(&self) -> Option<usize> {
|
|
self.level
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_level(&mut self, value: Option<usize>) {
|
|
self.level = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "inlineContent")]
|
|
pub fn inline_content(&self) -> Vec<WasmInlineElement> {
|
|
self.inline_content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "inlineContent")]
|
|
pub fn set_inline_content(&mut self, value: Vec<WasmInlineElement>) {
|
|
self.inline_content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attributes(&self) -> Option<String> {
|
|
self.attributes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attributes(&mut self, value: Option<String>) {
|
|
self.attributes = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn language(&self) -> Option<String> {
|
|
self.language.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_language(&mut self, value: Option<String>) {
|
|
self.language = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn code(&self) -> Option<String> {
|
|
self.code.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_code(&mut self, value: Option<String>) {
|
|
self.code = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn children(&self) -> Vec<WasmFormattedBlock> {
|
|
self.children.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_children(&mut self, value: Vec<WasmFormattedBlock>) {
|
|
self.children = value;
|
|
}
|
|
}
|
|
|
|
/// Inline element within a block.
|
|
///
|
|
/// Represents text with formatting, links, images, etc.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmInlineElement {
|
|
element_type: WasmInlineType,
|
|
content: String,
|
|
attributes: Option<String>,
|
|
metadata: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmInlineElement {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
elementType: WasmInlineType,
|
|
content: String,
|
|
attributes: Option<String>,
|
|
metadata: Option<JsValue>,
|
|
) -> WasmInlineElement {
|
|
WasmInlineElement {
|
|
element_type: elementType,
|
|
content,
|
|
attributes,
|
|
metadata,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmInlineElement {
|
|
<WasmInlineElement as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "elementType")]
|
|
pub fn element_type(&self) -> String {
|
|
self.element_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "elementType")]
|
|
pub fn set_element_type(&mut self, value: WasmInlineType) {
|
|
self.element_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attributes(&self) -> Option<String> {
|
|
self.attributes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attributes(&mut self, value: Option<String>) {
|
|
self.attributes = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> Option<JsValue> {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: Option<JsValue>) {
|
|
self.metadata = value;
|
|
}
|
|
}
|
|
|
|
/// Image element in Djot.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDjotImage {
|
|
src: String,
|
|
alt: String,
|
|
title: Option<String>,
|
|
attributes: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDjotImage {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(src: String, alt: String, title: Option<String>, attributes: Option<String>) -> WasmDjotImage {
|
|
WasmDjotImage {
|
|
src,
|
|
alt,
|
|
title,
|
|
attributes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDjotImage {
|
|
<WasmDjotImage as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn src(&self) -> String {
|
|
self.src.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_src(&mut self, value: String) {
|
|
self.src = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn alt(&self) -> String {
|
|
self.alt.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_alt(&mut self, value: String) {
|
|
self.alt = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attributes(&self) -> Option<String> {
|
|
self.attributes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attributes(&mut self, value: Option<String>) {
|
|
self.attributes = value;
|
|
}
|
|
}
|
|
|
|
/// Link element in Djot.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDjotLink {
|
|
url: String,
|
|
text: String,
|
|
title: Option<String>,
|
|
attributes: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDjotLink {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(url: String, text: String, title: Option<String>, attributes: Option<String>) -> WasmDjotLink {
|
|
WasmDjotLink {
|
|
url,
|
|
text,
|
|
title,
|
|
attributes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDjotLink {
|
|
<WasmDjotLink as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn url(&self) -> String {
|
|
self.url.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_url(&mut self, value: String) {
|
|
self.url = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn text(&self) -> String {
|
|
self.text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_text(&mut self, value: String) {
|
|
self.text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attributes(&self) -> Option<String> {
|
|
self.attributes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attributes(&mut self, value: Option<String>) {
|
|
self.attributes = value;
|
|
}
|
|
}
|
|
|
|
/// Footnote in Djot.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmFootnote {
|
|
label: String,
|
|
content: Vec<WasmFormattedBlock>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmFootnote {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(label: String, content: Vec<WasmFormattedBlock>) -> WasmFootnote {
|
|
WasmFootnote { label, content }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmFootnote {
|
|
<WasmFootnote as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn label(&self) -> String {
|
|
self.label.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_label(&mut self, value: String) {
|
|
self.label = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> Vec<WasmFormattedBlock> {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: Vec<WasmFormattedBlock>) {
|
|
self.content = value;
|
|
}
|
|
}
|
|
|
|
/// Top-level structured document representation.
|
|
///
|
|
/// A flat array of nodes with index-based parent/child references forming a tree.
|
|
/// Root-level nodes have `parent: None`. Use `body_roots()` and `furniture_roots()`
|
|
/// to iterate over top-level content by layer.
|
|
///
|
|
/// # Validation
|
|
///
|
|
/// Call `validate()` after construction to verify all node indices are in bounds
|
|
/// and parent-child relationships are bidirectionally consistent.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDocumentStructure {
|
|
nodes: Vec<WasmDocumentNode>,
|
|
source_format: Option<String>,
|
|
relationships: Vec<WasmDocumentRelationship>,
|
|
node_types: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDocumentStructure {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
nodes: Option<Vec<WasmDocumentNode>>,
|
|
relationships: Option<Vec<WasmDocumentRelationship>>,
|
|
nodeTypes: Option<Vec<String>>,
|
|
sourceFormat: Option<String>,
|
|
) -> WasmDocumentStructure {
|
|
WasmDocumentStructure {
|
|
nodes: nodes.unwrap_or_default(),
|
|
source_format: sourceFormat,
|
|
relationships: relationships.unwrap_or_default(),
|
|
node_types: nodeTypes.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn nodes(&self) -> Vec<WasmDocumentNode> {
|
|
self.nodes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_nodes(&mut self, value: Vec<WasmDocumentNode>) {
|
|
self.nodes = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "sourceFormat")]
|
|
pub fn source_format(&self) -> Option<String> {
|
|
self.source_format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "sourceFormat")]
|
|
pub fn set_source_format(&mut self, value: Option<String>) {
|
|
self.source_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn relationships(&self) -> Vec<WasmDocumentRelationship> {
|
|
self.relationships.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_relationships(&mut self, value: Vec<WasmDocumentRelationship>) {
|
|
self.relationships = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "nodeTypes")]
|
|
pub fn node_types(&self) -> Vec<String> {
|
|
self.node_types.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "nodeTypes")]
|
|
pub fn set_node_types(&mut self, value: Vec<String>) {
|
|
self.node_types = value;
|
|
}
|
|
|
|
/// Compute and populate the `node_types` field from the current `nodes`.
|
|
///
|
|
/// Call this after all nodes have been added to the structure. Internal
|
|
/// construction paths (builder, derivation) call this automatically.
|
|
///
|
|
/// # Examples
|
|
#[wasm_bindgen(js_name = "finalizeNodeTypes")]
|
|
pub fn finalize_node_types(&self) -> () {
|
|
kreuzberg::DocumentStructure::from(self.clone()).finalize_node_types()
|
|
}
|
|
|
|
/// Check if the document structure is empty.
|
|
#[wasm_bindgen(js_name = "isEmpty")]
|
|
pub fn is_empty(&self) -> bool {
|
|
kreuzberg::DocumentStructure::from(self.clone()).is_empty()
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmDocumentStructure {
|
|
kreuzberg::DocumentStructure::default().into()
|
|
}
|
|
}
|
|
|
|
/// A resolved relationship between two nodes in the document tree.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDocumentRelationship {
|
|
source: u32,
|
|
target: u32,
|
|
kind: WasmRelationshipKind,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDocumentRelationship {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(source: u32, target: u32, kind: WasmRelationshipKind) -> WasmDocumentRelationship {
|
|
WasmDocumentRelationship { source, target, kind }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDocumentRelationship {
|
|
<WasmDocumentRelationship as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn source(&self) -> u32 {
|
|
self.source
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_source(&mut self, value: u32) {
|
|
self.source = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn target(&self) -> u32 {
|
|
self.target
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_target(&mut self, value: u32) {
|
|
self.target = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn kind(&self) -> String {
|
|
self.kind.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_kind(&mut self, value: WasmRelationshipKind) {
|
|
self.kind = value;
|
|
}
|
|
}
|
|
|
|
/// A single node in the document tree.
|
|
///
|
|
/// Each node has deterministic `id`, typed `content`, optional `parent`/`children`
|
|
/// for tree structure, and metadata like page number, bounding box, and content layer.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDocumentNode {
|
|
id: String,
|
|
content: JsValue,
|
|
parent: Option<u32>,
|
|
children: Vec<u32>,
|
|
content_layer: WasmContentLayer,
|
|
page: Option<u32>,
|
|
page_end: Option<u32>,
|
|
bbox: Option<WasmBoundingBox>,
|
|
annotations: Vec<WasmTextAnnotation>,
|
|
attributes: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDocumentNode {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
id: String,
|
|
content: JsValue,
|
|
children: Vec<u32>,
|
|
contentLayer: WasmContentLayer,
|
|
annotations: Vec<WasmTextAnnotation>,
|
|
parent: Option<u32>,
|
|
page: Option<u32>,
|
|
pageEnd: Option<u32>,
|
|
bbox: Option<WasmBoundingBox>,
|
|
attributes: Option<JsValue>,
|
|
) -> WasmDocumentNode {
|
|
WasmDocumentNode {
|
|
id,
|
|
content,
|
|
parent,
|
|
children,
|
|
content_layer: contentLayer,
|
|
page,
|
|
page_end: pageEnd,
|
|
bbox,
|
|
annotations,
|
|
attributes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDocumentNode {
|
|
<WasmDocumentNode as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn id(&self) -> String {
|
|
self.id.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_id(&mut self, value: String) {
|
|
self.id = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> JsValue {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: JsValue) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn parent(&self) -> Option<u32> {
|
|
self.parent
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_parent(&mut self, value: Option<u32>) {
|
|
self.parent = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn children(&self) -> Vec<u32> {
|
|
self.children.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_children(&mut self, value: Vec<u32>) {
|
|
self.children = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "contentLayer")]
|
|
pub fn content_layer(&self) -> String {
|
|
self.content_layer.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "contentLayer")]
|
|
pub fn set_content_layer(&mut self, value: WasmContentLayer) {
|
|
self.content_layer = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn page(&self) -> Option<u32> {
|
|
self.page
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_page(&mut self, value: Option<u32>) {
|
|
self.page = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageEnd")]
|
|
pub fn page_end(&self) -> Option<u32> {
|
|
self.page_end
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageEnd")]
|
|
pub fn set_page_end(&mut self, value: Option<u32>) {
|
|
self.page_end = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn bbox(&self) -> Option<WasmBoundingBox> {
|
|
self.bbox.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_bbox(&mut self, value: Option<WasmBoundingBox>) {
|
|
self.bbox = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn annotations(&self) -> Vec<WasmTextAnnotation> {
|
|
self.annotations.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_annotations(&mut self, value: Vec<WasmTextAnnotation>) {
|
|
self.annotations = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attributes(&self) -> Option<JsValue> {
|
|
self.attributes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attributes(&mut self, value: Option<JsValue>) {
|
|
self.attributes = value;
|
|
}
|
|
}
|
|
|
|
/// Structured table grid with cell-level metadata.
|
|
///
|
|
/// Stores row/column dimensions and a flat list of cells with position info.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmTableGrid {
|
|
rows: u32,
|
|
cols: u32,
|
|
cells: Vec<WasmGridCell>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmTableGrid {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(rows: Option<u32>, cols: Option<u32>, cells: Option<Vec<WasmGridCell>>) -> WasmTableGrid {
|
|
WasmTableGrid {
|
|
rows: rows.unwrap_or_default(),
|
|
cols: cols.unwrap_or_default(),
|
|
cells: cells.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmTableGrid {
|
|
<WasmTableGrid as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn rows(&self) -> u32 {
|
|
self.rows
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_rows(&mut self, value: u32) {
|
|
self.rows = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn cols(&self) -> u32 {
|
|
self.cols
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_cols(&mut self, value: u32) {
|
|
self.cols = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn cells(&self) -> Vec<WasmGridCell> {
|
|
self.cells.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_cells(&mut self, value: Vec<WasmGridCell>) {
|
|
self.cells = value;
|
|
}
|
|
}
|
|
|
|
/// Individual grid cell with position and span metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmGridCell {
|
|
content: String,
|
|
row: u32,
|
|
col: u32,
|
|
row_span: u32,
|
|
col_span: u32,
|
|
is_header: bool,
|
|
bbox: Option<WasmBoundingBox>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmGridCell {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
content: String,
|
|
row: u32,
|
|
col: u32,
|
|
rowSpan: u32,
|
|
colSpan: u32,
|
|
isHeader: bool,
|
|
bbox: Option<WasmBoundingBox>,
|
|
) -> WasmGridCell {
|
|
WasmGridCell {
|
|
content,
|
|
row,
|
|
col,
|
|
row_span: rowSpan,
|
|
col_span: colSpan,
|
|
is_header: isHeader,
|
|
bbox,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmGridCell {
|
|
<WasmGridCell as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn row(&self) -> u32 {
|
|
self.row
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_row(&mut self, value: u32) {
|
|
self.row = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn col(&self) -> u32 {
|
|
self.col
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_col(&mut self, value: u32) {
|
|
self.col = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "rowSpan")]
|
|
pub fn row_span(&self) -> u32 {
|
|
self.row_span
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "rowSpan")]
|
|
pub fn set_row_span(&mut self, value: u32) {
|
|
self.row_span = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "colSpan")]
|
|
pub fn col_span(&self) -> u32 {
|
|
self.col_span
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "colSpan")]
|
|
pub fn set_col_span(&mut self, value: u32) {
|
|
self.col_span = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "isHeader")]
|
|
pub fn is_header(&self) -> bool {
|
|
self.is_header
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "isHeader")]
|
|
pub fn set_is_header(&mut self, value: bool) {
|
|
self.is_header = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn bbox(&self) -> Option<WasmBoundingBox> {
|
|
self.bbox.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_bbox(&mut self, value: Option<WasmBoundingBox>) {
|
|
self.bbox = value;
|
|
}
|
|
}
|
|
|
|
/// Inline text annotation — byte-range based formatting and links.
|
|
///
|
|
/// Annotations reference byte offsets into the node's text content,
|
|
/// enabling precise identification of formatted regions.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmTextAnnotation {
|
|
start: u32,
|
|
end: u32,
|
|
kind: JsValue,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmTextAnnotation {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(start: u32, end: u32, kind: JsValue) -> WasmTextAnnotation {
|
|
WasmTextAnnotation { start, end, kind }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmTextAnnotation {
|
|
<WasmTextAnnotation as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn start(&self) -> u32 {
|
|
self.start
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_start(&mut self, value: u32) {
|
|
self.start = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn end(&self) -> u32 {
|
|
self.end
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_end(&mut self, value: u32) {
|
|
self.end = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn kind(&self) -> JsValue {
|
|
self.kind.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_kind(&mut self, value: JsValue) {
|
|
self.kind = value;
|
|
}
|
|
}
|
|
|
|
/// General extraction result used by the core extraction API.
|
|
///
|
|
/// This is the main result type returned by all extraction functions.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmExtractionResult {
|
|
content: String,
|
|
mime_type: String,
|
|
metadata: WasmMetadata,
|
|
extraction_method: Option<WasmExtractionMethod>,
|
|
tables: Vec<WasmTable>,
|
|
detected_languages: Option<Vec<String>>,
|
|
chunks: Option<Vec<WasmChunk>>,
|
|
images: Option<Vec<WasmExtractedImage>>,
|
|
pages: Option<Vec<WasmPageContent>>,
|
|
elements: Option<Vec<WasmElement>>,
|
|
djot_content: Option<WasmDjotContent>,
|
|
ocr_elements: Option<Vec<WasmOcrElement>>,
|
|
document: Option<WasmDocumentStructure>,
|
|
quality_score: Option<f64>,
|
|
processing_warnings: Vec<WasmProcessingWarning>,
|
|
annotations: Option<Vec<WasmPdfAnnotation>>,
|
|
children: Option<Vec<WasmArchiveEntry>>,
|
|
uris: Option<Vec<WasmExtractedUri>>,
|
|
revisions: Option<Vec<WasmDocumentRevision>>,
|
|
structured_output: Option<JsValue>,
|
|
code_intelligence: Option<JsValue>,
|
|
llm_usage: Option<Vec<WasmLlmUsage>>,
|
|
formatted_content: Option<String>,
|
|
ocr_internal_document: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmExtractionResult {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
content: Option<String>,
|
|
mimeType: Option<String>,
|
|
metadata: Option<WasmMetadata>,
|
|
tables: Option<Vec<WasmTable>>,
|
|
processingWarnings: Option<Vec<WasmProcessingWarning>>,
|
|
extractionMethod: Option<WasmExtractionMethod>,
|
|
detectedLanguages: Option<Vec<String>>,
|
|
chunks: Option<Vec<WasmChunk>>,
|
|
images: Option<Vec<WasmExtractedImage>>,
|
|
pages: Option<Vec<WasmPageContent>>,
|
|
elements: Option<Vec<WasmElement>>,
|
|
djotContent: Option<WasmDjotContent>,
|
|
ocrElements: Option<Vec<WasmOcrElement>>,
|
|
document: Option<WasmDocumentStructure>,
|
|
qualityScore: Option<f64>,
|
|
annotations: Option<Vec<WasmPdfAnnotation>>,
|
|
children: Option<Vec<WasmArchiveEntry>>,
|
|
uris: Option<Vec<WasmExtractedUri>>,
|
|
revisions: Option<Vec<WasmDocumentRevision>>,
|
|
structuredOutput: Option<JsValue>,
|
|
llmUsage: Option<Vec<WasmLlmUsage>>,
|
|
formattedContent: Option<String>,
|
|
ocrInternalDocument: Option<String>,
|
|
) -> WasmExtractionResult {
|
|
WasmExtractionResult {
|
|
content: content.unwrap_or_default(),
|
|
mime_type: mimeType.unwrap_or_default(),
|
|
metadata: metadata.unwrap_or_default(),
|
|
extraction_method: extractionMethod,
|
|
tables: tables.unwrap_or_default(),
|
|
detected_languages: detectedLanguages,
|
|
chunks,
|
|
images,
|
|
pages,
|
|
elements,
|
|
djot_content: djotContent,
|
|
ocr_elements: ocrElements,
|
|
document,
|
|
quality_score: qualityScore,
|
|
processing_warnings: processingWarnings.unwrap_or_default(),
|
|
annotations,
|
|
children,
|
|
uris,
|
|
revisions,
|
|
structured_output: structuredOutput,
|
|
code_intelligence: Default::default(),
|
|
llm_usage: llmUsage,
|
|
formatted_content: formattedContent,
|
|
ocr_internal_document: ocrInternalDocument,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmExtractionResult {
|
|
<WasmExtractionResult as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "mimeType")]
|
|
pub fn mime_type(&self) -> String {
|
|
self.mime_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "mimeType")]
|
|
pub fn set_mime_type(&mut self, value: String) {
|
|
self.mime_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> WasmMetadata {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: WasmMetadata) {
|
|
self.metadata = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "extractionMethod")]
|
|
pub fn extraction_method(&self) -> Option<String> {
|
|
self.extraction_method.map(|v| v.to_api_str().to_owned())
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "extractionMethod")]
|
|
pub fn set_extraction_method(&mut self, value: Option<WasmExtractionMethod>) {
|
|
self.extraction_method = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn tables(&self) -> Vec<WasmTable> {
|
|
self.tables.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_tables(&mut self, value: Vec<WasmTable>) {
|
|
self.tables = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "detectedLanguages")]
|
|
pub fn detected_languages(&self) -> Option<Vec<String>> {
|
|
self.detected_languages.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "detectedLanguages")]
|
|
pub fn set_detected_languages(&mut self, value: Option<Vec<String>>) {
|
|
self.detected_languages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn chunks(&self) -> Option<js_sys::Array> {
|
|
self.chunks.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_chunks(&mut self, value: Option<Vec<WasmChunk>>) {
|
|
self.chunks = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn images(&self) -> Option<js_sys::Array> {
|
|
self.images.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_images(&mut self, value: Option<Vec<WasmExtractedImage>>) {
|
|
self.images = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn pages(&self) -> Option<js_sys::Array> {
|
|
self.pages.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_pages(&mut self, value: Option<Vec<WasmPageContent>>) {
|
|
self.pages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn elements(&self) -> Option<js_sys::Array> {
|
|
self.elements.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_elements(&mut self, value: Option<Vec<WasmElement>>) {
|
|
self.elements = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "djotContent")]
|
|
pub fn djot_content(&self) -> Option<WasmDjotContent> {
|
|
self.djot_content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "djotContent")]
|
|
pub fn set_djot_content(&mut self, value: Option<WasmDjotContent>) {
|
|
self.djot_content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ocrElements")]
|
|
pub fn ocr_elements(&self) -> Option<js_sys::Array> {
|
|
self.ocr_elements.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "ocrElements")]
|
|
pub fn set_ocr_elements(&mut self, value: Option<Vec<WasmOcrElement>>) {
|
|
self.ocr_elements = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn document(&self) -> Option<WasmDocumentStructure> {
|
|
self.document.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_document(&mut self, value: Option<WasmDocumentStructure>) {
|
|
self.document = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "qualityScore")]
|
|
pub fn quality_score(&self) -> Option<f64> {
|
|
self.quality_score
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "qualityScore")]
|
|
pub fn set_quality_score(&mut self, value: Option<f64>) {
|
|
self.quality_score = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "processingWarnings")]
|
|
pub fn processing_warnings(&self) -> Vec<WasmProcessingWarning> {
|
|
self.processing_warnings.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "processingWarnings")]
|
|
pub fn set_processing_warnings(&mut self, value: Vec<WasmProcessingWarning>) {
|
|
self.processing_warnings = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn annotations(&self) -> Option<js_sys::Array> {
|
|
self.annotations.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_annotations(&mut self, value: Option<Vec<WasmPdfAnnotation>>) {
|
|
self.annotations = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn children(&self) -> Option<js_sys::Array> {
|
|
self.children.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_children(&mut self, value: Option<Vec<WasmArchiveEntry>>) {
|
|
self.children = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn uris(&self) -> Option<js_sys::Array> {
|
|
self.uris.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_uris(&mut self, value: Option<Vec<WasmExtractedUri>>) {
|
|
self.uris = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn revisions(&self) -> Option<js_sys::Array> {
|
|
self.revisions.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_revisions(&mut self, value: Option<Vec<WasmDocumentRevision>>) {
|
|
self.revisions = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "structuredOutput")]
|
|
pub fn structured_output(&self) -> Option<JsValue> {
|
|
self.structured_output.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "structuredOutput")]
|
|
pub fn set_structured_output(&mut self, value: Option<JsValue>) {
|
|
self.structured_output = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "codeIntelligence")]
|
|
pub fn code_intelligence(&self) -> Option<JsValue> {
|
|
self.code_intelligence.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "codeIntelligence")]
|
|
pub fn set_code_intelligence(&mut self, value: Option<JsValue>) {
|
|
self.code_intelligence = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "llmUsage")]
|
|
pub fn llm_usage(&self) -> Option<js_sys::Array> {
|
|
self.llm_usage.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "llmUsage")]
|
|
pub fn set_llm_usage(&mut self, value: Option<Vec<WasmLlmUsage>>) {
|
|
self.llm_usage = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "formattedContent")]
|
|
pub fn formatted_content(&self) -> Option<String> {
|
|
self.formatted_content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "formattedContent")]
|
|
pub fn set_formatted_content(&mut self, value: Option<String>) {
|
|
self.formatted_content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ocrInternalDocument")]
|
|
pub fn ocr_internal_document(&self) -> Option<String> {
|
|
self.ocr_internal_document.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "ocrInternalDocument")]
|
|
pub fn set_ocr_internal_document(&mut self, value: Option<String>) {
|
|
self.ocr_internal_document = value;
|
|
}
|
|
|
|
/// Convert from an OCR result.
|
|
#[wasm_bindgen(js_name = "fromOcr")]
|
|
pub fn from_ocr(ocr: WasmOcrExtractionResult) -> WasmExtractionResult {
|
|
let ocr_core: kreuzberg::OcrExtractionResult = ocr.into();
|
|
|
|
kreuzberg::ExtractionResult::from_ocr(ocr_core).into()
|
|
}
|
|
}
|
|
|
|
/// A single file extracted from an archive.
|
|
///
|
|
/// When archives (ZIP, TAR, 7Z, GZIP) are extracted with recursive extraction
|
|
/// enabled, each processable file produces its own full `ExtractionResult`.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmArchiveEntry {
|
|
path: String,
|
|
mime_type: String,
|
|
result: WasmExtractionResult,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmArchiveEntry {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(path: String, mimeType: String, result: WasmExtractionResult) -> WasmArchiveEntry {
|
|
WasmArchiveEntry {
|
|
path,
|
|
mime_type: mimeType,
|
|
result,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmArchiveEntry {
|
|
<WasmArchiveEntry as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn path(&self) -> String {
|
|
self.path.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_path(&mut self, value: String) {
|
|
self.path = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "mimeType")]
|
|
pub fn mime_type(&self) -> String {
|
|
self.mime_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "mimeType")]
|
|
pub fn set_mime_type(&mut self, value: String) {
|
|
self.mime_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn result(&self) -> WasmExtractionResult {
|
|
self.result.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_result(&mut self, value: WasmExtractionResult) {
|
|
self.result = value;
|
|
}
|
|
}
|
|
|
|
/// A non-fatal warning from a processing pipeline stage.
|
|
///
|
|
/// Captures errors from optional features that don't prevent extraction
|
|
/// but may indicate degraded results.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmProcessingWarning {
|
|
source: String,
|
|
message: String,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmProcessingWarning {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(source: String, message: String) -> WasmProcessingWarning {
|
|
WasmProcessingWarning { source, message }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmProcessingWarning {
|
|
<WasmProcessingWarning as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn source(&self) -> String {
|
|
self.source.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_source(&mut self, value: String) {
|
|
self.source = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn message(&self) -> String {
|
|
self.message.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_message(&mut self, value: String) {
|
|
self.message = value;
|
|
}
|
|
}
|
|
|
|
/// Token usage and cost data for a single LLM call made during extraction.
|
|
///
|
|
/// Populated when VLM OCR, structured extraction, or LLM-based embeddings
|
|
/// are used. Multiple entries may be present when multiple LLM calls occur
|
|
/// within one extraction (e.g. VLM OCR + structured extraction).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmLlmUsage {
|
|
model: String,
|
|
source: String,
|
|
input_tokens: Option<u64>,
|
|
output_tokens: Option<u64>,
|
|
total_tokens: Option<u64>,
|
|
estimated_cost: Option<f64>,
|
|
finish_reason: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmLlmUsage {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
model: Option<String>,
|
|
source: Option<String>,
|
|
inputTokens: Option<u64>,
|
|
outputTokens: Option<u64>,
|
|
totalTokens: Option<u64>,
|
|
estimatedCost: Option<f64>,
|
|
finishReason: Option<String>,
|
|
) -> WasmLlmUsage {
|
|
WasmLlmUsage {
|
|
model: model.unwrap_or_default(),
|
|
source: source.unwrap_or_default(),
|
|
input_tokens: inputTokens,
|
|
output_tokens: outputTokens,
|
|
total_tokens: totalTokens,
|
|
estimated_cost: estimatedCost,
|
|
finish_reason: finishReason,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmLlmUsage {
|
|
<WasmLlmUsage as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn model(&self) -> String {
|
|
self.model.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_model(&mut self, value: String) {
|
|
self.model = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn source(&self) -> String {
|
|
self.source.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_source(&mut self, value: String) {
|
|
self.source = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "inputTokens")]
|
|
pub fn input_tokens(&self) -> Option<u64> {
|
|
self.input_tokens
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "inputTokens")]
|
|
pub fn set_input_tokens(&mut self, value: Option<u64>) {
|
|
self.input_tokens = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "outputTokens")]
|
|
pub fn output_tokens(&self) -> Option<u64> {
|
|
self.output_tokens
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "outputTokens")]
|
|
pub fn set_output_tokens(&mut self, value: Option<u64>) {
|
|
self.output_tokens = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "totalTokens")]
|
|
pub fn total_tokens(&self) -> Option<u64> {
|
|
self.total_tokens
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "totalTokens")]
|
|
pub fn set_total_tokens(&mut self, value: Option<u64>) {
|
|
self.total_tokens = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "estimatedCost")]
|
|
pub fn estimated_cost(&self) -> Option<f64> {
|
|
self.estimated_cost
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "estimatedCost")]
|
|
pub fn set_estimated_cost(&mut self, value: Option<f64>) {
|
|
self.estimated_cost = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "finishReason")]
|
|
pub fn finish_reason(&self) -> Option<String> {
|
|
self.finish_reason.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "finishReason")]
|
|
pub fn set_finish_reason(&mut self, value: Option<String>) {
|
|
self.finish_reason = value;
|
|
}
|
|
}
|
|
|
|
/// A text chunk with optional embedding and metadata.
|
|
///
|
|
/// Chunks are created when chunking is enabled in `ExtractionConfig`. Each chunk
|
|
/// contains the text content, optional embedding vector (if embedding generation
|
|
/// is configured), and metadata about its position in the document.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmChunk {
|
|
content: String,
|
|
chunk_type: WasmChunkType,
|
|
embedding: Option<Vec<f32>>,
|
|
metadata: WasmChunkMetadata,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmChunk {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
content: String,
|
|
chunkType: WasmChunkType,
|
|
metadata: WasmChunkMetadata,
|
|
embedding: Option<Vec<f32>>,
|
|
) -> WasmChunk {
|
|
WasmChunk {
|
|
content,
|
|
chunk_type: chunkType,
|
|
embedding,
|
|
metadata,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmChunk {
|
|
<WasmChunk as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "chunkType")]
|
|
pub fn chunk_type(&self) -> String {
|
|
self.chunk_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "chunkType")]
|
|
pub fn set_chunk_type(&mut self, value: WasmChunkType) {
|
|
self.chunk_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn embedding(&self) -> Option<Vec<f32>> {
|
|
self.embedding.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_embedding(&mut self, value: Option<Vec<f32>>) {
|
|
self.embedding = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> WasmChunkMetadata {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: WasmChunkMetadata) {
|
|
self.metadata = value;
|
|
}
|
|
}
|
|
|
|
/// Heading context for a chunk within a Markdown document.
|
|
///
|
|
/// Contains the heading hierarchy from document root to this chunk's section.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmHeadingContext {
|
|
headings: Vec<WasmHeadingLevel>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmHeadingContext {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(headings: Vec<WasmHeadingLevel>) -> WasmHeadingContext {
|
|
WasmHeadingContext { headings }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmHeadingContext {
|
|
<WasmHeadingContext as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn headings(&self) -> Vec<WasmHeadingLevel> {
|
|
self.headings.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_headings(&mut self, value: Vec<WasmHeadingLevel>) {
|
|
self.headings = value;
|
|
}
|
|
}
|
|
|
|
/// A single heading in the hierarchy.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmHeadingLevel {
|
|
level: u8,
|
|
text: String,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmHeadingLevel {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(level: u8, text: String) -> WasmHeadingLevel {
|
|
WasmHeadingLevel { level, text }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmHeadingLevel {
|
|
<WasmHeadingLevel as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn level(&self) -> u8 {
|
|
self.level
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_level(&mut self, value: u8) {
|
|
self.level = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn text(&self) -> String {
|
|
self.text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_text(&mut self, value: String) {
|
|
self.text = value;
|
|
}
|
|
}
|
|
|
|
/// Metadata about a chunk's position in the original document.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmChunkMetadata {
|
|
byte_start: usize,
|
|
byte_end: usize,
|
|
token_count: Option<usize>,
|
|
chunk_index: usize,
|
|
total_chunks: usize,
|
|
first_page: Option<u32>,
|
|
last_page: Option<u32>,
|
|
heading_context: Option<WasmHeadingContext>,
|
|
image_indices: Vec<u32>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmChunkMetadata {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
byteStart: usize,
|
|
byteEnd: usize,
|
|
chunkIndex: usize,
|
|
totalChunks: usize,
|
|
imageIndices: Vec<u32>,
|
|
tokenCount: Option<usize>,
|
|
firstPage: Option<u32>,
|
|
lastPage: Option<u32>,
|
|
headingContext: Option<WasmHeadingContext>,
|
|
) -> WasmChunkMetadata {
|
|
WasmChunkMetadata {
|
|
byte_start: byteStart,
|
|
byte_end: byteEnd,
|
|
token_count: tokenCount,
|
|
chunk_index: chunkIndex,
|
|
total_chunks: totalChunks,
|
|
first_page: firstPage,
|
|
last_page: lastPage,
|
|
heading_context: headingContext,
|
|
image_indices: imageIndices,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmChunkMetadata {
|
|
<WasmChunkMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "byteStart")]
|
|
pub fn byte_start(&self) -> usize {
|
|
self.byte_start
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "byteStart")]
|
|
pub fn set_byte_start(&mut self, value: usize) {
|
|
self.byte_start = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "byteEnd")]
|
|
pub fn byte_end(&self) -> usize {
|
|
self.byte_end
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "byteEnd")]
|
|
pub fn set_byte_end(&mut self, value: usize) {
|
|
self.byte_end = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tokenCount")]
|
|
pub fn token_count(&self) -> Option<usize> {
|
|
self.token_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tokenCount")]
|
|
pub fn set_token_count(&mut self, value: Option<usize>) {
|
|
self.token_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "chunkIndex")]
|
|
pub fn chunk_index(&self) -> usize {
|
|
self.chunk_index
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "chunkIndex")]
|
|
pub fn set_chunk_index(&mut self, value: usize) {
|
|
self.chunk_index = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "totalChunks")]
|
|
pub fn total_chunks(&self) -> usize {
|
|
self.total_chunks
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "totalChunks")]
|
|
pub fn set_total_chunks(&mut self, value: usize) {
|
|
self.total_chunks = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "firstPage")]
|
|
pub fn first_page(&self) -> Option<u32> {
|
|
self.first_page
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "firstPage")]
|
|
pub fn set_first_page(&mut self, value: Option<u32>) {
|
|
self.first_page = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "lastPage")]
|
|
pub fn last_page(&self) -> Option<u32> {
|
|
self.last_page
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "lastPage")]
|
|
pub fn set_last_page(&mut self, value: Option<u32>) {
|
|
self.last_page = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "headingContext")]
|
|
pub fn heading_context(&self) -> Option<WasmHeadingContext> {
|
|
self.heading_context.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "headingContext")]
|
|
pub fn set_heading_context(&mut self, value: Option<WasmHeadingContext>) {
|
|
self.heading_context = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageIndices")]
|
|
pub fn image_indices(&self) -> Vec<u32> {
|
|
self.image_indices.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imageIndices")]
|
|
pub fn set_image_indices(&mut self, value: Vec<u32>) {
|
|
self.image_indices = value;
|
|
}
|
|
}
|
|
|
|
/// Extracted image from a document.
|
|
///
|
|
/// Contains raw image data, metadata, and optional nested OCR results.
|
|
/// Raw bytes allow cross-language compatibility - users can convert to
|
|
/// PIL.Image (Python), Sharp (Node.js), or other formats as needed.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmExtractedImage {
|
|
data: Vec<u8>,
|
|
format: String,
|
|
image_index: u32,
|
|
page_number: Option<u32>,
|
|
width: Option<u32>,
|
|
height: Option<u32>,
|
|
colorspace: Option<String>,
|
|
bits_per_component: Option<u32>,
|
|
is_mask: bool,
|
|
description: Option<String>,
|
|
ocr_result: Option<WasmExtractionResult>,
|
|
bounding_box: Option<WasmBoundingBox>,
|
|
source_path: Option<String>,
|
|
image_kind: Option<WasmImageKind>,
|
|
kind_confidence: Option<f32>,
|
|
cluster_id: Option<u32>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmExtractedImage {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
data: Vec<u8>,
|
|
format: String,
|
|
imageIndex: u32,
|
|
isMask: bool,
|
|
pageNumber: Option<u32>,
|
|
width: Option<u32>,
|
|
height: Option<u32>,
|
|
colorspace: Option<String>,
|
|
bitsPerComponent: Option<u32>,
|
|
description: Option<String>,
|
|
ocrResult: Option<WasmExtractionResult>,
|
|
boundingBox: Option<WasmBoundingBox>,
|
|
sourcePath: Option<String>,
|
|
imageKind: Option<WasmImageKind>,
|
|
kindConfidence: Option<f32>,
|
|
clusterId: Option<u32>,
|
|
) -> WasmExtractedImage {
|
|
WasmExtractedImage {
|
|
data,
|
|
format,
|
|
image_index: imageIndex,
|
|
page_number: pageNumber,
|
|
width,
|
|
height,
|
|
colorspace,
|
|
bits_per_component: bitsPerComponent,
|
|
is_mask: isMask,
|
|
description,
|
|
ocr_result: ocrResult,
|
|
bounding_box: boundingBox,
|
|
source_path: sourcePath,
|
|
image_kind: imageKind,
|
|
kind_confidence: kindConfidence,
|
|
cluster_id: clusterId,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmExtractedImage {
|
|
<WasmExtractedImage as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn data(&self) -> Vec<u8> {
|
|
self.data.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_data(&mut self, value: Vec<u8>) {
|
|
self.data = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn format(&self) -> String {
|
|
self.format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_format(&mut self, value: String) {
|
|
self.format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageIndex")]
|
|
pub fn image_index(&self) -> u32 {
|
|
self.image_index
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imageIndex")]
|
|
pub fn set_image_index(&mut self, value: u32) {
|
|
self.image_index = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageNumber")]
|
|
pub fn page_number(&self) -> Option<u32> {
|
|
self.page_number
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageNumber")]
|
|
pub fn set_page_number(&mut self, value: Option<u32>) {
|
|
self.page_number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn width(&self) -> Option<u32> {
|
|
self.width
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_width(&mut self, value: Option<u32>) {
|
|
self.width = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn height(&self) -> Option<u32> {
|
|
self.height
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_height(&mut self, value: Option<u32>) {
|
|
self.height = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn colorspace(&self) -> Option<String> {
|
|
self.colorspace.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_colorspace(&mut self, value: Option<String>) {
|
|
self.colorspace = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "bitsPerComponent")]
|
|
pub fn bits_per_component(&self) -> Option<u32> {
|
|
self.bits_per_component
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "bitsPerComponent")]
|
|
pub fn set_bits_per_component(&mut self, value: Option<u32>) {
|
|
self.bits_per_component = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "isMask")]
|
|
pub fn is_mask(&self) -> bool {
|
|
self.is_mask
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "isMask")]
|
|
pub fn set_is_mask(&mut self, value: bool) {
|
|
self.is_mask = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn description(&self) -> Option<String> {
|
|
self.description.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_description(&mut self, value: Option<String>) {
|
|
self.description = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ocrResult")]
|
|
pub fn ocr_result(&self) -> Option<WasmExtractionResult> {
|
|
self.ocr_result.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "ocrResult")]
|
|
pub fn set_ocr_result(&mut self, value: Option<WasmExtractionResult>) {
|
|
self.ocr_result = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "boundingBox")]
|
|
pub fn bounding_box(&self) -> Option<WasmBoundingBox> {
|
|
self.bounding_box.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "boundingBox")]
|
|
pub fn set_bounding_box(&mut self, value: Option<WasmBoundingBox>) {
|
|
self.bounding_box = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "sourcePath")]
|
|
pub fn source_path(&self) -> Option<String> {
|
|
self.source_path.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "sourcePath")]
|
|
pub fn set_source_path(&mut self, value: Option<String>) {
|
|
self.source_path = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageKind")]
|
|
pub fn image_kind(&self) -> Option<String> {
|
|
self.image_kind.map(|v| v.to_api_str().to_owned())
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imageKind")]
|
|
pub fn set_image_kind(&mut self, value: Option<WasmImageKind>) {
|
|
self.image_kind = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "kindConfidence")]
|
|
pub fn kind_confidence(&self) -> Option<f32> {
|
|
self.kind_confidence
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "kindConfidence")]
|
|
pub fn set_kind_confidence(&mut self, value: Option<f32>) {
|
|
self.kind_confidence = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "clusterId")]
|
|
pub fn cluster_id(&self) -> Option<u32> {
|
|
self.cluster_id
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "clusterId")]
|
|
pub fn set_cluster_id(&mut self, value: Option<u32>) {
|
|
self.cluster_id = value;
|
|
}
|
|
}
|
|
|
|
/// Bounding box coordinates for element positioning.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmBoundingBox {
|
|
x0: f64,
|
|
y0: f64,
|
|
x1: f64,
|
|
y1: f64,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmBoundingBox {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(x0: Option<f64>, y0: Option<f64>, x1: Option<f64>, y1: Option<f64>) -> WasmBoundingBox {
|
|
WasmBoundingBox {
|
|
x0: x0.unwrap_or_default(),
|
|
y0: y0.unwrap_or_default(),
|
|
x1: x1.unwrap_or_default(),
|
|
y1: y1.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmBoundingBox {
|
|
<WasmBoundingBox as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn x0(&self) -> f64 {
|
|
self.x0
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_x0(&mut self, value: f64) {
|
|
self.x0 = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn y0(&self) -> f64 {
|
|
self.y0
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_y0(&mut self, value: f64) {
|
|
self.y0 = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn x1(&self) -> f64 {
|
|
self.x1
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_x1(&mut self, value: f64) {
|
|
self.x1 = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn y1(&self) -> f64 {
|
|
self.y1
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_y1(&mut self, value: f64) {
|
|
self.y1 = value;
|
|
}
|
|
}
|
|
|
|
/// Metadata for a semantic element.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmElementMetadata {
|
|
page_number: Option<u32>,
|
|
filename: Option<String>,
|
|
coordinates: Option<WasmBoundingBox>,
|
|
element_index: Option<usize>,
|
|
additional: JsValue,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmElementMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
additional: JsValue,
|
|
pageNumber: Option<u32>,
|
|
filename: Option<String>,
|
|
coordinates: Option<WasmBoundingBox>,
|
|
elementIndex: Option<usize>,
|
|
) -> WasmElementMetadata {
|
|
WasmElementMetadata {
|
|
page_number: pageNumber,
|
|
filename,
|
|
coordinates,
|
|
element_index: elementIndex,
|
|
additional,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmElementMetadata {
|
|
<WasmElementMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageNumber")]
|
|
pub fn page_number(&self) -> Option<u32> {
|
|
self.page_number
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageNumber")]
|
|
pub fn set_page_number(&mut self, value: Option<u32>) {
|
|
self.page_number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn filename(&self) -> Option<String> {
|
|
self.filename.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_filename(&mut self, value: Option<String>) {
|
|
self.filename = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn coordinates(&self) -> Option<WasmBoundingBox> {
|
|
self.coordinates.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_coordinates(&mut self, value: Option<WasmBoundingBox>) {
|
|
self.coordinates = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "elementIndex")]
|
|
pub fn element_index(&self) -> Option<usize> {
|
|
self.element_index
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "elementIndex")]
|
|
pub fn set_element_index(&mut self, value: Option<usize>) {
|
|
self.element_index = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn additional(&self) -> JsValue {
|
|
self.additional.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_additional(&mut self, value: JsValue) {
|
|
self.additional = value;
|
|
}
|
|
}
|
|
|
|
/// Semantic element extracted from document.
|
|
///
|
|
/// Represents a logical unit of content with semantic classification,
|
|
/// unique identifier, and metadata for tracking origin and position.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmElement {
|
|
element_id: String,
|
|
element_type: WasmElementType,
|
|
text: String,
|
|
metadata: WasmElementMetadata,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmElement {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
elementId: String,
|
|
elementType: WasmElementType,
|
|
text: String,
|
|
metadata: WasmElementMetadata,
|
|
) -> WasmElement {
|
|
WasmElement {
|
|
element_id: elementId,
|
|
element_type: elementType,
|
|
text,
|
|
metadata,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmElement {
|
|
<WasmElement as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "elementId")]
|
|
pub fn element_id(&self) -> String {
|
|
self.element_id.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "elementId")]
|
|
pub fn set_element_id(&mut self, value: String) {
|
|
self.element_id = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "elementType")]
|
|
pub fn element_type(&self) -> String {
|
|
self.element_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "elementType")]
|
|
pub fn set_element_type(&mut self, value: WasmElementType) {
|
|
self.element_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn text(&self) -> String {
|
|
self.text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_text(&mut self, value: String) {
|
|
self.text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> WasmElementMetadata {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: WasmElementMetadata) {
|
|
self.metadata = value;
|
|
}
|
|
}
|
|
|
|
/// Excel workbook representation.
|
|
///
|
|
/// Contains all sheets from an Excel file (.xlsx, .xls, etc.) with
|
|
/// extracted content and metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmExcelWorkbook {
|
|
sheets: Vec<WasmExcelSheet>,
|
|
metadata: JsValue,
|
|
revisions: Option<Vec<WasmDocumentRevision>>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmExcelWorkbook {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
sheets: Vec<WasmExcelSheet>,
|
|
metadata: JsValue,
|
|
revisions: Option<Vec<WasmDocumentRevision>>,
|
|
) -> WasmExcelWorkbook {
|
|
WasmExcelWorkbook {
|
|
sheets,
|
|
metadata,
|
|
revisions,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmExcelWorkbook {
|
|
<WasmExcelWorkbook as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn sheets(&self) -> Vec<WasmExcelSheet> {
|
|
self.sheets.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_sheets(&mut self, value: Vec<WasmExcelSheet>) {
|
|
self.sheets = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> JsValue {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: JsValue) {
|
|
self.metadata = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn revisions(&self) -> Option<js_sys::Array> {
|
|
self.revisions.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_revisions(&mut self, value: Option<Vec<WasmDocumentRevision>>) {
|
|
self.revisions = value;
|
|
}
|
|
}
|
|
|
|
/// Single Excel worksheet.
|
|
///
|
|
/// Represents one sheet from an Excel workbook with its content
|
|
/// converted to Markdown format and dimensional statistics.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmExcelSheet {
|
|
name: String,
|
|
markdown: String,
|
|
row_count: usize,
|
|
col_count: usize,
|
|
cell_count: usize,
|
|
table_cells: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmExcelSheet {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
name: String,
|
|
markdown: String,
|
|
rowCount: usize,
|
|
colCount: usize,
|
|
cellCount: usize,
|
|
tableCells: Option<JsValue>,
|
|
) -> WasmExcelSheet {
|
|
WasmExcelSheet {
|
|
name,
|
|
markdown,
|
|
row_count: rowCount,
|
|
col_count: colCount,
|
|
cell_count: cellCount,
|
|
table_cells: tableCells,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmExcelSheet {
|
|
<WasmExcelSheet as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn name(&self) -> String {
|
|
self.name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_name(&mut self, value: String) {
|
|
self.name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn markdown(&self) -> String {
|
|
self.markdown.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_markdown(&mut self, value: String) {
|
|
self.markdown = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "rowCount")]
|
|
pub fn row_count(&self) -> usize {
|
|
self.row_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "rowCount")]
|
|
pub fn set_row_count(&mut self, value: usize) {
|
|
self.row_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "colCount")]
|
|
pub fn col_count(&self) -> usize {
|
|
self.col_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "colCount")]
|
|
pub fn set_col_count(&mut self, value: usize) {
|
|
self.col_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "cellCount")]
|
|
pub fn cell_count(&self) -> usize {
|
|
self.cell_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "cellCount")]
|
|
pub fn set_cell_count(&mut self, value: usize) {
|
|
self.cell_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableCells")]
|
|
pub fn table_cells(&self) -> Option<JsValue> {
|
|
self.table_cells.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableCells")]
|
|
pub fn set_table_cells(&mut self, value: Option<JsValue>) {
|
|
self.table_cells = value;
|
|
}
|
|
}
|
|
|
|
/// XML extraction result.
|
|
///
|
|
/// Contains extracted text content from XML files along with
|
|
/// structural statistics about the XML document.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmXmlExtractionResult {
|
|
content: String,
|
|
element_count: usize,
|
|
unique_elements: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmXmlExtractionResult {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(content: String, elementCount: usize, uniqueElements: Vec<String>) -> WasmXmlExtractionResult {
|
|
WasmXmlExtractionResult {
|
|
content,
|
|
element_count: elementCount,
|
|
unique_elements: uniqueElements,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmXmlExtractionResult {
|
|
<WasmXmlExtractionResult as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "elementCount")]
|
|
pub fn element_count(&self) -> usize {
|
|
self.element_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "elementCount")]
|
|
pub fn set_element_count(&mut self, value: usize) {
|
|
self.element_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "uniqueElements")]
|
|
pub fn unique_elements(&self) -> Vec<String> {
|
|
self.unique_elements.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "uniqueElements")]
|
|
pub fn set_unique_elements(&mut self, value: Vec<String>) {
|
|
self.unique_elements = value;
|
|
}
|
|
}
|
|
|
|
/// Plain text and Markdown extraction result.
|
|
///
|
|
/// Contains the extracted text along with statistics and,
|
|
/// for Markdown files, structural elements like headers and links.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmTextExtractionResult {
|
|
content: String,
|
|
line_count: usize,
|
|
word_count: usize,
|
|
character_count: usize,
|
|
headers: Option<Vec<String>>,
|
|
links: Option<JsValue>,
|
|
code_blocks: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmTextExtractionResult {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
content: String,
|
|
lineCount: usize,
|
|
wordCount: usize,
|
|
characterCount: usize,
|
|
headers: Option<Vec<String>>,
|
|
links: Option<JsValue>,
|
|
codeBlocks: Option<JsValue>,
|
|
) -> WasmTextExtractionResult {
|
|
WasmTextExtractionResult {
|
|
content,
|
|
line_count: lineCount,
|
|
word_count: wordCount,
|
|
character_count: characterCount,
|
|
headers,
|
|
links,
|
|
code_blocks: codeBlocks,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmTextExtractionResult {
|
|
<WasmTextExtractionResult as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "lineCount")]
|
|
pub fn line_count(&self) -> usize {
|
|
self.line_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "lineCount")]
|
|
pub fn set_line_count(&mut self, value: usize) {
|
|
self.line_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "wordCount")]
|
|
pub fn word_count(&self) -> usize {
|
|
self.word_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "wordCount")]
|
|
pub fn set_word_count(&mut self, value: usize) {
|
|
self.word_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "characterCount")]
|
|
pub fn character_count(&self) -> usize {
|
|
self.character_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "characterCount")]
|
|
pub fn set_character_count(&mut self, value: usize) {
|
|
self.character_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn headers(&self) -> Option<Vec<String>> {
|
|
self.headers.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_headers(&mut self, value: Option<Vec<String>>) {
|
|
self.headers = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn links(&self) -> Option<JsValue> {
|
|
self.links.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_links(&mut self, value: Option<JsValue>) {
|
|
self.links = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "codeBlocks")]
|
|
pub fn code_blocks(&self) -> Option<JsValue> {
|
|
self.code_blocks.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "codeBlocks")]
|
|
pub fn set_code_blocks(&mut self, value: Option<JsValue>) {
|
|
self.code_blocks = value;
|
|
}
|
|
}
|
|
|
|
/// PowerPoint (PPTX) extraction result.
|
|
///
|
|
/// Contains extracted slide content, metadata, and embedded images/tables.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPptxExtractionResult {
|
|
content: String,
|
|
metadata: WasmPptxMetadata,
|
|
slide_count: usize,
|
|
image_count: usize,
|
|
table_count: usize,
|
|
images: Vec<WasmExtractedImage>,
|
|
page_structure: Option<WasmPageStructure>,
|
|
page_contents: Option<Vec<WasmPageContent>>,
|
|
document: Option<WasmDocumentStructure>,
|
|
hyperlinks: Vec<String>,
|
|
office_metadata: JsValue,
|
|
revisions: Option<Vec<WasmDocumentRevision>>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPptxExtractionResult {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
content: String,
|
|
metadata: WasmPptxMetadata,
|
|
slideCount: usize,
|
|
imageCount: usize,
|
|
tableCount: usize,
|
|
images: Vec<WasmExtractedImage>,
|
|
hyperlinks: Vec<String>,
|
|
officeMetadata: JsValue,
|
|
pageStructure: Option<WasmPageStructure>,
|
|
pageContents: Option<Vec<WasmPageContent>>,
|
|
document: Option<WasmDocumentStructure>,
|
|
revisions: Option<Vec<WasmDocumentRevision>>,
|
|
) -> WasmPptxExtractionResult {
|
|
WasmPptxExtractionResult {
|
|
content,
|
|
metadata,
|
|
slide_count: slideCount,
|
|
image_count: imageCount,
|
|
table_count: tableCount,
|
|
images,
|
|
page_structure: pageStructure,
|
|
page_contents: pageContents,
|
|
document,
|
|
hyperlinks,
|
|
office_metadata: officeMetadata,
|
|
revisions,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPptxExtractionResult {
|
|
<WasmPptxExtractionResult as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> WasmPptxMetadata {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: WasmPptxMetadata) {
|
|
self.metadata = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "slideCount")]
|
|
pub fn slide_count(&self) -> usize {
|
|
self.slide_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "slideCount")]
|
|
pub fn set_slide_count(&mut self, value: usize) {
|
|
self.slide_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageCount")]
|
|
pub fn image_count(&self) -> usize {
|
|
self.image_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imageCount")]
|
|
pub fn set_image_count(&mut self, value: usize) {
|
|
self.image_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableCount")]
|
|
pub fn table_count(&self) -> usize {
|
|
self.table_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableCount")]
|
|
pub fn set_table_count(&mut self, value: usize) {
|
|
self.table_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn images(&self) -> Vec<WasmExtractedImage> {
|
|
self.images.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_images(&mut self, value: Vec<WasmExtractedImage>) {
|
|
self.images = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageStructure")]
|
|
pub fn page_structure(&self) -> Option<WasmPageStructure> {
|
|
self.page_structure.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageStructure")]
|
|
pub fn set_page_structure(&mut self, value: Option<WasmPageStructure>) {
|
|
self.page_structure = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageContents")]
|
|
pub fn page_contents(&self) -> Option<js_sys::Array> {
|
|
self.page_contents.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageContents")]
|
|
pub fn set_page_contents(&mut self, value: Option<Vec<WasmPageContent>>) {
|
|
self.page_contents = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn document(&self) -> Option<WasmDocumentStructure> {
|
|
self.document.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_document(&mut self, value: Option<WasmDocumentStructure>) {
|
|
self.document = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn hyperlinks(&self) -> Vec<String> {
|
|
self.hyperlinks.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_hyperlinks(&mut self, value: Vec<String>) {
|
|
self.hyperlinks = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "officeMetadata")]
|
|
pub fn office_metadata(&self) -> JsValue {
|
|
self.office_metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "officeMetadata")]
|
|
pub fn set_office_metadata(&mut self, value: JsValue) {
|
|
self.office_metadata = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn revisions(&self) -> Option<js_sys::Array> {
|
|
self.revisions.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_revisions(&mut self, value: Option<Vec<WasmDocumentRevision>>) {
|
|
self.revisions = value;
|
|
}
|
|
}
|
|
|
|
/// Email extraction result.
|
|
///
|
|
/// Complete representation of an extracted email message (.eml or .msg)
|
|
/// including headers, body content, and attachments.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmEmailExtractionResult {
|
|
subject: Option<String>,
|
|
from_email: Option<String>,
|
|
to_emails: Vec<String>,
|
|
cc_emails: Vec<String>,
|
|
bcc_emails: Vec<String>,
|
|
date: Option<String>,
|
|
message_id: Option<String>,
|
|
plain_text: Option<String>,
|
|
html_content: Option<String>,
|
|
content: String,
|
|
attachments: Vec<WasmEmailAttachment>,
|
|
metadata: JsValue,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmEmailExtractionResult {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
toEmails: Vec<String>,
|
|
ccEmails: Vec<String>,
|
|
bccEmails: Vec<String>,
|
|
content: String,
|
|
attachments: Vec<WasmEmailAttachment>,
|
|
metadata: JsValue,
|
|
subject: Option<String>,
|
|
fromEmail: Option<String>,
|
|
date: Option<String>,
|
|
messageId: Option<String>,
|
|
plainText: Option<String>,
|
|
htmlContent: Option<String>,
|
|
) -> WasmEmailExtractionResult {
|
|
WasmEmailExtractionResult {
|
|
subject,
|
|
from_email: fromEmail,
|
|
to_emails: toEmails,
|
|
cc_emails: ccEmails,
|
|
bcc_emails: bccEmails,
|
|
date,
|
|
message_id: messageId,
|
|
plain_text: plainText,
|
|
html_content: htmlContent,
|
|
content,
|
|
attachments,
|
|
metadata,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmEmailExtractionResult {
|
|
<WasmEmailExtractionResult as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn subject(&self) -> Option<String> {
|
|
self.subject.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_subject(&mut self, value: Option<String>) {
|
|
self.subject = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "fromEmail")]
|
|
pub fn from_email(&self) -> Option<String> {
|
|
self.from_email.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "fromEmail")]
|
|
pub fn set_from_email(&mut self, value: Option<String>) {
|
|
self.from_email = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "toEmails")]
|
|
pub fn to_emails(&self) -> Vec<String> {
|
|
self.to_emails.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "toEmails")]
|
|
pub fn set_to_emails(&mut self, value: Vec<String>) {
|
|
self.to_emails = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ccEmails")]
|
|
pub fn cc_emails(&self) -> Vec<String> {
|
|
self.cc_emails.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "ccEmails")]
|
|
pub fn set_cc_emails(&mut self, value: Vec<String>) {
|
|
self.cc_emails = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "bccEmails")]
|
|
pub fn bcc_emails(&self) -> Vec<String> {
|
|
self.bcc_emails.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "bccEmails")]
|
|
pub fn set_bcc_emails(&mut self, value: Vec<String>) {
|
|
self.bcc_emails = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn date(&self) -> Option<String> {
|
|
self.date.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_date(&mut self, value: Option<String>) {
|
|
self.date = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "messageId")]
|
|
pub fn message_id(&self) -> Option<String> {
|
|
self.message_id.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "messageId")]
|
|
pub fn set_message_id(&mut self, value: Option<String>) {
|
|
self.message_id = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "plainText")]
|
|
pub fn plain_text(&self) -> Option<String> {
|
|
self.plain_text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "plainText")]
|
|
pub fn set_plain_text(&mut self, value: Option<String>) {
|
|
self.plain_text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "htmlContent")]
|
|
pub fn html_content(&self) -> Option<String> {
|
|
self.html_content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "htmlContent")]
|
|
pub fn set_html_content(&mut self, value: Option<String>) {
|
|
self.html_content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attachments(&self) -> Vec<WasmEmailAttachment> {
|
|
self.attachments.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attachments(&mut self, value: Vec<WasmEmailAttachment>) {
|
|
self.attachments = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> JsValue {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: JsValue) {
|
|
self.metadata = value;
|
|
}
|
|
}
|
|
|
|
/// Email attachment representation.
|
|
///
|
|
/// Contains metadata and optionally the content of an email attachment.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmEmailAttachment {
|
|
name: Option<String>,
|
|
filename: Option<String>,
|
|
mime_type: Option<String>,
|
|
size: Option<usize>,
|
|
is_image: bool,
|
|
data: Option<Vec<u8>>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmEmailAttachment {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
isImage: bool,
|
|
name: Option<String>,
|
|
filename: Option<String>,
|
|
mimeType: Option<String>,
|
|
size: Option<usize>,
|
|
data: Option<Vec<u8>>,
|
|
) -> WasmEmailAttachment {
|
|
WasmEmailAttachment {
|
|
name,
|
|
filename,
|
|
mime_type: mimeType,
|
|
size,
|
|
is_image: isImage,
|
|
data,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmEmailAttachment {
|
|
<WasmEmailAttachment as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn name(&self) -> Option<String> {
|
|
self.name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_name(&mut self, value: Option<String>) {
|
|
self.name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn filename(&self) -> Option<String> {
|
|
self.filename.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_filename(&mut self, value: Option<String>) {
|
|
self.filename = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "mimeType")]
|
|
pub fn mime_type(&self) -> Option<String> {
|
|
self.mime_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "mimeType")]
|
|
pub fn set_mime_type(&mut self, value: Option<String>) {
|
|
self.mime_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn size(&self) -> Option<usize> {
|
|
self.size
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_size(&mut self, value: Option<usize>) {
|
|
self.size = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "isImage")]
|
|
pub fn is_image(&self) -> bool {
|
|
self.is_image
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "isImage")]
|
|
pub fn set_is_image(&mut self, value: bool) {
|
|
self.is_image = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn data(&self) -> Option<Vec<u8>> {
|
|
self.data.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_data(&mut self, value: Option<Vec<u8>>) {
|
|
self.data = value;
|
|
}
|
|
}
|
|
|
|
/// OCR extraction result.
|
|
///
|
|
/// Result of performing OCR on an image or scanned document,
|
|
/// including recognized text and detected tables.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrExtractionResult {
|
|
content: String,
|
|
mime_type: String,
|
|
metadata: JsValue,
|
|
tables: Vec<WasmOcrTable>,
|
|
ocr_elements: Option<Vec<WasmOcrElement>>,
|
|
internal_document: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrExtractionResult {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
content: String,
|
|
mimeType: String,
|
|
metadata: JsValue,
|
|
tables: Vec<WasmOcrTable>,
|
|
ocrElements: Option<Vec<WasmOcrElement>>,
|
|
internalDocument: Option<String>,
|
|
) -> WasmOcrExtractionResult {
|
|
WasmOcrExtractionResult {
|
|
content,
|
|
mime_type: mimeType,
|
|
metadata,
|
|
tables,
|
|
ocr_elements: ocrElements,
|
|
internal_document: internalDocument,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrExtractionResult {
|
|
<WasmOcrExtractionResult as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "mimeType")]
|
|
pub fn mime_type(&self) -> String {
|
|
self.mime_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "mimeType")]
|
|
pub fn set_mime_type(&mut self, value: String) {
|
|
self.mime_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn metadata(&self) -> JsValue {
|
|
self.metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_metadata(&mut self, value: JsValue) {
|
|
self.metadata = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn tables(&self) -> Vec<WasmOcrTable> {
|
|
self.tables.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_tables(&mut self, value: Vec<WasmOcrTable>) {
|
|
self.tables = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ocrElements")]
|
|
pub fn ocr_elements(&self) -> Option<js_sys::Array> {
|
|
self.ocr_elements.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "ocrElements")]
|
|
pub fn set_ocr_elements(&mut self, value: Option<Vec<WasmOcrElement>>) {
|
|
self.ocr_elements = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "internalDocument")]
|
|
pub fn internal_document(&self) -> Option<String> {
|
|
self.internal_document.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "internalDocument")]
|
|
pub fn set_internal_document(&mut self, value: Option<String>) {
|
|
self.internal_document = value;
|
|
}
|
|
}
|
|
|
|
/// Table detected via OCR.
|
|
///
|
|
/// Represents a table structure recognized during OCR processing.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrTable {
|
|
cells: JsValue,
|
|
markdown: String,
|
|
page_number: u32,
|
|
bounding_box: Option<WasmOcrTableBoundingBox>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrTable {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
cells: JsValue,
|
|
markdown: String,
|
|
pageNumber: u32,
|
|
boundingBox: Option<WasmOcrTableBoundingBox>,
|
|
) -> WasmOcrTable {
|
|
WasmOcrTable {
|
|
cells,
|
|
markdown,
|
|
page_number: pageNumber,
|
|
bounding_box: boundingBox,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrTable {
|
|
<WasmOcrTable as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn cells(&self) -> JsValue {
|
|
self.cells.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_cells(&mut self, value: JsValue) {
|
|
self.cells = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn markdown(&self) -> String {
|
|
self.markdown.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_markdown(&mut self, value: String) {
|
|
self.markdown = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageNumber")]
|
|
pub fn page_number(&self) -> u32 {
|
|
self.page_number
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageNumber")]
|
|
pub fn set_page_number(&mut self, value: u32) {
|
|
self.page_number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "boundingBox")]
|
|
pub fn bounding_box(&self) -> Option<WasmOcrTableBoundingBox> {
|
|
self.bounding_box.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "boundingBox")]
|
|
pub fn set_bounding_box(&mut self, value: Option<WasmOcrTableBoundingBox>) {
|
|
self.bounding_box = value;
|
|
}
|
|
}
|
|
|
|
/// Bounding box for an OCR-detected table in pixel coordinates.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrTableBoundingBox {
|
|
left: u32,
|
|
top: u32,
|
|
right: u32,
|
|
bottom: u32,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrTableBoundingBox {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(left: u32, top: u32, right: u32, bottom: u32) -> WasmOcrTableBoundingBox {
|
|
WasmOcrTableBoundingBox {
|
|
left,
|
|
top,
|
|
right,
|
|
bottom,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrTableBoundingBox {
|
|
<WasmOcrTableBoundingBox as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn left(&self) -> u32 {
|
|
self.left
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_left(&mut self, value: u32) {
|
|
self.left = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn top(&self) -> u32 {
|
|
self.top
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_top(&mut self, value: u32) {
|
|
self.top = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn right(&self) -> u32 {
|
|
self.right
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_right(&mut self, value: u32) {
|
|
self.right = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn bottom(&self) -> u32 {
|
|
self.bottom
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_bottom(&mut self, value: u32) {
|
|
self.bottom = value;
|
|
}
|
|
}
|
|
|
|
/// Image preprocessing configuration for OCR.
|
|
///
|
|
/// These settings control how images are preprocessed before OCR to improve
|
|
/// text recognition quality. Different preprocessing strategies work better
|
|
/// for different document types.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmImagePreprocessingConfig {
|
|
target_dpi: i32,
|
|
auto_rotate: bool,
|
|
deskew: bool,
|
|
denoise: bool,
|
|
contrast_enhance: bool,
|
|
binarization_method: String,
|
|
invert_colors: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmImagePreprocessingConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
targetDpi: Option<i32>,
|
|
autoRotate: Option<bool>,
|
|
deskew: Option<bool>,
|
|
denoise: Option<bool>,
|
|
contrastEnhance: Option<bool>,
|
|
binarizationMethod: Option<String>,
|
|
invertColors: Option<bool>,
|
|
) -> WasmImagePreprocessingConfig {
|
|
WasmImagePreprocessingConfig {
|
|
target_dpi: targetDpi.unwrap_or(300),
|
|
auto_rotate: autoRotate.unwrap_or(true),
|
|
deskew: deskew.unwrap_or(true),
|
|
denoise: denoise.unwrap_or(false),
|
|
contrast_enhance: contrastEnhance.unwrap_or(false),
|
|
binarization_method: binarizationMethod.unwrap_or_else(|| "otsu".to_string()),
|
|
invert_colors: invertColors.unwrap_or(false),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "targetDpi")]
|
|
pub fn target_dpi(&self) -> i32 {
|
|
self.target_dpi
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "targetDpi")]
|
|
pub fn set_target_dpi(&mut self, value: i32) {
|
|
self.target_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "autoRotate")]
|
|
pub fn auto_rotate(&self) -> bool {
|
|
self.auto_rotate
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "autoRotate")]
|
|
pub fn set_auto_rotate(&mut self, value: bool) {
|
|
self.auto_rotate = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn deskew(&self) -> bool {
|
|
self.deskew
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_deskew(&mut self, value: bool) {
|
|
self.deskew = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn denoise(&self) -> bool {
|
|
self.denoise
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_denoise(&mut self, value: bool) {
|
|
self.denoise = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "contrastEnhance")]
|
|
pub fn contrast_enhance(&self) -> bool {
|
|
self.contrast_enhance
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "contrastEnhance")]
|
|
pub fn set_contrast_enhance(&mut self, value: bool) {
|
|
self.contrast_enhance = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "binarizationMethod")]
|
|
pub fn binarization_method(&self) -> String {
|
|
self.binarization_method.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "binarizationMethod")]
|
|
pub fn set_binarization_method(&mut self, value: String) {
|
|
self.binarization_method = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "invertColors")]
|
|
pub fn invert_colors(&self) -> bool {
|
|
self.invert_colors
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "invertColors")]
|
|
pub fn set_invert_colors(&mut self, value: bool) {
|
|
self.invert_colors = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmImagePreprocessingConfig {
|
|
kreuzberg::ImagePreprocessingConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Tesseract OCR configuration.
|
|
///
|
|
/// Provides fine-grained control over Tesseract OCR engine parameters.
|
|
/// Most users can use the defaults, but these settings allow optimization
|
|
/// for specific document types (invoices, handwriting, etc.).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmTesseractConfig {
|
|
language: String,
|
|
psm: i32,
|
|
output_format: String,
|
|
oem: i32,
|
|
min_confidence: f64,
|
|
preprocessing: Option<WasmImagePreprocessingConfig>,
|
|
enable_table_detection: bool,
|
|
table_min_confidence: f64,
|
|
table_column_threshold: i32,
|
|
table_row_threshold_ratio: f64,
|
|
use_cache: bool,
|
|
classify_use_pre_adapted_templates: bool,
|
|
language_model_ngram_on: bool,
|
|
tessedit_dont_blkrej_good_wds: bool,
|
|
tessedit_dont_rowrej_good_wds: bool,
|
|
tessedit_enable_dict_correction: bool,
|
|
tessedit_char_whitelist: String,
|
|
tessedit_char_blacklist: String,
|
|
tessedit_use_primary_params_model: bool,
|
|
textord_space_size_is_variable: bool,
|
|
thresholding_method: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmTesseractConfig {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
language: Option<String>,
|
|
psm: Option<i32>,
|
|
outputFormat: Option<String>,
|
|
oem: Option<i32>,
|
|
minConfidence: Option<f64>,
|
|
enableTableDetection: Option<bool>,
|
|
tableMinConfidence: Option<f64>,
|
|
tableColumnThreshold: Option<i32>,
|
|
tableRowThresholdRatio: Option<f64>,
|
|
useCache: Option<bool>,
|
|
classifyUsePreAdaptedTemplates: Option<bool>,
|
|
languageModelNgramOn: Option<bool>,
|
|
tesseditDontBlkrejGoodWds: Option<bool>,
|
|
tesseditDontRowrejGoodWds: Option<bool>,
|
|
tesseditEnableDictCorrection: Option<bool>,
|
|
tesseditCharWhitelist: Option<String>,
|
|
tesseditCharBlacklist: Option<String>,
|
|
tesseditUsePrimaryParamsModel: Option<bool>,
|
|
textordSpaceSizeIsVariable: Option<bool>,
|
|
thresholdingMethod: Option<bool>,
|
|
preprocessing: Option<WasmImagePreprocessingConfig>,
|
|
) -> WasmTesseractConfig {
|
|
WasmTesseractConfig {
|
|
language: language.unwrap_or_else(|| "eng".to_string()),
|
|
psm: psm.unwrap_or(3),
|
|
output_format: outputFormat.unwrap_or_else(|| "markdown".to_string()),
|
|
oem: oem.unwrap_or(3),
|
|
min_confidence: minConfidence.unwrap_or(0.0),
|
|
preprocessing,
|
|
enable_table_detection: enableTableDetection.unwrap_or(true),
|
|
table_min_confidence: tableMinConfidence.unwrap_or(0.0),
|
|
table_column_threshold: tableColumnThreshold.unwrap_or(50),
|
|
table_row_threshold_ratio: tableRowThresholdRatio.unwrap_or(0.5),
|
|
use_cache: useCache.unwrap_or(true),
|
|
classify_use_pre_adapted_templates: classifyUsePreAdaptedTemplates.unwrap_or(true),
|
|
language_model_ngram_on: languageModelNgramOn.unwrap_or(false),
|
|
tessedit_dont_blkrej_good_wds: tesseditDontBlkrejGoodWds.unwrap_or(true),
|
|
tessedit_dont_rowrej_good_wds: tesseditDontRowrejGoodWds.unwrap_or(true),
|
|
tessedit_enable_dict_correction: tesseditEnableDictCorrection.unwrap_or(true),
|
|
tessedit_char_whitelist: tesseditCharWhitelist.unwrap_or_else(|| "".to_string()),
|
|
tessedit_char_blacklist: tesseditCharBlacklist.unwrap_or_else(|| "".to_string()),
|
|
tessedit_use_primary_params_model: tesseditUsePrimaryParamsModel.unwrap_or(true),
|
|
textord_space_size_is_variable: textordSpaceSizeIsVariable.unwrap_or(true),
|
|
thresholding_method: thresholdingMethod.unwrap_or(false),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn language(&self) -> String {
|
|
self.language.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_language(&mut self, value: String) {
|
|
self.language = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn psm(&self) -> i32 {
|
|
self.psm
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_psm(&mut self, value: i32) {
|
|
self.psm = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "outputFormat")]
|
|
pub fn output_format(&self) -> String {
|
|
self.output_format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "outputFormat")]
|
|
pub fn set_output_format(&mut self, value: String) {
|
|
self.output_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn oem(&self) -> i32 {
|
|
self.oem
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_oem(&mut self, value: i32) {
|
|
self.oem = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minConfidence")]
|
|
pub fn min_confidence(&self) -> f64 {
|
|
self.min_confidence
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minConfidence")]
|
|
pub fn set_min_confidence(&mut self, value: f64) {
|
|
self.min_confidence = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn preprocessing(&self) -> Option<WasmImagePreprocessingConfig> {
|
|
self.preprocessing.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_preprocessing(&mut self, value: Option<WasmImagePreprocessingConfig>) {
|
|
self.preprocessing = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "enableTableDetection")]
|
|
pub fn enable_table_detection(&self) -> bool {
|
|
self.enable_table_detection
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "enableTableDetection")]
|
|
pub fn set_enable_table_detection(&mut self, value: bool) {
|
|
self.enable_table_detection = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableMinConfidence")]
|
|
pub fn table_min_confidence(&self) -> f64 {
|
|
self.table_min_confidence
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableMinConfidence")]
|
|
pub fn set_table_min_confidence(&mut self, value: f64) {
|
|
self.table_min_confidence = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableColumnThreshold")]
|
|
pub fn table_column_threshold(&self) -> i32 {
|
|
self.table_column_threshold
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableColumnThreshold")]
|
|
pub fn set_table_column_threshold(&mut self, value: i32) {
|
|
self.table_column_threshold = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableRowThresholdRatio")]
|
|
pub fn table_row_threshold_ratio(&self) -> f64 {
|
|
self.table_row_threshold_ratio
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableRowThresholdRatio")]
|
|
pub fn set_table_row_threshold_ratio(&mut self, value: f64) {
|
|
self.table_row_threshold_ratio = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "useCache")]
|
|
pub fn use_cache(&self) -> bool {
|
|
self.use_cache
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "useCache")]
|
|
pub fn set_use_cache(&mut self, value: bool) {
|
|
self.use_cache = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "classifyUsePreAdaptedTemplates")]
|
|
pub fn classify_use_pre_adapted_templates(&self) -> bool {
|
|
self.classify_use_pre_adapted_templates
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "classifyUsePreAdaptedTemplates")]
|
|
pub fn set_classify_use_pre_adapted_templates(&mut self, value: bool) {
|
|
self.classify_use_pre_adapted_templates = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "languageModelNgramOn")]
|
|
pub fn language_model_ngram_on(&self) -> bool {
|
|
self.language_model_ngram_on
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "languageModelNgramOn")]
|
|
pub fn set_language_model_ngram_on(&mut self, value: bool) {
|
|
self.language_model_ngram_on = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tesseditDontBlkrejGoodWds")]
|
|
pub fn tessedit_dont_blkrej_good_wds(&self) -> bool {
|
|
self.tessedit_dont_blkrej_good_wds
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tesseditDontBlkrejGoodWds")]
|
|
pub fn set_tessedit_dont_blkrej_good_wds(&mut self, value: bool) {
|
|
self.tessedit_dont_blkrej_good_wds = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tesseditDontRowrejGoodWds")]
|
|
pub fn tessedit_dont_rowrej_good_wds(&self) -> bool {
|
|
self.tessedit_dont_rowrej_good_wds
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tesseditDontRowrejGoodWds")]
|
|
pub fn set_tessedit_dont_rowrej_good_wds(&mut self, value: bool) {
|
|
self.tessedit_dont_rowrej_good_wds = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tesseditEnableDictCorrection")]
|
|
pub fn tessedit_enable_dict_correction(&self) -> bool {
|
|
self.tessedit_enable_dict_correction
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tesseditEnableDictCorrection")]
|
|
pub fn set_tessedit_enable_dict_correction(&mut self, value: bool) {
|
|
self.tessedit_enable_dict_correction = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tesseditCharWhitelist")]
|
|
pub fn tessedit_char_whitelist(&self) -> String {
|
|
self.tessedit_char_whitelist.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tesseditCharWhitelist")]
|
|
pub fn set_tessedit_char_whitelist(&mut self, value: String) {
|
|
self.tessedit_char_whitelist = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tesseditCharBlacklist")]
|
|
pub fn tessedit_char_blacklist(&self) -> String {
|
|
self.tessedit_char_blacklist.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tesseditCharBlacklist")]
|
|
pub fn set_tessedit_char_blacklist(&mut self, value: String) {
|
|
self.tessedit_char_blacklist = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tesseditUsePrimaryParamsModel")]
|
|
pub fn tessedit_use_primary_params_model(&self) -> bool {
|
|
self.tessedit_use_primary_params_model
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tesseditUsePrimaryParamsModel")]
|
|
pub fn set_tessedit_use_primary_params_model(&mut self, value: bool) {
|
|
self.tessedit_use_primary_params_model = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "textordSpaceSizeIsVariable")]
|
|
pub fn textord_space_size_is_variable(&self) -> bool {
|
|
self.textord_space_size_is_variable
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "textordSpaceSizeIsVariable")]
|
|
pub fn set_textord_space_size_is_variable(&mut self, value: bool) {
|
|
self.textord_space_size_is_variable = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "thresholdingMethod")]
|
|
pub fn thresholding_method(&self) -> bool {
|
|
self.thresholding_method
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "thresholdingMethod")]
|
|
pub fn set_thresholding_method(&mut self, value: bool) {
|
|
self.thresholding_method = value;
|
|
}
|
|
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[wasm_bindgen]
|
|
pub fn default() -> WasmTesseractConfig {
|
|
kreuzberg::TesseractConfig::default().into()
|
|
}
|
|
}
|
|
|
|
/// Image preprocessing metadata.
|
|
///
|
|
/// Tracks the transformations applied to an image during OCR preprocessing,
|
|
/// including DPI normalization, resizing, and resampling.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmImagePreprocessingMetadata {
|
|
original_dimensions: Vec<usize>,
|
|
original_dpi: Vec<f64>,
|
|
target_dpi: i32,
|
|
scale_factor: f64,
|
|
auto_adjusted: bool,
|
|
final_dpi: i32,
|
|
new_dimensions: Option<Vec<usize>>,
|
|
resample_method: String,
|
|
dimension_clamped: bool,
|
|
calculated_dpi: Option<i32>,
|
|
skipped_resize: bool,
|
|
resize_error: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmImagePreprocessingMetadata {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
originalDimensions: Vec<usize>,
|
|
originalDpi: Vec<f64>,
|
|
targetDpi: i32,
|
|
scaleFactor: f64,
|
|
autoAdjusted: bool,
|
|
finalDpi: i32,
|
|
resampleMethod: String,
|
|
dimensionClamped: bool,
|
|
skippedResize: bool,
|
|
newDimensions: Option<Vec<usize>>,
|
|
calculatedDpi: Option<i32>,
|
|
resizeError: Option<String>,
|
|
) -> WasmImagePreprocessingMetadata {
|
|
WasmImagePreprocessingMetadata {
|
|
original_dimensions: originalDimensions,
|
|
original_dpi: originalDpi,
|
|
target_dpi: targetDpi,
|
|
scale_factor: scaleFactor,
|
|
auto_adjusted: autoAdjusted,
|
|
final_dpi: finalDpi,
|
|
new_dimensions: newDimensions,
|
|
resample_method: resampleMethod,
|
|
dimension_clamped: dimensionClamped,
|
|
calculated_dpi: calculatedDpi,
|
|
skipped_resize: skippedResize,
|
|
resize_error: resizeError,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmImagePreprocessingMetadata {
|
|
<WasmImagePreprocessingMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "originalDimensions")]
|
|
pub fn original_dimensions(&self) -> Vec<usize> {
|
|
self.original_dimensions.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "originalDimensions")]
|
|
pub fn set_original_dimensions(&mut self, value: Vec<usize>) {
|
|
self.original_dimensions = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "originalDpi")]
|
|
pub fn original_dpi(&self) -> Vec<f64> {
|
|
self.original_dpi.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "originalDpi")]
|
|
pub fn set_original_dpi(&mut self, value: Vec<f64>) {
|
|
self.original_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "targetDpi")]
|
|
pub fn target_dpi(&self) -> i32 {
|
|
self.target_dpi
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "targetDpi")]
|
|
pub fn set_target_dpi(&mut self, value: i32) {
|
|
self.target_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "scaleFactor")]
|
|
pub fn scale_factor(&self) -> f64 {
|
|
self.scale_factor
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "scaleFactor")]
|
|
pub fn set_scale_factor(&mut self, value: f64) {
|
|
self.scale_factor = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "autoAdjusted")]
|
|
pub fn auto_adjusted(&self) -> bool {
|
|
self.auto_adjusted
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "autoAdjusted")]
|
|
pub fn set_auto_adjusted(&mut self, value: bool) {
|
|
self.auto_adjusted = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "finalDpi")]
|
|
pub fn final_dpi(&self) -> i32 {
|
|
self.final_dpi
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "finalDpi")]
|
|
pub fn set_final_dpi(&mut self, value: i32) {
|
|
self.final_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "newDimensions")]
|
|
pub fn new_dimensions(&self) -> Option<Vec<usize>> {
|
|
self.new_dimensions.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "newDimensions")]
|
|
pub fn set_new_dimensions(&mut self, value: Option<Vec<usize>>) {
|
|
self.new_dimensions = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "resampleMethod")]
|
|
pub fn resample_method(&self) -> String {
|
|
self.resample_method.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "resampleMethod")]
|
|
pub fn set_resample_method(&mut self, value: String) {
|
|
self.resample_method = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "dimensionClamped")]
|
|
pub fn dimension_clamped(&self) -> bool {
|
|
self.dimension_clamped
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "dimensionClamped")]
|
|
pub fn set_dimension_clamped(&mut self, value: bool) {
|
|
self.dimension_clamped = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "calculatedDpi")]
|
|
pub fn calculated_dpi(&self) -> Option<i32> {
|
|
self.calculated_dpi
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "calculatedDpi")]
|
|
pub fn set_calculated_dpi(&mut self, value: Option<i32>) {
|
|
self.calculated_dpi = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "skippedResize")]
|
|
pub fn skipped_resize(&self) -> bool {
|
|
self.skipped_resize
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "skippedResize")]
|
|
pub fn set_skipped_resize(&mut self, value: bool) {
|
|
self.skipped_resize = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "resizeError")]
|
|
pub fn resize_error(&self) -> Option<String> {
|
|
self.resize_error.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "resizeError")]
|
|
pub fn set_resize_error(&mut self, value: Option<String>) {
|
|
self.resize_error = value;
|
|
}
|
|
}
|
|
|
|
/// Extraction result metadata.
|
|
///
|
|
/// Contains common fields applicable to all formats, format-specific metadata
|
|
/// via a discriminated union, and additional custom fields from postprocessors.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmMetadata {
|
|
title: Option<String>,
|
|
subject: Option<String>,
|
|
authors: Option<Vec<String>>,
|
|
keywords: Option<Vec<String>>,
|
|
language: Option<String>,
|
|
created_at: Option<String>,
|
|
modified_at: Option<String>,
|
|
created_by: Option<String>,
|
|
modified_by: Option<String>,
|
|
pages: Option<WasmPageStructure>,
|
|
format: Option<JsValue>,
|
|
image_preprocessing: Option<WasmImagePreprocessingMetadata>,
|
|
json_schema: Option<JsValue>,
|
|
error: Option<WasmErrorMetadata>,
|
|
extraction_duration_ms: Option<u64>,
|
|
category: Option<String>,
|
|
tags: Option<Vec<String>>,
|
|
document_version: Option<String>,
|
|
abstract_text: Option<String>,
|
|
output_format: Option<String>,
|
|
ocr_used: bool,
|
|
additional: JsValue,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmMetadata {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
ocrUsed: Option<bool>,
|
|
additional: Option<JsValue>,
|
|
title: Option<String>,
|
|
subject: Option<String>,
|
|
authors: Option<Vec<String>>,
|
|
keywords: Option<Vec<String>>,
|
|
language: Option<String>,
|
|
createdAt: Option<String>,
|
|
modifiedAt: Option<String>,
|
|
createdBy: Option<String>,
|
|
modifiedBy: Option<String>,
|
|
pages: Option<WasmPageStructure>,
|
|
format: Option<JsValue>,
|
|
imagePreprocessing: Option<WasmImagePreprocessingMetadata>,
|
|
jsonSchema: Option<JsValue>,
|
|
error: Option<WasmErrorMetadata>,
|
|
extractionDurationMs: Option<u64>,
|
|
category: Option<String>,
|
|
tags: Option<Vec<String>>,
|
|
documentVersion: Option<String>,
|
|
abstractText: Option<String>,
|
|
outputFormat: Option<String>,
|
|
) -> WasmMetadata {
|
|
WasmMetadata {
|
|
title,
|
|
subject,
|
|
authors,
|
|
keywords,
|
|
language,
|
|
created_at: createdAt,
|
|
modified_at: modifiedAt,
|
|
created_by: createdBy,
|
|
modified_by: modifiedBy,
|
|
pages,
|
|
format,
|
|
image_preprocessing: imagePreprocessing,
|
|
json_schema: jsonSchema,
|
|
error,
|
|
extraction_duration_ms: extractionDurationMs,
|
|
category,
|
|
tags,
|
|
document_version: documentVersion,
|
|
abstract_text: abstractText,
|
|
output_format: outputFormat,
|
|
ocr_used: ocrUsed.unwrap_or_default(),
|
|
additional: additional.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmMetadata {
|
|
<WasmMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn subject(&self) -> Option<String> {
|
|
self.subject.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_subject(&mut self, value: Option<String>) {
|
|
self.subject = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn authors(&self) -> Option<Vec<String>> {
|
|
self.authors.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_authors(&mut self, value: Option<Vec<String>>) {
|
|
self.authors = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn keywords(&self) -> Option<Vec<String>> {
|
|
self.keywords.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_keywords(&mut self, value: Option<Vec<String>>) {
|
|
self.keywords = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn language(&self) -> Option<String> {
|
|
self.language.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_language(&mut self, value: Option<String>) {
|
|
self.language = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "createdAt")]
|
|
pub fn created_at(&self) -> Option<String> {
|
|
self.created_at.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "createdAt")]
|
|
pub fn set_created_at(&mut self, value: Option<String>) {
|
|
self.created_at = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "modifiedAt")]
|
|
pub fn modified_at(&self) -> Option<String> {
|
|
self.modified_at.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "modifiedAt")]
|
|
pub fn set_modified_at(&mut self, value: Option<String>) {
|
|
self.modified_at = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "createdBy")]
|
|
pub fn created_by(&self) -> Option<String> {
|
|
self.created_by.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "createdBy")]
|
|
pub fn set_created_by(&mut self, value: Option<String>) {
|
|
self.created_by = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "modifiedBy")]
|
|
pub fn modified_by(&self) -> Option<String> {
|
|
self.modified_by.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "modifiedBy")]
|
|
pub fn set_modified_by(&mut self, value: Option<String>) {
|
|
self.modified_by = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn pages(&self) -> Option<WasmPageStructure> {
|
|
self.pages.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_pages(&mut self, value: Option<WasmPageStructure>) {
|
|
self.pages = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn format(&self) -> Option<JsValue> {
|
|
self.format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_format(&mut self, value: Option<JsValue>) {
|
|
self.format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imagePreprocessing")]
|
|
pub fn image_preprocessing(&self) -> Option<WasmImagePreprocessingMetadata> {
|
|
self.image_preprocessing.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imagePreprocessing")]
|
|
pub fn set_image_preprocessing(&mut self, value: Option<WasmImagePreprocessingMetadata>) {
|
|
self.image_preprocessing = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "jsonSchema")]
|
|
pub fn json_schema(&self) -> Option<JsValue> {
|
|
self.json_schema.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "jsonSchema")]
|
|
pub fn set_json_schema(&mut self, value: Option<JsValue>) {
|
|
self.json_schema = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn error(&self) -> Option<WasmErrorMetadata> {
|
|
self.error.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_error(&mut self, value: Option<WasmErrorMetadata>) {
|
|
self.error = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "extractionDurationMs")]
|
|
pub fn extraction_duration_ms(&self) -> Option<u64> {
|
|
self.extraction_duration_ms
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "extractionDurationMs")]
|
|
pub fn set_extraction_duration_ms(&mut self, value: Option<u64>) {
|
|
self.extraction_duration_ms = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn category(&self) -> Option<String> {
|
|
self.category.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_category(&mut self, value: Option<String>) {
|
|
self.category = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn tags(&self) -> Option<Vec<String>> {
|
|
self.tags.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_tags(&mut self, value: Option<Vec<String>>) {
|
|
self.tags = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "documentVersion")]
|
|
pub fn document_version(&self) -> Option<String> {
|
|
self.document_version.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "documentVersion")]
|
|
pub fn set_document_version(&mut self, value: Option<String>) {
|
|
self.document_version = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "abstractText")]
|
|
pub fn abstract_text(&self) -> Option<String> {
|
|
self.abstract_text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "abstractText")]
|
|
pub fn set_abstract_text(&mut self, value: Option<String>) {
|
|
self.abstract_text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "outputFormat")]
|
|
pub fn output_format(&self) -> Option<String> {
|
|
self.output_format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "outputFormat")]
|
|
pub fn set_output_format(&mut self, value: Option<String>) {
|
|
self.output_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ocrUsed")]
|
|
pub fn ocr_used(&self) -> bool {
|
|
self.ocr_used
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "ocrUsed")]
|
|
pub fn set_ocr_used(&mut self, value: bool) {
|
|
self.ocr_used = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn additional(&self) -> JsValue {
|
|
self.additional.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_additional(&mut self, value: JsValue) {
|
|
self.additional = value;
|
|
}
|
|
|
|
/// Returns `true` when no metadata fields, format-specific metadata, or
|
|
/// additional postprocessor fields are populated.
|
|
#[wasm_bindgen(js_name = "isEmpty")]
|
|
pub fn is_empty(&self) -> bool {
|
|
kreuzberg::Metadata::from(self.clone()).is_empty()
|
|
}
|
|
}
|
|
|
|
/// Excel/spreadsheet format metadata.
|
|
///
|
|
/// Identifies the document as a spreadsheet source via the `FormatMetadata.Excel`
|
|
/// discriminant. Sheet count and sheet names are stored inside this struct.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmExcelMetadata {
|
|
sheet_count: Option<u32>,
|
|
sheet_names: Option<Vec<String>>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmExcelMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(sheetCount: Option<u32>, sheetNames: Option<Vec<String>>) -> WasmExcelMetadata {
|
|
WasmExcelMetadata {
|
|
sheet_count: sheetCount,
|
|
sheet_names: sheetNames,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmExcelMetadata {
|
|
<WasmExcelMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "sheetCount")]
|
|
pub fn sheet_count(&self) -> Option<u32> {
|
|
self.sheet_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "sheetCount")]
|
|
pub fn set_sheet_count(&mut self, value: Option<u32>) {
|
|
self.sheet_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "sheetNames")]
|
|
pub fn sheet_names(&self) -> Option<Vec<String>> {
|
|
self.sheet_names.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "sheetNames")]
|
|
pub fn set_sheet_names(&mut self, value: Option<Vec<String>>) {
|
|
self.sheet_names = value;
|
|
}
|
|
}
|
|
|
|
/// Email metadata extracted from .eml and .msg files.
|
|
///
|
|
/// Includes sender/recipient information, message ID, and attachment list.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmEmailMetadata {
|
|
from_email: Option<String>,
|
|
from_name: Option<String>,
|
|
to_emails: Vec<String>,
|
|
cc_emails: Vec<String>,
|
|
bcc_emails: Vec<String>,
|
|
message_id: Option<String>,
|
|
attachments: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmEmailMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
toEmails: Option<Vec<String>>,
|
|
ccEmails: Option<Vec<String>>,
|
|
bccEmails: Option<Vec<String>>,
|
|
attachments: Option<Vec<String>>,
|
|
fromEmail: Option<String>,
|
|
fromName: Option<String>,
|
|
messageId: Option<String>,
|
|
) -> WasmEmailMetadata {
|
|
WasmEmailMetadata {
|
|
from_email: fromEmail,
|
|
from_name: fromName,
|
|
to_emails: toEmails.unwrap_or_default(),
|
|
cc_emails: ccEmails.unwrap_or_default(),
|
|
bcc_emails: bccEmails.unwrap_or_default(),
|
|
message_id: messageId,
|
|
attachments: attachments.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmEmailMetadata {
|
|
<WasmEmailMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "fromEmail")]
|
|
pub fn from_email(&self) -> Option<String> {
|
|
self.from_email.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "fromEmail")]
|
|
pub fn set_from_email(&mut self, value: Option<String>) {
|
|
self.from_email = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "fromName")]
|
|
pub fn from_name(&self) -> Option<String> {
|
|
self.from_name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "fromName")]
|
|
pub fn set_from_name(&mut self, value: Option<String>) {
|
|
self.from_name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "toEmails")]
|
|
pub fn to_emails(&self) -> Vec<String> {
|
|
self.to_emails.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "toEmails")]
|
|
pub fn set_to_emails(&mut self, value: Vec<String>) {
|
|
self.to_emails = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ccEmails")]
|
|
pub fn cc_emails(&self) -> Vec<String> {
|
|
self.cc_emails.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "ccEmails")]
|
|
pub fn set_cc_emails(&mut self, value: Vec<String>) {
|
|
self.cc_emails = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "bccEmails")]
|
|
pub fn bcc_emails(&self) -> Vec<String> {
|
|
self.bcc_emails.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "bccEmails")]
|
|
pub fn set_bcc_emails(&mut self, value: Vec<String>) {
|
|
self.bcc_emails = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "messageId")]
|
|
pub fn message_id(&self) -> Option<String> {
|
|
self.message_id.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "messageId")]
|
|
pub fn set_message_id(&mut self, value: Option<String>) {
|
|
self.message_id = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attachments(&self) -> Vec<String> {
|
|
self.attachments.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attachments(&mut self, value: Vec<String>) {
|
|
self.attachments = value;
|
|
}
|
|
}
|
|
|
|
/// Archive (ZIP/TAR/7Z) metadata.
|
|
///
|
|
/// Extracted from compressed archive files containing file lists and size information.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmArchiveMetadata {
|
|
format: String,
|
|
file_count: u32,
|
|
file_list: Vec<String>,
|
|
total_size: u64,
|
|
compressed_size: Option<u64>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmArchiveMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
format: Option<String>,
|
|
fileCount: Option<u32>,
|
|
fileList: Option<Vec<String>>,
|
|
totalSize: Option<u64>,
|
|
compressedSize: Option<u64>,
|
|
) -> WasmArchiveMetadata {
|
|
WasmArchiveMetadata {
|
|
format: format.unwrap_or_default(),
|
|
file_count: fileCount.unwrap_or_default(),
|
|
file_list: fileList.unwrap_or_default(),
|
|
total_size: totalSize.unwrap_or_default(),
|
|
compressed_size: compressedSize,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmArchiveMetadata {
|
|
<WasmArchiveMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn format(&self) -> String {
|
|
self.format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_format(&mut self, value: String) {
|
|
self.format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "fileCount")]
|
|
pub fn file_count(&self) -> u32 {
|
|
self.file_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "fileCount")]
|
|
pub fn set_file_count(&mut self, value: u32) {
|
|
self.file_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "fileList")]
|
|
pub fn file_list(&self) -> Vec<String> {
|
|
self.file_list.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "fileList")]
|
|
pub fn set_file_list(&mut self, value: Vec<String>) {
|
|
self.file_list = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "totalSize")]
|
|
pub fn total_size(&self) -> u64 {
|
|
self.total_size
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "totalSize")]
|
|
pub fn set_total_size(&mut self, value: u64) {
|
|
self.total_size = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "compressedSize")]
|
|
pub fn compressed_size(&self) -> Option<u64> {
|
|
self.compressed_size
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "compressedSize")]
|
|
pub fn set_compressed_size(&mut self, value: Option<u64>) {
|
|
self.compressed_size = value;
|
|
}
|
|
}
|
|
|
|
/// Image metadata extracted from image files.
|
|
///
|
|
/// Includes dimensions, format, and EXIF data.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmImageMetadata {
|
|
width: u32,
|
|
height: u32,
|
|
format: String,
|
|
exif: JsValue,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmImageMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
width: Option<u32>,
|
|
height: Option<u32>,
|
|
format: Option<String>,
|
|
exif: Option<JsValue>,
|
|
) -> WasmImageMetadata {
|
|
WasmImageMetadata {
|
|
width: width.unwrap_or_default(),
|
|
height: height.unwrap_or_default(),
|
|
format: format.unwrap_or_default(),
|
|
exif: exif.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmImageMetadata {
|
|
<WasmImageMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn width(&self) -> u32 {
|
|
self.width
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_width(&mut self, value: u32) {
|
|
self.width = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn height(&self) -> u32 {
|
|
self.height
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_height(&mut self, value: u32) {
|
|
self.height = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn format(&self) -> String {
|
|
self.format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_format(&mut self, value: String) {
|
|
self.format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn exif(&self) -> JsValue {
|
|
self.exif.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_exif(&mut self, value: JsValue) {
|
|
self.exif = value;
|
|
}
|
|
}
|
|
|
|
/// XML metadata extracted during XML parsing.
|
|
///
|
|
/// Provides statistics about XML document structure.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmXmlMetadata {
|
|
element_count: u32,
|
|
unique_elements: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmXmlMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(elementCount: Option<u32>, uniqueElements: Option<Vec<String>>) -> WasmXmlMetadata {
|
|
WasmXmlMetadata {
|
|
element_count: elementCount.unwrap_or_default(),
|
|
unique_elements: uniqueElements.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmXmlMetadata {
|
|
<WasmXmlMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "elementCount")]
|
|
pub fn element_count(&self) -> u32 {
|
|
self.element_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "elementCount")]
|
|
pub fn set_element_count(&mut self, value: u32) {
|
|
self.element_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "uniqueElements")]
|
|
pub fn unique_elements(&self) -> Vec<String> {
|
|
self.unique_elements.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "uniqueElements")]
|
|
pub fn set_unique_elements(&mut self, value: Vec<String>) {
|
|
self.unique_elements = value;
|
|
}
|
|
}
|
|
|
|
/// Text/Markdown metadata.
|
|
///
|
|
/// Extracted from plain text and Markdown files. Includes word counts and,
|
|
/// for Markdown, structural elements like headers and links.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmTextMetadata {
|
|
line_count: u32,
|
|
word_count: u32,
|
|
character_count: u32,
|
|
headers: Option<Vec<String>>,
|
|
links: Option<JsValue>,
|
|
code_blocks: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmTextMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
lineCount: Option<u32>,
|
|
wordCount: Option<u32>,
|
|
characterCount: Option<u32>,
|
|
headers: Option<Vec<String>>,
|
|
links: Option<JsValue>,
|
|
codeBlocks: Option<JsValue>,
|
|
) -> WasmTextMetadata {
|
|
WasmTextMetadata {
|
|
line_count: lineCount.unwrap_or_default(),
|
|
word_count: wordCount.unwrap_or_default(),
|
|
character_count: characterCount.unwrap_or_default(),
|
|
headers,
|
|
links,
|
|
code_blocks: codeBlocks,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmTextMetadata {
|
|
<WasmTextMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "lineCount")]
|
|
pub fn line_count(&self) -> u32 {
|
|
self.line_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "lineCount")]
|
|
pub fn set_line_count(&mut self, value: u32) {
|
|
self.line_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "wordCount")]
|
|
pub fn word_count(&self) -> u32 {
|
|
self.word_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "wordCount")]
|
|
pub fn set_word_count(&mut self, value: u32) {
|
|
self.word_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "characterCount")]
|
|
pub fn character_count(&self) -> u32 {
|
|
self.character_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "characterCount")]
|
|
pub fn set_character_count(&mut self, value: u32) {
|
|
self.character_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn headers(&self) -> Option<Vec<String>> {
|
|
self.headers.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_headers(&mut self, value: Option<Vec<String>>) {
|
|
self.headers = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn links(&self) -> Option<JsValue> {
|
|
self.links.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_links(&mut self, value: Option<JsValue>) {
|
|
self.links = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "codeBlocks")]
|
|
pub fn code_blocks(&self) -> Option<JsValue> {
|
|
self.code_blocks.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "codeBlocks")]
|
|
pub fn set_code_blocks(&mut self, value: Option<JsValue>) {
|
|
self.code_blocks = value;
|
|
}
|
|
}
|
|
|
|
/// Header/heading element metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmHeaderMetadata {
|
|
level: u8,
|
|
text: String,
|
|
id: Option<String>,
|
|
depth: u32,
|
|
html_offset: u32,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmHeaderMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(level: u8, text: String, depth: u32, htmlOffset: u32, id: Option<String>) -> WasmHeaderMetadata {
|
|
WasmHeaderMetadata {
|
|
level,
|
|
text,
|
|
id,
|
|
depth,
|
|
html_offset: htmlOffset,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmHeaderMetadata {
|
|
<WasmHeaderMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn level(&self) -> u8 {
|
|
self.level
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_level(&mut self, value: u8) {
|
|
self.level = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn text(&self) -> String {
|
|
self.text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_text(&mut self, value: String) {
|
|
self.text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn id(&self) -> Option<String> {
|
|
self.id.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_id(&mut self, value: Option<String>) {
|
|
self.id = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn depth(&self) -> u32 {
|
|
self.depth
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_depth(&mut self, value: u32) {
|
|
self.depth = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "htmlOffset")]
|
|
pub fn html_offset(&self) -> u32 {
|
|
self.html_offset
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "htmlOffset")]
|
|
pub fn set_html_offset(&mut self, value: u32) {
|
|
self.html_offset = value;
|
|
}
|
|
}
|
|
|
|
/// Link element metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmLinkMetadata {
|
|
href: String,
|
|
text: String,
|
|
title: Option<String>,
|
|
link_type: WasmLinkType,
|
|
rel: Vec<String>,
|
|
attributes: JsValue,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmLinkMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
href: String,
|
|
text: String,
|
|
linkType: WasmLinkType,
|
|
rel: Vec<String>,
|
|
attributes: JsValue,
|
|
title: Option<String>,
|
|
) -> WasmLinkMetadata {
|
|
WasmLinkMetadata {
|
|
href,
|
|
text,
|
|
title,
|
|
link_type: linkType,
|
|
rel,
|
|
attributes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmLinkMetadata {
|
|
<WasmLinkMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn href(&self) -> String {
|
|
self.href.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_href(&mut self, value: String) {
|
|
self.href = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn text(&self) -> String {
|
|
self.text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_text(&mut self, value: String) {
|
|
self.text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "linkType")]
|
|
pub fn link_type(&self) -> String {
|
|
self.link_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "linkType")]
|
|
pub fn set_link_type(&mut self, value: WasmLinkType) {
|
|
self.link_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn rel(&self) -> Vec<String> {
|
|
self.rel.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_rel(&mut self, value: Vec<String>) {
|
|
self.rel = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attributes(&self) -> JsValue {
|
|
self.attributes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attributes(&mut self, value: JsValue) {
|
|
self.attributes = value;
|
|
}
|
|
}
|
|
|
|
/// Image element metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmImageMetadataType {
|
|
src: String,
|
|
alt: Option<String>,
|
|
title: Option<String>,
|
|
dimensions: Option<Vec<u32>>,
|
|
image_type: WasmImageType,
|
|
attributes: JsValue,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmImageMetadataType {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
src: String,
|
|
imageType: WasmImageType,
|
|
attributes: JsValue,
|
|
alt: Option<String>,
|
|
title: Option<String>,
|
|
dimensions: Option<Vec<u32>>,
|
|
) -> WasmImageMetadataType {
|
|
WasmImageMetadataType {
|
|
src,
|
|
alt,
|
|
title,
|
|
dimensions,
|
|
image_type: imageType,
|
|
attributes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmImageMetadataType {
|
|
<WasmImageMetadataType as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn src(&self) -> String {
|
|
self.src.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_src(&mut self, value: String) {
|
|
self.src = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn alt(&self) -> Option<String> {
|
|
self.alt.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_alt(&mut self, value: Option<String>) {
|
|
self.alt = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn dimensions(&self) -> Option<Vec<u32>> {
|
|
self.dimensions.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_dimensions(&mut self, value: Option<Vec<u32>>) {
|
|
self.dimensions = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageType")]
|
|
pub fn image_type(&self) -> String {
|
|
self.image_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imageType")]
|
|
pub fn set_image_type(&mut self, value: WasmImageType) {
|
|
self.image_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn attributes(&self) -> JsValue {
|
|
self.attributes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_attributes(&mut self, value: JsValue) {
|
|
self.attributes = value;
|
|
}
|
|
}
|
|
|
|
/// Structured data (Schema.org, microdata, RDFa) block.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmStructuredData {
|
|
data_type: WasmStructuredDataType,
|
|
raw_json: String,
|
|
schema_type: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmStructuredData {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(dataType: WasmStructuredDataType, rawJson: String, schemaType: Option<String>) -> WasmStructuredData {
|
|
WasmStructuredData {
|
|
data_type: dataType,
|
|
raw_json: rawJson,
|
|
schema_type: schemaType,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmStructuredData {
|
|
<WasmStructuredData as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "dataType")]
|
|
pub fn data_type(&self) -> String {
|
|
self.data_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "dataType")]
|
|
pub fn set_data_type(&mut self, value: WasmStructuredDataType) {
|
|
self.data_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "rawJson")]
|
|
pub fn raw_json(&self) -> String {
|
|
self.raw_json.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "rawJson")]
|
|
pub fn set_raw_json(&mut self, value: String) {
|
|
self.raw_json = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "schemaType")]
|
|
pub fn schema_type(&self) -> Option<String> {
|
|
self.schema_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "schemaType")]
|
|
pub fn set_schema_type(&mut self, value: Option<String>) {
|
|
self.schema_type = value;
|
|
}
|
|
}
|
|
|
|
/// HTML metadata extracted from HTML documents.
|
|
///
|
|
/// Includes document-level metadata, Open Graph data, Twitter Card metadata,
|
|
/// and extracted structural elements (headers, links, images, structured data).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmHtmlMetadata {
|
|
title: Option<String>,
|
|
description: Option<String>,
|
|
keywords: Vec<String>,
|
|
author: Option<String>,
|
|
canonical_url: Option<String>,
|
|
base_href: Option<String>,
|
|
language: Option<String>,
|
|
text_direction: Option<WasmTextDirection>,
|
|
open_graph: JsValue,
|
|
twitter_card: JsValue,
|
|
meta_tags: JsValue,
|
|
headers: Vec<WasmHeaderMetadata>,
|
|
links: Vec<WasmLinkMetadata>,
|
|
images: Vec<WasmImageMetadataType>,
|
|
structured_data: Vec<WasmStructuredData>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmHtmlMetadata {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
keywords: Option<Vec<String>>,
|
|
openGraph: Option<JsValue>,
|
|
twitterCard: Option<JsValue>,
|
|
metaTags: Option<JsValue>,
|
|
headers: Option<Vec<WasmHeaderMetadata>>,
|
|
links: Option<Vec<WasmLinkMetadata>>,
|
|
images: Option<Vec<WasmImageMetadataType>>,
|
|
structuredData: Option<Vec<WasmStructuredData>>,
|
|
title: Option<String>,
|
|
description: Option<String>,
|
|
author: Option<String>,
|
|
canonicalUrl: Option<String>,
|
|
baseHref: Option<String>,
|
|
language: Option<String>,
|
|
textDirection: Option<WasmTextDirection>,
|
|
) -> WasmHtmlMetadata {
|
|
WasmHtmlMetadata {
|
|
title,
|
|
description,
|
|
keywords: keywords.unwrap_or_default(),
|
|
author,
|
|
canonical_url: canonicalUrl,
|
|
base_href: baseHref,
|
|
language,
|
|
text_direction: textDirection,
|
|
open_graph: openGraph.unwrap_or_default(),
|
|
twitter_card: twitterCard.unwrap_or_default(),
|
|
meta_tags: metaTags.unwrap_or_default(),
|
|
headers: headers.unwrap_or_default(),
|
|
links: links.unwrap_or_default(),
|
|
images: images.unwrap_or_default(),
|
|
structured_data: structuredData.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmHtmlMetadata {
|
|
<WasmHtmlMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn description(&self) -> Option<String> {
|
|
self.description.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_description(&mut self, value: Option<String>) {
|
|
self.description = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn keywords(&self) -> Vec<String> {
|
|
self.keywords.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_keywords(&mut self, value: Vec<String>) {
|
|
self.keywords = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn author(&self) -> Option<String> {
|
|
self.author.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_author(&mut self, value: Option<String>) {
|
|
self.author = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "canonicalUrl")]
|
|
pub fn canonical_url(&self) -> Option<String> {
|
|
self.canonical_url.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "canonicalUrl")]
|
|
pub fn set_canonical_url(&mut self, value: Option<String>) {
|
|
self.canonical_url = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "baseHref")]
|
|
pub fn base_href(&self) -> Option<String> {
|
|
self.base_href.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "baseHref")]
|
|
pub fn set_base_href(&mut self, value: Option<String>) {
|
|
self.base_href = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn language(&self) -> Option<String> {
|
|
self.language.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_language(&mut self, value: Option<String>) {
|
|
self.language = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "textDirection")]
|
|
pub fn text_direction(&self) -> Option<String> {
|
|
self.text_direction.map(|v| v.to_api_str().to_owned())
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "textDirection")]
|
|
pub fn set_text_direction(&mut self, value: Option<WasmTextDirection>) {
|
|
self.text_direction = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "openGraph")]
|
|
pub fn open_graph(&self) -> JsValue {
|
|
self.open_graph.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "openGraph")]
|
|
pub fn set_open_graph(&mut self, value: JsValue) {
|
|
self.open_graph = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "twitterCard")]
|
|
pub fn twitter_card(&self) -> JsValue {
|
|
self.twitter_card.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "twitterCard")]
|
|
pub fn set_twitter_card(&mut self, value: JsValue) {
|
|
self.twitter_card = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "metaTags")]
|
|
pub fn meta_tags(&self) -> JsValue {
|
|
self.meta_tags.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "metaTags")]
|
|
pub fn set_meta_tags(&mut self, value: JsValue) {
|
|
self.meta_tags = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn headers(&self) -> Vec<WasmHeaderMetadata> {
|
|
self.headers.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_headers(&mut self, value: Vec<WasmHeaderMetadata>) {
|
|
self.headers = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn links(&self) -> Vec<WasmLinkMetadata> {
|
|
self.links.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_links(&mut self, value: Vec<WasmLinkMetadata>) {
|
|
self.links = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn images(&self) -> Vec<WasmImageMetadataType> {
|
|
self.images.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_images(&mut self, value: Vec<WasmImageMetadataType>) {
|
|
self.images = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "structuredData")]
|
|
pub fn structured_data(&self) -> Vec<WasmStructuredData> {
|
|
self.structured_data.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "structuredData")]
|
|
pub fn set_structured_data(&mut self, value: Vec<WasmStructuredData>) {
|
|
self.structured_data = value;
|
|
}
|
|
}
|
|
|
|
/// OCR processing metadata.
|
|
///
|
|
/// Captures information about OCR processing configuration and results.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrMetadata {
|
|
language: String,
|
|
psm: i32,
|
|
output_format: String,
|
|
table_count: u32,
|
|
table_rows: Option<u32>,
|
|
table_cols: Option<u32>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
language: Option<String>,
|
|
psm: Option<i32>,
|
|
outputFormat: Option<String>,
|
|
tableCount: Option<u32>,
|
|
tableRows: Option<u32>,
|
|
tableCols: Option<u32>,
|
|
) -> WasmOcrMetadata {
|
|
WasmOcrMetadata {
|
|
language: language.unwrap_or_default(),
|
|
psm: psm.unwrap_or_default(),
|
|
output_format: outputFormat.unwrap_or_default(),
|
|
table_count: tableCount.unwrap_or_default(),
|
|
table_rows: tableRows,
|
|
table_cols: tableCols,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrMetadata {
|
|
<WasmOcrMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn language(&self) -> String {
|
|
self.language.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_language(&mut self, value: String) {
|
|
self.language = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn psm(&self) -> i32 {
|
|
self.psm
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_psm(&mut self, value: i32) {
|
|
self.psm = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "outputFormat")]
|
|
pub fn output_format(&self) -> String {
|
|
self.output_format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "outputFormat")]
|
|
pub fn set_output_format(&mut self, value: String) {
|
|
self.output_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableCount")]
|
|
pub fn table_count(&self) -> u32 {
|
|
self.table_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableCount")]
|
|
pub fn set_table_count(&mut self, value: u32) {
|
|
self.table_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableRows")]
|
|
pub fn table_rows(&self) -> Option<u32> {
|
|
self.table_rows
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableRows")]
|
|
pub fn set_table_rows(&mut self, value: Option<u32>) {
|
|
self.table_rows = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableCols")]
|
|
pub fn table_cols(&self) -> Option<u32> {
|
|
self.table_cols
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableCols")]
|
|
pub fn set_table_cols(&mut self, value: Option<u32>) {
|
|
self.table_cols = value;
|
|
}
|
|
}
|
|
|
|
/// Error metadata (for batch operations).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmErrorMetadata {
|
|
error_type: String,
|
|
message: String,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmErrorMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(errorType: String, message: String) -> WasmErrorMetadata {
|
|
WasmErrorMetadata {
|
|
error_type: errorType,
|
|
message,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmErrorMetadata {
|
|
<WasmErrorMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "errorType")]
|
|
pub fn error_type(&self) -> String {
|
|
self.error_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "errorType")]
|
|
pub fn set_error_type(&mut self, value: String) {
|
|
self.error_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn message(&self) -> String {
|
|
self.message.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_message(&mut self, value: String) {
|
|
self.message = value;
|
|
}
|
|
}
|
|
|
|
/// PowerPoint presentation metadata.
|
|
///
|
|
/// Extracted from PPTX files containing slide counts and presentation details.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPptxMetadata {
|
|
slide_count: u32,
|
|
slide_names: Vec<String>,
|
|
image_count: Option<u32>,
|
|
table_count: Option<u32>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPptxMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
slideCount: Option<u32>,
|
|
slideNames: Option<Vec<String>>,
|
|
imageCount: Option<u32>,
|
|
tableCount: Option<u32>,
|
|
) -> WasmPptxMetadata {
|
|
WasmPptxMetadata {
|
|
slide_count: slideCount.unwrap_or_default(),
|
|
slide_names: slideNames.unwrap_or_default(),
|
|
image_count: imageCount,
|
|
table_count: tableCount,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPptxMetadata {
|
|
<WasmPptxMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "slideCount")]
|
|
pub fn slide_count(&self) -> u32 {
|
|
self.slide_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "slideCount")]
|
|
pub fn set_slide_count(&mut self, value: u32) {
|
|
self.slide_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "slideNames")]
|
|
pub fn slide_names(&self) -> Vec<String> {
|
|
self.slide_names.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "slideNames")]
|
|
pub fn set_slide_names(&mut self, value: Vec<String>) {
|
|
self.slide_names = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageCount")]
|
|
pub fn image_count(&self) -> Option<u32> {
|
|
self.image_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imageCount")]
|
|
pub fn set_image_count(&mut self, value: Option<u32>) {
|
|
self.image_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableCount")]
|
|
pub fn table_count(&self) -> Option<u32> {
|
|
self.table_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableCount")]
|
|
pub fn set_table_count(&mut self, value: Option<u32>) {
|
|
self.table_count = value;
|
|
}
|
|
}
|
|
|
|
/// CSV/TSV file metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmCsvMetadata {
|
|
row_count: u32,
|
|
column_count: u32,
|
|
delimiter: Option<String>,
|
|
has_header: bool,
|
|
column_types: Option<Vec<String>>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmCsvMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
rowCount: Option<u32>,
|
|
columnCount: Option<u32>,
|
|
hasHeader: Option<bool>,
|
|
delimiter: Option<String>,
|
|
columnTypes: Option<Vec<String>>,
|
|
) -> WasmCsvMetadata {
|
|
WasmCsvMetadata {
|
|
row_count: rowCount.unwrap_or_default(),
|
|
column_count: columnCount.unwrap_or_default(),
|
|
delimiter,
|
|
has_header: hasHeader.unwrap_or_default(),
|
|
column_types: columnTypes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmCsvMetadata {
|
|
<WasmCsvMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "rowCount")]
|
|
pub fn row_count(&self) -> u32 {
|
|
self.row_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "rowCount")]
|
|
pub fn set_row_count(&mut self, value: u32) {
|
|
self.row_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "columnCount")]
|
|
pub fn column_count(&self) -> u32 {
|
|
self.column_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "columnCount")]
|
|
pub fn set_column_count(&mut self, value: u32) {
|
|
self.column_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn delimiter(&self) -> Option<String> {
|
|
self.delimiter.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_delimiter(&mut self, value: Option<String>) {
|
|
self.delimiter = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "hasHeader")]
|
|
pub fn has_header(&self) -> bool {
|
|
self.has_header
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "hasHeader")]
|
|
pub fn set_has_header(&mut self, value: bool) {
|
|
self.has_header = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "columnTypes")]
|
|
pub fn column_types(&self) -> Option<Vec<String>> {
|
|
self.column_types.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "columnTypes")]
|
|
pub fn set_column_types(&mut self, value: Option<Vec<String>>) {
|
|
self.column_types = value;
|
|
}
|
|
}
|
|
|
|
/// BibTeX bibliography metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmBibtexMetadata {
|
|
entry_count: usize,
|
|
citation_keys: Vec<String>,
|
|
authors: Vec<String>,
|
|
year_range: Option<WasmYearRange>,
|
|
entry_types: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmBibtexMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
entryCount: Option<usize>,
|
|
citationKeys: Option<Vec<String>>,
|
|
authors: Option<Vec<String>>,
|
|
yearRange: Option<WasmYearRange>,
|
|
entryTypes: Option<JsValue>,
|
|
) -> WasmBibtexMetadata {
|
|
WasmBibtexMetadata {
|
|
entry_count: entryCount.unwrap_or_default(),
|
|
citation_keys: citationKeys.unwrap_or_default(),
|
|
authors: authors.unwrap_or_default(),
|
|
year_range: yearRange,
|
|
entry_types: entryTypes,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmBibtexMetadata {
|
|
<WasmBibtexMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "entryCount")]
|
|
pub fn entry_count(&self) -> usize {
|
|
self.entry_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "entryCount")]
|
|
pub fn set_entry_count(&mut self, value: usize) {
|
|
self.entry_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "citationKeys")]
|
|
pub fn citation_keys(&self) -> Vec<String> {
|
|
self.citation_keys.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "citationKeys")]
|
|
pub fn set_citation_keys(&mut self, value: Vec<String>) {
|
|
self.citation_keys = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn authors(&self) -> Vec<String> {
|
|
self.authors.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_authors(&mut self, value: Vec<String>) {
|
|
self.authors = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "yearRange")]
|
|
pub fn year_range(&self) -> Option<WasmYearRange> {
|
|
self.year_range.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "yearRange")]
|
|
pub fn set_year_range(&mut self, value: Option<WasmYearRange>) {
|
|
self.year_range = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "entryTypes")]
|
|
pub fn entry_types(&self) -> Option<JsValue> {
|
|
self.entry_types.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "entryTypes")]
|
|
pub fn set_entry_types(&mut self, value: Option<JsValue>) {
|
|
self.entry_types = value;
|
|
}
|
|
}
|
|
|
|
/// Citation file metadata (RIS, PubMed, EndNote).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmCitationMetadata {
|
|
citation_count: usize,
|
|
format: Option<String>,
|
|
authors: Vec<String>,
|
|
year_range: Option<WasmYearRange>,
|
|
dois: Vec<String>,
|
|
keywords: Vec<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmCitationMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
citationCount: Option<usize>,
|
|
authors: Option<Vec<String>>,
|
|
dois: Option<Vec<String>>,
|
|
keywords: Option<Vec<String>>,
|
|
format: Option<String>,
|
|
yearRange: Option<WasmYearRange>,
|
|
) -> WasmCitationMetadata {
|
|
WasmCitationMetadata {
|
|
citation_count: citationCount.unwrap_or_default(),
|
|
format,
|
|
authors: authors.unwrap_or_default(),
|
|
year_range: yearRange,
|
|
dois: dois.unwrap_or_default(),
|
|
keywords: keywords.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmCitationMetadata {
|
|
<WasmCitationMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "citationCount")]
|
|
pub fn citation_count(&self) -> usize {
|
|
self.citation_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "citationCount")]
|
|
pub fn set_citation_count(&mut self, value: usize) {
|
|
self.citation_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn format(&self) -> Option<String> {
|
|
self.format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_format(&mut self, value: Option<String>) {
|
|
self.format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn authors(&self) -> Vec<String> {
|
|
self.authors.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_authors(&mut self, value: Vec<String>) {
|
|
self.authors = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "yearRange")]
|
|
pub fn year_range(&self) -> Option<WasmYearRange> {
|
|
self.year_range.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "yearRange")]
|
|
pub fn set_year_range(&mut self, value: Option<WasmYearRange>) {
|
|
self.year_range = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn dois(&self) -> Vec<String> {
|
|
self.dois.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_dois(&mut self, value: Vec<String>) {
|
|
self.dois = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn keywords(&self) -> Vec<String> {
|
|
self.keywords.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_keywords(&mut self, value: Vec<String>) {
|
|
self.keywords = value;
|
|
}
|
|
}
|
|
|
|
/// Year range for bibliographic metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmYearRange {
|
|
min: Option<u32>,
|
|
max: Option<u32>,
|
|
years: Vec<u32>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmYearRange {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(years: Vec<u32>, min: Option<u32>, max: Option<u32>) -> WasmYearRange {
|
|
WasmYearRange { min, max, years }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmYearRange {
|
|
<WasmYearRange as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn min(&self) -> Option<u32> {
|
|
self.min
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_min(&mut self, value: Option<u32>) {
|
|
self.min = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn max(&self) -> Option<u32> {
|
|
self.max
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_max(&mut self, value: Option<u32>) {
|
|
self.max = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn years(&self) -> Vec<u32> {
|
|
self.years.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_years(&mut self, value: Vec<u32>) {
|
|
self.years = value;
|
|
}
|
|
}
|
|
|
|
/// FictionBook (FB2) metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmFictionBookMetadata {
|
|
genres: Vec<String>,
|
|
sequences: Vec<String>,
|
|
annotation: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmFictionBookMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
genres: Option<Vec<String>>,
|
|
sequences: Option<Vec<String>>,
|
|
annotation: Option<String>,
|
|
) -> WasmFictionBookMetadata {
|
|
WasmFictionBookMetadata {
|
|
genres: genres.unwrap_or_default(),
|
|
sequences: sequences.unwrap_or_default(),
|
|
annotation,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmFictionBookMetadata {
|
|
<WasmFictionBookMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn genres(&self) -> Vec<String> {
|
|
self.genres.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_genres(&mut self, value: Vec<String>) {
|
|
self.genres = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn sequences(&self) -> Vec<String> {
|
|
self.sequences.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_sequences(&mut self, value: Vec<String>) {
|
|
self.sequences = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn annotation(&self) -> Option<String> {
|
|
self.annotation.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_annotation(&mut self, value: Option<String>) {
|
|
self.annotation = value;
|
|
}
|
|
}
|
|
|
|
/// dBASE (DBF) file metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDbfMetadata {
|
|
record_count: usize,
|
|
field_count: usize,
|
|
fields: Vec<WasmDbfFieldInfo>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDbfMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
recordCount: Option<usize>,
|
|
fieldCount: Option<usize>,
|
|
fields: Option<Vec<WasmDbfFieldInfo>>,
|
|
) -> WasmDbfMetadata {
|
|
WasmDbfMetadata {
|
|
record_count: recordCount.unwrap_or_default(),
|
|
field_count: fieldCount.unwrap_or_default(),
|
|
fields: fields.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDbfMetadata {
|
|
<WasmDbfMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "recordCount")]
|
|
pub fn record_count(&self) -> usize {
|
|
self.record_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "recordCount")]
|
|
pub fn set_record_count(&mut self, value: usize) {
|
|
self.record_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "fieldCount")]
|
|
pub fn field_count(&self) -> usize {
|
|
self.field_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "fieldCount")]
|
|
pub fn set_field_count(&mut self, value: usize) {
|
|
self.field_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn fields(&self) -> Vec<WasmDbfFieldInfo> {
|
|
self.fields.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_fields(&mut self, value: Vec<WasmDbfFieldInfo>) {
|
|
self.fields = value;
|
|
}
|
|
}
|
|
|
|
/// dBASE field information.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDbfFieldInfo {
|
|
name: String,
|
|
field_type: String,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDbfFieldInfo {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(name: String, fieldType: String) -> WasmDbfFieldInfo {
|
|
WasmDbfFieldInfo {
|
|
name,
|
|
field_type: fieldType,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDbfFieldInfo {
|
|
<WasmDbfFieldInfo as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn name(&self) -> String {
|
|
self.name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_name(&mut self, value: String) {
|
|
self.name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "fieldType")]
|
|
pub fn field_type(&self) -> String {
|
|
self.field_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "fieldType")]
|
|
pub fn set_field_type(&mut self, value: String) {
|
|
self.field_type = value;
|
|
}
|
|
}
|
|
|
|
/// JATS (Journal Article Tag Suite) metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmJatsMetadata {
|
|
copyright: Option<String>,
|
|
license: Option<String>,
|
|
history_dates: JsValue,
|
|
contributor_roles: Vec<WasmContributorRole>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmJatsMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
historyDates: Option<JsValue>,
|
|
contributorRoles: Option<Vec<WasmContributorRole>>,
|
|
copyright: Option<String>,
|
|
license: Option<String>,
|
|
) -> WasmJatsMetadata {
|
|
WasmJatsMetadata {
|
|
copyright,
|
|
license,
|
|
history_dates: historyDates.unwrap_or_default(),
|
|
contributor_roles: contributorRoles.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmJatsMetadata {
|
|
<WasmJatsMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn copyright(&self) -> Option<String> {
|
|
self.copyright.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_copyright(&mut self, value: Option<String>) {
|
|
self.copyright = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn license(&self) -> Option<String> {
|
|
self.license.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_license(&mut self, value: Option<String>) {
|
|
self.license = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "historyDates")]
|
|
pub fn history_dates(&self) -> JsValue {
|
|
self.history_dates.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "historyDates")]
|
|
pub fn set_history_dates(&mut self, value: JsValue) {
|
|
self.history_dates = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "contributorRoles")]
|
|
pub fn contributor_roles(&self) -> Vec<WasmContributorRole> {
|
|
self.contributor_roles.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "contributorRoles")]
|
|
pub fn set_contributor_roles(&mut self, value: Vec<WasmContributorRole>) {
|
|
self.contributor_roles = value;
|
|
}
|
|
}
|
|
|
|
/// JATS contributor with role.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmContributorRole {
|
|
name: String,
|
|
role: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmContributorRole {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(name: String, role: Option<String>) -> WasmContributorRole {
|
|
WasmContributorRole { name, role }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmContributorRole {
|
|
<WasmContributorRole as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn name(&self) -> String {
|
|
self.name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_name(&mut self, value: String) {
|
|
self.name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn role(&self) -> Option<String> {
|
|
self.role.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_role(&mut self, value: Option<String>) {
|
|
self.role = value;
|
|
}
|
|
}
|
|
|
|
/// EPUB metadata (Dublin Core extensions).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmEpubMetadata {
|
|
coverage: Option<String>,
|
|
dc_format: Option<String>,
|
|
relation: Option<String>,
|
|
source: Option<String>,
|
|
dc_type: Option<String>,
|
|
cover_image: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmEpubMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
coverage: Option<String>,
|
|
dcFormat: Option<String>,
|
|
relation: Option<String>,
|
|
source: Option<String>,
|
|
dcType: Option<String>,
|
|
coverImage: Option<String>,
|
|
) -> WasmEpubMetadata {
|
|
WasmEpubMetadata {
|
|
coverage,
|
|
dc_format: dcFormat,
|
|
relation,
|
|
source,
|
|
dc_type: dcType,
|
|
cover_image: coverImage,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmEpubMetadata {
|
|
<WasmEpubMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn coverage(&self) -> Option<String> {
|
|
self.coverage.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_coverage(&mut self, value: Option<String>) {
|
|
self.coverage = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "dcFormat")]
|
|
pub fn dc_format(&self) -> Option<String> {
|
|
self.dc_format.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "dcFormat")]
|
|
pub fn set_dc_format(&mut self, value: Option<String>) {
|
|
self.dc_format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn relation(&self) -> Option<String> {
|
|
self.relation.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_relation(&mut self, value: Option<String>) {
|
|
self.relation = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn source(&self) -> Option<String> {
|
|
self.source.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_source(&mut self, value: Option<String>) {
|
|
self.source = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "dcType")]
|
|
pub fn dc_type(&self) -> Option<String> {
|
|
self.dc_type.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "dcType")]
|
|
pub fn set_dc_type(&mut self, value: Option<String>) {
|
|
self.dc_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "coverImage")]
|
|
pub fn cover_image(&self) -> Option<String> {
|
|
self.cover_image.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "coverImage")]
|
|
pub fn set_cover_image(&mut self, value: Option<String>) {
|
|
self.cover_image = value;
|
|
}
|
|
}
|
|
|
|
/// Outlook PST archive metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPstMetadata {
|
|
message_count: usize,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPstMetadata {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(messageCount: Option<usize>) -> WasmPstMetadata {
|
|
WasmPstMetadata {
|
|
message_count: messageCount.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPstMetadata {
|
|
<WasmPstMetadata as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "messageCount")]
|
|
pub fn message_count(&self) -> usize {
|
|
self.message_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "messageCount")]
|
|
pub fn set_message_count(&mut self, value: usize) {
|
|
self.message_count = value;
|
|
}
|
|
}
|
|
|
|
/// Confidence scores for an OCR element.
|
|
///
|
|
/// Separates detection confidence (how confident that text exists at this location)
|
|
/// from recognition confidence (how confident about the actual text content).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrConfidence {
|
|
detection: Option<f64>,
|
|
recognition: f64,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrConfidence {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(recognition: Option<f64>, detection: Option<f64>) -> WasmOcrConfidence {
|
|
WasmOcrConfidence {
|
|
detection,
|
|
recognition: recognition.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrConfidence {
|
|
<WasmOcrConfidence as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn detection(&self) -> Option<f64> {
|
|
self.detection
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_detection(&mut self, value: Option<f64>) {
|
|
self.detection = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn recognition(&self) -> f64 {
|
|
self.recognition
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_recognition(&mut self, value: f64) {
|
|
self.recognition = value;
|
|
}
|
|
}
|
|
|
|
/// Rotation information for an OCR element.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrRotation {
|
|
angle_degrees: f64,
|
|
confidence: Option<f64>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrRotation {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(angleDegrees: f64, confidence: Option<f64>) -> WasmOcrRotation {
|
|
WasmOcrRotation {
|
|
angle_degrees: angleDegrees,
|
|
confidence,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrRotation {
|
|
<WasmOcrRotation as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "angleDegrees")]
|
|
pub fn angle_degrees(&self) -> f64 {
|
|
self.angle_degrees
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "angleDegrees")]
|
|
pub fn set_angle_degrees(&mut self, value: f64) {
|
|
self.angle_degrees = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn confidence(&self) -> Option<f64> {
|
|
self.confidence
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_confidence(&mut self, value: Option<f64>) {
|
|
self.confidence = value;
|
|
}
|
|
}
|
|
|
|
/// A unified OCR element representing detected text with full metadata.
|
|
///
|
|
/// This is the primary type for structured OCR output, preserving all information
|
|
/// from both Tesseract and PaddleOCR backends.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrElement {
|
|
text: String,
|
|
geometry: JsValue,
|
|
confidence: WasmOcrConfidence,
|
|
level: WasmOcrElementLevel,
|
|
rotation: Option<WasmOcrRotation>,
|
|
page_number: u32,
|
|
parent_id: Option<String>,
|
|
backend_metadata: JsValue,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrElement {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
text: Option<String>,
|
|
geometry: Option<JsValue>,
|
|
confidence: Option<WasmOcrConfidence>,
|
|
level: Option<WasmOcrElementLevel>,
|
|
pageNumber: Option<u32>,
|
|
backendMetadata: Option<JsValue>,
|
|
rotation: Option<WasmOcrRotation>,
|
|
parentId: Option<String>,
|
|
) -> WasmOcrElement {
|
|
WasmOcrElement {
|
|
text: text.unwrap_or_default(),
|
|
geometry: geometry.unwrap_or_default(),
|
|
confidence: confidence.unwrap_or_default(),
|
|
level: level.unwrap_or_default(),
|
|
rotation,
|
|
page_number: pageNumber.unwrap_or_default(),
|
|
parent_id: parentId,
|
|
backend_metadata: backendMetadata.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrElement {
|
|
<WasmOcrElement as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn text(&self) -> String {
|
|
self.text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_text(&mut self, value: String) {
|
|
self.text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn geometry(&self) -> JsValue {
|
|
self.geometry.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_geometry(&mut self, value: JsValue) {
|
|
self.geometry = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn confidence(&self) -> WasmOcrConfidence {
|
|
self.confidence.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_confidence(&mut self, value: WasmOcrConfidence) {
|
|
self.confidence = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn level(&self) -> String {
|
|
self.level.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_level(&mut self, value: WasmOcrElementLevel) {
|
|
self.level = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn rotation(&self) -> Option<WasmOcrRotation> {
|
|
self.rotation.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_rotation(&mut self, value: Option<WasmOcrRotation>) {
|
|
self.rotation = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageNumber")]
|
|
pub fn page_number(&self) -> u32 {
|
|
self.page_number
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageNumber")]
|
|
pub fn set_page_number(&mut self, value: u32) {
|
|
self.page_number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "parentId")]
|
|
pub fn parent_id(&self) -> Option<String> {
|
|
self.parent_id.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "parentId")]
|
|
pub fn set_parent_id(&mut self, value: Option<String>) {
|
|
self.parent_id = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "backendMetadata")]
|
|
pub fn backend_metadata(&self) -> JsValue {
|
|
self.backend_metadata.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "backendMetadata")]
|
|
pub fn set_backend_metadata(&mut self, value: JsValue) {
|
|
self.backend_metadata = value;
|
|
}
|
|
}
|
|
|
|
/// Configuration for OCR element extraction.
|
|
///
|
|
/// Controls how OCR elements are extracted and filtered.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmOcrElementConfig {
|
|
include_elements: bool,
|
|
min_level: WasmOcrElementLevel,
|
|
min_confidence: f64,
|
|
build_hierarchy: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrElementConfig {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
includeElements: Option<bool>,
|
|
minLevel: Option<WasmOcrElementLevel>,
|
|
minConfidence: Option<f64>,
|
|
buildHierarchy: Option<bool>,
|
|
) -> WasmOcrElementConfig {
|
|
WasmOcrElementConfig {
|
|
include_elements: includeElements.unwrap_or_default(),
|
|
min_level: minLevel.unwrap_or_default(),
|
|
min_confidence: minConfidence.unwrap_or_default(),
|
|
build_hierarchy: buildHierarchy.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrElementConfig {
|
|
<WasmOcrElementConfig as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "includeElements")]
|
|
pub fn include_elements(&self) -> bool {
|
|
self.include_elements
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "includeElements")]
|
|
pub fn set_include_elements(&mut self, value: bool) {
|
|
self.include_elements = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minLevel")]
|
|
pub fn min_level(&self) -> String {
|
|
self.min_level.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minLevel")]
|
|
pub fn set_min_level(&mut self, value: WasmOcrElementLevel) {
|
|
self.min_level = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "minConfidence")]
|
|
pub fn min_confidence(&self) -> f64 {
|
|
self.min_confidence
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "minConfidence")]
|
|
pub fn set_min_confidence(&mut self, value: f64) {
|
|
self.min_confidence = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "buildHierarchy")]
|
|
pub fn build_hierarchy(&self) -> bool {
|
|
self.build_hierarchy
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "buildHierarchy")]
|
|
pub fn set_build_hierarchy(&mut self, value: bool) {
|
|
self.build_hierarchy = value;
|
|
}
|
|
}
|
|
|
|
/// Unified page structure for documents.
|
|
///
|
|
/// Supports different page types (PDF pages, PPTX slides, Excel sheets)
|
|
/// with character offset boundaries for chunk-to-page mapping.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPageStructure {
|
|
total_count: u32,
|
|
unit_type: WasmPageUnitType,
|
|
boundaries: Option<Vec<WasmPageBoundary>>,
|
|
pages: Option<Vec<WasmPageInfo>>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPageStructure {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
totalCount: u32,
|
|
unitType: WasmPageUnitType,
|
|
boundaries: Option<Vec<WasmPageBoundary>>,
|
|
pages: Option<Vec<WasmPageInfo>>,
|
|
) -> WasmPageStructure {
|
|
WasmPageStructure {
|
|
total_count: totalCount,
|
|
unit_type: unitType,
|
|
boundaries,
|
|
pages,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPageStructure {
|
|
<WasmPageStructure as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "totalCount")]
|
|
pub fn total_count(&self) -> u32 {
|
|
self.total_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "totalCount")]
|
|
pub fn set_total_count(&mut self, value: u32) {
|
|
self.total_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "unitType")]
|
|
pub fn unit_type(&self) -> String {
|
|
self.unit_type.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "unitType")]
|
|
pub fn set_unit_type(&mut self, value: WasmPageUnitType) {
|
|
self.unit_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn boundaries(&self) -> Option<js_sys::Array> {
|
|
self.boundaries.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_boundaries(&mut self, value: Option<Vec<WasmPageBoundary>>) {
|
|
self.boundaries = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn pages(&self) -> Option<js_sys::Array> {
|
|
self.pages.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_pages(&mut self, value: Option<Vec<WasmPageInfo>>) {
|
|
self.pages = value;
|
|
}
|
|
}
|
|
|
|
/// Byte offset boundary for a page.
|
|
///
|
|
/// Tracks where a specific page's content starts and ends in the main content string,
|
|
/// enabling mapping from byte positions to page numbers. Offsets are guaranteed to be
|
|
/// at valid UTF-8 character boundaries when using standard String methods (push_str, push, etc.).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPageBoundary {
|
|
byte_start: usize,
|
|
byte_end: usize,
|
|
page_number: u32,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPageBoundary {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(byteStart: usize, byteEnd: usize, pageNumber: u32) -> WasmPageBoundary {
|
|
WasmPageBoundary {
|
|
byte_start: byteStart,
|
|
byte_end: byteEnd,
|
|
page_number: pageNumber,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPageBoundary {
|
|
<WasmPageBoundary as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "byteStart")]
|
|
pub fn byte_start(&self) -> usize {
|
|
self.byte_start
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "byteStart")]
|
|
pub fn set_byte_start(&mut self, value: usize) {
|
|
self.byte_start = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "byteEnd")]
|
|
pub fn byte_end(&self) -> usize {
|
|
self.byte_end
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "byteEnd")]
|
|
pub fn set_byte_end(&mut self, value: usize) {
|
|
self.byte_end = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageNumber")]
|
|
pub fn page_number(&self) -> u32 {
|
|
self.page_number
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageNumber")]
|
|
pub fn set_page_number(&mut self, value: u32) {
|
|
self.page_number = value;
|
|
}
|
|
}
|
|
|
|
/// Metadata for individual page/slide/sheet.
|
|
///
|
|
/// Captures per-page information including dimensions, content counts,
|
|
/// and visibility state (for presentations).
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPageInfo {
|
|
number: u32,
|
|
title: Option<String>,
|
|
dimensions: Option<Vec<f64>>,
|
|
image_count: Option<u32>,
|
|
table_count: Option<u32>,
|
|
hidden: Option<bool>,
|
|
is_blank: Option<bool>,
|
|
has_vector_graphics: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPageInfo {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
number: u32,
|
|
hasVectorGraphics: bool,
|
|
title: Option<String>,
|
|
dimensions: Option<Vec<f64>>,
|
|
imageCount: Option<u32>,
|
|
tableCount: Option<u32>,
|
|
hidden: Option<bool>,
|
|
isBlank: Option<bool>,
|
|
) -> WasmPageInfo {
|
|
WasmPageInfo {
|
|
number,
|
|
title,
|
|
dimensions,
|
|
image_count: imageCount,
|
|
table_count: tableCount,
|
|
hidden,
|
|
is_blank: isBlank,
|
|
has_vector_graphics: hasVectorGraphics,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPageInfo {
|
|
<WasmPageInfo as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn number(&self) -> u32 {
|
|
self.number
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_number(&mut self, value: u32) {
|
|
self.number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn dimensions(&self) -> Option<Vec<f64>> {
|
|
self.dimensions.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_dimensions(&mut self, value: Option<Vec<f64>>) {
|
|
self.dimensions = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageCount")]
|
|
pub fn image_count(&self) -> Option<u32> {
|
|
self.image_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imageCount")]
|
|
pub fn set_image_count(&mut self, value: Option<u32>) {
|
|
self.image_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableCount")]
|
|
pub fn table_count(&self) -> Option<u32> {
|
|
self.table_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableCount")]
|
|
pub fn set_table_count(&mut self, value: Option<u32>) {
|
|
self.table_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn hidden(&self) -> Option<bool> {
|
|
self.hidden
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_hidden(&mut self, value: Option<bool>) {
|
|
self.hidden = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "isBlank")]
|
|
pub fn is_blank(&self) -> Option<bool> {
|
|
self.is_blank
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "isBlank")]
|
|
pub fn set_is_blank(&mut self, value: Option<bool>) {
|
|
self.is_blank = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "hasVectorGraphics")]
|
|
pub fn has_vector_graphics(&self) -> bool {
|
|
self.has_vector_graphics
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "hasVectorGraphics")]
|
|
pub fn set_has_vector_graphics(&mut self, value: bool) {
|
|
self.has_vector_graphics = value;
|
|
}
|
|
}
|
|
|
|
/// Content for a single page/slide.
|
|
///
|
|
/// When page extraction is enabled, documents are split into per-page content
|
|
/// with associated tables and images mapped to each page.
|
|
///
|
|
/// # Performance
|
|
///
|
|
/// Uses Arc-wrapped tables and images for memory efficiency:
|
|
/// - `Vec<Arc<Table>>` enables zero-copy sharing of table data
|
|
/// - `Vec<Arc<ExtractedImage>>` enables zero-copy sharing of image data
|
|
/// - Maintains exact JSON compatibility via custom Serialize/Deserialize
|
|
///
|
|
/// This reduces memory overhead for documents with shared tables/images
|
|
/// by avoiding redundant copies during serialization.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPageContent {
|
|
page_number: u32,
|
|
content: String,
|
|
tables: Vec<WasmTable>,
|
|
image_indices: Vec<u32>,
|
|
hierarchy: Option<WasmPageHierarchy>,
|
|
is_blank: Option<bool>,
|
|
layout_regions: Option<Vec<WasmLayoutRegion>>,
|
|
speaker_notes: Option<String>,
|
|
section_name: Option<String>,
|
|
sheet_name: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPageContent {
|
|
#[allow(clippy::too_many_arguments)]
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
pageNumber: u32,
|
|
content: String,
|
|
tables: Vec<WasmTable>,
|
|
imageIndices: Vec<u32>,
|
|
hierarchy: Option<WasmPageHierarchy>,
|
|
isBlank: Option<bool>,
|
|
layoutRegions: Option<Vec<WasmLayoutRegion>>,
|
|
speakerNotes: Option<String>,
|
|
sectionName: Option<String>,
|
|
sheetName: Option<String>,
|
|
) -> WasmPageContent {
|
|
WasmPageContent {
|
|
page_number: pageNumber,
|
|
content,
|
|
tables,
|
|
image_indices: imageIndices,
|
|
hierarchy,
|
|
is_blank: isBlank,
|
|
layout_regions: layoutRegions,
|
|
speaker_notes: speakerNotes,
|
|
section_name: sectionName,
|
|
sheet_name: sheetName,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPageContent {
|
|
<WasmPageContent as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageNumber")]
|
|
pub fn page_number(&self) -> u32 {
|
|
self.page_number
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageNumber")]
|
|
pub fn set_page_number(&mut self, value: u32) {
|
|
self.page_number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn tables(&self) -> Vec<WasmTable> {
|
|
self.tables.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_tables(&mut self, value: Vec<WasmTable>) {
|
|
self.tables = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageIndices")]
|
|
pub fn image_indices(&self) -> Vec<u32> {
|
|
self.image_indices.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "imageIndices")]
|
|
pub fn set_image_indices(&mut self, value: Vec<u32>) {
|
|
self.image_indices = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn hierarchy(&self) -> Option<WasmPageHierarchy> {
|
|
self.hierarchy.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_hierarchy(&mut self, value: Option<WasmPageHierarchy>) {
|
|
self.hierarchy = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "isBlank")]
|
|
pub fn is_blank(&self) -> Option<bool> {
|
|
self.is_blank
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "isBlank")]
|
|
pub fn set_is_blank(&mut self, value: Option<bool>) {
|
|
self.is_blank = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "layoutRegions")]
|
|
pub fn layout_regions(&self) -> Option<js_sys::Array> {
|
|
self.layout_regions.as_ref().map(|items| {
|
|
let arr = js_sys::Array::new();
|
|
for item in items {
|
|
arr.push(&JsValue::from(item.clone()));
|
|
}
|
|
arr
|
|
})
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "layoutRegions")]
|
|
pub fn set_layout_regions(&mut self, value: Option<Vec<WasmLayoutRegion>>) {
|
|
self.layout_regions = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "speakerNotes")]
|
|
pub fn speaker_notes(&self) -> Option<String> {
|
|
self.speaker_notes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "speakerNotes")]
|
|
pub fn set_speaker_notes(&mut self, value: Option<String>) {
|
|
self.speaker_notes = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "sectionName")]
|
|
pub fn section_name(&self) -> Option<String> {
|
|
self.section_name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "sectionName")]
|
|
pub fn set_section_name(&mut self, value: Option<String>) {
|
|
self.section_name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "sheetName")]
|
|
pub fn sheet_name(&self) -> Option<String> {
|
|
self.sheet_name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "sheetName")]
|
|
pub fn set_sheet_name(&mut self, value: Option<String>) {
|
|
self.sheet_name = value;
|
|
}
|
|
}
|
|
|
|
/// A detected layout region on a page.
|
|
///
|
|
/// When layout detection is enabled, each page may have layout regions
|
|
/// identifying different content types (text, pictures, tables, etc.)
|
|
/// with confidence scores and spatial positions.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmLayoutRegion {
|
|
class_name: String,
|
|
confidence: f64,
|
|
bounding_box: WasmBoundingBox,
|
|
area_fraction: f64,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmLayoutRegion {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
className: Option<String>,
|
|
confidence: Option<f64>,
|
|
boundingBox: Option<WasmBoundingBox>,
|
|
areaFraction: Option<f64>,
|
|
) -> WasmLayoutRegion {
|
|
WasmLayoutRegion {
|
|
class_name: className.unwrap_or_default(),
|
|
confidence: confidence.unwrap_or_default(),
|
|
bounding_box: boundingBox.unwrap_or_default(),
|
|
area_fraction: areaFraction.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmLayoutRegion {
|
|
<WasmLayoutRegion as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "className")]
|
|
pub fn class_name(&self) -> String {
|
|
self.class_name.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "className")]
|
|
pub fn set_class_name(&mut self, value: String) {
|
|
self.class_name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn confidence(&self) -> f64 {
|
|
self.confidence
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_confidence(&mut self, value: f64) {
|
|
self.confidence = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "boundingBox")]
|
|
pub fn bounding_box(&self) -> WasmBoundingBox {
|
|
self.bounding_box.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "boundingBox")]
|
|
pub fn set_bounding_box(&mut self, value: WasmBoundingBox) {
|
|
self.bounding_box = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "areaFraction")]
|
|
pub fn area_fraction(&self) -> f64 {
|
|
self.area_fraction
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "areaFraction")]
|
|
pub fn set_area_fraction(&mut self, value: f64) {
|
|
self.area_fraction = value;
|
|
}
|
|
}
|
|
|
|
/// Page hierarchy structure containing heading levels and block information.
|
|
///
|
|
/// Used when PDF text hierarchy extraction is enabled. Contains hierarchical
|
|
/// blocks with heading levels (H1-H6) for semantic document structure.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmPageHierarchy {
|
|
block_count: u32,
|
|
blocks: Vec<WasmHierarchicalBlock>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmPageHierarchy {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(blockCount: u32, blocks: Vec<WasmHierarchicalBlock>) -> WasmPageHierarchy {
|
|
WasmPageHierarchy {
|
|
block_count: blockCount,
|
|
blocks,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmPageHierarchy {
|
|
<WasmPageHierarchy as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "blockCount")]
|
|
pub fn block_count(&self) -> u32 {
|
|
self.block_count
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "blockCount")]
|
|
pub fn set_block_count(&mut self, value: u32) {
|
|
self.block_count = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn blocks(&self) -> Vec<WasmHierarchicalBlock> {
|
|
self.blocks.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_blocks(&mut self, value: Vec<WasmHierarchicalBlock>) {
|
|
self.blocks = value;
|
|
}
|
|
}
|
|
|
|
/// A text block with hierarchy level assignment.
|
|
///
|
|
/// Represents a block of text with semantic heading information extracted from
|
|
/// font size clustering and hierarchical analysis.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmHierarchicalBlock {
|
|
text: String,
|
|
font_size: f32,
|
|
level: String,
|
|
bbox: Option<Vec<f32>>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmHierarchicalBlock {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(text: String, fontSize: f32, level: String, bbox: Option<Vec<f32>>) -> WasmHierarchicalBlock {
|
|
WasmHierarchicalBlock {
|
|
text,
|
|
font_size: fontSize,
|
|
level,
|
|
bbox,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmHierarchicalBlock {
|
|
<WasmHierarchicalBlock as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn text(&self) -> String {
|
|
self.text.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_text(&mut self, value: String) {
|
|
self.text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "fontSize")]
|
|
pub fn font_size(&self) -> f32 {
|
|
self.font_size
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "fontSize")]
|
|
pub fn set_font_size(&mut self, value: f32) {
|
|
self.font_size = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn level(&self) -> String {
|
|
self.level.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_level(&mut self, value: String) {
|
|
self.level = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn bbox(&self) -> Option<Vec<f32>> {
|
|
self.bbox.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_bbox(&mut self, value: Option<Vec<f32>>) {
|
|
self.bbox = value;
|
|
}
|
|
}
|
|
|
|
/// A single changed cell within a table.
|
|
///
|
|
/// Defined here (rather than only in `crate.diff`) so `RevisionDelta` can
|
|
/// reference it unconditionally, without requiring the `diff` Cargo feature.
|
|
/// `crate.diff` re-exports this type verbatim.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmCellChange {
|
|
row: usize,
|
|
col: usize,
|
|
from: String,
|
|
to: String,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmCellChange {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(row: usize, col: usize, from: String, to: String) -> WasmCellChange {
|
|
WasmCellChange { row, col, from, to }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmCellChange {
|
|
<WasmCellChange as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn row(&self) -> usize {
|
|
self.row
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_row(&mut self, value: usize) {
|
|
self.row = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn col(&self) -> usize {
|
|
self.col
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_col(&mut self, value: usize) {
|
|
self.col = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn from(&self) -> String {
|
|
self.from.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_from(&mut self, value: String) {
|
|
self.from = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn to(&self) -> String {
|
|
self.to.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_to(&mut self, value: String) {
|
|
self.to = value;
|
|
}
|
|
}
|
|
|
|
/// A single tracked change embedded in a document.
|
|
///
|
|
/// Populated by per-format extractors that understand change-tracking metadata
|
|
/// (DOCX `w:ins`/`w:del`/`w:rPrChange`, ODT `text:change-*`, …). Every
|
|
/// extractor defaults to `ExtractionResult.revisions = None` until a
|
|
/// format-specific implementation is added.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmDocumentRevision {
|
|
revision_id: String,
|
|
author: Option<String>,
|
|
timestamp: Option<String>,
|
|
kind: WasmRevisionKind,
|
|
anchor: Option<JsValue>,
|
|
delta: WasmRevisionDelta,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDocumentRevision {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
revisionId: String,
|
|
kind: WasmRevisionKind,
|
|
delta: WasmRevisionDelta,
|
|
author: Option<String>,
|
|
timestamp: Option<String>,
|
|
anchor: Option<JsValue>,
|
|
) -> WasmDocumentRevision {
|
|
WasmDocumentRevision {
|
|
revision_id: revisionId,
|
|
author,
|
|
timestamp,
|
|
kind,
|
|
anchor,
|
|
delta,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDocumentRevision {
|
|
<WasmDocumentRevision as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "revisionId")]
|
|
pub fn revision_id(&self) -> String {
|
|
self.revision_id.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "revisionId")]
|
|
pub fn set_revision_id(&mut self, value: String) {
|
|
self.revision_id = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn author(&self) -> Option<String> {
|
|
self.author.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_author(&mut self, value: Option<String>) {
|
|
self.author = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn timestamp(&self) -> Option<String> {
|
|
self.timestamp.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_timestamp(&mut self, value: Option<String>) {
|
|
self.timestamp = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn kind(&self) -> String {
|
|
self.kind.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_kind(&mut self, value: WasmRevisionKind) {
|
|
self.kind = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn anchor(&self) -> Option<JsValue> {
|
|
self.anchor.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_anchor(&mut self, value: Option<JsValue>) {
|
|
self.anchor = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn delta(&self) -> WasmRevisionDelta {
|
|
self.delta.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_delta(&mut self, value: WasmRevisionDelta) {
|
|
self.delta = value;
|
|
}
|
|
}
|
|
|
|
/// The content changes that make up a single revision.
|
|
///
|
|
/// For insertions and deletions the `content` field carries the added/removed
|
|
/// lines as `DiffLine.Added` / `DiffLine.Removed` entries. For format
|
|
/// changes, `content` is empty — the property diff is left as a TODO for a
|
|
/// later enrichment pass.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmRevisionDelta {
|
|
content: JsValue,
|
|
table_changes: Vec<WasmCellChange>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmRevisionDelta {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(content: Option<JsValue>, tableChanges: Option<Vec<WasmCellChange>>) -> WasmRevisionDelta {
|
|
WasmRevisionDelta {
|
|
content: content.unwrap_or_default(),
|
|
table_changes: tableChanges.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmRevisionDelta {
|
|
<WasmRevisionDelta as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> JsValue {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: JsValue) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableChanges")]
|
|
pub fn table_changes(&self) -> Vec<WasmCellChange> {
|
|
self.table_changes.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "tableChanges")]
|
|
pub fn set_table_changes(&mut self, value: Vec<WasmCellChange>) {
|
|
self.table_changes = value;
|
|
}
|
|
}
|
|
|
|
/// Extracted table structure.
|
|
///
|
|
/// Represents a table detected and extracted from a document (PDF, image, etc.).
|
|
/// Tables are converted to both structured cell data and Markdown format.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmTable {
|
|
cells: JsValue,
|
|
markdown: String,
|
|
page_number: u32,
|
|
bounding_box: Option<WasmBoundingBox>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmTable {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
cells: Option<JsValue>,
|
|
markdown: Option<String>,
|
|
pageNumber: Option<u32>,
|
|
boundingBox: Option<WasmBoundingBox>,
|
|
) -> WasmTable {
|
|
WasmTable {
|
|
cells: cells.unwrap_or_default(),
|
|
markdown: markdown.unwrap_or_default(),
|
|
page_number: pageNumber.unwrap_or_default(),
|
|
bounding_box: boundingBox,
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmTable {
|
|
<WasmTable as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn cells(&self) -> JsValue {
|
|
self.cells.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_cells(&mut self, value: JsValue) {
|
|
self.cells = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn markdown(&self) -> String {
|
|
self.markdown.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_markdown(&mut self, value: String) {
|
|
self.markdown = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "pageNumber")]
|
|
pub fn page_number(&self) -> u32 {
|
|
self.page_number
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "pageNumber")]
|
|
pub fn set_page_number(&mut self, value: u32) {
|
|
self.page_number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "boundingBox")]
|
|
pub fn bounding_box(&self) -> Option<WasmBoundingBox> {
|
|
self.bounding_box.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "boundingBox")]
|
|
pub fn set_bounding_box(&mut self, value: Option<WasmBoundingBox>) {
|
|
self.bounding_box = value;
|
|
}
|
|
}
|
|
|
|
/// Individual table cell with content and optional styling.
|
|
///
|
|
/// Future extension point for rich table support with cell-level metadata.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmTableCell {
|
|
content: String,
|
|
row_span: u32,
|
|
col_span: u32,
|
|
is_header: bool,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmTableCell {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(
|
|
content: Option<String>,
|
|
rowSpan: Option<u32>,
|
|
colSpan: Option<u32>,
|
|
isHeader: Option<bool>,
|
|
) -> WasmTableCell {
|
|
WasmTableCell {
|
|
content: content.unwrap_or_default(),
|
|
row_span: rowSpan.unwrap_or_default(),
|
|
col_span: colSpan.unwrap_or_default(),
|
|
is_header: isHeader.unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmTableCell {
|
|
<WasmTableCell as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn content(&self) -> String {
|
|
self.content.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_content(&mut self, value: String) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "rowSpan")]
|
|
pub fn row_span(&self) -> u32 {
|
|
self.row_span
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "rowSpan")]
|
|
pub fn set_row_span(&mut self, value: u32) {
|
|
self.row_span = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "colSpan")]
|
|
pub fn col_span(&self) -> u32 {
|
|
self.col_span
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "colSpan")]
|
|
pub fn set_col_span(&mut self, value: u32) {
|
|
self.col_span = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "isHeader")]
|
|
pub fn is_header(&self) -> bool {
|
|
self.is_header
|
|
}
|
|
|
|
#[wasm_bindgen(setter, js_name = "isHeader")]
|
|
pub fn set_is_header(&mut self, value: bool) {
|
|
self.is_header = value;
|
|
}
|
|
}
|
|
|
|
/// A URI extracted from a document.
|
|
///
|
|
/// Represents any link, reference, or resource pointer found during extraction.
|
|
/// The `kind` field classifies the URI semantically, while `label` carries
|
|
/// optional human-readable display text.
|
|
#[derive(Clone, Default)]
|
|
#[wasm_bindgen]
|
|
pub struct WasmExtractedUri {
|
|
url: String,
|
|
label: Option<String>,
|
|
page: Option<u32>,
|
|
kind: WasmUriKind,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmExtractedUri {
|
|
#[allow(non_snake_case)]
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new(url: String, kind: WasmUriKind, label: Option<String>, page: Option<u32>) -> WasmExtractedUri {
|
|
WasmExtractedUri { url, label, page, kind }
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmExtractedUri {
|
|
<WasmExtractedUri as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn url(&self) -> String {
|
|
self.url.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_url(&mut self, value: String) {
|
|
self.url = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn label(&self) -> Option<String> {
|
|
self.label.clone()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_label(&mut self, value: Option<String>) {
|
|
self.label = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn page(&self) -> Option<u32> {
|
|
self.page
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_page(&mut self, value: Option<u32>) {
|
|
self.page = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter)]
|
|
pub fn kind(&self) -> String {
|
|
self.kind.to_api_str().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen(setter)]
|
|
pub fn set_kind(&mut self, value: WasmUriKind) {
|
|
self.kind = value;
|
|
}
|
|
}
|
|
|
|
/// ONNX Runtime execution provider type.
|
|
///
|
|
/// Determines which hardware backend is used for model inference.
|
|
/// `Auto` (default) selects the best available provider per platform.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmExecutionProviderType {
|
|
Auto = 0,
|
|
Cpu = 1,
|
|
CoreMl = 2,
|
|
Cuda = 3,
|
|
TensorRt = 4,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmExecutionProviderType {
|
|
fn default() -> Self {
|
|
Self::Auto
|
|
}
|
|
}
|
|
|
|
impl WasmExecutionProviderType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Auto => "auto",
|
|
Self::Cpu => "cpu",
|
|
Self::CoreMl => "coreml",
|
|
Self::Cuda => "cuda",
|
|
Self::TensorRt => "tensorrt",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"auto" => Some(Self::Auto),
|
|
"cpu" => Some(Self::Cpu),
|
|
"coreml" => Some(Self::CoreMl),
|
|
"cuda" => Some(Self::Cuda),
|
|
"tensorrt" => Some(Self::TensorRt),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Output format for extraction results.
|
|
///
|
|
/// Controls the format of the `content` field in `ExtractionResult`.
|
|
/// When set to `Markdown`, `Djot`, or `Html`, the output uses that format.
|
|
/// `Plain` returns the raw extracted text.
|
|
/// `Structured` returns JSON with full OCR element data including bounding
|
|
/// boxes and confidence scores.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmOutputFormat {
|
|
Plain = 0,
|
|
Markdown = 1,
|
|
Djot = 2,
|
|
Html = 3,
|
|
Json = 4,
|
|
Structured = 5,
|
|
Custom = 6,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmOutputFormat {
|
|
fn default() -> Self {
|
|
Self::Plain
|
|
}
|
|
}
|
|
|
|
impl WasmOutputFormat {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Plain => "plain",
|
|
Self::Markdown => "markdown",
|
|
Self::Djot => "djot",
|
|
Self::Html => "html",
|
|
Self::Json => "json",
|
|
Self::Structured => "structured",
|
|
Self::Custom => "custom",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"plain" => Some(Self::Plain),
|
|
"markdown" => Some(Self::Markdown),
|
|
"djot" => Some(Self::Djot),
|
|
"html" => Some(Self::Html),
|
|
"json" => Some(Self::Json),
|
|
"structured" => Some(Self::Structured),
|
|
"custom" => Some(Self::Custom),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Type of text chunker to use.
|
|
///
|
|
/// # Variants
|
|
///
|
|
/// * `Text` - Generic text splitter, splits on whitespace and punctuation
|
|
/// * `Markdown` - Markdown-aware splitter, preserves formatting and structure
|
|
/// * `Yaml` - YAML-aware splitter, creates one chunk per top-level key
|
|
/// * `Semantic` - Topic-aware chunker. With an `EmbeddingConfig`, splits at
|
|
/// embedding-based topic shifts tuned by `topic_threshold` (default 0.75,
|
|
/// lower = more splits). Without an embedding, falls back to a
|
|
/// structural-boundary heuristic (ALL-CAPS headers, numbered sections,
|
|
/// blank-line paragraphs) and merges groups into chunks capped at
|
|
/// `max_characters` (default 1000). `topic_threshold` has no effect in the
|
|
/// fallback path. For best results, pair with an embedding model.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmChunkerType {
|
|
Text = 0,
|
|
Markdown = 1,
|
|
Yaml = 2,
|
|
Semantic = 3,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmChunkerType {
|
|
fn default() -> Self {
|
|
Self::Text
|
|
}
|
|
}
|
|
|
|
impl WasmChunkerType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Text => "text",
|
|
Self::Markdown => "markdown",
|
|
Self::Yaml => "yaml",
|
|
Self::Semantic => "semantic",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"text" => Some(Self::Text),
|
|
"markdown" => Some(Self::Markdown),
|
|
"yaml" => Some(Self::Yaml),
|
|
"semantic" => Some(Self::Semantic),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// How chunk size is measured.
|
|
///
|
|
/// Defaults to `Characters` (Unicode character count). When using token-based sizing,
|
|
/// chunks are sized by token count according to the specified tokenizer.
|
|
///
|
|
/// Token-based sizing uses HuggingFace tokenizers loaded at runtime. Any tokenizer
|
|
/// available on HuggingFace Hub can be used, including OpenAI-compatible tokenizers
|
|
/// (e.g., `Xenova/gpt-4o`, `Xenova/cl100k_base`).
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Default)]
|
|
pub struct WasmChunkSizing {
|
|
pub(crate) r#type: String,
|
|
pub(crate) model: Option<String>,
|
|
pub(crate) cache_dir: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmChunkSizing {
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> WasmChunkSizing {
|
|
Self::default()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmChunkSizing {
|
|
<Self as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "type")]
|
|
pub fn r#type(&self) -> String {
|
|
self.r#type.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "type")]
|
|
pub fn set_type(&mut self, value: String) {
|
|
self.r#type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "model")]
|
|
pub fn model(&self) -> Option<String> {
|
|
self.model.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "model")]
|
|
pub fn set_model(&mut self, value: Option<String>) {
|
|
self.model = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "cacheDir")]
|
|
pub fn cache_dir(&self) -> Option<String> {
|
|
self.cache_dir.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "cacheDir")]
|
|
pub fn set_cache_dir(&mut self, value: Option<String>) {
|
|
self.cache_dir = value;
|
|
}
|
|
}
|
|
|
|
/// Embedding model types supported by Kreuzberg.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Default)]
|
|
pub struct WasmEmbeddingModelType {
|
|
pub(crate) r#type: String,
|
|
pub(crate) name: Option<String>,
|
|
pub(crate) model_id: Option<String>,
|
|
pub(crate) dimensions: Option<usize>,
|
|
pub(crate) llm: Option<WasmLlmConfig>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmEmbeddingModelType {
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> WasmEmbeddingModelType {
|
|
Self::default()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmEmbeddingModelType {
|
|
<Self as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "type")]
|
|
pub fn r#type(&self) -> String {
|
|
self.r#type.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "type")]
|
|
pub fn set_type(&mut self, value: String) {
|
|
self.r#type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "name")]
|
|
pub fn name(&self) -> Option<String> {
|
|
self.name.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "name")]
|
|
pub fn set_name(&mut self, value: Option<String>) {
|
|
self.name = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "modelId")]
|
|
pub fn model_id(&self) -> Option<String> {
|
|
self.model_id.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "modelId")]
|
|
pub fn set_model_id(&mut self, value: Option<String>) {
|
|
self.model_id = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "dimensions")]
|
|
pub fn dimensions(&self) -> Option<usize> {
|
|
self.dimensions.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "dimensions")]
|
|
pub fn set_dimensions(&mut self, value: Option<usize>) {
|
|
self.dimensions = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "llm")]
|
|
pub fn llm(&self) -> Option<WasmLlmConfig> {
|
|
self.llm.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "llm")]
|
|
pub fn set_llm(&mut self, value: Option<WasmLlmConfig>) {
|
|
self.llm = value;
|
|
}
|
|
}
|
|
|
|
/// Type of list detection.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmListType {
|
|
Bullet = 0,
|
|
Numbered = 1,
|
|
Lettered = 2,
|
|
Indented = 3,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmListType {
|
|
fn default() -> Self {
|
|
Self::Bullet
|
|
}
|
|
}
|
|
|
|
impl WasmListType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Bullet => "Bullet",
|
|
Self::Numbered => "Numbered",
|
|
Self::Lettered => "Lettered",
|
|
Self::Indented => "Indented",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"Bullet" => Some(Self::Bullet),
|
|
"Numbered" => Some(Self::Numbered),
|
|
"Lettered" => Some(Self::Lettered),
|
|
"Indented" => Some(Self::Indented),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// OCR backend types.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmOcrBackendType {
|
|
Tesseract = 0,
|
|
EasyOCR = 1,
|
|
PaddleOCR = 2,
|
|
Custom = 3,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmOcrBackendType {
|
|
fn default() -> Self {
|
|
Self::Tesseract
|
|
}
|
|
}
|
|
|
|
impl WasmOcrBackendType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Tesseract => "Tesseract",
|
|
Self::EasyOCR => "EasyOCR",
|
|
Self::PaddleOCR => "PaddleOCR",
|
|
Self::Custom => "Custom",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"Tesseract" => Some(Self::Tesseract),
|
|
"EasyOCR" => Some(Self::EasyOCR),
|
|
"PaddleOCR" => Some(Self::PaddleOCR),
|
|
"Custom" => Some(Self::Custom),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Processing stages for post-processors.
|
|
///
|
|
/// Post-processors are executed in stage order (Early → Middle → Late).
|
|
/// Use stages to control the order of post-processing operations.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmProcessingStage {
|
|
Early = 0,
|
|
Middle = 1,
|
|
Late = 2,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmProcessingStage {
|
|
fn default() -> Self {
|
|
Self::Early
|
|
}
|
|
}
|
|
|
|
impl WasmProcessingStage {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Early => "Early",
|
|
Self::Middle => "Middle",
|
|
Self::Late => "Late",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"Early" => Some(Self::Early),
|
|
"Middle" => Some(Self::Middle),
|
|
"Late" => Some(Self::Late),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Type of PDF annotation.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmPdfAnnotationType {
|
|
Text = 0,
|
|
Highlight = 1,
|
|
Link = 2,
|
|
Stamp = 3,
|
|
Underline = 4,
|
|
StrikeOut = 5,
|
|
Other = 6,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmPdfAnnotationType {
|
|
fn default() -> Self {
|
|
Self::Text
|
|
}
|
|
}
|
|
|
|
impl WasmPdfAnnotationType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Text => "text",
|
|
Self::Highlight => "highlight",
|
|
Self::Link => "link",
|
|
Self::Stamp => "stamp",
|
|
Self::Underline => "underline",
|
|
Self::StrikeOut => "strike_out",
|
|
Self::Other => "other",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"text" => Some(Self::Text),
|
|
"highlight" => Some(Self::Highlight),
|
|
"link" => Some(Self::Link),
|
|
"stamp" => Some(Self::Stamp),
|
|
"underline" => Some(Self::Underline),
|
|
"strike_out" => Some(Self::StrikeOut),
|
|
"other" => Some(Self::Other),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Types of block-level elements in Djot.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmBlockType {
|
|
Paragraph = 0,
|
|
Heading = 1,
|
|
Blockquote = 2,
|
|
CodeBlock = 3,
|
|
ListItem = 4,
|
|
OrderedList = 5,
|
|
BulletList = 6,
|
|
TaskList = 7,
|
|
DefinitionList = 8,
|
|
DefinitionTerm = 9,
|
|
DefinitionDescription = 10,
|
|
Div = 11,
|
|
Section = 12,
|
|
ThematicBreak = 13,
|
|
RawBlock = 14,
|
|
MathDisplay = 15,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmBlockType {
|
|
fn default() -> Self {
|
|
Self::Paragraph
|
|
}
|
|
}
|
|
|
|
impl WasmBlockType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Paragraph => "paragraph",
|
|
Self::Heading => "heading",
|
|
Self::Blockquote => "blockquote",
|
|
Self::CodeBlock => "code_block",
|
|
Self::ListItem => "list_item",
|
|
Self::OrderedList => "ordered_list",
|
|
Self::BulletList => "bullet_list",
|
|
Self::TaskList => "task_list",
|
|
Self::DefinitionList => "definition_list",
|
|
Self::DefinitionTerm => "definition_term",
|
|
Self::DefinitionDescription => "definition_description",
|
|
Self::Div => "div",
|
|
Self::Section => "section",
|
|
Self::ThematicBreak => "thematic_break",
|
|
Self::RawBlock => "raw_block",
|
|
Self::MathDisplay => "math_display",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"paragraph" => Some(Self::Paragraph),
|
|
"heading" => Some(Self::Heading),
|
|
"blockquote" => Some(Self::Blockquote),
|
|
"code_block" => Some(Self::CodeBlock),
|
|
"list_item" => Some(Self::ListItem),
|
|
"ordered_list" => Some(Self::OrderedList),
|
|
"bullet_list" => Some(Self::BulletList),
|
|
"task_list" => Some(Self::TaskList),
|
|
"definition_list" => Some(Self::DefinitionList),
|
|
"definition_term" => Some(Self::DefinitionTerm),
|
|
"definition_description" => Some(Self::DefinitionDescription),
|
|
"div" => Some(Self::Div),
|
|
"section" => Some(Self::Section),
|
|
"thematic_break" => Some(Self::ThematicBreak),
|
|
"raw_block" => Some(Self::RawBlock),
|
|
"math_display" => Some(Self::MathDisplay),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Types of inline elements in Djot.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmInlineType {
|
|
Text = 0,
|
|
Strong = 1,
|
|
Emphasis = 2,
|
|
Highlight = 3,
|
|
Subscript = 4,
|
|
Superscript = 5,
|
|
Insert = 6,
|
|
Delete = 7,
|
|
Code = 8,
|
|
Link = 9,
|
|
Image = 10,
|
|
Span = 11,
|
|
Math = 12,
|
|
RawInline = 13,
|
|
FootnoteRef = 14,
|
|
Symbol = 15,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmInlineType {
|
|
fn default() -> Self {
|
|
Self::Text
|
|
}
|
|
}
|
|
|
|
impl WasmInlineType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Text => "text",
|
|
Self::Strong => "strong",
|
|
Self::Emphasis => "emphasis",
|
|
Self::Highlight => "highlight",
|
|
Self::Subscript => "subscript",
|
|
Self::Superscript => "superscript",
|
|
Self::Insert => "insert",
|
|
Self::Delete => "delete",
|
|
Self::Code => "code",
|
|
Self::Link => "link",
|
|
Self::Image => "image",
|
|
Self::Span => "span",
|
|
Self::Math => "math",
|
|
Self::RawInline => "raw_inline",
|
|
Self::FootnoteRef => "footnote_ref",
|
|
Self::Symbol => "symbol",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"text" => Some(Self::Text),
|
|
"strong" => Some(Self::Strong),
|
|
"emphasis" => Some(Self::Emphasis),
|
|
"highlight" => Some(Self::Highlight),
|
|
"subscript" => Some(Self::Subscript),
|
|
"superscript" => Some(Self::Superscript),
|
|
"insert" => Some(Self::Insert),
|
|
"delete" => Some(Self::Delete),
|
|
"code" => Some(Self::Code),
|
|
"link" => Some(Self::Link),
|
|
"image" => Some(Self::Image),
|
|
"span" => Some(Self::Span),
|
|
"math" => Some(Self::Math),
|
|
"raw_inline" => Some(Self::RawInline),
|
|
"footnote_ref" => Some(Self::FootnoteRef),
|
|
"symbol" => Some(Self::Symbol),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Semantic kind of a relationship between document elements.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmRelationshipKind {
|
|
FootnoteReference = 0,
|
|
CitationReference = 1,
|
|
InternalLink = 2,
|
|
Caption = 3,
|
|
Label = 4,
|
|
TocEntry = 5,
|
|
CrossReference = 6,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmRelationshipKind {
|
|
fn default() -> Self {
|
|
Self::FootnoteReference
|
|
}
|
|
}
|
|
|
|
impl WasmRelationshipKind {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::FootnoteReference => "footnote_reference",
|
|
Self::CitationReference => "citation_reference",
|
|
Self::InternalLink => "internal_link",
|
|
Self::Caption => "caption",
|
|
Self::Label => "label",
|
|
Self::TocEntry => "toc_entry",
|
|
Self::CrossReference => "cross_reference",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"footnote_reference" => Some(Self::FootnoteReference),
|
|
"citation_reference" => Some(Self::CitationReference),
|
|
"internal_link" => Some(Self::InternalLink),
|
|
"caption" => Some(Self::Caption),
|
|
"label" => Some(Self::Label),
|
|
"toc_entry" => Some(Self::TocEntry),
|
|
"cross_reference" => Some(Self::CrossReference),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Content layer classification for document nodes.
|
|
///
|
|
/// Replaces separate body/furniture arrays with per-node granularity.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmContentLayer {
|
|
Body = 0,
|
|
Header = 1,
|
|
Footer = 2,
|
|
Footnote = 3,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmContentLayer {
|
|
fn default() -> Self {
|
|
Self::Body
|
|
}
|
|
}
|
|
|
|
impl WasmContentLayer {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Body => "body",
|
|
Self::Header => "header",
|
|
Self::Footer => "footer",
|
|
Self::Footnote => "footnote",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"body" => Some(Self::Body),
|
|
"header" => Some(Self::Header),
|
|
"footer" => Some(Self::Footer),
|
|
"footnote" => Some(Self::Footnote),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Tagged enum for node content. Each variant carries only type-specific data.
|
|
///
|
|
/// Uses `#[serde(tag = "node_type")]` to avoid "type" keyword collision in
|
|
/// Go/Java/TypeScript bindings.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Default)]
|
|
pub struct WasmNodeContent {
|
|
pub(crate) node_type: String,
|
|
pub(crate) text: Option<String>,
|
|
pub(crate) level: Option<u8>,
|
|
pub(crate) ordered: Option<bool>,
|
|
pub(crate) grid: Option<WasmTableGrid>,
|
|
pub(crate) description: Option<String>,
|
|
pub(crate) image_index: Option<u32>,
|
|
pub(crate) src: Option<String>,
|
|
pub(crate) language: Option<String>,
|
|
pub(crate) label: Option<String>,
|
|
pub(crate) heading_level: Option<u8>,
|
|
pub(crate) heading_text: Option<String>,
|
|
pub(crate) number: Option<u32>,
|
|
pub(crate) title: Option<String>,
|
|
pub(crate) term: Option<String>,
|
|
pub(crate) definition: Option<String>,
|
|
pub(crate) key: Option<String>,
|
|
pub(crate) kind: Option<String>,
|
|
pub(crate) format: Option<String>,
|
|
pub(crate) content: Option<String>,
|
|
pub(crate) entries: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmNodeContent {
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> WasmNodeContent {
|
|
Self::default()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmNodeContent {
|
|
<Self as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "nodeType")]
|
|
pub fn node_type(&self) -> String {
|
|
self.node_type.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "nodeType")]
|
|
pub fn set_node_type(&mut self, value: String) {
|
|
self.node_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "text")]
|
|
pub fn text(&self) -> Option<String> {
|
|
self.text.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "text")]
|
|
pub fn set_text(&mut self, value: Option<String>) {
|
|
self.text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "level")]
|
|
pub fn level(&self) -> Option<u8> {
|
|
self.level.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "level")]
|
|
pub fn set_level(&mut self, value: Option<u8>) {
|
|
self.level = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "ordered")]
|
|
pub fn ordered(&self) -> Option<bool> {
|
|
self.ordered.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "ordered")]
|
|
pub fn set_ordered(&mut self, value: Option<bool>) {
|
|
self.ordered = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "grid")]
|
|
pub fn grid(&self) -> Option<WasmTableGrid> {
|
|
self.grid.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "grid")]
|
|
pub fn set_grid(&mut self, value: Option<WasmTableGrid>) {
|
|
self.grid = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "description")]
|
|
pub fn description(&self) -> Option<String> {
|
|
self.description.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "description")]
|
|
pub fn set_description(&mut self, value: Option<String>) {
|
|
self.description = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "imageIndex")]
|
|
pub fn image_index(&self) -> Option<u32> {
|
|
self.image_index.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "imageIndex")]
|
|
pub fn set_image_index(&mut self, value: Option<u32>) {
|
|
self.image_index = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "src")]
|
|
pub fn src(&self) -> Option<String> {
|
|
self.src.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "src")]
|
|
pub fn set_src(&mut self, value: Option<String>) {
|
|
self.src = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "language")]
|
|
pub fn language(&self) -> Option<String> {
|
|
self.language.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "language")]
|
|
pub fn set_language(&mut self, value: Option<String>) {
|
|
self.language = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "label")]
|
|
pub fn label(&self) -> Option<String> {
|
|
self.label.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "label")]
|
|
pub fn set_label(&mut self, value: Option<String>) {
|
|
self.label = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "headingLevel")]
|
|
pub fn heading_level(&self) -> Option<u8> {
|
|
self.heading_level.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "headingLevel")]
|
|
pub fn set_heading_level(&mut self, value: Option<u8>) {
|
|
self.heading_level = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "headingText")]
|
|
pub fn heading_text(&self) -> Option<String> {
|
|
self.heading_text.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "headingText")]
|
|
pub fn set_heading_text(&mut self, value: Option<String>) {
|
|
self.heading_text = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "number")]
|
|
pub fn number(&self) -> Option<u32> {
|
|
self.number.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "number")]
|
|
pub fn set_number(&mut self, value: Option<u32>) {
|
|
self.number = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "title")]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "title")]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "term")]
|
|
pub fn term(&self) -> Option<String> {
|
|
self.term.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "term")]
|
|
pub fn set_term(&mut self, value: Option<String>) {
|
|
self.term = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "definition")]
|
|
pub fn definition(&self) -> Option<String> {
|
|
self.definition.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "definition")]
|
|
pub fn set_definition(&mut self, value: Option<String>) {
|
|
self.definition = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "key")]
|
|
pub fn key(&self) -> Option<String> {
|
|
self.key.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "key")]
|
|
pub fn set_key(&mut self, value: Option<String>) {
|
|
self.key = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "kind")]
|
|
pub fn kind(&self) -> Option<String> {
|
|
self.kind.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "kind")]
|
|
pub fn set_kind(&mut self, value: Option<String>) {
|
|
self.kind = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "format")]
|
|
pub fn format(&self) -> Option<String> {
|
|
self.format.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "format")]
|
|
pub fn set_format(&mut self, value: Option<String>) {
|
|
self.format = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "content")]
|
|
pub fn content(&self) -> Option<String> {
|
|
self.content.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "content")]
|
|
pub fn set_content(&mut self, value: Option<String>) {
|
|
self.content = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "entries")]
|
|
pub fn entries(&self) -> Option<JsValue> {
|
|
self.entries.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "entries")]
|
|
pub fn set_entries(&mut self, value: Option<JsValue>) {
|
|
self.entries = value;
|
|
}
|
|
}
|
|
|
|
/// Types of inline text annotations.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Default)]
|
|
pub struct WasmAnnotationKind {
|
|
pub(crate) annotation_type: String,
|
|
pub(crate) url: Option<String>,
|
|
pub(crate) title: Option<String>,
|
|
pub(crate) value: Option<String>,
|
|
pub(crate) name: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmAnnotationKind {
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> WasmAnnotationKind {
|
|
Self::default()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmAnnotationKind {
|
|
<Self as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "annotationType")]
|
|
pub fn annotation_type(&self) -> String {
|
|
self.annotation_type.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "annotationType")]
|
|
pub fn set_annotation_type(&mut self, value: String) {
|
|
self.annotation_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "url")]
|
|
pub fn url(&self) -> Option<String> {
|
|
self.url.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "url")]
|
|
pub fn set_url(&mut self, value: Option<String>) {
|
|
self.url = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "title")]
|
|
pub fn title(&self) -> Option<String> {
|
|
self.title.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "title")]
|
|
pub fn set_title(&mut self, value: Option<String>) {
|
|
self.title = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "value")]
|
|
pub fn value(&self) -> Option<String> {
|
|
self.value.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "value")]
|
|
pub fn set_value(&mut self, value: Option<String>) {
|
|
self.value = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "name")]
|
|
pub fn name(&self) -> Option<String> {
|
|
self.name.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "name")]
|
|
pub fn set_name(&mut self, value: Option<String>) {
|
|
self.name = value;
|
|
}
|
|
}
|
|
|
|
/// How the extracted text was produced.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmExtractionMethod {
|
|
Native = 0,
|
|
Ocr = 1,
|
|
Mixed = 2,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmExtractionMethod {
|
|
fn default() -> Self {
|
|
Self::Native
|
|
}
|
|
}
|
|
|
|
impl WasmExtractionMethod {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Native => "native",
|
|
Self::Ocr => "ocr",
|
|
Self::Mixed => "mixed",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"native" => Some(Self::Native),
|
|
"ocr" => Some(Self::Ocr),
|
|
"mixed" => Some(Self::Mixed),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Semantic structural classification of a text chunk.
|
|
///
|
|
/// Assigned by the heuristic classifier in `chunking.classifier`.
|
|
/// Defaults to `Unknown` when no rule matches.
|
|
/// Designed to be extended in future versions without breaking changes.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmChunkType {
|
|
Heading = 0,
|
|
PartyList = 1,
|
|
Definitions = 2,
|
|
OperativeClause = 3,
|
|
SignatureBlock = 4,
|
|
Schedule = 5,
|
|
TableLike = 6,
|
|
Formula = 7,
|
|
CodeBlock = 8,
|
|
Image = 9,
|
|
OrgChart = 10,
|
|
Diagram = 11,
|
|
Unknown = 12,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmChunkType {
|
|
fn default() -> Self {
|
|
Self::Unknown
|
|
}
|
|
}
|
|
|
|
impl WasmChunkType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Heading => "heading",
|
|
Self::PartyList => "party_list",
|
|
Self::Definitions => "definitions",
|
|
Self::OperativeClause => "operative_clause",
|
|
Self::SignatureBlock => "signature_block",
|
|
Self::Schedule => "schedule",
|
|
Self::TableLike => "table_like",
|
|
Self::Formula => "formula",
|
|
Self::CodeBlock => "code_block",
|
|
Self::Image => "image",
|
|
Self::OrgChart => "org_chart",
|
|
Self::Diagram => "diagram",
|
|
Self::Unknown => "unknown",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"heading" => Some(Self::Heading),
|
|
"party_list" => Some(Self::PartyList),
|
|
"definitions" => Some(Self::Definitions),
|
|
"operative_clause" => Some(Self::OperativeClause),
|
|
"signature_block" => Some(Self::SignatureBlock),
|
|
"schedule" => Some(Self::Schedule),
|
|
"table_like" => Some(Self::TableLike),
|
|
"formula" => Some(Self::Formula),
|
|
"code_block" => Some(Self::CodeBlock),
|
|
"image" => Some(Self::Image),
|
|
"org_chart" => Some(Self::OrgChart),
|
|
"diagram" => Some(Self::Diagram),
|
|
"unknown" => Some(Self::Unknown),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Heuristic classification of what an image likely depicts.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmImageKind {
|
|
Photograph = 0,
|
|
Diagram = 1,
|
|
Chart = 2,
|
|
Drawing = 3,
|
|
TextBlock = 4,
|
|
Decoration = 5,
|
|
Logo = 6,
|
|
Icon = 7,
|
|
TileFragment = 8,
|
|
Mask = 9,
|
|
PageRaster = 10,
|
|
Unknown = 11,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmImageKind {
|
|
fn default() -> Self {
|
|
Self::Photograph
|
|
}
|
|
}
|
|
|
|
impl WasmImageKind {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Photograph => "photograph",
|
|
Self::Diagram => "diagram",
|
|
Self::Chart => "chart",
|
|
Self::Drawing => "drawing",
|
|
Self::TextBlock => "text_block",
|
|
Self::Decoration => "decoration",
|
|
Self::Logo => "logo",
|
|
Self::Icon => "icon",
|
|
Self::TileFragment => "tile_fragment",
|
|
Self::Mask => "mask",
|
|
Self::PageRaster => "page_raster",
|
|
Self::Unknown => "unknown",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"photograph" => Some(Self::Photograph),
|
|
"diagram" => Some(Self::Diagram),
|
|
"chart" => Some(Self::Chart),
|
|
"drawing" => Some(Self::Drawing),
|
|
"text_block" => Some(Self::TextBlock),
|
|
"decoration" => Some(Self::Decoration),
|
|
"logo" => Some(Self::Logo),
|
|
"icon" => Some(Self::Icon),
|
|
"tile_fragment" => Some(Self::TileFragment),
|
|
"mask" => Some(Self::Mask),
|
|
"page_raster" => Some(Self::PageRaster),
|
|
"unknown" => Some(Self::Unknown),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Result-shape selection for extraction results.
|
|
///
|
|
/// Distinct from `OutputFormat` (which controls rendering — Plain, Markdown,
|
|
/// HTML, etc.). `ResultFormat` controls the *shape* of the result: a unified content
|
|
/// blob vs. an element-based decomposition.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmResultFormat {
|
|
Unified = 0,
|
|
ElementBased = 1,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmResultFormat {
|
|
fn default() -> Self {
|
|
Self::Unified
|
|
}
|
|
}
|
|
|
|
impl WasmResultFormat {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Unified => "unified",
|
|
Self::ElementBased => "element_based",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"unified" => Some(Self::Unified),
|
|
"element_based" => Some(Self::ElementBased),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Semantic element type classification.
|
|
///
|
|
/// Categorizes text content into semantic units for downstream processing.
|
|
/// Supports the element types commonly found in Unstructured documents.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmElementType {
|
|
Title = 0,
|
|
NarrativeText = 1,
|
|
Heading = 2,
|
|
ListItem = 3,
|
|
Table = 4,
|
|
Image = 5,
|
|
PageBreak = 6,
|
|
CodeBlock = 7,
|
|
BlockQuote = 8,
|
|
Footer = 9,
|
|
Header = 10,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmElementType {
|
|
fn default() -> Self {
|
|
Self::Title
|
|
}
|
|
}
|
|
|
|
impl WasmElementType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Title => "title",
|
|
Self::NarrativeText => "narrative_text",
|
|
Self::Heading => "heading",
|
|
Self::ListItem => "list_item",
|
|
Self::Table => "table",
|
|
Self::Image => "image",
|
|
Self::PageBreak => "page_break",
|
|
Self::CodeBlock => "code_block",
|
|
Self::BlockQuote => "block_quote",
|
|
Self::Footer => "footer",
|
|
Self::Header => "header",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"title" => Some(Self::Title),
|
|
"narrative_text" => Some(Self::NarrativeText),
|
|
"heading" => Some(Self::Heading),
|
|
"list_item" => Some(Self::ListItem),
|
|
"table" => Some(Self::Table),
|
|
"image" => Some(Self::Image),
|
|
"page_break" => Some(Self::PageBreak),
|
|
"code_block" => Some(Self::CodeBlock),
|
|
"block_quote" => Some(Self::BlockQuote),
|
|
"footer" => Some(Self::Footer),
|
|
"header" => Some(Self::Header),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Format-specific metadata (discriminated union).
|
|
///
|
|
/// Only one format type can exist per extraction result. This provides
|
|
/// type-safe, clean metadata without nested optionals.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Default)]
|
|
pub struct WasmFormatMetadata {
|
|
pub(crate) format_type: String,
|
|
pub(crate) _0: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmFormatMetadata {
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> WasmFormatMetadata {
|
|
Self::default()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmFormatMetadata {
|
|
<Self as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "formatType")]
|
|
pub fn format_type(&self) -> String {
|
|
self.format_type.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "formatType")]
|
|
pub fn set_format_type(&mut self, value: String) {
|
|
self.format_type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "0")]
|
|
pub fn field_0(&self) -> Option<JsValue> {
|
|
self._0.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "0")]
|
|
pub fn set_field_0(&mut self, value: Option<JsValue>) {
|
|
self._0 = value;
|
|
}
|
|
}
|
|
|
|
/// Text direction enumeration for HTML documents.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmTextDirection {
|
|
LeftToRight = 0,
|
|
RightToLeft = 1,
|
|
Auto = 2,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmTextDirection {
|
|
fn default() -> Self {
|
|
Self::LeftToRight
|
|
}
|
|
}
|
|
|
|
impl WasmTextDirection {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::LeftToRight => "ltr",
|
|
Self::RightToLeft => "rtl",
|
|
Self::Auto => "auto",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"ltr" => Some(Self::LeftToRight),
|
|
"rtl" => Some(Self::RightToLeft),
|
|
"auto" => Some(Self::Auto),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Link type classification.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmLinkType {
|
|
Anchor = 0,
|
|
Internal = 1,
|
|
External = 2,
|
|
Email = 3,
|
|
Phone = 4,
|
|
Other = 5,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmLinkType {
|
|
fn default() -> Self {
|
|
Self::Anchor
|
|
}
|
|
}
|
|
|
|
impl WasmLinkType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Anchor => "anchor",
|
|
Self::Internal => "internal",
|
|
Self::External => "external",
|
|
Self::Email => "email",
|
|
Self::Phone => "phone",
|
|
Self::Other => "other",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"anchor" => Some(Self::Anchor),
|
|
"internal" => Some(Self::Internal),
|
|
"external" => Some(Self::External),
|
|
"email" => Some(Self::Email),
|
|
"phone" => Some(Self::Phone),
|
|
"other" => Some(Self::Other),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Image type classification.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmImageType {
|
|
DataUri = 0,
|
|
InlineSvg = 1,
|
|
External = 2,
|
|
Relative = 3,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmImageType {
|
|
fn default() -> Self {
|
|
Self::DataUri
|
|
}
|
|
}
|
|
|
|
impl WasmImageType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::DataUri => "data-uri",
|
|
Self::InlineSvg => "inline-svg",
|
|
Self::External => "external",
|
|
Self::Relative => "relative",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"data-uri" => Some(Self::DataUri),
|
|
"inline-svg" => Some(Self::InlineSvg),
|
|
"external" => Some(Self::External),
|
|
"relative" => Some(Self::Relative),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Structured data type classification.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmStructuredDataType {
|
|
JsonLd = 0,
|
|
Microdata = 1,
|
|
RDFa = 2,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmStructuredDataType {
|
|
fn default() -> Self {
|
|
Self::JsonLd
|
|
}
|
|
}
|
|
|
|
impl WasmStructuredDataType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::JsonLd => "json-ld",
|
|
Self::Microdata => "microdata",
|
|
Self::RDFa => "rdfa",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"json-ld" => Some(Self::JsonLd),
|
|
"microdata" => Some(Self::Microdata),
|
|
"rdfa" => Some(Self::RDFa),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Bounding geometry for an OCR element.
|
|
///
|
|
/// Supports both axis-aligned rectangles (from Tesseract) and 4-point quadrilaterals
|
|
/// (from PaddleOCR and rotated text detection).
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Default)]
|
|
pub struct WasmOcrBoundingGeometry {
|
|
pub(crate) r#type: String,
|
|
pub(crate) left: Option<u32>,
|
|
pub(crate) top: Option<u32>,
|
|
pub(crate) width: Option<u32>,
|
|
pub(crate) height: Option<u32>,
|
|
pub(crate) points: Option<JsValue>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmOcrBoundingGeometry {
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> WasmOcrBoundingGeometry {
|
|
Self::default()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmOcrBoundingGeometry {
|
|
<Self as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "type")]
|
|
pub fn r#type(&self) -> String {
|
|
self.r#type.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "type")]
|
|
pub fn set_type(&mut self, value: String) {
|
|
self.r#type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "left")]
|
|
pub fn left(&self) -> Option<u32> {
|
|
self.left.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "left")]
|
|
pub fn set_left(&mut self, value: Option<u32>) {
|
|
self.left = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "top")]
|
|
pub fn top(&self) -> Option<u32> {
|
|
self.top.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "top")]
|
|
pub fn set_top(&mut self, value: Option<u32>) {
|
|
self.top = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "width")]
|
|
pub fn width(&self) -> Option<u32> {
|
|
self.width.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "width")]
|
|
pub fn set_width(&mut self, value: Option<u32>) {
|
|
self.width = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "height")]
|
|
pub fn height(&self) -> Option<u32> {
|
|
self.height.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "height")]
|
|
pub fn set_height(&mut self, value: Option<u32>) {
|
|
self.height = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "points")]
|
|
pub fn points(&self) -> Option<JsValue> {
|
|
self.points.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "points")]
|
|
pub fn set_points(&mut self, value: Option<JsValue>) {
|
|
self.points = value;
|
|
}
|
|
}
|
|
|
|
/// Hierarchical level of an OCR element.
|
|
///
|
|
/// Maps to Tesseract's page segmentation hierarchy and provides
|
|
/// equivalent semantics for PaddleOCR.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmOcrElementLevel {
|
|
Word = 0,
|
|
Line = 1,
|
|
Block = 2,
|
|
Page = 3,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmOcrElementLevel {
|
|
fn default() -> Self {
|
|
Self::Line
|
|
}
|
|
}
|
|
|
|
impl WasmOcrElementLevel {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Word => "word",
|
|
Self::Line => "line",
|
|
Self::Block => "block",
|
|
Self::Page => "page",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"word" => Some(Self::Word),
|
|
"line" => Some(Self::Line),
|
|
"block" => Some(Self::Block),
|
|
"page" => Some(Self::Page),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Type of paginated unit in a document.
|
|
///
|
|
/// Distinguishes between different types of "pages" (PDF pages, presentation slides, spreadsheet sheets).
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmPageUnitType {
|
|
Page = 0,
|
|
Slide = 1,
|
|
Sheet = 2,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmPageUnitType {
|
|
fn default() -> Self {
|
|
Self::Page
|
|
}
|
|
}
|
|
|
|
impl WasmPageUnitType {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Page => "page",
|
|
Self::Slide => "slide",
|
|
Self::Sheet => "sheet",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"page" => Some(Self::Page),
|
|
"slide" => Some(Self::Slide),
|
|
"sheet" => Some(Self::Sheet),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A single line in a unified-diff hunk.
|
|
///
|
|
/// Defined here (rather than only in `crate.diff`) so `RevisionDelta` can
|
|
/// reference it unconditionally, without requiring the `diff` Cargo feature.
|
|
/// `crate.diff` re-exports this type verbatim.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Default)]
|
|
pub struct WasmDiffLine {
|
|
pub(crate) kind: String,
|
|
pub(crate) _0: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmDiffLine {
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> WasmDiffLine {
|
|
Self::default()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmDiffLine {
|
|
<Self as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "kind")]
|
|
pub fn kind(&self) -> String {
|
|
self.kind.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "kind")]
|
|
pub fn set_kind(&mut self, value: String) {
|
|
self.kind = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "0")]
|
|
pub fn field_0(&self) -> Option<String> {
|
|
self._0.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "0")]
|
|
pub fn set_field_0(&mut self, value: Option<String>) {
|
|
self._0 = value;
|
|
}
|
|
}
|
|
|
|
/// Semantic classification of a tracked change.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmRevisionKind {
|
|
Insertion = 0,
|
|
Deletion = 1,
|
|
FormatChange = 2,
|
|
Comment = 3,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmRevisionKind {
|
|
fn default() -> Self {
|
|
Self::Insertion
|
|
}
|
|
}
|
|
|
|
impl WasmRevisionKind {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Insertion => "insertion",
|
|
Self::Deletion => "deletion",
|
|
Self::FormatChange => "format_change",
|
|
Self::Comment => "comment",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"insertion" => Some(Self::Insertion),
|
|
"deletion" => Some(Self::Deletion),
|
|
"format_change" => Some(Self::FormatChange),
|
|
"comment" => Some(Self::Comment),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Best-effort document location for a revision.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Default)]
|
|
pub struct WasmRevisionAnchor {
|
|
pub(crate) r#type: String,
|
|
pub(crate) index: Option<usize>,
|
|
pub(crate) row: Option<usize>,
|
|
pub(crate) col: Option<usize>,
|
|
pub(crate) table_index: Option<usize>,
|
|
pub(crate) name: Option<String>,
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
impl WasmRevisionAnchor {
|
|
#[wasm_bindgen(constructor)]
|
|
pub fn new() -> WasmRevisionAnchor {
|
|
Self::default()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
#[allow(clippy::should_implement_trait)]
|
|
pub fn default() -> WasmRevisionAnchor {
|
|
<Self as ::core::default::Default>::default()
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "type")]
|
|
pub fn r#type(&self) -> String {
|
|
self.r#type.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "type")]
|
|
pub fn set_type(&mut self, value: String) {
|
|
self.r#type = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "index")]
|
|
pub fn index(&self) -> Option<usize> {
|
|
self.index.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "index")]
|
|
pub fn set_index(&mut self, value: Option<usize>) {
|
|
self.index = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "row")]
|
|
pub fn row(&self) -> Option<usize> {
|
|
self.row.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "row")]
|
|
pub fn set_row(&mut self, value: Option<usize>) {
|
|
self.row = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "col")]
|
|
pub fn col(&self) -> Option<usize> {
|
|
self.col.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "col")]
|
|
pub fn set_col(&mut self, value: Option<usize>) {
|
|
self.col = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "tableIndex")]
|
|
pub fn table_index(&self) -> Option<usize> {
|
|
self.table_index.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "tableIndex")]
|
|
pub fn set_table_index(&mut self, value: Option<usize>) {
|
|
self.table_index = value;
|
|
}
|
|
|
|
#[wasm_bindgen(getter, js_name = "name")]
|
|
pub fn name(&self) -> Option<String> {
|
|
self.name.clone()
|
|
}
|
|
#[wasm_bindgen(setter, js_name = "name")]
|
|
pub fn set_name(&mut self, value: Option<String>) {
|
|
self.name = value;
|
|
}
|
|
}
|
|
|
|
/// Semantic classification of an extracted URI.
|
|
|
|
#[wasm_bindgen]
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
pub enum WasmUriKind {
|
|
Hyperlink = 0,
|
|
Image = 1,
|
|
Anchor = 2,
|
|
Citation = 3,
|
|
Reference = 4,
|
|
Email = 5,
|
|
}
|
|
|
|
#[allow(clippy::derivable_impls)]
|
|
impl Default for WasmUriKind {
|
|
fn default() -> Self {
|
|
Self::Hyperlink
|
|
}
|
|
}
|
|
|
|
impl WasmUriKind {
|
|
/// Returns the serde wire string for this variant (e.g. `"stop"`, `"tool_calls"`).
|
|
pub fn to_api_str(self) -> &'static str {
|
|
match self {
|
|
Self::Hyperlink => "hyperlink",
|
|
Self::Image => "image",
|
|
Self::Anchor => "anchor",
|
|
Self::Citation => "citation",
|
|
Self::Reference => "reference",
|
|
Self::Email => "email",
|
|
}
|
|
}
|
|
|
|
/// Parses a serde wire string and returns the corresponding variant, or None if unrecognized.
|
|
pub fn from_api_str(s: &str) -> Option<Self> {
|
|
match s {
|
|
"hyperlink" => Some(Self::Hyperlink),
|
|
"image" => Some(Self::Image),
|
|
"anchor" => Some(Self::Anchor),
|
|
"citation" => Some(Self::Citation),
|
|
"reference" => Some(Self::Reference),
|
|
"email" => Some(Self::Email),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(serde::Deserialize, Default)]
|
|
#[serde(default)]
|
|
pub struct ExtractionConfigInput {
|
|
#[serde(rename = "useCache")]
|
|
pub use_cache: Option<bool>,
|
|
#[serde(rename = "enableQualityProcessing")]
|
|
pub enable_quality_processing: Option<bool>,
|
|
#[serde(rename = "ocr")]
|
|
pub ocr: Option<kreuzberg::OcrConfig>,
|
|
#[serde(rename = "forceOcr")]
|
|
pub force_ocr: Option<bool>,
|
|
#[serde(rename = "forceOcrPages")]
|
|
pub force_ocr_pages: Option<Vec<u32>>,
|
|
#[serde(rename = "disableOcr")]
|
|
pub disable_ocr: Option<bool>,
|
|
#[serde(rename = "chunking")]
|
|
pub chunking: Option<kreuzberg::ChunkingConfig>,
|
|
#[serde(rename = "contentFilter")]
|
|
pub content_filter: Option<kreuzberg::ContentFilterConfig>,
|
|
#[serde(rename = "images")]
|
|
pub images: Option<kreuzberg::ImageExtractionConfig>,
|
|
#[cfg(feature = "pdf")]
|
|
#[serde(skip)]
|
|
pub pdf_options: Option<kreuzberg::PdfConfig>,
|
|
#[serde(rename = "tokenReduction")]
|
|
pub token_reduction: Option<kreuzberg::TokenReductionOptions>,
|
|
#[serde(rename = "languageDetection")]
|
|
pub language_detection: Option<kreuzberg::LanguageDetectionConfig>,
|
|
#[serde(rename = "pages")]
|
|
pub pages: Option<kreuzberg::PageConfig>,
|
|
#[cfg(any(feature = "keywords-yake", feature = "keywords-rake"))]
|
|
#[serde(skip)]
|
|
pub keywords: Option<kreuzberg::KeywordConfig>,
|
|
#[serde(rename = "postprocessor")]
|
|
pub postprocessor: Option<kreuzberg::PostProcessorConfig>,
|
|
#[cfg(feature = "html")]
|
|
#[serde(skip)]
|
|
pub html_options: Option<String>,
|
|
#[cfg(feature = "html")]
|
|
#[serde(skip)]
|
|
pub html_output: Option<kreuzberg::HtmlOutputConfig>,
|
|
#[serde(rename = "extractionTimeoutSecs")]
|
|
pub extraction_timeout_secs: Option<u64>,
|
|
#[serde(rename = "maxConcurrentExtractions")]
|
|
pub max_concurrent_extractions: Option<usize>,
|
|
#[serde(rename = "resultFormat")]
|
|
pub result_format: Option<kreuzberg::ResultFormat>,
|
|
#[serde(rename = "securityLimits")]
|
|
pub security_limits: Option<kreuzberg::SecurityLimits>,
|
|
#[serde(rename = "maxEmbeddedFileBytes")]
|
|
pub max_embedded_file_bytes: Option<u64>,
|
|
#[serde(rename = "outputFormat")]
|
|
pub output_format: Option<kreuzberg::OutputFormat>,
|
|
#[cfg(feature = "layout-types")]
|
|
#[serde(skip)]
|
|
pub layout: Option<kreuzberg::LayoutDetectionConfig>,
|
|
#[serde(rename = "useLayoutForMarkdown")]
|
|
pub use_layout_for_markdown: Option<bool>,
|
|
#[serde(rename = "includeDocumentStructure")]
|
|
pub include_document_structure: Option<bool>,
|
|
#[serde(rename = "acceleration")]
|
|
pub acceleration: Option<kreuzberg::AccelerationConfig>,
|
|
#[serde(rename = "cacheNamespace")]
|
|
pub cache_namespace: Option<String>,
|
|
#[serde(rename = "cacheTtlSecs")]
|
|
pub cache_ttl_secs: Option<u64>,
|
|
#[serde(rename = "email")]
|
|
pub email: Option<kreuzberg::EmailConfig>,
|
|
#[serde(rename = "concurrency")]
|
|
pub concurrency: Option<String>,
|
|
#[serde(rename = "maxArchiveDepth")]
|
|
pub max_archive_depth: Option<usize>,
|
|
#[cfg(feature = "tree-sitter")]
|
|
#[serde(skip)]
|
|
pub tree_sitter: Option<kreuzberg::TreeSitterConfig>,
|
|
#[serde(rename = "structuredExtraction")]
|
|
pub structured_extraction: Option<kreuzberg::StructuredExtractionConfig>,
|
|
#[serde(rename = "cancelToken")]
|
|
pub cancel_token: Option<String>,
|
|
}
|
|
|
|
impl From<ExtractionConfigInput> for kreuzberg::ExtractionConfig {
|
|
fn from(val: ExtractionConfigInput) -> Self {
|
|
let mut out = Self::default();
|
|
if let Some(v) = val.use_cache {
|
|
out.use_cache = v.into();
|
|
}
|
|
if let Some(v) = val.enable_quality_processing {
|
|
out.enable_quality_processing = v.into();
|
|
}
|
|
if let Some(v) = val.ocr {
|
|
out.ocr = v.into();
|
|
}
|
|
if let Some(v) = val.force_ocr {
|
|
out.force_ocr = v.into();
|
|
}
|
|
if let Some(v) = val.force_ocr_pages {
|
|
out.force_ocr_pages = v.into();
|
|
}
|
|
if let Some(v) = val.disable_ocr {
|
|
out.disable_ocr = v.into();
|
|
}
|
|
if let Some(v) = val.chunking {
|
|
out.chunking = v.into();
|
|
}
|
|
if let Some(v) = val.content_filter {
|
|
out.content_filter = v.into();
|
|
}
|
|
if let Some(v) = val.images {
|
|
out.images = v.into();
|
|
}
|
|
#[cfg(feature = "pdf")]
|
|
if let Some(v) = val.pdf_options {
|
|
out.pdf_options = v.into();
|
|
}
|
|
if let Some(v) = val.token_reduction {
|
|
out.token_reduction = v.into();
|
|
}
|
|
if let Some(v) = val.language_detection {
|
|
out.language_detection = v.into();
|
|
}
|
|
if let Some(v) = val.pages {
|
|
out.pages = v.into();
|
|
}
|
|
#[cfg(any(feature = "keywords-yake", feature = "keywords-rake"))]
|
|
if let Some(v) = val.keywords {
|
|
out.keywords = v.into();
|
|
}
|
|
if let Some(v) = val.postprocessor {
|
|
out.postprocessor = v.into();
|
|
}
|
|
#[cfg(feature = "html")]
|
|
if let Some(v) = val.html_options {
|
|
out.html_options = serde_json::from_str(&v).unwrap_or_default();
|
|
}
|
|
#[cfg(feature = "html")]
|
|
if let Some(v) = val.html_output {
|
|
out.html_output = v.into();
|
|
}
|
|
if let Some(v) = val.extraction_timeout_secs {
|
|
out.extraction_timeout_secs = v.into();
|
|
}
|
|
if let Some(v) = val.max_concurrent_extractions {
|
|
out.max_concurrent_extractions = v.into();
|
|
}
|
|
if let Some(v) = val.result_format {
|
|
out.result_format = v.into();
|
|
}
|
|
if let Some(v) = val.security_limits {
|
|
out.security_limits = v.into();
|
|
}
|
|
if let Some(v) = val.max_embedded_file_bytes {
|
|
out.max_embedded_file_bytes = v.into();
|
|
}
|
|
if let Some(v) = val.output_format {
|
|
out.output_format = v.into();
|
|
}
|
|
#[cfg(feature = "layout-types")]
|
|
if let Some(v) = val.layout {
|
|
out.layout = v.into();
|
|
}
|
|
if let Some(v) = val.use_layout_for_markdown {
|
|
out.use_layout_for_markdown = v.into();
|
|
}
|
|
if let Some(v) = val.include_document_structure {
|
|
out.include_document_structure = v.into();
|
|
}
|
|
if let Some(v) = val.acceleration {
|
|
out.acceleration = v.into();
|
|
}
|
|
if let Some(v) = val.cache_namespace {
|
|
out.cache_namespace = v.into();
|
|
}
|
|
if let Some(v) = val.cache_ttl_secs {
|
|
out.cache_ttl_secs = v.into();
|
|
}
|
|
if let Some(v) = val.email {
|
|
out.email = v.into();
|
|
}
|
|
if let Some(v) = val.concurrency {
|
|
out.concurrency = serde_json::from_str(&v).unwrap_or_default();
|
|
}
|
|
if let Some(v) = val.max_archive_depth {
|
|
out.max_archive_depth = v.into();
|
|
}
|
|
#[cfg(feature = "tree-sitter")]
|
|
if let Some(v) = val.tree_sitter {
|
|
out.tree_sitter = v.into();
|
|
}
|
|
if let Some(v) = val.structured_extraction {
|
|
out.structured_extraction = v.into();
|
|
}
|
|
if let Some(v) = val.cancel_token {
|
|
out.cancel_token = serde_json::from_str(&v).unwrap_or_default();
|
|
}
|
|
out
|
|
}
|
|
}
|
|
|
|
/// Extract content from a byte array.
|
|
///
|
|
/// This is the main entry point for in-memory extraction. It performs the following steps:
|
|
/// 1. Validate MIME type
|
|
/// 2. Handle legacy format conversion if needed
|
|
/// 3. Select appropriate extractor from registry
|
|
/// 4. Extract content
|
|
/// 5. Run post-processing pipeline
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `content` - The byte array to extract
|
|
/// * `mime_type` - MIME type of the content
|
|
/// * `config` - Extraction configuration
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// An `ExtractionResult` containing the extracted content and metadata.
|
|
///
|
|
/// # Errors
|
|
///
|
|
/// Returns `KreuzbergError.Validation` if MIME type is invalid.
|
|
/// Returns `KreuzbergError.UnsupportedFormat` if MIME type is not supported.
|
|
///
|
|
/// # Example
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "extractBytes")]
|
|
pub async fn extract_bytes(
|
|
content: Vec<u8>,
|
|
mime_type: String,
|
|
config: JsValue,
|
|
) -> Result<WasmExtractionResult, JsValue> {
|
|
let config_core: kreuzberg::ExtractionConfig = if config.is_undefined() {
|
|
kreuzberg::ExtractionConfig::default()
|
|
} else {
|
|
serde_wasm_bindgen::from_value::<kreuzberg::ExtractionConfig>(config)
|
|
.map_err(|e| JsValue::from_str(&e.to_string()))?
|
|
};
|
|
let result = kreuzberg::extract_bytes(&content, &mime_type, &config_core)
|
|
.await
|
|
.map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(WasmExtractionResult::from(result))
|
|
}
|
|
|
|
/// Extract content from a file.
|
|
///
|
|
/// This is the main entry point for file-based extraction. It performs the following steps:
|
|
/// 1. Check cache for existing result (if caching enabled)
|
|
/// 2. Detect or validate MIME type
|
|
/// 3. Select appropriate extractor from registry
|
|
/// 4. Extract content
|
|
/// 5. Run post-processing pipeline
|
|
/// 6. Store result in cache (if caching enabled)
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `path` - Path to the file to extract
|
|
/// * `mime_type` - Optional MIME type override. If undefined, will be auto-detected
|
|
/// * `config` - Extraction configuration
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// An `ExtractionResult` containing the extracted content and metadata.
|
|
///
|
|
/// # Errors
|
|
///
|
|
/// Returns `KreuzbergError.Io` if the file doesn't exist (NotFound) or for other file I/O errors.
|
|
/// Returns `KreuzbergError.UnsupportedFormat` if MIME type is not supported.
|
|
///
|
|
/// # Example
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "extractFile")]
|
|
pub async fn extract_file(
|
|
path: String,
|
|
mime_type: Option<String>,
|
|
config: JsValue,
|
|
) -> Result<WasmExtractionResult, JsValue> {
|
|
let config_core: kreuzberg::ExtractionConfig = if config.is_undefined() {
|
|
kreuzberg::ExtractionConfig::default()
|
|
} else {
|
|
serde_wasm_bindgen::from_value::<kreuzberg::ExtractionConfig>(config)
|
|
.map_err(|e| JsValue::from_str(&e.to_string()))?
|
|
};
|
|
let result = kreuzberg::extract_file(std::path::PathBuf::from(path), mime_type.as_deref(), &config_core)
|
|
.await
|
|
.map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(WasmExtractionResult::from(result))
|
|
}
|
|
|
|
/// Detect MIME type from raw file bytes.
|
|
///
|
|
/// Uses magic byte signatures to detect file type from content.
|
|
/// Falls back to `infer` crate for comprehensive detection.
|
|
///
|
|
/// For ZIP-based files, inspects contents to distinguish Office Open XML
|
|
/// formats (DOCX, XLSX, PPTX) from plain ZIP archives.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `content` - Raw file bytes
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// The detected MIME type string.
|
|
///
|
|
/// # Errors
|
|
///
|
|
/// Returns `KreuzbergError.UnsupportedFormat` if MIME type cannot be determined.
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "detectMimeTypeFromBytes")]
|
|
pub fn detect_mime_type_from_bytes(content: Vec<u8>) -> Result<String, JsValue> {
|
|
let result = kreuzberg::detect_mime_type_from_bytes(&content).map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
/// Get file extensions for a given MIME type.
|
|
///
|
|
/// Returns all known file extensions that map to the specified MIME type.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `mime_type` - The MIME type to look up
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// A vector of file extensions (without leading dot) for the MIME type.
|
|
///
|
|
/// # Example
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "getExtensionsForMime")]
|
|
pub fn get_extensions_for_mime(mime_type: String) -> Result<Vec<String>, JsValue> {
|
|
let result = kreuzberg::get_extensions_for_mime(&mime_type).map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
/// List the names of all registered embedding backends.
|
|
///
|
|
/// Used by `kreuzberg-cli`, the api/mcp endpoints, and generated language
|
|
/// bindings.
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "listEmbeddingBackends")]
|
|
pub fn list_embedding_backends() -> Result<Vec<String>, JsValue> {
|
|
let result = kreuzberg::list_embedding_backends().map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
/// List names of all registered document extractors.
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "listDocumentExtractors")]
|
|
pub fn list_document_extractors() -> Result<Vec<String>, JsValue> {
|
|
let result = kreuzberg::list_document_extractors().map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
/// List all registered OCR backends.
|
|
///
|
|
/// Returns the names of all OCR backends currently registered in the global registry.
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// A vector of OCR backend names.
|
|
///
|
|
/// # Example
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "listOcrBackends")]
|
|
pub fn list_ocr_backends() -> Result<Vec<String>, JsValue> {
|
|
let result = kreuzberg::list_ocr_backends().map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
/// List all registered post-processor names.
|
|
///
|
|
/// Returns a vector of all post-processor names currently registered in the
|
|
/// global registry.
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// - `Ok(Vec<String>)` - Vector of post-processor names
|
|
/// - `Err(...)` if the registry lock is poisoned
|
|
///
|
|
/// # Example
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "listPostProcessors")]
|
|
pub fn list_post_processors() -> Result<Vec<String>, JsValue> {
|
|
let result = kreuzberg::list_post_processors().map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
/// List names of all registered renderers.
|
|
///
|
|
/// # Errors
|
|
///
|
|
/// Returns an error if the registry lock is poisoned.
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "listRenderers")]
|
|
pub fn list_renderers() -> Result<Vec<String>, JsValue> {
|
|
let result = kreuzberg::list_renderers().map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
/// List names of all registered validators.
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "listValidators")]
|
|
pub fn list_validators() -> Result<Vec<String>, JsValue> {
|
|
let result = kreuzberg::list_validators().map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
/// Detect the MIME type of a file at the given path.
|
|
///
|
|
/// Uses the file extension and optionally the file content to determine the MIME type.
|
|
/// Set `check_exists` to `true` to verify the file exists before detection.
|
|
#[allow(clippy::missing_errors_doc)]
|
|
#[wasm_bindgen(js_name = "detectMimeType")]
|
|
pub fn detect_mime_type(path: String, check_exists: bool) -> Result<String, JsValue> {
|
|
let result = kreuzberg::detect_mime_type(path, check_exists).map_err(|e| JsValue::from_str(&e.to_string()))?;
|
|
Ok(result)
|
|
}
|
|
|
|
#[cfg(target_arch = "wasm32")]
|
|
mod __alef_wasm_bridge_ocrbackend {
|
|
use super::*;
|
|
|
|
/// Wrapper that bridges a foreign Wasm object to the `OcrBackend` trait.
|
|
pub struct WasmOcrBackendBridge {
|
|
inner: wasm_bindgen::JsValue,
|
|
cached_name: String,
|
|
}
|
|
|
|
impl std::fmt::Debug for WasmOcrBackendBridge {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "WasmOcrBackendBridge")
|
|
}
|
|
}
|
|
|
|
impl WasmOcrBackendBridge {
|
|
/// Create a new bridge wrapping a JS object.
|
|
///
|
|
/// Validates that the JS object provides all required methods.
|
|
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("processImage")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "process_image"));
|
|
}
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("supportsLanguage")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "supports_language"));
|
|
}
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("backendType")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "backend_type"));
|
|
}
|
|
let cached_name = {
|
|
let key = wasm_bindgen::JsValue::from_str("name");
|
|
js_sys::Reflect::get(&js_obj, &key)
|
|
.ok()
|
|
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
|
|
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
|
|
.and_then(|v| v.as_string())
|
|
.unwrap_or_else(|| "wasm_bridge".to_string())
|
|
};
|
|
|
|
Ok(Self {
|
|
inner: js_obj,
|
|
cached_name,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl kreuzberg::plugins::Plugin for WasmOcrBackendBridge {
|
|
fn name(&self) -> &str {
|
|
&self.cached_name
|
|
}
|
|
|
|
fn version(&self) -> String {
|
|
let key = wasm_bindgen::JsValue::from_str("version");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
result.as_string().unwrap_or_default()
|
|
}
|
|
|
|
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("initialize");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"initialize"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
|
|
|
|
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
|
|
})?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
|
|
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("shutdown");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"shutdown"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait(?Send)]
|
|
impl kreuzberg::OcrBackend for WasmOcrBackendBridge {
|
|
async fn process_image(
|
|
&self,
|
|
image_bytes: &[u8],
|
|
config: &kreuzberg::OcrConfig,
|
|
) -> std::result::Result<kreuzberg::ExtractionResult, kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("processImage");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"process_image"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "process_image")))?;
|
|
|
|
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "process_image"))
|
|
})?;
|
|
|
|
let args = js_sys::Array::new();
|
|
args.push(&js_sys::Uint8Array::from(image_bytes).into());
|
|
args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL));
|
|
|
|
let promise_val = func.apply(&self.inner, &args).map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "process_image"))
|
|
})?;
|
|
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "process_image"))
|
|
})?;
|
|
let result = wasm_bindgen_futures::JsFuture::from(promise)
|
|
.await
|
|
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
|
|
|
|
// Convert result
|
|
result
|
|
.as_string()
|
|
.ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string()))
|
|
.and_then(|s| {
|
|
serde_json::from_str::<kreuzberg::ExtractionResult>(&s)
|
|
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
|
|
})
|
|
}
|
|
|
|
fn supports_language(&self, lang: &str) -> bool {
|
|
let key = wasm_bindgen::JsValue::from_str("supportsLanguage");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
args.push(&wasm_bindgen::JsValue::from_str(lang));
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
// Convert JS boolean to Rust bool
|
|
result.as_bool().unwrap_or_default()
|
|
}
|
|
|
|
fn backend_type(&self) -> kreuzberg::OcrBackendType {
|
|
let key = wasm_bindgen::JsValue::from_str("backendType");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
// Convert bare enum string (non-JSON) to kreuzberg::OcrBackendType
|
|
result
|
|
.as_string()
|
|
.and_then(|s| {
|
|
serde_json::from_str::<kreuzberg::OcrBackendType>(&format!("\"{}\"", s))
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
|
|
})
|
|
.unwrap_or_default()
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "registerOcrBackend")]
|
|
pub fn register_ocr_backend(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
|
|
let required_methods = vec!["processImage", "supportsLanguage", "backendType"];
|
|
|
|
for method_name in required_methods {
|
|
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
|
|
return Err(wasm_bindgen::JsValue::from_str(&format!(
|
|
"Backend missing required method: {}",
|
|
method_name
|
|
)));
|
|
}
|
|
}
|
|
let wrapper = WasmOcrBackendBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
|
|
let arc: std::sync::Arc<dyn kreuzberg::OcrBackend> = std::sync::Arc::new(wrapper);
|
|
|
|
let registry = kreuzberg::plugins::registry::get_ocr_backend_registry();
|
|
let mut registry = registry.write();
|
|
registry
|
|
.register(arc)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "unregisterOcrBackend")]
|
|
pub fn unregister_ocr_backend(name: String) -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::ocr_backend::unregister_ocr_backend(&name)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "clearOcrBackends")]
|
|
pub fn clear_ocr_backends() -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::ocr_backend::clear_ocr_backends()
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
}
|
|
#[cfg(target_arch = "wasm32")]
|
|
pub use __alef_wasm_bridge_ocrbackend::*;
|
|
|
|
#[cfg(target_arch = "wasm32")]
|
|
mod __alef_wasm_bridge_postprocessor {
|
|
use super::*;
|
|
|
|
/// Wrapper that bridges a foreign Wasm object to the `PostProcessor` trait.
|
|
pub struct WasmPostProcessorBridge {
|
|
inner: wasm_bindgen::JsValue,
|
|
cached_name: String,
|
|
}
|
|
|
|
impl std::fmt::Debug for WasmPostProcessorBridge {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "WasmPostProcessorBridge")
|
|
}
|
|
}
|
|
|
|
impl WasmPostProcessorBridge {
|
|
/// Create a new bridge wrapping a JS object.
|
|
///
|
|
/// Validates that the JS object provides all required methods.
|
|
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("process")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "process"));
|
|
}
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("processingStage")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "processing_stage"));
|
|
}
|
|
let cached_name = {
|
|
let key = wasm_bindgen::JsValue::from_str("name");
|
|
js_sys::Reflect::get(&js_obj, &key)
|
|
.ok()
|
|
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
|
|
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
|
|
.and_then(|v| v.as_string())
|
|
.unwrap_or_else(|| "wasm_bridge".to_string())
|
|
};
|
|
|
|
Ok(Self {
|
|
inner: js_obj,
|
|
cached_name,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl kreuzberg::plugins::Plugin for WasmPostProcessorBridge {
|
|
fn name(&self) -> &str {
|
|
&self.cached_name
|
|
}
|
|
|
|
fn version(&self) -> String {
|
|
let key = wasm_bindgen::JsValue::from_str("version");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
result.as_string().unwrap_or_default()
|
|
}
|
|
|
|
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("initialize");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"initialize"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
|
|
|
|
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
|
|
})?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
|
|
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("shutdown");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"shutdown"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait(?Send)]
|
|
impl kreuzberg::PostProcessor for WasmPostProcessorBridge {
|
|
async fn process(
|
|
&self,
|
|
result: &mut kreuzberg::ExtractionResult,
|
|
config: &kreuzberg::ExtractionConfig,
|
|
) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("process");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"process"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "process")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "process")))?;
|
|
|
|
let args = js_sys::Array::new();
|
|
args.push(&serde_wasm_bindgen::to_value(result).unwrap_or(wasm_bindgen::JsValue::NULL));
|
|
args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL));
|
|
|
|
let promise_val = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "process")))?;
|
|
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "process"))
|
|
})?;
|
|
let result = wasm_bindgen_futures::JsFuture::from(promise)
|
|
.await
|
|
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
|
|
fn processing_stage(&self) -> kreuzberg::ProcessingStage {
|
|
let key = wasm_bindgen::JsValue::from_str("processingStage");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
// Convert bare enum string (non-JSON) to kreuzberg::ProcessingStage
|
|
result
|
|
.as_string()
|
|
.and_then(|s| {
|
|
serde_json::from_str::<kreuzberg::ProcessingStage>(&format!("\"{}\"", s))
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
|
|
})
|
|
.unwrap_or_default()
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "registerPostProcessor")]
|
|
pub fn register_post_processor(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
|
|
let required_methods = vec!["process", "processingStage"];
|
|
|
|
for method_name in required_methods {
|
|
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
|
|
return Err(wasm_bindgen::JsValue::from_str(&format!(
|
|
"Backend missing required method: {}",
|
|
method_name
|
|
)));
|
|
}
|
|
}
|
|
let wrapper = WasmPostProcessorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
|
|
let arc: std::sync::Arc<dyn kreuzberg::PostProcessor> = std::sync::Arc::new(wrapper);
|
|
|
|
let registry = kreuzberg::plugins::registry::get_post_processor_registry();
|
|
let mut registry = registry.write();
|
|
registry
|
|
.register(arc)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "unregisterPostProcessor")]
|
|
pub fn unregister_post_processor(name: String) -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::post_processor::unregister_post_processor(&name)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "clearPostProcessors")]
|
|
pub fn clear_post_processors() -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::post_processor::clear_post_processors()
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
}
|
|
#[cfg(target_arch = "wasm32")]
|
|
pub use __alef_wasm_bridge_postprocessor::*;
|
|
|
|
#[cfg(target_arch = "wasm32")]
|
|
mod __alef_wasm_bridge_validator {
|
|
use super::*;
|
|
|
|
/// Wrapper that bridges a foreign Wasm object to the `Validator` trait.
|
|
pub struct WasmValidatorBridge {
|
|
inner: wasm_bindgen::JsValue,
|
|
cached_name: String,
|
|
}
|
|
|
|
impl std::fmt::Debug for WasmValidatorBridge {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "WasmValidatorBridge")
|
|
}
|
|
}
|
|
|
|
impl WasmValidatorBridge {
|
|
/// Create a new bridge wrapping a JS object.
|
|
///
|
|
/// Validates that the JS object provides all required methods.
|
|
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("validate")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "validate"));
|
|
}
|
|
let cached_name = {
|
|
let key = wasm_bindgen::JsValue::from_str("name");
|
|
js_sys::Reflect::get(&js_obj, &key)
|
|
.ok()
|
|
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
|
|
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
|
|
.and_then(|v| v.as_string())
|
|
.unwrap_or_else(|| "wasm_bridge".to_string())
|
|
};
|
|
|
|
Ok(Self {
|
|
inner: js_obj,
|
|
cached_name,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl kreuzberg::plugins::Plugin for WasmValidatorBridge {
|
|
fn name(&self) -> &str {
|
|
&self.cached_name
|
|
}
|
|
|
|
fn version(&self) -> String {
|
|
let key = wasm_bindgen::JsValue::from_str("version");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
result.as_string().unwrap_or_default()
|
|
}
|
|
|
|
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("initialize");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"initialize"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
|
|
|
|
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
|
|
})?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
|
|
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("shutdown");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"shutdown"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait(?Send)]
|
|
impl kreuzberg::Validator for WasmValidatorBridge {
|
|
async fn validate(
|
|
&self,
|
|
result: &kreuzberg::ExtractionResult,
|
|
config: &kreuzberg::ExtractionConfig,
|
|
) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("validate");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"validate"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "validate")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "validate")))?;
|
|
|
|
let args = js_sys::Array::new();
|
|
args.push(&serde_wasm_bindgen::to_value(result).unwrap_or(wasm_bindgen::JsValue::NULL));
|
|
args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL));
|
|
|
|
let promise_val = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "validate")))?;
|
|
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "validate"))
|
|
})?;
|
|
let result = wasm_bindgen_futures::JsFuture::from(promise)
|
|
.await
|
|
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "registerValidator")]
|
|
pub fn register_validator(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
|
|
let required_methods = vec!["validate"];
|
|
|
|
for method_name in required_methods {
|
|
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
|
|
return Err(wasm_bindgen::JsValue::from_str(&format!(
|
|
"Backend missing required method: {}",
|
|
method_name
|
|
)));
|
|
}
|
|
}
|
|
let wrapper = WasmValidatorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
|
|
let arc: std::sync::Arc<dyn kreuzberg::Validator> = std::sync::Arc::new(wrapper);
|
|
|
|
let registry = kreuzberg::plugins::registry::get_validator_registry();
|
|
let mut registry = registry.write();
|
|
registry
|
|
.register(arc)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "unregisterValidator")]
|
|
pub fn unregister_validator(name: String) -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::validator::unregister_validator(&name)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "clearValidators")]
|
|
pub fn clear_validators() -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::validator::clear_validators().map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
}
|
|
#[cfg(target_arch = "wasm32")]
|
|
pub use __alef_wasm_bridge_validator::*;
|
|
|
|
#[cfg(target_arch = "wasm32")]
|
|
mod __alef_wasm_bridge_embeddingbackend {
|
|
use super::*;
|
|
|
|
/// Wrapper that bridges a foreign Wasm object to the `EmbeddingBackend` trait.
|
|
pub struct WasmEmbeddingBackendBridge {
|
|
inner: wasm_bindgen::JsValue,
|
|
cached_name: String,
|
|
}
|
|
|
|
impl std::fmt::Debug for WasmEmbeddingBackendBridge {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "WasmEmbeddingBackendBridge")
|
|
}
|
|
}
|
|
|
|
impl WasmEmbeddingBackendBridge {
|
|
/// Create a new bridge wrapping a JS object.
|
|
///
|
|
/// Validates that the JS object provides all required methods.
|
|
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("dimensions")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "dimensions"));
|
|
}
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("embed")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "embed"));
|
|
}
|
|
let cached_name = {
|
|
let key = wasm_bindgen::JsValue::from_str("name");
|
|
js_sys::Reflect::get(&js_obj, &key)
|
|
.ok()
|
|
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
|
|
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
|
|
.and_then(|v| v.as_string())
|
|
.unwrap_or_else(|| "wasm_bridge".to_string())
|
|
};
|
|
|
|
Ok(Self {
|
|
inner: js_obj,
|
|
cached_name,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl kreuzberg::plugins::Plugin for WasmEmbeddingBackendBridge {
|
|
fn name(&self) -> &str {
|
|
&self.cached_name
|
|
}
|
|
|
|
fn version(&self) -> String {
|
|
let key = wasm_bindgen::JsValue::from_str("version");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
result.as_string().unwrap_or_default()
|
|
}
|
|
|
|
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("initialize");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"initialize"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
|
|
|
|
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
|
|
})?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
|
|
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("shutdown");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"shutdown"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait(?Send)]
|
|
impl kreuzberg::EmbeddingBackend for WasmEmbeddingBackendBridge {
|
|
fn dimensions(&self) -> usize {
|
|
let key = wasm_bindgen::JsValue::from_str("dimensions");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
// Convert JS result to usize
|
|
result
|
|
.as_string()
|
|
.and_then(|s| {
|
|
serde_json::from_str::<usize>(&s)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
|
|
})
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
async fn embed(&self, texts: Vec<String>) -> std::result::Result<Vec<Vec<f32>>, kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("embed");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"embed"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "embed")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "embed")))?;
|
|
|
|
let args = js_sys::Array::new();
|
|
args.push(&serde_wasm_bindgen::to_value(&texts).unwrap_or(wasm_bindgen::JsValue::NULL));
|
|
|
|
let promise_val = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "embed")))?;
|
|
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "embed"))
|
|
})?;
|
|
let result = wasm_bindgen_futures::JsFuture::from(promise)
|
|
.await
|
|
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
|
|
|
|
// Convert result
|
|
result
|
|
.as_string()
|
|
.ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string()))
|
|
.and_then(|s| {
|
|
serde_json::from_str::<Vec<Vec<f32>>>(&s)
|
|
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
|
|
})
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "registerEmbeddingBackend")]
|
|
pub fn register_embedding_backend(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
|
|
let required_methods = vec!["dimensions", "embed"];
|
|
|
|
for method_name in required_methods {
|
|
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
|
|
return Err(wasm_bindgen::JsValue::from_str(&format!(
|
|
"Backend missing required method: {}",
|
|
method_name
|
|
)));
|
|
}
|
|
}
|
|
let wrapper = WasmEmbeddingBackendBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
|
|
let arc: std::sync::Arc<dyn kreuzberg::EmbeddingBackend> = std::sync::Arc::new(wrapper);
|
|
|
|
let registry = kreuzberg::plugins::registry::get_embedding_backend_registry();
|
|
let mut registry = registry.write();
|
|
registry
|
|
.register(arc)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "unregisterEmbeddingBackend")]
|
|
pub fn unregister_embedding_backend(name: String) -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::embedding_backend::unregister_embedding_backend(&name)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "clearEmbeddingBackends")]
|
|
pub fn clear_embedding_backends() -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::embedding_backend::clear_embedding_backends()
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
}
|
|
#[cfg(target_arch = "wasm32")]
|
|
pub use __alef_wasm_bridge_embeddingbackend::*;
|
|
|
|
#[cfg(target_arch = "wasm32")]
|
|
mod __alef_wasm_bridge_documentextractor {
|
|
use super::*;
|
|
|
|
/// Wrapper that bridges a foreign Wasm object to the `DocumentExtractor` trait.
|
|
pub struct WasmDocumentExtractorBridge {
|
|
inner: wasm_bindgen::JsValue,
|
|
cached_name: String,
|
|
}
|
|
|
|
impl std::fmt::Debug for WasmDocumentExtractorBridge {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "WasmDocumentExtractorBridge")
|
|
}
|
|
}
|
|
|
|
impl WasmDocumentExtractorBridge {
|
|
/// Create a new bridge wrapping a JS object.
|
|
///
|
|
/// Validates that the JS object provides all required methods.
|
|
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("extractBytes")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "extract_bytes"));
|
|
}
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("supportedMimeTypes")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "supported_mime_types"));
|
|
}
|
|
let cached_name = {
|
|
let key = wasm_bindgen::JsValue::from_str("name");
|
|
js_sys::Reflect::get(&js_obj, &key)
|
|
.ok()
|
|
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
|
|
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
|
|
.and_then(|v| v.as_string())
|
|
.unwrap_or_else(|| "wasm_bridge".to_string())
|
|
};
|
|
|
|
Ok(Self {
|
|
inner: js_obj,
|
|
cached_name,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl kreuzberg::plugins::Plugin for WasmDocumentExtractorBridge {
|
|
fn name(&self) -> &str {
|
|
&self.cached_name
|
|
}
|
|
|
|
fn version(&self) -> String {
|
|
let key = wasm_bindgen::JsValue::from_str("version");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
result.as_string().unwrap_or_default()
|
|
}
|
|
|
|
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("initialize");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"initialize"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
|
|
|
|
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
|
|
})?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
|
|
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("shutdown");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"shutdown"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait(?Send)]
|
|
impl kreuzberg::DocumentExtractor for WasmDocumentExtractorBridge {
|
|
async fn extract_bytes(
|
|
&self,
|
|
content: &[u8],
|
|
mime_type: &str,
|
|
config: &kreuzberg::ExtractionConfig,
|
|
) -> std::result::Result<kreuzberg::InternalDocument, kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("extractBytes");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"extract_bytes"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "extract_bytes")))?;
|
|
|
|
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "extract_bytes"))
|
|
})?;
|
|
|
|
let args = js_sys::Array::new();
|
|
args.push(&js_sys::Uint8Array::from(content).into());
|
|
args.push(&wasm_bindgen::JsValue::from_str(mime_type));
|
|
args.push(&serde_wasm_bindgen::to_value(config).unwrap_or(wasm_bindgen::JsValue::NULL));
|
|
|
|
let promise_val = func.apply(&self.inner, &args).map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "extract_bytes"))
|
|
})?;
|
|
let promise: js_sys::Promise = promise_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' did not return a Promise", "extract_bytes"))
|
|
})?;
|
|
let result = wasm_bindgen_futures::JsFuture::from(promise)
|
|
.await
|
|
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Promise rejected: {:?}", e)))?;
|
|
|
|
// Convert result
|
|
result
|
|
.as_string()
|
|
.ok_or_else(|| kreuzberg::KreuzbergError::Other("Failed to convert result".to_string()))
|
|
.and_then(|s| {
|
|
serde_json::from_str::<kreuzberg::InternalDocument>(&s)
|
|
.map_err(|e| kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e)))
|
|
})
|
|
}
|
|
|
|
fn supported_mime_types(&self) -> &[&str] {
|
|
let __types: Vec<String> = {
|
|
let key = wasm_bindgen::JsValue::from_str("supportedMimeTypes");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
// Convert JS result to Vec<String>
|
|
result
|
|
.as_string()
|
|
.and_then(|s| {
|
|
serde_json::from_str::<Vec<String>>(&s).map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Failed to deserialize result: {}", e))
|
|
})
|
|
})
|
|
.unwrap_or_default()
|
|
};
|
|
let __strs: Vec<&'static str> = __types
|
|
.into_iter()
|
|
.map(|s| -> &'static str { Box::leak(s.into_boxed_str()) })
|
|
.collect();
|
|
Box::leak(__strs.into_boxed_slice())
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "registerDocumentExtractor")]
|
|
pub fn register_document_extractor(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
|
|
let required_methods = vec!["extractBytes", "supportedMimeTypes"];
|
|
|
|
for method_name in required_methods {
|
|
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
|
|
return Err(wasm_bindgen::JsValue::from_str(&format!(
|
|
"Backend missing required method: {}",
|
|
method_name
|
|
)));
|
|
}
|
|
}
|
|
let wrapper = WasmDocumentExtractorBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
|
|
let arc: std::sync::Arc<dyn kreuzberg::DocumentExtractor> = std::sync::Arc::new(wrapper);
|
|
|
|
let registry = kreuzberg::plugins::registry::get_document_extractor_registry();
|
|
let mut registry = registry.write();
|
|
registry
|
|
.register(arc)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "unregisterDocumentExtractor")]
|
|
pub fn unregister_document_extractor(name: String) -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::document_extractor::unregister_document_extractor(&name)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "clearDocumentExtractors")]
|
|
pub fn clear_document_extractors() -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::document_extractor::clear_document_extractors()
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
}
|
|
#[cfg(target_arch = "wasm32")]
|
|
pub use __alef_wasm_bridge_documentextractor::*;
|
|
|
|
#[cfg(target_arch = "wasm32")]
|
|
mod __alef_wasm_bridge_renderer {
|
|
use super::*;
|
|
|
|
/// Wrapper that bridges a foreign Wasm object to the `Renderer` trait.
|
|
pub struct WasmRendererBridge {
|
|
inner: wasm_bindgen::JsValue,
|
|
cached_name: String,
|
|
}
|
|
|
|
impl std::fmt::Debug for WasmRendererBridge {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "WasmRendererBridge")
|
|
}
|
|
}
|
|
|
|
impl WasmRendererBridge {
|
|
/// Create a new bridge wrapping a JS object.
|
|
///
|
|
/// Validates that the JS object provides all required methods.
|
|
pub fn new(js_obj: wasm_bindgen::JsValue) -> Result<Self, String> {
|
|
if !js_sys::Reflect::has(&js_obj, &wasm_bindgen::JsValue::from_str("render")).unwrap_or(false) {
|
|
return Err(format!("JS object missing required method: {}", "render"));
|
|
}
|
|
let cached_name = {
|
|
let key = wasm_bindgen::JsValue::from_str("name");
|
|
js_sys::Reflect::get(&js_obj, &key)
|
|
.ok()
|
|
.and_then(|v| v.dyn_into::<js_sys::Function>().ok())
|
|
.and_then(|f| f.apply(&js_obj, &js_sys::Array::new()).ok())
|
|
.and_then(|v| v.as_string())
|
|
.unwrap_or_else(|| "wasm_bridge".to_string())
|
|
};
|
|
|
|
Ok(Self {
|
|
inner: js_obj,
|
|
cached_name,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl kreuzberg::plugins::Plugin for WasmRendererBridge {
|
|
fn name(&self) -> &str {
|
|
&self.cached_name
|
|
}
|
|
|
|
fn version(&self) -> String {
|
|
let key = wasm_bindgen::JsValue::from_str("version");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Default::default();
|
|
}
|
|
|
|
let func_val = match js_sys::Reflect::get(&self.inner, &key) {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
let func: js_sys::Function = match func_val.dyn_into() {
|
|
Ok(f) => f,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = match func.apply(&self.inner, &args) {
|
|
Ok(r) => r,
|
|
Err(_) => return Default::default(),
|
|
};
|
|
|
|
// Convert result
|
|
result.as_string().unwrap_or_default()
|
|
}
|
|
|
|
fn initialize(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("initialize");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"initialize"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "initialize")))?;
|
|
|
|
let func: js_sys::Function = func_val.dyn_into().map_err(|_| {
|
|
kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "initialize"))
|
|
})?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "initialize")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
|
|
fn shutdown(&self) -> std::result::Result<(), kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("shutdown");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"shutdown"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "shutdown")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "shutdown")))?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "shutdown")))?;
|
|
|
|
// Convert result
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl kreuzberg::Renderer for WasmRendererBridge {
|
|
fn render(&self, doc: &kreuzberg::InternalDocument) -> std::result::Result<String, kreuzberg::KreuzbergError> {
|
|
let key = wasm_bindgen::JsValue::from_str("render");
|
|
let has_method = js_sys::Reflect::has(&self.inner, &key).unwrap_or(false);
|
|
if !has_method {
|
|
return Err(kreuzberg::KreuzbergError::Other(format!(
|
|
"Method '{}' not found on JS object",
|
|
"render"
|
|
)));
|
|
}
|
|
|
|
let func_val = js_sys::Reflect::get(&self.inner, &key)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to get method '{}'", "render")))?;
|
|
|
|
let func: js_sys::Function = func_val
|
|
.dyn_into()
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Method '{}' is not a function", "render")))?;
|
|
|
|
// Build args array
|
|
let args = js_sys::Array::new();
|
|
args.push(&serde_wasm_bindgen::to_value(doc).unwrap_or(wasm_bindgen::JsValue::NULL));
|
|
|
|
// Call the function
|
|
let result = func
|
|
.apply(&self.inner, &args)
|
|
.map_err(|_| kreuzberg::KreuzbergError::Other(format!("Failed to call method '{}'", "render")))?;
|
|
|
|
// Convert result
|
|
result
|
|
.as_string()
|
|
.ok_or_else(|| kreuzberg::KreuzbergError::Other("Expected string return".to_string()))
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "registerRenderer")]
|
|
pub fn register_renderer(backend: wasm_bindgen::JsValue) -> Result<(), wasm_bindgen::JsValue> {
|
|
let required_methods = vec!["render"];
|
|
|
|
for method_name in required_methods {
|
|
if !js_sys::Reflect::has(&backend, &wasm_bindgen::JsValue::from_str(method_name)).unwrap_or(false) {
|
|
return Err(wasm_bindgen::JsValue::from_str(&format!(
|
|
"Backend missing required method: {}",
|
|
method_name
|
|
)));
|
|
}
|
|
}
|
|
let wrapper = WasmRendererBridge::new(backend).map_err(|e| wasm_bindgen::JsValue::from_str(&e))?;
|
|
let arc: std::sync::Arc<dyn kreuzberg::Renderer> = std::sync::Arc::new(wrapper);
|
|
|
|
let registry = kreuzberg::plugins::registry::get_renderer_registry();
|
|
let mut registry = registry.write();
|
|
registry
|
|
.register(arc)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "unregisterRenderer")]
|
|
pub fn unregister_renderer(name: String) -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::renderer::unregister_renderer(&name)
|
|
.map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
|
|
#[wasm_bindgen(js_name = "clearRenderers")]
|
|
pub fn clear_renderers() -> Result<(), wasm_bindgen::JsValue> {
|
|
kreuzberg::plugins::renderer::clear_renderers().map_err(|e| wasm_bindgen::JsValue::from_str(&e.to_string()))
|
|
}
|
|
}
|
|
#[cfg(target_arch = "wasm32")]
|
|
pub use __alef_wasm_bridge_renderer::*;
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::CacheStats> for WasmCacheStats {
|
|
fn from(val: kreuzberg::CacheStats) -> Self {
|
|
Self {
|
|
total_files: val.total_files,
|
|
total_size_mb: val.total_size_mb,
|
|
available_space_mb: val.available_space_mb,
|
|
oldest_file_age_days: val.oldest_file_age_days,
|
|
newest_file_age_days: val.newest_file_age_days,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmAccelerationConfig> for kreuzberg::AccelerationConfig {
|
|
fn from(val: WasmAccelerationConfig) -> Self {
|
|
Self {
|
|
provider: val.provider.into(),
|
|
device_id: val.device_id,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::AccelerationConfig> for WasmAccelerationConfig {
|
|
fn from(val: kreuzberg::AccelerationConfig) -> Self {
|
|
Self {
|
|
provider: val.provider.into(),
|
|
device_id: val.device_id,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmContentFilterConfig> for kreuzberg::ContentFilterConfig {
|
|
fn from(val: WasmContentFilterConfig) -> Self {
|
|
Self {
|
|
include_headers: val.include_headers,
|
|
include_footers: val.include_footers,
|
|
strip_repeating_text: val.strip_repeating_text,
|
|
include_watermarks: val.include_watermarks,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ContentFilterConfig> for WasmContentFilterConfig {
|
|
fn from(val: kreuzberg::ContentFilterConfig) -> Self {
|
|
Self {
|
|
include_headers: val.include_headers,
|
|
include_footers: val.include_footers,
|
|
strip_repeating_text: val.strip_repeating_text,
|
|
include_watermarks: val.include_watermarks,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmEmailConfig> for kreuzberg::EmailConfig {
|
|
fn from(val: WasmEmailConfig) -> Self {
|
|
Self {
|
|
msg_fallback_codepage: val.msg_fallback_codepage,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::EmailConfig> for WasmEmailConfig {
|
|
fn from(val: kreuzberg::EmailConfig) -> Self {
|
|
Self {
|
|
msg_fallback_codepage: val.msg_fallback_codepage,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::needless_update)]
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmExtractionConfig> for kreuzberg::ExtractionConfig {
|
|
fn from(val: WasmExtractionConfig) -> Self {
|
|
Self {
|
|
use_cache: val.use_cache,
|
|
enable_quality_processing: val.enable_quality_processing,
|
|
ocr: val.ocr.map(Into::into),
|
|
force_ocr: val.force_ocr,
|
|
force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()),
|
|
disable_ocr: val.disable_ocr,
|
|
chunking: val.chunking.map(Into::into),
|
|
content_filter: val.content_filter.map(Into::into),
|
|
images: val.images.map(Into::into),
|
|
token_reduction: val.token_reduction.map(Into::into),
|
|
language_detection: val.language_detection.map(Into::into),
|
|
pages: val.pages.map(Into::into),
|
|
postprocessor: val.postprocessor.map(Into::into),
|
|
html_options: Default::default(),
|
|
extraction_timeout_secs: val.extraction_timeout_secs,
|
|
max_concurrent_extractions: val.max_concurrent_extractions,
|
|
result_format: val.result_format.into(),
|
|
security_limits: val.security_limits.map(Into::into),
|
|
max_embedded_file_bytes: val.max_embedded_file_bytes,
|
|
output_format: val.output_format.into(),
|
|
use_layout_for_markdown: val.use_layout_for_markdown,
|
|
include_document_structure: val.include_document_structure,
|
|
acceleration: val.acceleration.map(Into::into),
|
|
cache_namespace: val.cache_namespace,
|
|
cache_ttl_secs: val.cache_ttl_secs,
|
|
email: val.email.map(Into::into),
|
|
concurrency: Default::default(),
|
|
max_archive_depth: val.max_archive_depth,
|
|
structured_extraction: val.structured_extraction.map(Into::into),
|
|
cancel_token: Default::default(),
|
|
..Default::default()
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ExtractionConfig> for WasmExtractionConfig {
|
|
fn from(val: kreuzberg::ExtractionConfig) -> Self {
|
|
Self {
|
|
use_cache: val.use_cache,
|
|
enable_quality_processing: val.enable_quality_processing,
|
|
ocr: val.ocr.map(Into::into),
|
|
force_ocr: val.force_ocr,
|
|
force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()),
|
|
disable_ocr: val.disable_ocr,
|
|
chunking: val.chunking.map(Into::into),
|
|
content_filter: val.content_filter.map(Into::into),
|
|
images: val.images.map(Into::into),
|
|
token_reduction: val.token_reduction.map(Into::into),
|
|
language_detection: val.language_detection.map(Into::into),
|
|
pages: val.pages.map(Into::into),
|
|
postprocessor: val.postprocessor.map(Into::into),
|
|
html_options: val.html_options.as_ref().map(|v| format!("{v:?}")),
|
|
extraction_timeout_secs: val.extraction_timeout_secs,
|
|
max_concurrent_extractions: val.max_concurrent_extractions,
|
|
result_format: val.result_format.into(),
|
|
security_limits: val.security_limits.map(Into::into),
|
|
max_embedded_file_bytes: val.max_embedded_file_bytes,
|
|
output_format: val.output_format.into(),
|
|
use_layout_for_markdown: val.use_layout_for_markdown,
|
|
include_document_structure: val.include_document_structure,
|
|
acceleration: val.acceleration.map(Into::into),
|
|
cache_namespace: val.cache_namespace,
|
|
cache_ttl_secs: val.cache_ttl_secs,
|
|
email: val.email.map(Into::into),
|
|
concurrency: val.concurrency.as_ref().map(|v| format!("{v:?}")),
|
|
max_archive_depth: val.max_archive_depth,
|
|
structured_extraction: val.structured_extraction.map(Into::into),
|
|
cancel_token: val.cancel_token.as_ref().map(|v| format!("{v:?}")),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::needless_update)]
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmFileExtractionConfig> for kreuzberg::FileExtractionConfig {
|
|
fn from(val: WasmFileExtractionConfig) -> Self {
|
|
Self {
|
|
enable_quality_processing: val.enable_quality_processing,
|
|
ocr: val.ocr.map(Into::into),
|
|
force_ocr: val.force_ocr,
|
|
force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()),
|
|
disable_ocr: val.disable_ocr,
|
|
chunking: val.chunking.map(Into::into),
|
|
content_filter: val.content_filter.map(Into::into),
|
|
images: val.images.map(Into::into),
|
|
token_reduction: val.token_reduction.map(Into::into),
|
|
language_detection: val.language_detection.map(Into::into),
|
|
pages: val.pages.map(Into::into),
|
|
postprocessor: val.postprocessor.map(Into::into),
|
|
html_options: Default::default(),
|
|
result_format: val.result_format.map(Into::into),
|
|
output_format: val.output_format.map(Into::into),
|
|
include_document_structure: val.include_document_structure,
|
|
timeout_secs: val.timeout_secs,
|
|
structured_extraction: val.structured_extraction.map(Into::into),
|
|
..Default::default()
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::FileExtractionConfig> for WasmFileExtractionConfig {
|
|
fn from(val: kreuzberg::FileExtractionConfig) -> Self {
|
|
Self {
|
|
enable_quality_processing: val.enable_quality_processing,
|
|
ocr: val.ocr.map(Into::into),
|
|
force_ocr: val.force_ocr,
|
|
force_ocr_pages: val.force_ocr_pages.map(|v| v.into_iter().collect()),
|
|
disable_ocr: val.disable_ocr,
|
|
chunking: val.chunking.map(Into::into),
|
|
content_filter: val.content_filter.map(Into::into),
|
|
images: val.images.map(Into::into),
|
|
token_reduction: val.token_reduction.map(Into::into),
|
|
language_detection: val.language_detection.map(Into::into),
|
|
pages: val.pages.map(Into::into),
|
|
postprocessor: val.postprocessor.map(Into::into),
|
|
html_options: val.html_options.as_ref().map(|v| format!("{v:?}")),
|
|
result_format: val.result_format.map(Into::into),
|
|
output_format: val.output_format.map(Into::into),
|
|
include_document_structure: val.include_document_structure,
|
|
timeout_secs: val.timeout_secs,
|
|
structured_extraction: val.structured_extraction.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmBatchBytesItem> for kreuzberg::BatchBytesItem {
|
|
fn from(val: WasmBatchBytesItem) -> Self {
|
|
Self {
|
|
content: val.content.to_vec().into(),
|
|
mime_type: val.mime_type,
|
|
config: val.config.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::BatchBytesItem> for WasmBatchBytesItem {
|
|
fn from(val: kreuzberg::BatchBytesItem) -> Self {
|
|
Self {
|
|
content: val.content.to_vec().into(),
|
|
mime_type: val.mime_type,
|
|
config: val.config.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmBatchFileItem> for kreuzberg::BatchFileItem {
|
|
fn from(val: WasmBatchFileItem) -> Self {
|
|
Self {
|
|
path: val.path.into(),
|
|
config: val.config.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::BatchFileItem> for WasmBatchFileItem {
|
|
fn from(val: kreuzberg::BatchFileItem) -> Self {
|
|
Self {
|
|
path: val.path.to_string_lossy().to_string(),
|
|
config: val.config.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmImageExtractionConfig> for kreuzberg::ImageExtractionConfig {
|
|
fn from(val: WasmImageExtractionConfig) -> Self {
|
|
Self {
|
|
extract_images: val.extract_images,
|
|
target_dpi: val.target_dpi,
|
|
max_image_dimension: val.max_image_dimension,
|
|
inject_placeholders: val.inject_placeholders,
|
|
auto_adjust_dpi: val.auto_adjust_dpi,
|
|
min_dpi: val.min_dpi,
|
|
max_dpi: val.max_dpi,
|
|
max_images_per_page: val.max_images_per_page,
|
|
classify: val.classify,
|
|
include_page_rasters: val.include_page_rasters,
|
|
run_ocr_on_images: val.run_ocr_on_images,
|
|
ocr_text_only: val.ocr_text_only,
|
|
append_ocr_text: val.append_ocr_text,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ImageExtractionConfig> for WasmImageExtractionConfig {
|
|
fn from(val: kreuzberg::ImageExtractionConfig) -> Self {
|
|
Self {
|
|
extract_images: val.extract_images,
|
|
target_dpi: val.target_dpi,
|
|
max_image_dimension: val.max_image_dimension,
|
|
inject_placeholders: val.inject_placeholders,
|
|
auto_adjust_dpi: val.auto_adjust_dpi,
|
|
min_dpi: val.min_dpi,
|
|
max_dpi: val.max_dpi,
|
|
max_images_per_page: val.max_images_per_page,
|
|
classify: val.classify,
|
|
include_page_rasters: val.include_page_rasters,
|
|
run_ocr_on_images: val.run_ocr_on_images,
|
|
ocr_text_only: val.ocr_text_only,
|
|
append_ocr_text: val.append_ocr_text,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmTokenReductionOptions> for kreuzberg::TokenReductionOptions {
|
|
fn from(val: WasmTokenReductionOptions) -> Self {
|
|
Self {
|
|
mode: val.mode,
|
|
preserve_important_words: val.preserve_important_words,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::TokenReductionOptions> for WasmTokenReductionOptions {
|
|
fn from(val: kreuzberg::TokenReductionOptions) -> Self {
|
|
Self {
|
|
mode: val.mode,
|
|
preserve_important_words: val.preserve_important_words,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmLanguageDetectionConfig> for kreuzberg::LanguageDetectionConfig {
|
|
fn from(val: WasmLanguageDetectionConfig) -> Self {
|
|
Self {
|
|
enabled: val.enabled,
|
|
min_confidence: val.min_confidence,
|
|
detect_multiple: val.detect_multiple,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::LanguageDetectionConfig> for WasmLanguageDetectionConfig {
|
|
fn from(val: kreuzberg::LanguageDetectionConfig) -> Self {
|
|
Self {
|
|
enabled: val.enabled,
|
|
min_confidence: val.min_confidence,
|
|
detect_multiple: val.detect_multiple,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmLlmConfig> for kreuzberg::LlmConfig {
|
|
fn from(val: WasmLlmConfig) -> Self {
|
|
Self {
|
|
model: val.model,
|
|
api_key: val.api_key,
|
|
base_url: val.base_url,
|
|
timeout_secs: val.timeout_secs,
|
|
max_retries: val.max_retries,
|
|
temperature: val.temperature,
|
|
max_tokens: val.max_tokens,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::LlmConfig> for WasmLlmConfig {
|
|
fn from(val: kreuzberg::LlmConfig) -> Self {
|
|
Self {
|
|
model: val.model,
|
|
api_key: val.api_key,
|
|
base_url: val.base_url,
|
|
timeout_secs: val.timeout_secs,
|
|
max_retries: val.max_retries,
|
|
temperature: val.temperature,
|
|
max_tokens: val.max_tokens,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmStructuredExtractionConfig> for kreuzberg::StructuredExtractionConfig {
|
|
fn from(val: WasmStructuredExtractionConfig) -> Self {
|
|
Self {
|
|
schema: serde_wasm_bindgen::from_value(val.schema.clone()).unwrap_or_default(),
|
|
schema_name: val.schema_name,
|
|
schema_description: val.schema_description,
|
|
strict: val.strict,
|
|
prompt: val.prompt,
|
|
llm: val.llm.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::StructuredExtractionConfig> for WasmStructuredExtractionConfig {
|
|
fn from(val: kreuzberg::StructuredExtractionConfig) -> Self {
|
|
Self {
|
|
schema: serde_wasm_bindgen::to_value(&val.schema).unwrap_or(JsValue::NULL),
|
|
schema_name: val.schema_name,
|
|
schema_description: val.schema_description,
|
|
strict: val.strict,
|
|
prompt: val.prompt,
|
|
llm: val.llm.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrQualityThresholds> for kreuzberg::OcrQualityThresholds {
|
|
fn from(val: WasmOcrQualityThresholds) -> Self {
|
|
Self {
|
|
min_total_non_whitespace: val.min_total_non_whitespace,
|
|
min_non_whitespace_per_page: val.min_non_whitespace_per_page,
|
|
min_meaningful_word_len: val.min_meaningful_word_len,
|
|
min_meaningful_words: val.min_meaningful_words,
|
|
min_alnum_ratio: val.min_alnum_ratio,
|
|
min_garbage_chars: val.min_garbage_chars,
|
|
max_fragmented_word_ratio: val.max_fragmented_word_ratio,
|
|
critical_fragmented_word_ratio: val.critical_fragmented_word_ratio,
|
|
min_avg_word_length: val.min_avg_word_length,
|
|
min_words_for_avg_length_check: val.min_words_for_avg_length_check,
|
|
min_consecutive_repeat_ratio: val.min_consecutive_repeat_ratio,
|
|
min_words_for_repeat_check: val.min_words_for_repeat_check,
|
|
substantive_min_chars: val.substantive_min_chars,
|
|
non_text_min_chars: val.non_text_min_chars,
|
|
alnum_ws_ratio_threshold: val.alnum_ws_ratio_threshold,
|
|
pipeline_min_quality: val.pipeline_min_quality,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrQualityThresholds> for WasmOcrQualityThresholds {
|
|
fn from(val: kreuzberg::OcrQualityThresholds) -> Self {
|
|
Self {
|
|
min_total_non_whitespace: val.min_total_non_whitespace,
|
|
min_non_whitespace_per_page: val.min_non_whitespace_per_page,
|
|
min_meaningful_word_len: val.min_meaningful_word_len,
|
|
min_meaningful_words: val.min_meaningful_words,
|
|
min_alnum_ratio: val.min_alnum_ratio,
|
|
min_garbage_chars: val.min_garbage_chars,
|
|
max_fragmented_word_ratio: val.max_fragmented_word_ratio,
|
|
critical_fragmented_word_ratio: val.critical_fragmented_word_ratio,
|
|
min_avg_word_length: val.min_avg_word_length,
|
|
min_words_for_avg_length_check: val.min_words_for_avg_length_check,
|
|
min_consecutive_repeat_ratio: val.min_consecutive_repeat_ratio,
|
|
min_words_for_repeat_check: val.min_words_for_repeat_check,
|
|
substantive_min_chars: val.substantive_min_chars,
|
|
non_text_min_chars: val.non_text_min_chars,
|
|
alnum_ws_ratio_threshold: val.alnum_ws_ratio_threshold,
|
|
pipeline_min_quality: val.pipeline_min_quality,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrPipelineStage> for kreuzberg::OcrPipelineStage {
|
|
fn from(val: WasmOcrPipelineStage) -> Self {
|
|
Self {
|
|
backend: val.backend,
|
|
priority: val.priority,
|
|
language: val.language,
|
|
tesseract_config: val.tesseract_config.map(Into::into),
|
|
paddle_ocr_config: val
|
|
.paddle_ocr_config
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
vlm_config: val.vlm_config.map(Into::into),
|
|
backend_options: val
|
|
.backend_options
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrPipelineStage> for WasmOcrPipelineStage {
|
|
fn from(val: kreuzberg::OcrPipelineStage) -> Self {
|
|
Self {
|
|
backend: val.backend,
|
|
priority: val.priority,
|
|
language: val.language,
|
|
tesseract_config: val.tesseract_config.map(Into::into),
|
|
paddle_ocr_config: val
|
|
.paddle_ocr_config
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
vlm_config: val.vlm_config.map(Into::into),
|
|
backend_options: val
|
|
.backend_options
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrPipelineConfig> for kreuzberg::OcrPipelineConfig {
|
|
fn from(val: WasmOcrPipelineConfig) -> Self {
|
|
Self {
|
|
stages: val.stages.into_iter().map(Into::into).collect(),
|
|
quality_thresholds: val.quality_thresholds.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrPipelineConfig> for WasmOcrPipelineConfig {
|
|
fn from(val: kreuzberg::OcrPipelineConfig) -> Self {
|
|
Self {
|
|
stages: val.stages.into_iter().map(Into::into).collect(),
|
|
quality_thresholds: val.quality_thresholds.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrConfig> for kreuzberg::OcrConfig {
|
|
fn from(val: WasmOcrConfig) -> Self {
|
|
Self {
|
|
enabled: val.enabled,
|
|
backend: val.backend,
|
|
language: val.language,
|
|
tesseract_config: val.tesseract_config.map(Into::into),
|
|
output_format: val.output_format.map(Into::into),
|
|
paddle_ocr_config: val
|
|
.paddle_ocr_config
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
backend_options: val
|
|
.backend_options
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
element_config: val.element_config.map(Into::into),
|
|
quality_thresholds: val.quality_thresholds.map(Into::into),
|
|
pipeline: val.pipeline.map(Into::into),
|
|
auto_rotate: val.auto_rotate,
|
|
vlm_config: val.vlm_config.map(Into::into),
|
|
vlm_prompt: val.vlm_prompt,
|
|
acceleration: val.acceleration.map(Into::into),
|
|
tessdata_bytes: val
|
|
.tessdata_bytes
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrConfig> for WasmOcrConfig {
|
|
fn from(val: kreuzberg::OcrConfig) -> Self {
|
|
Self {
|
|
enabled: val.enabled,
|
|
backend: val.backend,
|
|
language: val.language,
|
|
tesseract_config: val.tesseract_config.map(Into::into),
|
|
output_format: val.output_format.map(Into::into),
|
|
paddle_ocr_config: val
|
|
.paddle_ocr_config
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
backend_options: val
|
|
.backend_options
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
element_config: val.element_config.map(Into::into),
|
|
quality_thresholds: val.quality_thresholds.map(Into::into),
|
|
pipeline: val.pipeline.map(Into::into),
|
|
auto_rotate: val.auto_rotate,
|
|
vlm_config: val.vlm_config.map(Into::into),
|
|
vlm_prompt: val.vlm_prompt,
|
|
acceleration: val.acceleration.map(Into::into),
|
|
tessdata_bytes: val
|
|
.tessdata_bytes
|
|
.as_ref()
|
|
.and_then(|v| serde_json::to_string(v).ok())
|
|
.and_then(|s| js_sys::JSON::parse(&s).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPageConfig> for kreuzberg::PageConfig {
|
|
fn from(val: WasmPageConfig) -> Self {
|
|
Self {
|
|
extract_pages: val.extract_pages,
|
|
insert_page_markers: val.insert_page_markers,
|
|
marker_format: val.marker_format,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PageConfig> for WasmPageConfig {
|
|
fn from(val: kreuzberg::PageConfig) -> Self {
|
|
Self {
|
|
extract_pages: val.extract_pages,
|
|
insert_page_markers: val.insert_page_markers,
|
|
marker_format: val.marker_format,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPostProcessorConfig> for kreuzberg::PostProcessorConfig {
|
|
fn from(val: WasmPostProcessorConfig) -> Self {
|
|
Self {
|
|
enabled: val.enabled,
|
|
enabled_processors: val.enabled_processors.map(|v| v.into_iter().collect()),
|
|
disabled_processors: val.disabled_processors.map(|v| v.into_iter().collect()),
|
|
enabled_set: val.enabled_set.map(|v| v.into_iter().collect()),
|
|
disabled_set: val.disabled_set.map(|v| v.into_iter().collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PostProcessorConfig> for WasmPostProcessorConfig {
|
|
fn from(val: kreuzberg::PostProcessorConfig) -> Self {
|
|
Self {
|
|
enabled: val.enabled,
|
|
enabled_processors: val.enabled_processors.map(|v| v.into_iter().collect()),
|
|
disabled_processors: val.disabled_processors.map(|v| v.into_iter().collect()),
|
|
enabled_set: val.enabled_set.map(|v| v.into_iter().collect()),
|
|
disabled_set: val.disabled_set.map(|v| v.into_iter().collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmChunkingConfig> for kreuzberg::ChunkingConfig {
|
|
fn from(val: WasmChunkingConfig) -> Self {
|
|
Self {
|
|
max_characters: val.max_characters,
|
|
overlap: val.overlap,
|
|
trim: val.trim,
|
|
chunker_type: val.chunker_type.into(),
|
|
embedding: val.embedding.map(Into::into),
|
|
preset: val.preset,
|
|
sizing: serde_wasm_bindgen::from_value(val.sizing.clone()).unwrap_or_default(),
|
|
prepend_heading_context: val.prepend_heading_context,
|
|
topic_threshold: val.topic_threshold,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ChunkingConfig> for WasmChunkingConfig {
|
|
fn from(val: kreuzberg::ChunkingConfig) -> Self {
|
|
Self {
|
|
max_characters: val.max_characters,
|
|
overlap: val.overlap,
|
|
trim: val.trim,
|
|
chunker_type: val.chunker_type.into(),
|
|
embedding: val.embedding.map(Into::into),
|
|
preset: val.preset,
|
|
sizing: serde_wasm_bindgen::to_value(&val.sizing).unwrap_or(JsValue::NULL),
|
|
prepend_heading_context: val.prepend_heading_context,
|
|
topic_threshold: val.topic_threshold,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmEmbeddingConfig> for kreuzberg::EmbeddingConfig {
|
|
fn from(val: WasmEmbeddingConfig) -> Self {
|
|
Self {
|
|
model: serde_wasm_bindgen::from_value(val.model.clone()).unwrap_or_default(),
|
|
normalize: val.normalize,
|
|
batch_size: val.batch_size,
|
|
show_download_progress: val.show_download_progress,
|
|
cache_dir: val.cache_dir.map(Into::into),
|
|
acceleration: val.acceleration.map(Into::into),
|
|
max_embed_duration_secs: val.max_embed_duration_secs,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::EmbeddingConfig> for WasmEmbeddingConfig {
|
|
fn from(val: kreuzberg::EmbeddingConfig) -> Self {
|
|
Self {
|
|
model: serde_wasm_bindgen::to_value(&val.model).unwrap_or(JsValue::NULL),
|
|
normalize: val.normalize,
|
|
batch_size: val.batch_size,
|
|
show_download_progress: val.show_download_progress,
|
|
cache_dir: val.cache_dir.map(|p| p.to_string_lossy().to_string()),
|
|
acceleration: val.acceleration.map(Into::into),
|
|
max_embed_duration_secs: val.max_embed_duration_secs,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::SupportedFormat> for WasmSupportedFormat {
|
|
fn from(val: kreuzberg::SupportedFormat) -> Self {
|
|
Self {
|
|
extension: val.extension,
|
|
mime_type: val.mime_type,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::extraction::structured::StructuredDataResult> for WasmStructuredDataResult {
|
|
fn from(val: kreuzberg::extraction::structured::StructuredDataResult) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
format: val.format.to_string(),
|
|
metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
text_fields: val.text_fields.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::extraction::office_metadata::app_properties::XlsxAppProperties> for WasmXlsxAppProperties {
|
|
fn from(val: kreuzberg::extraction::office_metadata::app_properties::XlsxAppProperties) -> Self {
|
|
Self {
|
|
application: val.application,
|
|
app_version: val.app_version,
|
|
doc_security: val.doc_security,
|
|
scale_crop: val.scale_crop,
|
|
links_up_to_date: val.links_up_to_date,
|
|
shared_doc: val.shared_doc,
|
|
hyperlinks_changed: val.hyperlinks_changed,
|
|
company: val.company,
|
|
worksheet_names: val.worksheet_names.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::extraction::office_metadata::app_properties::PptxAppProperties> for WasmPptxAppProperties {
|
|
fn from(val: kreuzberg::extraction::office_metadata::app_properties::PptxAppProperties) -> Self {
|
|
Self {
|
|
application: val.application,
|
|
app_version: val.app_version,
|
|
total_time: val.total_time,
|
|
company: val.company,
|
|
doc_security: val.doc_security,
|
|
scale_crop: val.scale_crop,
|
|
links_up_to_date: val.links_up_to_date,
|
|
shared_doc: val.shared_doc,
|
|
hyperlinks_changed: val.hyperlinks_changed,
|
|
slides: val.slides,
|
|
notes: val.notes,
|
|
hidden_slides: val.hidden_slides,
|
|
multimedia_clips: val.multimedia_clips,
|
|
presentation_format: val.presentation_format,
|
|
slide_titles: val.slide_titles.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmSecurityLimits> for kreuzberg::SecurityLimits {
|
|
fn from(val: WasmSecurityLimits) -> Self {
|
|
Self {
|
|
max_archive_size: val.max_archive_size,
|
|
max_compression_ratio: val.max_compression_ratio,
|
|
max_files_in_archive: val.max_files_in_archive,
|
|
max_nesting_depth: val.max_nesting_depth,
|
|
max_entity_length: val.max_entity_length,
|
|
max_content_size: val.max_content_size,
|
|
max_iterations: val.max_iterations,
|
|
max_xml_depth: val.max_xml_depth,
|
|
max_table_cells: val.max_table_cells,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::SecurityLimits> for WasmSecurityLimits {
|
|
fn from(val: kreuzberg::SecurityLimits) -> Self {
|
|
Self {
|
|
max_archive_size: val.max_archive_size,
|
|
max_compression_ratio: val.max_compression_ratio,
|
|
max_files_in_archive: val.max_files_in_archive,
|
|
max_nesting_depth: val.max_nesting_depth,
|
|
max_entity_length: val.max_entity_length,
|
|
max_content_size: val.max_content_size,
|
|
max_iterations: val.max_iterations,
|
|
max_xml_depth: val.max_xml_depth,
|
|
max_table_cells: val.max_table_cells,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPdfAnnotation> for kreuzberg::PdfAnnotation {
|
|
fn from(val: WasmPdfAnnotation) -> Self {
|
|
Self {
|
|
annotation_type: val.annotation_type.into(),
|
|
content: val.content,
|
|
page_number: val.page_number,
|
|
bounding_box: val.bounding_box.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PdfAnnotation> for WasmPdfAnnotation {
|
|
fn from(val: kreuzberg::PdfAnnotation) -> Self {
|
|
Self {
|
|
annotation_type: val.annotation_type.into(),
|
|
content: val.content,
|
|
page_number: val.page_number,
|
|
bounding_box: val.bounding_box.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDjotContent> for kreuzberg::DjotContent {
|
|
fn from(val: WasmDjotContent) -> Self {
|
|
Self {
|
|
plain_text: val.plain_text,
|
|
blocks: val.blocks.into_iter().map(Into::into).collect(),
|
|
metadata: val.metadata.into(),
|
|
tables: val.tables.into_iter().map(Into::into).collect(),
|
|
images: val.images.into_iter().map(Into::into).collect(),
|
|
links: val.links.into_iter().map(Into::into).collect(),
|
|
footnotes: val.footnotes.into_iter().map(Into::into).collect(),
|
|
attributes: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DjotContent> for WasmDjotContent {
|
|
fn from(val: kreuzberg::DjotContent) -> Self {
|
|
Self {
|
|
plain_text: val.plain_text,
|
|
blocks: val.blocks.into_iter().map(Into::into).collect(),
|
|
metadata: val.metadata.into(),
|
|
tables: val.tables.into_iter().map(Into::into).collect(),
|
|
images: val.images.into_iter().map(Into::into).collect(),
|
|
links: val.links.into_iter().map(Into::into).collect(),
|
|
footnotes: val.footnotes.into_iter().map(Into::into).collect(),
|
|
attributes: val.attributes.iter().map(|i| format!("{:?}", i)).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmFormattedBlock> for kreuzberg::FormattedBlock {
|
|
fn from(val: WasmFormattedBlock) -> Self {
|
|
Self {
|
|
block_type: val.block_type.into(),
|
|
level: val.level,
|
|
inline_content: val.inline_content.into_iter().map(Into::into).collect(),
|
|
attributes: Default::default(),
|
|
language: val.language,
|
|
code: val.code,
|
|
children: val.children.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::FormattedBlock> for WasmFormattedBlock {
|
|
fn from(val: kreuzberg::FormattedBlock) -> Self {
|
|
Self {
|
|
block_type: val.block_type.into(),
|
|
level: val.level,
|
|
inline_content: val.inline_content.into_iter().map(Into::into).collect(),
|
|
attributes: val.attributes.as_ref().map(|v| format!("{v:?}")),
|
|
language: val.language,
|
|
code: val.code,
|
|
children: val.children.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmInlineElement> for kreuzberg::InlineElement {
|
|
fn from(val: WasmInlineElement) -> Self {
|
|
Self {
|
|
element_type: val.element_type.into(),
|
|
content: val.content,
|
|
attributes: Default::default(),
|
|
metadata: val
|
|
.metadata
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::InlineElement> for WasmInlineElement {
|
|
fn from(val: kreuzberg::InlineElement) -> Self {
|
|
Self {
|
|
element_type: val.element_type.into(),
|
|
content: val.content,
|
|
attributes: val.attributes.as_ref().map(|v| format!("{v:?}")),
|
|
metadata: val
|
|
.metadata
|
|
.as_ref()
|
|
.and_then(|v| serde_json::to_string(v).ok())
|
|
.and_then(|s| js_sys::JSON::parse(&s).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDjotImage> for kreuzberg::DjotImage {
|
|
fn from(val: WasmDjotImage) -> Self {
|
|
Self {
|
|
src: val.src,
|
|
alt: val.alt,
|
|
title: val.title,
|
|
attributes: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DjotImage> for WasmDjotImage {
|
|
fn from(val: kreuzberg::DjotImage) -> Self {
|
|
Self {
|
|
src: val.src,
|
|
alt: val.alt,
|
|
title: val.title,
|
|
attributes: val.attributes.as_ref().map(|v| format!("{v:?}")),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDjotLink> for kreuzberg::DjotLink {
|
|
fn from(val: WasmDjotLink) -> Self {
|
|
Self {
|
|
url: val.url,
|
|
text: val.text,
|
|
title: val.title,
|
|
attributes: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DjotLink> for WasmDjotLink {
|
|
fn from(val: kreuzberg::DjotLink) -> Self {
|
|
Self {
|
|
url: val.url,
|
|
text: val.text,
|
|
title: val.title,
|
|
attributes: val.attributes.as_ref().map(|v| format!("{v:?}")),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmFootnote> for kreuzberg::Footnote {
|
|
fn from(val: WasmFootnote) -> Self {
|
|
Self {
|
|
label: val.label,
|
|
content: val.content.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::Footnote> for WasmFootnote {
|
|
fn from(val: kreuzberg::Footnote) -> Self {
|
|
Self {
|
|
label: val.label,
|
|
content: val.content.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDocumentStructure> for kreuzberg::DocumentStructure {
|
|
fn from(val: WasmDocumentStructure) -> Self {
|
|
Self {
|
|
nodes: val.nodes.into_iter().map(Into::into).collect(),
|
|
source_format: val.source_format,
|
|
relationships: val.relationships.into_iter().map(Into::into).collect(),
|
|
node_types: val.node_types.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DocumentStructure> for WasmDocumentStructure {
|
|
fn from(val: kreuzberg::DocumentStructure) -> Self {
|
|
Self {
|
|
nodes: val.nodes.into_iter().map(Into::into).collect(),
|
|
source_format: val.source_format,
|
|
relationships: val.relationships.into_iter().map(Into::into).collect(),
|
|
node_types: val.node_types.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDocumentRelationship> for kreuzberg::DocumentRelationship {
|
|
fn from(val: WasmDocumentRelationship) -> Self {
|
|
Self {
|
|
source: kreuzberg::NodeIndex(val.source),
|
|
target: kreuzberg::NodeIndex(val.target),
|
|
kind: val.kind.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DocumentRelationship> for WasmDocumentRelationship {
|
|
fn from(val: kreuzberg::DocumentRelationship) -> Self {
|
|
Self {
|
|
source: val.source.0,
|
|
target: val.target.0,
|
|
kind: val.kind.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDocumentNode> for kreuzberg::DocumentNode {
|
|
fn from(val: WasmDocumentNode) -> Self {
|
|
Self {
|
|
id: Default::default(),
|
|
content: serde_wasm_bindgen::from_value(val.content.clone()).unwrap_or_default(),
|
|
parent: (val.parent).map(kreuzberg::NodeIndex),
|
|
children: (val.children.into_iter().collect::<Vec<_>>())
|
|
.into_iter()
|
|
.map(kreuzberg::NodeIndex)
|
|
.collect(),
|
|
content_layer: val.content_layer.into(),
|
|
page: val.page,
|
|
page_end: val.page_end,
|
|
bbox: val.bbox.map(Into::into),
|
|
annotations: val.annotations.into_iter().map(Into::into).collect(),
|
|
attributes: val
|
|
.attributes
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DocumentNode> for WasmDocumentNode {
|
|
fn from(val: kreuzberg::DocumentNode) -> Self {
|
|
Self {
|
|
id: format!("{:?}", val.id),
|
|
content: serde_wasm_bindgen::to_value(&val.content).unwrap_or(JsValue::NULL),
|
|
parent: val.parent.map(|v| v.0),
|
|
children: val
|
|
.children
|
|
.iter()
|
|
.map(|v| v.0)
|
|
.collect::<Vec<_>>()
|
|
.into_iter()
|
|
.collect(),
|
|
content_layer: val.content_layer.into(),
|
|
page: val.page,
|
|
page_end: val.page_end,
|
|
bbox: val.bbox.map(Into::into),
|
|
annotations: val.annotations.into_iter().map(Into::into).collect(),
|
|
attributes: val
|
|
.attributes
|
|
.as_ref()
|
|
.and_then(|v| serde_json::to_string(v).ok())
|
|
.and_then(|s| js_sys::JSON::parse(&s).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmTableGrid> for kreuzberg::TableGrid {
|
|
fn from(val: WasmTableGrid) -> Self {
|
|
Self {
|
|
rows: val.rows,
|
|
cols: val.cols,
|
|
cells: val.cells.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::TableGrid> for WasmTableGrid {
|
|
fn from(val: kreuzberg::TableGrid) -> Self {
|
|
Self {
|
|
rows: val.rows,
|
|
cols: val.cols,
|
|
cells: val.cells.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmGridCell> for kreuzberg::GridCell {
|
|
fn from(val: WasmGridCell) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
row: val.row,
|
|
col: val.col,
|
|
row_span: val.row_span,
|
|
col_span: val.col_span,
|
|
is_header: val.is_header,
|
|
bbox: val.bbox.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::GridCell> for WasmGridCell {
|
|
fn from(val: kreuzberg::GridCell) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
row: val.row,
|
|
col: val.col,
|
|
row_span: val.row_span,
|
|
col_span: val.col_span,
|
|
is_header: val.is_header,
|
|
bbox: val.bbox.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmTextAnnotation> for kreuzberg::TextAnnotation {
|
|
fn from(val: WasmTextAnnotation) -> Self {
|
|
Self {
|
|
start: val.start,
|
|
end: val.end,
|
|
kind: serde_wasm_bindgen::from_value(val.kind.clone()).unwrap_or_default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::TextAnnotation> for WasmTextAnnotation {
|
|
fn from(val: kreuzberg::TextAnnotation) -> Self {
|
|
Self {
|
|
start: val.start,
|
|
end: val.end,
|
|
kind: serde_wasm_bindgen::to_value(&val.kind).unwrap_or(JsValue::NULL),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::needless_update)]
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmExtractionResult> for kreuzberg::ExtractionResult {
|
|
fn from(val: WasmExtractionResult) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
mime_type: val.mime_type.into(),
|
|
metadata: val.metadata.into(),
|
|
extraction_method: val.extraction_method.map(Into::into),
|
|
tables: val.tables.into_iter().map(Into::into).collect(),
|
|
detected_languages: val.detected_languages.map(|v| v.into_iter().collect()),
|
|
chunks: val.chunks.map(|v| v.into_iter().map(Into::into).collect()),
|
|
images: val.images.map(|v| v.into_iter().map(Into::into).collect()),
|
|
pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()),
|
|
elements: val.elements.map(|v| v.into_iter().map(Into::into).collect()),
|
|
djot_content: val.djot_content.map(Into::into),
|
|
ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()),
|
|
document: val.document.map(Into::into),
|
|
quality_score: val.quality_score,
|
|
processing_warnings: val.processing_warnings.into_iter().map(Into::into).collect(),
|
|
annotations: val.annotations.map(|v| v.into_iter().map(Into::into).collect()),
|
|
children: val.children.map(|v| v.into_iter().map(Into::into).collect()),
|
|
uris: val.uris.map(|v| v.into_iter().map(Into::into).collect()),
|
|
revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()),
|
|
structured_output: val
|
|
.structured_output
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
code_intelligence: val
|
|
.code_intelligence
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
llm_usage: val.llm_usage.map(|v| v.into_iter().map(Into::into).collect()),
|
|
formatted_content: val.formatted_content,
|
|
ocr_internal_document: Default::default(),
|
|
..Default::default()
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ExtractionResult> for WasmExtractionResult {
|
|
fn from(val: kreuzberg::ExtractionResult) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
mime_type: val.mime_type.to_string(),
|
|
metadata: val.metadata.into(),
|
|
extraction_method: val.extraction_method.map(Into::into),
|
|
tables: val.tables.into_iter().map(Into::into).collect(),
|
|
detected_languages: val.detected_languages.map(|v| v.into_iter().collect()),
|
|
chunks: val.chunks.map(|v| v.into_iter().map(Into::into).collect()),
|
|
images: val.images.map(|v| v.into_iter().map(Into::into).collect()),
|
|
pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()),
|
|
elements: val.elements.map(|v| v.into_iter().map(Into::into).collect()),
|
|
djot_content: val.djot_content.map(Into::into),
|
|
ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()),
|
|
document: val.document.map(Into::into),
|
|
quality_score: val.quality_score,
|
|
processing_warnings: val.processing_warnings.into_iter().map(Into::into).collect(),
|
|
annotations: val.annotations.map(|v| v.into_iter().map(Into::into).collect()),
|
|
children: val.children.map(|v| v.into_iter().map(Into::into).collect()),
|
|
uris: val.uris.map(|v| v.into_iter().map(Into::into).collect()),
|
|
revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()),
|
|
structured_output: val
|
|
.structured_output
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
code_intelligence: val
|
|
.code_intelligence
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
llm_usage: val.llm_usage.map(|v| v.into_iter().map(Into::into).collect()),
|
|
formatted_content: val.formatted_content,
|
|
ocr_internal_document: val.ocr_internal_document.as_ref().map(|v| format!("{v:?}")),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmArchiveEntry> for kreuzberg::ArchiveEntry {
|
|
fn from(val: WasmArchiveEntry) -> Self {
|
|
Self {
|
|
path: val.path,
|
|
mime_type: val.mime_type,
|
|
result: Box::new(val.result.into()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ArchiveEntry> for WasmArchiveEntry {
|
|
fn from(val: kreuzberg::ArchiveEntry) -> Self {
|
|
Self {
|
|
path: val.path,
|
|
mime_type: val.mime_type,
|
|
result: (*val.result).into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmProcessingWarning> for kreuzberg::ProcessingWarning {
|
|
fn from(val: WasmProcessingWarning) -> Self {
|
|
Self {
|
|
source: val.source.into(),
|
|
message: val.message.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ProcessingWarning> for WasmProcessingWarning {
|
|
fn from(val: kreuzberg::ProcessingWarning) -> Self {
|
|
Self {
|
|
source: val.source.to_string(),
|
|
message: val.message.to_string(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmLlmUsage> for kreuzberg::LlmUsage {
|
|
fn from(val: WasmLlmUsage) -> Self {
|
|
Self {
|
|
model: val.model,
|
|
source: val.source,
|
|
input_tokens: val.input_tokens,
|
|
output_tokens: val.output_tokens,
|
|
total_tokens: val.total_tokens,
|
|
estimated_cost: val.estimated_cost,
|
|
finish_reason: val.finish_reason,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::LlmUsage> for WasmLlmUsage {
|
|
fn from(val: kreuzberg::LlmUsage) -> Self {
|
|
Self {
|
|
model: val.model,
|
|
source: val.source,
|
|
input_tokens: val.input_tokens,
|
|
output_tokens: val.output_tokens,
|
|
total_tokens: val.total_tokens,
|
|
estimated_cost: val.estimated_cost,
|
|
finish_reason: val.finish_reason,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmChunk> for kreuzberg::Chunk {
|
|
fn from(val: WasmChunk) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
chunk_type: val.chunk_type.into(),
|
|
embedding: val.embedding.map(|v| v.into_iter().collect()),
|
|
metadata: val.metadata.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::Chunk> for WasmChunk {
|
|
fn from(val: kreuzberg::Chunk) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
chunk_type: val.chunk_type.into(),
|
|
embedding: val.embedding.map(|v| v.into_iter().collect()),
|
|
metadata: val.metadata.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmHeadingContext> for kreuzberg::HeadingContext {
|
|
fn from(val: WasmHeadingContext) -> Self {
|
|
Self {
|
|
headings: val.headings.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::HeadingContext> for WasmHeadingContext {
|
|
fn from(val: kreuzberg::HeadingContext) -> Self {
|
|
Self {
|
|
headings: val.headings.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmHeadingLevel> for kreuzberg::HeadingLevel {
|
|
fn from(val: WasmHeadingLevel) -> Self {
|
|
Self {
|
|
level: val.level,
|
|
text: val.text,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::HeadingLevel> for WasmHeadingLevel {
|
|
fn from(val: kreuzberg::HeadingLevel) -> Self {
|
|
Self {
|
|
level: val.level,
|
|
text: val.text,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmChunkMetadata> for kreuzberg::ChunkMetadata {
|
|
fn from(val: WasmChunkMetadata) -> Self {
|
|
Self {
|
|
byte_start: val.byte_start,
|
|
byte_end: val.byte_end,
|
|
token_count: val.token_count,
|
|
chunk_index: val.chunk_index,
|
|
total_chunks: val.total_chunks,
|
|
first_page: val.first_page,
|
|
last_page: val.last_page,
|
|
heading_context: val.heading_context.map(Into::into),
|
|
image_indices: val.image_indices.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ChunkMetadata> for WasmChunkMetadata {
|
|
fn from(val: kreuzberg::ChunkMetadata) -> Self {
|
|
Self {
|
|
byte_start: val.byte_start,
|
|
byte_end: val.byte_end,
|
|
token_count: val.token_count,
|
|
chunk_index: val.chunk_index,
|
|
total_chunks: val.total_chunks,
|
|
first_page: val.first_page,
|
|
last_page: val.last_page,
|
|
heading_context: val.heading_context.map(Into::into),
|
|
image_indices: val.image_indices.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmExtractedImage> for kreuzberg::ExtractedImage {
|
|
fn from(val: WasmExtractedImage) -> Self {
|
|
Self {
|
|
data: val.data.to_vec().into(),
|
|
format: val.format.into(),
|
|
image_index: val.image_index,
|
|
page_number: val.page_number,
|
|
width: val.width,
|
|
height: val.height,
|
|
colorspace: val.colorspace,
|
|
bits_per_component: val.bits_per_component,
|
|
is_mask: val.is_mask,
|
|
description: val.description,
|
|
ocr_result: val.ocr_result.map(Into::into).map(Box::new),
|
|
bounding_box: val.bounding_box.map(Into::into),
|
|
source_path: val.source_path,
|
|
image_kind: val.image_kind.map(Into::into),
|
|
kind_confidence: val.kind_confidence,
|
|
cluster_id: val.cluster_id,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ExtractedImage> for WasmExtractedImage {
|
|
fn from(val: kreuzberg::ExtractedImage) -> Self {
|
|
Self {
|
|
data: val.data.to_vec().into(),
|
|
format: val.format.to_string(),
|
|
image_index: val.image_index,
|
|
page_number: val.page_number,
|
|
width: val.width,
|
|
height: val.height,
|
|
colorspace: val.colorspace,
|
|
bits_per_component: val.bits_per_component,
|
|
is_mask: val.is_mask,
|
|
description: val.description,
|
|
ocr_result: val.ocr_result.map(|v| (*v).into()),
|
|
bounding_box: val.bounding_box.map(Into::into),
|
|
source_path: val.source_path,
|
|
image_kind: val.image_kind.map(Into::into),
|
|
kind_confidence: val.kind_confidence,
|
|
cluster_id: val.cluster_id,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmBoundingBox> for kreuzberg::BoundingBox {
|
|
fn from(val: WasmBoundingBox) -> Self {
|
|
Self {
|
|
x0: val.x0,
|
|
y0: val.y0,
|
|
x1: val.x1,
|
|
y1: val.y1,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::BoundingBox> for WasmBoundingBox {
|
|
fn from(val: kreuzberg::BoundingBox) -> Self {
|
|
Self {
|
|
x0: val.x0,
|
|
y0: val.y0,
|
|
x1: val.x1,
|
|
y1: val.y1,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmElementMetadata> for kreuzberg::ElementMetadata {
|
|
fn from(val: WasmElementMetadata) -> Self {
|
|
Self {
|
|
page_number: val.page_number,
|
|
filename: val.filename,
|
|
coordinates: val.coordinates.map(Into::into),
|
|
element_index: val.element_index,
|
|
additional: serde_wasm_bindgen::from_value(val.additional.clone()).unwrap_or_default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ElementMetadata> for WasmElementMetadata {
|
|
fn from(val: kreuzberg::ElementMetadata) -> Self {
|
|
Self {
|
|
page_number: val.page_number,
|
|
filename: val.filename,
|
|
coordinates: val.coordinates.map(Into::into),
|
|
element_index: val.element_index,
|
|
additional: js_sys::JSON::parse(&serde_json::to_string(&val.additional).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmElement> for kreuzberg::Element {
|
|
fn from(val: WasmElement) -> Self {
|
|
Self {
|
|
element_id: Default::default(),
|
|
element_type: val.element_type.into(),
|
|
text: val.text,
|
|
metadata: val.metadata.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::Element> for WasmElement {
|
|
fn from(val: kreuzberg::Element) -> Self {
|
|
Self {
|
|
element_id: format!("{:?}", val.element_id),
|
|
element_type: val.element_type.into(),
|
|
text: val.text,
|
|
metadata: val.metadata.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ExcelWorkbook> for WasmExcelWorkbook {
|
|
fn from(val: kreuzberg::ExcelWorkbook) -> Self {
|
|
Self {
|
|
sheets: val.sheets.into_iter().map(Into::into).collect(),
|
|
metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ExcelSheet> for WasmExcelSheet {
|
|
fn from(val: kreuzberg::ExcelSheet) -> Self {
|
|
Self {
|
|
name: val.name,
|
|
markdown: val.markdown,
|
|
row_count: val.row_count,
|
|
col_count: val.col_count,
|
|
cell_count: val.cell_count,
|
|
table_cells: val
|
|
.table_cells
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::XmlExtractionResult> for WasmXmlExtractionResult {
|
|
fn from(val: kreuzberg::XmlExtractionResult) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
element_count: val.element_count,
|
|
unique_elements: val.unique_elements.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::TextExtractionResult> for WasmTextExtractionResult {
|
|
fn from(val: kreuzberg::TextExtractionResult) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
line_count: val.line_count,
|
|
word_count: val.word_count,
|
|
character_count: val.character_count,
|
|
headers: val.headers.map(|v| v.into_iter().collect()),
|
|
links: val.links.as_ref().and_then(|v| {
|
|
serde_wasm_bindgen::to_value(
|
|
&v.iter()
|
|
.map(|(a, b)| vec![a.to_string(), b.to_string()])
|
|
.collect::<Vec<Vec<String>>>(),
|
|
)
|
|
.ok()
|
|
}),
|
|
code_blocks: val.code_blocks.as_ref().and_then(|v| {
|
|
serde_wasm_bindgen::to_value(
|
|
&v.iter()
|
|
.map(|(a, b)| vec![a.to_string(), b.to_string()])
|
|
.collect::<Vec<Vec<String>>>(),
|
|
)
|
|
.ok()
|
|
}),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PptxExtractionResult> for WasmPptxExtractionResult {
|
|
fn from(val: kreuzberg::PptxExtractionResult) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
metadata: val.metadata.into(),
|
|
slide_count: val.slide_count,
|
|
image_count: val.image_count,
|
|
table_count: val.table_count,
|
|
images: val.images.into_iter().map(Into::into).collect(),
|
|
page_structure: val.page_structure.map(Into::into),
|
|
page_contents: val.page_contents.map(|v| v.into_iter().map(Into::into).collect()),
|
|
document: val.document.map(Into::into),
|
|
hyperlinks: val.hyperlinks.iter().map(|i| format!("{:?}", i)).collect(),
|
|
office_metadata: js_sys::JSON::parse(&serde_json::to_string(&val.office_metadata).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
revisions: val.revisions.map(|v| v.into_iter().map(Into::into).collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::EmailExtractionResult> for WasmEmailExtractionResult {
|
|
fn from(val: kreuzberg::EmailExtractionResult) -> Self {
|
|
Self {
|
|
subject: val.subject,
|
|
from_email: val.from_email,
|
|
to_emails: val.to_emails.into_iter().collect(),
|
|
cc_emails: val.cc_emails.into_iter().collect(),
|
|
bcc_emails: val.bcc_emails.into_iter().collect(),
|
|
date: val.date,
|
|
message_id: val.message_id,
|
|
plain_text: val.plain_text,
|
|
html_content: val.html_content,
|
|
content: val.content,
|
|
attachments: val.attachments.into_iter().map(Into::into).collect(),
|
|
metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::EmailAttachment> for WasmEmailAttachment {
|
|
fn from(val: kreuzberg::EmailAttachment) -> Self {
|
|
Self {
|
|
name: val.name,
|
|
filename: val.filename,
|
|
mime_type: val.mime_type,
|
|
size: val.size,
|
|
is_image: val.is_image,
|
|
data: val.data.map(|v| v.to_vec().into()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrExtractionResult> for kreuzberg::OcrExtractionResult {
|
|
fn from(val: WasmOcrExtractionResult) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
mime_type: val.mime_type,
|
|
metadata: serde_wasm_bindgen::from_value(val.metadata.clone()).unwrap_or_default(),
|
|
tables: val.tables.into_iter().map(Into::into).collect(),
|
|
ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()),
|
|
internal_document: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrExtractionResult> for WasmOcrExtractionResult {
|
|
fn from(val: kreuzberg::OcrExtractionResult) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
mime_type: val.mime_type,
|
|
metadata: js_sys::JSON::parse(&serde_json::to_string(&val.metadata).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
tables: val.tables.into_iter().map(Into::into).collect(),
|
|
ocr_elements: val.ocr_elements.map(|v| v.into_iter().map(Into::into).collect()),
|
|
internal_document: val.internal_document.as_ref().map(|v| format!("{v:?}")),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrTable> for kreuzberg::OcrTable {
|
|
fn from(val: WasmOcrTable) -> Self {
|
|
Self {
|
|
cells: serde_wasm_bindgen::from_value(val.cells.clone()).unwrap_or_default(),
|
|
markdown: val.markdown,
|
|
page_number: val.page_number,
|
|
bounding_box: val.bounding_box.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrTable> for WasmOcrTable {
|
|
fn from(val: kreuzberg::OcrTable) -> Self {
|
|
Self {
|
|
cells: serde_wasm_bindgen::to_value(&val.cells).unwrap_or(JsValue::NULL),
|
|
markdown: val.markdown,
|
|
page_number: val.page_number,
|
|
bounding_box: val.bounding_box.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrTableBoundingBox> for kreuzberg::OcrTableBoundingBox {
|
|
fn from(val: WasmOcrTableBoundingBox) -> Self {
|
|
Self {
|
|
left: val.left,
|
|
top: val.top,
|
|
right: val.right,
|
|
bottom: val.bottom,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrTableBoundingBox> for WasmOcrTableBoundingBox {
|
|
fn from(val: kreuzberg::OcrTableBoundingBox) -> Self {
|
|
Self {
|
|
left: val.left,
|
|
top: val.top,
|
|
right: val.right,
|
|
bottom: val.bottom,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmImagePreprocessingConfig> for kreuzberg::ImagePreprocessingConfig {
|
|
fn from(val: WasmImagePreprocessingConfig) -> Self {
|
|
Self {
|
|
target_dpi: val.target_dpi,
|
|
auto_rotate: val.auto_rotate,
|
|
deskew: val.deskew,
|
|
denoise: val.denoise,
|
|
contrast_enhance: val.contrast_enhance,
|
|
binarization_method: val.binarization_method,
|
|
invert_colors: val.invert_colors,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ImagePreprocessingConfig> for WasmImagePreprocessingConfig {
|
|
fn from(val: kreuzberg::ImagePreprocessingConfig) -> Self {
|
|
Self {
|
|
target_dpi: val.target_dpi,
|
|
auto_rotate: val.auto_rotate,
|
|
deskew: val.deskew,
|
|
denoise: val.denoise,
|
|
contrast_enhance: val.contrast_enhance,
|
|
binarization_method: val.binarization_method,
|
|
invert_colors: val.invert_colors,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmTesseractConfig> for kreuzberg::TesseractConfig {
|
|
fn from(val: WasmTesseractConfig) -> Self {
|
|
Self {
|
|
language: val.language,
|
|
psm: val.psm,
|
|
output_format: val.output_format,
|
|
oem: val.oem,
|
|
min_confidence: val.min_confidence,
|
|
preprocessing: val.preprocessing.map(Into::into),
|
|
enable_table_detection: val.enable_table_detection,
|
|
table_min_confidence: val.table_min_confidence,
|
|
table_column_threshold: val.table_column_threshold,
|
|
table_row_threshold_ratio: val.table_row_threshold_ratio,
|
|
use_cache: val.use_cache,
|
|
classify_use_pre_adapted_templates: val.classify_use_pre_adapted_templates,
|
|
language_model_ngram_on: val.language_model_ngram_on,
|
|
tessedit_dont_blkrej_good_wds: val.tessedit_dont_blkrej_good_wds,
|
|
tessedit_dont_rowrej_good_wds: val.tessedit_dont_rowrej_good_wds,
|
|
tessedit_enable_dict_correction: val.tessedit_enable_dict_correction,
|
|
tessedit_char_whitelist: val.tessedit_char_whitelist,
|
|
tessedit_char_blacklist: val.tessedit_char_blacklist,
|
|
tessedit_use_primary_params_model: val.tessedit_use_primary_params_model,
|
|
textord_space_size_is_variable: val.textord_space_size_is_variable,
|
|
thresholding_method: val.thresholding_method,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::TesseractConfig> for WasmTesseractConfig {
|
|
fn from(val: kreuzberg::TesseractConfig) -> Self {
|
|
Self {
|
|
language: val.language,
|
|
psm: val.psm,
|
|
output_format: val.output_format,
|
|
oem: val.oem,
|
|
min_confidence: val.min_confidence,
|
|
preprocessing: val.preprocessing.map(Into::into),
|
|
enable_table_detection: val.enable_table_detection,
|
|
table_min_confidence: val.table_min_confidence,
|
|
table_column_threshold: val.table_column_threshold,
|
|
table_row_threshold_ratio: val.table_row_threshold_ratio,
|
|
use_cache: val.use_cache,
|
|
classify_use_pre_adapted_templates: val.classify_use_pre_adapted_templates,
|
|
language_model_ngram_on: val.language_model_ngram_on,
|
|
tessedit_dont_blkrej_good_wds: val.tessedit_dont_blkrej_good_wds,
|
|
tessedit_dont_rowrej_good_wds: val.tessedit_dont_rowrej_good_wds,
|
|
tessedit_enable_dict_correction: val.tessedit_enable_dict_correction,
|
|
tessedit_char_whitelist: val.tessedit_char_whitelist,
|
|
tessedit_char_blacklist: val.tessedit_char_blacklist,
|
|
tessedit_use_primary_params_model: val.tessedit_use_primary_params_model,
|
|
textord_space_size_is_variable: val.textord_space_size_is_variable,
|
|
thresholding_method: val.thresholding_method,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmImagePreprocessingMetadata> for kreuzberg::ImagePreprocessingMetadata {
|
|
fn from(val: WasmImagePreprocessingMetadata) -> Self {
|
|
Self {
|
|
original_dimensions: Default::default(),
|
|
original_dpi: Default::default(),
|
|
target_dpi: val.target_dpi,
|
|
scale_factor: val.scale_factor,
|
|
auto_adjusted: val.auto_adjusted,
|
|
final_dpi: val.final_dpi,
|
|
new_dimensions: Default::default(),
|
|
resample_method: val.resample_method,
|
|
dimension_clamped: val.dimension_clamped,
|
|
calculated_dpi: val.calculated_dpi,
|
|
skipped_resize: val.skipped_resize,
|
|
resize_error: val.resize_error,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ImagePreprocessingMetadata> for WasmImagePreprocessingMetadata {
|
|
fn from(val: kreuzberg::ImagePreprocessingMetadata) -> Self {
|
|
Self {
|
|
original_dimensions: vec![val.original_dimensions.0 as _, val.original_dimensions.1 as _],
|
|
original_dpi: vec![val.original_dpi.0 as _, val.original_dpi.1 as _],
|
|
target_dpi: val.target_dpi,
|
|
scale_factor: val.scale_factor,
|
|
auto_adjusted: val.auto_adjusted,
|
|
final_dpi: val.final_dpi,
|
|
new_dimensions: val.new_dimensions.map(|t| {
|
|
let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect();
|
|
arr
|
|
}),
|
|
resample_method: val.resample_method,
|
|
dimension_clamped: val.dimension_clamped,
|
|
calculated_dpi: val.calculated_dpi,
|
|
skipped_resize: val.skipped_resize,
|
|
resize_error: val.resize_error,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmMetadata> for kreuzberg::Metadata {
|
|
fn from(val: WasmMetadata) -> Self {
|
|
Self {
|
|
title: val.title,
|
|
subject: val.subject,
|
|
authors: val.authors.map(|v| v.into_iter().collect()),
|
|
keywords: val.keywords.map(|v| v.into_iter().collect()),
|
|
language: val.language,
|
|
created_at: val.created_at,
|
|
modified_at: val.modified_at,
|
|
created_by: val.created_by,
|
|
modified_by: val.modified_by,
|
|
pages: val.pages.map(Into::into),
|
|
format: val
|
|
.format
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
image_preprocessing: val.image_preprocessing.map(Into::into),
|
|
json_schema: val
|
|
.json_schema
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
error: val.error.map(Into::into),
|
|
extraction_duration_ms: val.extraction_duration_ms,
|
|
category: val.category,
|
|
tags: val.tags.map(|v| v.into_iter().collect()),
|
|
document_version: val.document_version,
|
|
abstract_text: val.abstract_text,
|
|
output_format: val.output_format,
|
|
ocr_used: val.ocr_used,
|
|
additional: serde_wasm_bindgen::from_value(val.additional.clone()).unwrap_or_default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::Metadata> for WasmMetadata {
|
|
fn from(val: kreuzberg::Metadata) -> Self {
|
|
Self {
|
|
title: val.title,
|
|
subject: val.subject,
|
|
authors: val.authors.map(|v| v.into_iter().collect()),
|
|
keywords: val.keywords.map(|v| v.into_iter().collect()),
|
|
language: val.language,
|
|
created_at: val.created_at,
|
|
modified_at: val.modified_at,
|
|
created_by: val.created_by,
|
|
modified_by: val.modified_by,
|
|
pages: val.pages.map(Into::into),
|
|
format: val.format.as_ref().and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
image_preprocessing: val.image_preprocessing.map(Into::into),
|
|
json_schema: val
|
|
.json_schema
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
error: val.error.map(Into::into),
|
|
extraction_duration_ms: val.extraction_duration_ms,
|
|
category: val.category,
|
|
tags: val.tags.map(|v| v.into_iter().collect()),
|
|
document_version: val.document_version,
|
|
abstract_text: val.abstract_text,
|
|
output_format: val.output_format,
|
|
ocr_used: val.ocr_used,
|
|
additional: js_sys::JSON::parse(&serde_json::to_string(&val.additional).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmExcelMetadata> for kreuzberg::ExcelMetadata {
|
|
fn from(val: WasmExcelMetadata) -> Self {
|
|
Self {
|
|
sheet_count: val.sheet_count,
|
|
sheet_names: val.sheet_names.map(|v| v.into_iter().collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ExcelMetadata> for WasmExcelMetadata {
|
|
fn from(val: kreuzberg::ExcelMetadata) -> Self {
|
|
Self {
|
|
sheet_count: val.sheet_count,
|
|
sheet_names: val.sheet_names.map(|v| v.into_iter().collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmEmailMetadata> for kreuzberg::EmailMetadata {
|
|
fn from(val: WasmEmailMetadata) -> Self {
|
|
Self {
|
|
from_email: val.from_email,
|
|
from_name: val.from_name,
|
|
to_emails: val.to_emails.into_iter().collect(),
|
|
cc_emails: val.cc_emails.into_iter().collect(),
|
|
bcc_emails: val.bcc_emails.into_iter().collect(),
|
|
message_id: val.message_id,
|
|
attachments: val.attachments.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::EmailMetadata> for WasmEmailMetadata {
|
|
fn from(val: kreuzberg::EmailMetadata) -> Self {
|
|
Self {
|
|
from_email: val.from_email,
|
|
from_name: val.from_name,
|
|
to_emails: val.to_emails.into_iter().collect(),
|
|
cc_emails: val.cc_emails.into_iter().collect(),
|
|
bcc_emails: val.bcc_emails.into_iter().collect(),
|
|
message_id: val.message_id,
|
|
attachments: val.attachments.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmArchiveMetadata> for kreuzberg::ArchiveMetadata {
|
|
fn from(val: WasmArchiveMetadata) -> Self {
|
|
Self {
|
|
format: val.format.into(),
|
|
file_count: val.file_count,
|
|
file_list: val.file_list.into_iter().collect(),
|
|
total_size: val.total_size,
|
|
compressed_size: val.compressed_size,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ArchiveMetadata> for WasmArchiveMetadata {
|
|
fn from(val: kreuzberg::ArchiveMetadata) -> Self {
|
|
Self {
|
|
format: val.format.to_string(),
|
|
file_count: val.file_count,
|
|
file_list: val.file_list.into_iter().collect(),
|
|
total_size: val.total_size,
|
|
compressed_size: val.compressed_size,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmImageMetadata> for kreuzberg::ImageMetadata {
|
|
fn from(val: WasmImageMetadata) -> Self {
|
|
Self {
|
|
width: val.width,
|
|
height: val.height,
|
|
format: val.format,
|
|
exif: serde_wasm_bindgen::from_value(val.exif.clone()).unwrap_or_default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ImageMetadata> for WasmImageMetadata {
|
|
fn from(val: kreuzberg::ImageMetadata) -> Self {
|
|
Self {
|
|
width: val.width,
|
|
height: val.height,
|
|
format: val.format,
|
|
exif: js_sys::JSON::parse(&serde_json::to_string(&val.exif).unwrap_or_default()).unwrap_or(JsValue::NULL),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmXmlMetadata> for kreuzberg::XmlMetadata {
|
|
fn from(val: WasmXmlMetadata) -> Self {
|
|
Self {
|
|
element_count: val.element_count,
|
|
unique_elements: val.unique_elements.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::XmlMetadata> for WasmXmlMetadata {
|
|
fn from(val: kreuzberg::XmlMetadata) -> Self {
|
|
Self {
|
|
element_count: val.element_count,
|
|
unique_elements: val.unique_elements.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmTextMetadata> for kreuzberg::TextMetadata {
|
|
fn from(val: WasmTextMetadata) -> Self {
|
|
Self {
|
|
line_count: val.line_count,
|
|
word_count: val.word_count,
|
|
character_count: val.character_count,
|
|
headers: val.headers.map(|v| v.into_iter().collect()),
|
|
links: Default::default(),
|
|
code_blocks: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::TextMetadata> for WasmTextMetadata {
|
|
fn from(val: kreuzberg::TextMetadata) -> Self {
|
|
Self {
|
|
line_count: val.line_count,
|
|
word_count: val.word_count,
|
|
character_count: val.character_count,
|
|
headers: val.headers.map(|v| v.into_iter().collect()),
|
|
links: val.links.as_ref().and_then(|v| {
|
|
serde_wasm_bindgen::to_value(
|
|
&v.iter()
|
|
.map(|(a, b)| vec![a.to_string(), b.to_string()])
|
|
.collect::<Vec<Vec<String>>>(),
|
|
)
|
|
.ok()
|
|
}),
|
|
code_blocks: val.code_blocks.as_ref().and_then(|v| {
|
|
serde_wasm_bindgen::to_value(
|
|
&v.iter()
|
|
.map(|(a, b)| vec![a.to_string(), b.to_string()])
|
|
.collect::<Vec<Vec<String>>>(),
|
|
)
|
|
.ok()
|
|
}),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmHeaderMetadata> for kreuzberg::HeaderMetadata {
|
|
fn from(val: WasmHeaderMetadata) -> Self {
|
|
Self {
|
|
level: val.level,
|
|
text: val.text,
|
|
id: val.id,
|
|
depth: val.depth,
|
|
html_offset: val.html_offset,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::HeaderMetadata> for WasmHeaderMetadata {
|
|
fn from(val: kreuzberg::HeaderMetadata) -> Self {
|
|
Self {
|
|
level: val.level,
|
|
text: val.text,
|
|
id: val.id,
|
|
depth: val.depth,
|
|
html_offset: val.html_offset,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmLinkMetadata> for kreuzberg::LinkMetadata {
|
|
fn from(val: WasmLinkMetadata) -> Self {
|
|
Self {
|
|
href: val.href,
|
|
text: val.text,
|
|
title: val.title,
|
|
link_type: val.link_type.into(),
|
|
rel: val.rel.into_iter().collect(),
|
|
attributes: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::LinkMetadata> for WasmLinkMetadata {
|
|
fn from(val: kreuzberg::LinkMetadata) -> Self {
|
|
Self {
|
|
href: val.href,
|
|
text: val.text,
|
|
title: val.title,
|
|
link_type: val.link_type.into(),
|
|
rel: val.rel.into_iter().collect(),
|
|
attributes: serde_wasm_bindgen::to_value(
|
|
&val.attributes
|
|
.iter()
|
|
.map(|(a, b)| vec![a.to_string(), b.to_string()])
|
|
.collect::<Vec<Vec<String>>>(),
|
|
)
|
|
.unwrap_or(JsValue::NULL),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmImageMetadataType> for kreuzberg::ImageMetadataType {
|
|
fn from(val: WasmImageMetadataType) -> Self {
|
|
Self {
|
|
src: val.src,
|
|
alt: val.alt,
|
|
title: val.title,
|
|
dimensions: Default::default(),
|
|
image_type: val.image_type.into(),
|
|
attributes: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ImageMetadataType> for WasmImageMetadataType {
|
|
fn from(val: kreuzberg::ImageMetadataType) -> Self {
|
|
Self {
|
|
src: val.src,
|
|
alt: val.alt,
|
|
title: val.title,
|
|
dimensions: val.dimensions.map(|t| {
|
|
let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect();
|
|
arr
|
|
}),
|
|
image_type: val.image_type.into(),
|
|
attributes: serde_wasm_bindgen::to_value(
|
|
&val.attributes
|
|
.iter()
|
|
.map(|(a, b)| vec![a.to_string(), b.to_string()])
|
|
.collect::<Vec<Vec<String>>>(),
|
|
)
|
|
.unwrap_or(JsValue::NULL),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmStructuredData> for kreuzberg::StructuredData {
|
|
fn from(val: WasmStructuredData) -> Self {
|
|
Self {
|
|
data_type: val.data_type.into(),
|
|
raw_json: val.raw_json,
|
|
schema_type: val.schema_type,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::StructuredData> for WasmStructuredData {
|
|
fn from(val: kreuzberg::StructuredData) -> Self {
|
|
Self {
|
|
data_type: val.data_type.into(),
|
|
raw_json: val.raw_json,
|
|
schema_type: val.schema_type,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmHtmlMetadata> for kreuzberg::HtmlMetadata {
|
|
fn from(val: WasmHtmlMetadata) -> Self {
|
|
Self {
|
|
title: val.title,
|
|
description: val.description,
|
|
keywords: val.keywords.into_iter().collect(),
|
|
author: val.author,
|
|
canonical_url: val.canonical_url,
|
|
base_href: val.base_href,
|
|
language: val.language,
|
|
text_direction: val.text_direction.map(Into::into),
|
|
open_graph: serde_wasm_bindgen::from_value(val.open_graph.clone()).unwrap_or_default(),
|
|
twitter_card: serde_wasm_bindgen::from_value(val.twitter_card.clone()).unwrap_or_default(),
|
|
meta_tags: serde_wasm_bindgen::from_value(val.meta_tags.clone()).unwrap_or_default(),
|
|
headers: val.headers.into_iter().map(Into::into).collect(),
|
|
links: val.links.into_iter().map(Into::into).collect(),
|
|
images: val.images.into_iter().map(Into::into).collect(),
|
|
structured_data: val.structured_data.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::HtmlMetadata> for WasmHtmlMetadata {
|
|
fn from(val: kreuzberg::HtmlMetadata) -> Self {
|
|
Self {
|
|
title: val.title,
|
|
description: val.description,
|
|
keywords: val.keywords.into_iter().collect(),
|
|
author: val.author,
|
|
canonical_url: val.canonical_url,
|
|
base_href: val.base_href,
|
|
language: val.language,
|
|
text_direction: val.text_direction.map(Into::into),
|
|
open_graph: js_sys::JSON::parse(&serde_json::to_string(&val.open_graph).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
twitter_card: js_sys::JSON::parse(&serde_json::to_string(&val.twitter_card).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
meta_tags: js_sys::JSON::parse(&serde_json::to_string(&val.meta_tags).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
headers: val.headers.into_iter().map(Into::into).collect(),
|
|
links: val.links.into_iter().map(Into::into).collect(),
|
|
images: val.images.into_iter().map(Into::into).collect(),
|
|
structured_data: val.structured_data.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrMetadata> for kreuzberg::OcrMetadata {
|
|
fn from(val: WasmOcrMetadata) -> Self {
|
|
Self {
|
|
language: val.language,
|
|
psm: val.psm,
|
|
output_format: val.output_format,
|
|
table_count: val.table_count,
|
|
table_rows: val.table_rows,
|
|
table_cols: val.table_cols,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrMetadata> for WasmOcrMetadata {
|
|
fn from(val: kreuzberg::OcrMetadata) -> Self {
|
|
Self {
|
|
language: val.language,
|
|
psm: val.psm,
|
|
output_format: val.output_format,
|
|
table_count: val.table_count,
|
|
table_rows: val.table_rows,
|
|
table_cols: val.table_cols,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmErrorMetadata> for kreuzberg::ErrorMetadata {
|
|
fn from(val: WasmErrorMetadata) -> Self {
|
|
Self {
|
|
error_type: val.error_type,
|
|
message: val.message,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ErrorMetadata> for WasmErrorMetadata {
|
|
fn from(val: kreuzberg::ErrorMetadata) -> Self {
|
|
Self {
|
|
error_type: val.error_type,
|
|
message: val.message,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPptxMetadata> for kreuzberg::PptxMetadata {
|
|
fn from(val: WasmPptxMetadata) -> Self {
|
|
Self {
|
|
slide_count: val.slide_count,
|
|
slide_names: val.slide_names.into_iter().collect(),
|
|
image_count: val.image_count,
|
|
table_count: val.table_count,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PptxMetadata> for WasmPptxMetadata {
|
|
fn from(val: kreuzberg::PptxMetadata) -> Self {
|
|
Self {
|
|
slide_count: val.slide_count,
|
|
slide_names: val.slide_names.into_iter().collect(),
|
|
image_count: val.image_count,
|
|
table_count: val.table_count,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmCsvMetadata> for kreuzberg::CsvMetadata {
|
|
fn from(val: WasmCsvMetadata) -> Self {
|
|
Self {
|
|
row_count: val.row_count,
|
|
column_count: val.column_count,
|
|
delimiter: val.delimiter,
|
|
has_header: val.has_header,
|
|
column_types: val.column_types.map(|v| v.into_iter().collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::CsvMetadata> for WasmCsvMetadata {
|
|
fn from(val: kreuzberg::CsvMetadata) -> Self {
|
|
Self {
|
|
row_count: val.row_count,
|
|
column_count: val.column_count,
|
|
delimiter: val.delimiter,
|
|
has_header: val.has_header,
|
|
column_types: val.column_types.map(|v| v.into_iter().collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmBibtexMetadata> for kreuzberg::BibtexMetadata {
|
|
fn from(val: WasmBibtexMetadata) -> Self {
|
|
Self {
|
|
entry_count: val.entry_count,
|
|
citation_keys: val.citation_keys.into_iter().collect(),
|
|
authors: val.authors.into_iter().collect(),
|
|
year_range: val.year_range.map(Into::into),
|
|
entry_types: val
|
|
.entry_types
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::BibtexMetadata> for WasmBibtexMetadata {
|
|
fn from(val: kreuzberg::BibtexMetadata) -> Self {
|
|
Self {
|
|
entry_count: val.entry_count,
|
|
citation_keys: val.citation_keys.into_iter().collect(),
|
|
authors: val.authors.into_iter().collect(),
|
|
year_range: val.year_range.map(Into::into),
|
|
entry_types: val
|
|
.entry_types
|
|
.as_ref()
|
|
.and_then(|v| serde_json::to_string(v).ok())
|
|
.and_then(|s| js_sys::JSON::parse(&s).ok()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmCitationMetadata> for kreuzberg::CitationMetadata {
|
|
fn from(val: WasmCitationMetadata) -> Self {
|
|
Self {
|
|
citation_count: val.citation_count,
|
|
format: val.format,
|
|
authors: val.authors.into_iter().collect(),
|
|
year_range: val.year_range.map(Into::into),
|
|
dois: val.dois.into_iter().collect(),
|
|
keywords: val.keywords.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::CitationMetadata> for WasmCitationMetadata {
|
|
fn from(val: kreuzberg::CitationMetadata) -> Self {
|
|
Self {
|
|
citation_count: val.citation_count,
|
|
format: val.format,
|
|
authors: val.authors.into_iter().collect(),
|
|
year_range: val.year_range.map(Into::into),
|
|
dois: val.dois.into_iter().collect(),
|
|
keywords: val.keywords.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmYearRange> for kreuzberg::YearRange {
|
|
fn from(val: WasmYearRange) -> Self {
|
|
Self {
|
|
min: val.min,
|
|
max: val.max,
|
|
years: val.years.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::YearRange> for WasmYearRange {
|
|
fn from(val: kreuzberg::YearRange) -> Self {
|
|
Self {
|
|
min: val.min,
|
|
max: val.max,
|
|
years: val.years.into_iter().collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmFictionBookMetadata> for kreuzberg::FictionBookMetadata {
|
|
fn from(val: WasmFictionBookMetadata) -> Self {
|
|
Self {
|
|
genres: val.genres.into_iter().collect(),
|
|
sequences: val.sequences.into_iter().collect(),
|
|
annotation: val.annotation,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::FictionBookMetadata> for WasmFictionBookMetadata {
|
|
fn from(val: kreuzberg::FictionBookMetadata) -> Self {
|
|
Self {
|
|
genres: val.genres.into_iter().collect(),
|
|
sequences: val.sequences.into_iter().collect(),
|
|
annotation: val.annotation,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDbfMetadata> for kreuzberg::DbfMetadata {
|
|
fn from(val: WasmDbfMetadata) -> Self {
|
|
Self {
|
|
record_count: val.record_count,
|
|
field_count: val.field_count,
|
|
fields: val.fields.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DbfMetadata> for WasmDbfMetadata {
|
|
fn from(val: kreuzberg::DbfMetadata) -> Self {
|
|
Self {
|
|
record_count: val.record_count,
|
|
field_count: val.field_count,
|
|
fields: val.fields.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDbfFieldInfo> for kreuzberg::DbfFieldInfo {
|
|
fn from(val: WasmDbfFieldInfo) -> Self {
|
|
Self {
|
|
name: val.name,
|
|
field_type: val.field_type,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DbfFieldInfo> for WasmDbfFieldInfo {
|
|
fn from(val: kreuzberg::DbfFieldInfo) -> Self {
|
|
Self {
|
|
name: val.name,
|
|
field_type: val.field_type,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmJatsMetadata> for kreuzberg::JatsMetadata {
|
|
fn from(val: WasmJatsMetadata) -> Self {
|
|
Self {
|
|
copyright: val.copyright,
|
|
license: val.license,
|
|
history_dates: serde_wasm_bindgen::from_value(val.history_dates.clone()).unwrap_or_default(),
|
|
contributor_roles: val.contributor_roles.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::JatsMetadata> for WasmJatsMetadata {
|
|
fn from(val: kreuzberg::JatsMetadata) -> Self {
|
|
Self {
|
|
copyright: val.copyright,
|
|
license: val.license,
|
|
history_dates: js_sys::JSON::parse(&serde_json::to_string(&val.history_dates).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
contributor_roles: val.contributor_roles.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmContributorRole> for kreuzberg::ContributorRole {
|
|
fn from(val: WasmContributorRole) -> Self {
|
|
Self {
|
|
name: val.name,
|
|
role: val.role,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ContributorRole> for WasmContributorRole {
|
|
fn from(val: kreuzberg::ContributorRole) -> Self {
|
|
Self {
|
|
name: val.name,
|
|
role: val.role,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmEpubMetadata> for kreuzberg::EpubMetadata {
|
|
fn from(val: WasmEpubMetadata) -> Self {
|
|
Self {
|
|
coverage: val.coverage,
|
|
dc_format: val.dc_format,
|
|
relation: val.relation,
|
|
source: val.source,
|
|
dc_type: val.dc_type,
|
|
cover_image: val.cover_image,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::EpubMetadata> for WasmEpubMetadata {
|
|
fn from(val: kreuzberg::EpubMetadata) -> Self {
|
|
Self {
|
|
coverage: val.coverage,
|
|
dc_format: val.dc_format,
|
|
relation: val.relation,
|
|
source: val.source,
|
|
dc_type: val.dc_type,
|
|
cover_image: val.cover_image,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPstMetadata> for kreuzberg::PstMetadata {
|
|
fn from(val: WasmPstMetadata) -> Self {
|
|
Self {
|
|
message_count: val.message_count,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PstMetadata> for WasmPstMetadata {
|
|
fn from(val: kreuzberg::PstMetadata) -> Self {
|
|
Self {
|
|
message_count: val.message_count,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrConfidence> for kreuzberg::OcrConfidence {
|
|
fn from(val: WasmOcrConfidence) -> Self {
|
|
Self {
|
|
detection: val.detection,
|
|
recognition: val.recognition,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrConfidence> for WasmOcrConfidence {
|
|
fn from(val: kreuzberg::OcrConfidence) -> Self {
|
|
Self {
|
|
detection: val.detection,
|
|
recognition: val.recognition,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrRotation> for kreuzberg::OcrRotation {
|
|
fn from(val: WasmOcrRotation) -> Self {
|
|
Self {
|
|
angle_degrees: val.angle_degrees,
|
|
confidence: val.confidence,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrRotation> for WasmOcrRotation {
|
|
fn from(val: kreuzberg::OcrRotation) -> Self {
|
|
Self {
|
|
angle_degrees: val.angle_degrees,
|
|
confidence: val.confidence,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrElement> for kreuzberg::OcrElement {
|
|
fn from(val: WasmOcrElement) -> Self {
|
|
Self {
|
|
text: val.text,
|
|
geometry: serde_wasm_bindgen::from_value(val.geometry.clone()).unwrap_or_default(),
|
|
confidence: val.confidence.into(),
|
|
level: val.level.into(),
|
|
rotation: val.rotation.map(Into::into),
|
|
page_number: val.page_number,
|
|
parent_id: val.parent_id,
|
|
backend_metadata: serde_wasm_bindgen::from_value(val.backend_metadata.clone()).unwrap_or_default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrElement> for WasmOcrElement {
|
|
fn from(val: kreuzberg::OcrElement) -> Self {
|
|
Self {
|
|
text: val.text,
|
|
geometry: serde_wasm_bindgen::to_value(&val.geometry).unwrap_or(JsValue::NULL),
|
|
confidence: val.confidence.into(),
|
|
level: val.level.into(),
|
|
rotation: val.rotation.map(Into::into),
|
|
page_number: val.page_number,
|
|
parent_id: val.parent_id,
|
|
backend_metadata: js_sys::JSON::parse(&serde_json::to_string(&val.backend_metadata).unwrap_or_default())
|
|
.unwrap_or(JsValue::NULL),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmOcrElementConfig> for kreuzberg::OcrElementConfig {
|
|
fn from(val: WasmOcrElementConfig) -> Self {
|
|
Self {
|
|
include_elements: val.include_elements,
|
|
min_level: val.min_level.into(),
|
|
min_confidence: val.min_confidence,
|
|
build_hierarchy: val.build_hierarchy,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::OcrElementConfig> for WasmOcrElementConfig {
|
|
fn from(val: kreuzberg::OcrElementConfig) -> Self {
|
|
Self {
|
|
include_elements: val.include_elements,
|
|
min_level: val.min_level.into(),
|
|
min_confidence: val.min_confidence,
|
|
build_hierarchy: val.build_hierarchy,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPageStructure> for kreuzberg::PageStructure {
|
|
fn from(val: WasmPageStructure) -> Self {
|
|
Self {
|
|
total_count: val.total_count,
|
|
unit_type: val.unit_type.into(),
|
|
boundaries: val.boundaries.map(|v| v.into_iter().map(Into::into).collect()),
|
|
pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PageStructure> for WasmPageStructure {
|
|
fn from(val: kreuzberg::PageStructure) -> Self {
|
|
Self {
|
|
total_count: val.total_count,
|
|
unit_type: val.unit_type.into(),
|
|
boundaries: val.boundaries.map(|v| v.into_iter().map(Into::into).collect()),
|
|
pages: val.pages.map(|v| v.into_iter().map(Into::into).collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPageBoundary> for kreuzberg::PageBoundary {
|
|
fn from(val: WasmPageBoundary) -> Self {
|
|
Self {
|
|
byte_start: val.byte_start,
|
|
byte_end: val.byte_end,
|
|
page_number: val.page_number,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PageBoundary> for WasmPageBoundary {
|
|
fn from(val: kreuzberg::PageBoundary) -> Self {
|
|
Self {
|
|
byte_start: val.byte_start,
|
|
byte_end: val.byte_end,
|
|
page_number: val.page_number,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPageInfo> for kreuzberg::PageInfo {
|
|
fn from(val: WasmPageInfo) -> Self {
|
|
Self {
|
|
number: val.number,
|
|
title: val.title,
|
|
dimensions: Default::default(),
|
|
image_count: val.image_count,
|
|
table_count: val.table_count,
|
|
hidden: val.hidden,
|
|
is_blank: val.is_blank,
|
|
has_vector_graphics: val.has_vector_graphics,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PageInfo> for WasmPageInfo {
|
|
fn from(val: kreuzberg::PageInfo) -> Self {
|
|
Self {
|
|
number: val.number,
|
|
title: val.title,
|
|
dimensions: val.dimensions.map(|t| {
|
|
let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect();
|
|
arr
|
|
}),
|
|
image_count: val.image_count,
|
|
table_count: val.table_count,
|
|
hidden: val.hidden,
|
|
is_blank: val.is_blank,
|
|
has_vector_graphics: val.has_vector_graphics,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPageContent> for kreuzberg::PageContent {
|
|
fn from(val: WasmPageContent) -> Self {
|
|
Self {
|
|
page_number: val.page_number,
|
|
content: val.content,
|
|
tables: val.tables.into_iter().map(|v| std::sync::Arc::new(v.into())).collect(),
|
|
image_indices: val.image_indices.into_iter().collect(),
|
|
hierarchy: val.hierarchy.map(Into::into),
|
|
is_blank: val.is_blank,
|
|
layout_regions: val.layout_regions.map(|v| v.into_iter().map(Into::into).collect()),
|
|
speaker_notes: val.speaker_notes,
|
|
section_name: val.section_name,
|
|
sheet_name: val.sheet_name,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PageContent> for WasmPageContent {
|
|
fn from(val: kreuzberg::PageContent) -> Self {
|
|
Self {
|
|
page_number: val.page_number,
|
|
content: val.content,
|
|
tables: val.tables.into_iter().map(|v| (*v).clone().into()).collect(),
|
|
image_indices: val.image_indices.into_iter().collect(),
|
|
hierarchy: val.hierarchy.map(Into::into),
|
|
is_blank: val.is_blank,
|
|
layout_regions: val.layout_regions.map(|v| v.into_iter().map(Into::into).collect()),
|
|
speaker_notes: val.speaker_notes,
|
|
section_name: val.section_name,
|
|
sheet_name: val.sheet_name,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmLayoutRegion> for kreuzberg::LayoutRegion {
|
|
fn from(val: WasmLayoutRegion) -> Self {
|
|
Self {
|
|
class_name: val.class_name,
|
|
confidence: val.confidence,
|
|
bounding_box: val.bounding_box.into(),
|
|
area_fraction: val.area_fraction,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::LayoutRegion> for WasmLayoutRegion {
|
|
fn from(val: kreuzberg::LayoutRegion) -> Self {
|
|
Self {
|
|
class_name: val.class_name,
|
|
confidence: val.confidence,
|
|
bounding_box: val.bounding_box.into(),
|
|
area_fraction: val.area_fraction,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmPageHierarchy> for kreuzberg::PageHierarchy {
|
|
fn from(val: WasmPageHierarchy) -> Self {
|
|
Self {
|
|
block_count: val.block_count,
|
|
blocks: val.blocks.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::PageHierarchy> for WasmPageHierarchy {
|
|
fn from(val: kreuzberg::PageHierarchy) -> Self {
|
|
Self {
|
|
block_count: val.block_count,
|
|
blocks: val.blocks.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmHierarchicalBlock> for kreuzberg::HierarchicalBlock {
|
|
fn from(val: WasmHierarchicalBlock) -> Self {
|
|
Self {
|
|
text: val.text,
|
|
font_size: val.font_size,
|
|
level: val.level,
|
|
bbox: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::HierarchicalBlock> for WasmHierarchicalBlock {
|
|
fn from(val: kreuzberg::HierarchicalBlock) -> Self {
|
|
Self {
|
|
text: val.text,
|
|
font_size: val.font_size,
|
|
level: val.level,
|
|
bbox: val.bbox.map(|t| {
|
|
let arr: Vec<_> = [t.0, t.1].into_iter().map(|v| v as _).collect();
|
|
arr
|
|
}),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmCellChange> for kreuzberg::CellChange {
|
|
fn from(val: WasmCellChange) -> Self {
|
|
Self {
|
|
row: val.row,
|
|
col: val.col,
|
|
from: val.from,
|
|
to: val.to,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::CellChange> for WasmCellChange {
|
|
fn from(val: kreuzberg::CellChange) -> Self {
|
|
Self {
|
|
row: val.row,
|
|
col: val.col,
|
|
from: val.from,
|
|
to: val.to,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmDocumentRevision> for kreuzberg::DocumentRevision {
|
|
fn from(val: WasmDocumentRevision) -> Self {
|
|
Self {
|
|
revision_id: val.revision_id,
|
|
author: val.author,
|
|
timestamp: val.timestamp,
|
|
kind: val.kind.into(),
|
|
anchor: val
|
|
.anchor
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value(v.clone()).ok()),
|
|
delta: val.delta.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::DocumentRevision> for WasmDocumentRevision {
|
|
fn from(val: kreuzberg::DocumentRevision) -> Self {
|
|
Self {
|
|
revision_id: val.revision_id,
|
|
author: val.author,
|
|
timestamp: val.timestamp,
|
|
kind: val.kind.into(),
|
|
anchor: val.anchor.as_ref().and_then(|v| serde_wasm_bindgen::to_value(v).ok()),
|
|
delta: val.delta.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmRevisionDelta> for kreuzberg::RevisionDelta {
|
|
fn from(val: WasmRevisionDelta) -> Self {
|
|
Self {
|
|
content: serde_wasm_bindgen::from_value(val.content.clone()).unwrap_or_default(),
|
|
table_changes: val.table_changes.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::RevisionDelta> for WasmRevisionDelta {
|
|
fn from(val: kreuzberg::RevisionDelta) -> Self {
|
|
Self {
|
|
content: serde_wasm_bindgen::to_value(&val.content).unwrap_or(JsValue::NULL),
|
|
table_changes: val.table_changes.into_iter().map(Into::into).collect(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmTable> for kreuzberg::Table {
|
|
fn from(val: WasmTable) -> Self {
|
|
Self {
|
|
cells: serde_wasm_bindgen::from_value(val.cells.clone()).unwrap_or_default(),
|
|
markdown: val.markdown,
|
|
page_number: val.page_number,
|
|
bounding_box: val.bounding_box.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::Table> for WasmTable {
|
|
fn from(val: kreuzberg::Table) -> Self {
|
|
Self {
|
|
cells: serde_wasm_bindgen::to_value(&val.cells).unwrap_or(JsValue::NULL),
|
|
markdown: val.markdown,
|
|
page_number: val.page_number,
|
|
bounding_box: val.bounding_box.map(Into::into),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::TableCell> for WasmTableCell {
|
|
fn from(val: kreuzberg::TableCell) -> Self {
|
|
Self {
|
|
content: val.content,
|
|
row_span: val.row_span,
|
|
col_span: val.col_span,
|
|
is_header: val.is_header,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<WasmExtractedUri> for kreuzberg::ExtractedUri {
|
|
fn from(val: WasmExtractedUri) -> Self {
|
|
Self {
|
|
url: val.url,
|
|
label: val.label,
|
|
page: val.page,
|
|
kind: val.kind.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
impl From<kreuzberg::ExtractedUri> for WasmExtractedUri {
|
|
fn from(val: kreuzberg::ExtractedUri) -> Self {
|
|
Self {
|
|
url: val.url,
|
|
label: val.label,
|
|
page: val.page,
|
|
kind: val.kind.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmExecutionProviderType> for kreuzberg::ExecutionProviderType {
|
|
fn from(val: WasmExecutionProviderType) -> Self {
|
|
match val {
|
|
WasmExecutionProviderType::Auto => Self::Auto,
|
|
WasmExecutionProviderType::Cpu => Self::Cpu,
|
|
WasmExecutionProviderType::CoreMl => Self::CoreMl,
|
|
WasmExecutionProviderType::Cuda => Self::Cuda,
|
|
WasmExecutionProviderType::TensorRt => Self::TensorRt,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ExecutionProviderType> for WasmExecutionProviderType {
|
|
fn from(val: kreuzberg::ExecutionProviderType) -> Self {
|
|
match val {
|
|
kreuzberg::ExecutionProviderType::Auto => Self::Auto,
|
|
kreuzberg::ExecutionProviderType::Cpu => Self::Cpu,
|
|
kreuzberg::ExecutionProviderType::CoreMl => Self::CoreMl,
|
|
kreuzberg::ExecutionProviderType::Cuda => Self::Cuda,
|
|
kreuzberg::ExecutionProviderType::TensorRt => Self::TensorRt,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmOutputFormat> for kreuzberg::OutputFormat {
|
|
fn from(val: WasmOutputFormat) -> Self {
|
|
match val {
|
|
WasmOutputFormat::Plain => Self::Plain,
|
|
WasmOutputFormat::Markdown => Self::Markdown,
|
|
WasmOutputFormat::Djot => Self::Djot,
|
|
WasmOutputFormat::Html => Self::Html,
|
|
WasmOutputFormat::Json => Self::Json,
|
|
WasmOutputFormat::Structured => Self::Structured,
|
|
WasmOutputFormat::Custom => Self::Custom(Default::default()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::OutputFormat> for WasmOutputFormat {
|
|
fn from(val: kreuzberg::OutputFormat) -> Self {
|
|
match val {
|
|
kreuzberg::OutputFormat::Plain => Self::Plain,
|
|
kreuzberg::OutputFormat::Markdown => Self::Markdown,
|
|
kreuzberg::OutputFormat::Djot => Self::Djot,
|
|
kreuzberg::OutputFormat::Html => Self::Html,
|
|
kreuzberg::OutputFormat::Json => Self::Json,
|
|
kreuzberg::OutputFormat::Structured => Self::Structured,
|
|
kreuzberg::OutputFormat::Custom(..) => Self::Custom,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmChunkerType> for kreuzberg::ChunkerType {
|
|
fn from(val: WasmChunkerType) -> Self {
|
|
match val {
|
|
WasmChunkerType::Text => Self::Text,
|
|
WasmChunkerType::Markdown => Self::Markdown,
|
|
WasmChunkerType::Yaml => Self::Yaml,
|
|
WasmChunkerType::Semantic => Self::Semantic,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ChunkerType> for WasmChunkerType {
|
|
fn from(val: kreuzberg::ChunkerType) -> Self {
|
|
match val {
|
|
kreuzberg::ChunkerType::Text => Self::Text,
|
|
kreuzberg::ChunkerType::Markdown => Self::Markdown,
|
|
kreuzberg::ChunkerType::Yaml => Self::Yaml,
|
|
kreuzberg::ChunkerType::Semantic => Self::Semantic,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmChunkSizing> for kreuzberg::ChunkSizing {
|
|
fn from(val: WasmChunkSizing) -> Self {
|
|
match val.r#type.as_str() {
|
|
"characters" => Self::Characters,
|
|
"tokenizer" => Self::Tokenizer {
|
|
model: val.model.clone().unwrap_or_default(),
|
|
cache_dir: val.cache_dir.clone().map(Into::into),
|
|
},
|
|
_ => Self::Characters,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ChunkSizing> for WasmChunkSizing {
|
|
fn from(val: kreuzberg::ChunkSizing) -> Self {
|
|
match val {
|
|
kreuzberg::ChunkSizing::Characters => Self {
|
|
r#type: "characters".to_string(),
|
|
cache_dir: None,
|
|
model: None,
|
|
},
|
|
kreuzberg::ChunkSizing::Tokenizer { model, cache_dir } => Self {
|
|
r#type: "tokenizer".to_string(),
|
|
cache_dir: cache_dir.map(|p| p.to_string_lossy().to_string()),
|
|
model: Some(model),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmEmbeddingModelType> for kreuzberg::EmbeddingModelType {
|
|
fn from(val: WasmEmbeddingModelType) -> Self {
|
|
match val.r#type.as_str() {
|
|
"preset" => Self::Preset {
|
|
name: val.name.clone().unwrap_or_default(),
|
|
},
|
|
"custom" => Self::Custom {
|
|
model_id: val.model_id.clone().unwrap_or_default(),
|
|
dimensions: val.dimensions.clone().unwrap_or_default(),
|
|
},
|
|
"llm" => Self::Llm {
|
|
llm: val.llm.clone().map(Into::into).unwrap_or_default(),
|
|
},
|
|
"plugin" => Self::Plugin {
|
|
name: val.name.clone().unwrap_or_default(),
|
|
},
|
|
_ => Self::Preset {
|
|
name: Default::default(),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::EmbeddingModelType> for WasmEmbeddingModelType {
|
|
fn from(val: kreuzberg::EmbeddingModelType) -> Self {
|
|
match val {
|
|
kreuzberg::EmbeddingModelType::Preset { name } => Self {
|
|
r#type: "preset".to_string(),
|
|
dimensions: None,
|
|
llm: None,
|
|
model_id: None,
|
|
name: Some(name),
|
|
},
|
|
kreuzberg::EmbeddingModelType::Custom { model_id, dimensions } => Self {
|
|
r#type: "custom".to_string(),
|
|
dimensions: Some(dimensions),
|
|
llm: None,
|
|
model_id: Some(model_id),
|
|
name: None,
|
|
},
|
|
kreuzberg::EmbeddingModelType::Llm { llm } => Self {
|
|
r#type: "llm".to_string(),
|
|
dimensions: None,
|
|
llm: Some(llm.into()),
|
|
model_id: None,
|
|
name: None,
|
|
},
|
|
kreuzberg::EmbeddingModelType::Plugin { name } => Self {
|
|
r#type: "plugin".to_string(),
|
|
dimensions: None,
|
|
llm: None,
|
|
model_id: None,
|
|
name: Some(name),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::extraction::transform::ListType> for WasmListType {
|
|
fn from(val: kreuzberg::extraction::transform::ListType) -> Self {
|
|
match val {
|
|
kreuzberg::extraction::transform::ListType::Bullet => Self::Bullet,
|
|
kreuzberg::extraction::transform::ListType::Numbered => Self::Numbered,
|
|
kreuzberg::extraction::transform::ListType::Lettered => Self::Lettered,
|
|
kreuzberg::extraction::transform::ListType::Indented => Self::Indented,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::OcrBackendType> for WasmOcrBackendType {
|
|
fn from(val: kreuzberg::OcrBackendType) -> Self {
|
|
match val {
|
|
kreuzberg::OcrBackendType::Tesseract => Self::Tesseract,
|
|
kreuzberg::OcrBackendType::EasyOCR => Self::EasyOCR,
|
|
kreuzberg::OcrBackendType::PaddleOCR => Self::PaddleOCR,
|
|
kreuzberg::OcrBackendType::Custom => Self::Custom,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ProcessingStage> for WasmProcessingStage {
|
|
fn from(val: kreuzberg::ProcessingStage) -> Self {
|
|
match val {
|
|
kreuzberg::ProcessingStage::Early => Self::Early,
|
|
kreuzberg::ProcessingStage::Middle => Self::Middle,
|
|
kreuzberg::ProcessingStage::Late => Self::Late,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmPdfAnnotationType> for kreuzberg::PdfAnnotationType {
|
|
fn from(val: WasmPdfAnnotationType) -> Self {
|
|
match val {
|
|
WasmPdfAnnotationType::Text => Self::Text,
|
|
WasmPdfAnnotationType::Highlight => Self::Highlight,
|
|
WasmPdfAnnotationType::Link => Self::Link,
|
|
WasmPdfAnnotationType::Stamp => Self::Stamp,
|
|
WasmPdfAnnotationType::Underline => Self::Underline,
|
|
WasmPdfAnnotationType::StrikeOut => Self::StrikeOut,
|
|
WasmPdfAnnotationType::Other => Self::Other,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::PdfAnnotationType> for WasmPdfAnnotationType {
|
|
fn from(val: kreuzberg::PdfAnnotationType) -> Self {
|
|
match val {
|
|
kreuzberg::PdfAnnotationType::Text => Self::Text,
|
|
kreuzberg::PdfAnnotationType::Highlight => Self::Highlight,
|
|
kreuzberg::PdfAnnotationType::Link => Self::Link,
|
|
kreuzberg::PdfAnnotationType::Stamp => Self::Stamp,
|
|
kreuzberg::PdfAnnotationType::Underline => Self::Underline,
|
|
kreuzberg::PdfAnnotationType::StrikeOut => Self::StrikeOut,
|
|
kreuzberg::PdfAnnotationType::Other => Self::Other,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmBlockType> for kreuzberg::BlockType {
|
|
fn from(val: WasmBlockType) -> Self {
|
|
match val {
|
|
WasmBlockType::Paragraph => Self::Paragraph,
|
|
WasmBlockType::Heading => Self::Heading,
|
|
WasmBlockType::Blockquote => Self::Blockquote,
|
|
WasmBlockType::CodeBlock => Self::CodeBlock,
|
|
WasmBlockType::ListItem => Self::ListItem,
|
|
WasmBlockType::OrderedList => Self::OrderedList,
|
|
WasmBlockType::BulletList => Self::BulletList,
|
|
WasmBlockType::TaskList => Self::TaskList,
|
|
WasmBlockType::DefinitionList => Self::DefinitionList,
|
|
WasmBlockType::DefinitionTerm => Self::DefinitionTerm,
|
|
WasmBlockType::DefinitionDescription => Self::DefinitionDescription,
|
|
WasmBlockType::Div => Self::Div,
|
|
WasmBlockType::Section => Self::Section,
|
|
WasmBlockType::ThematicBreak => Self::ThematicBreak,
|
|
WasmBlockType::RawBlock => Self::RawBlock,
|
|
WasmBlockType::MathDisplay => Self::MathDisplay,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::BlockType> for WasmBlockType {
|
|
fn from(val: kreuzberg::BlockType) -> Self {
|
|
match val {
|
|
kreuzberg::BlockType::Paragraph => Self::Paragraph,
|
|
kreuzberg::BlockType::Heading => Self::Heading,
|
|
kreuzberg::BlockType::Blockquote => Self::Blockquote,
|
|
kreuzberg::BlockType::CodeBlock => Self::CodeBlock,
|
|
kreuzberg::BlockType::ListItem => Self::ListItem,
|
|
kreuzberg::BlockType::OrderedList => Self::OrderedList,
|
|
kreuzberg::BlockType::BulletList => Self::BulletList,
|
|
kreuzberg::BlockType::TaskList => Self::TaskList,
|
|
kreuzberg::BlockType::DefinitionList => Self::DefinitionList,
|
|
kreuzberg::BlockType::DefinitionTerm => Self::DefinitionTerm,
|
|
kreuzberg::BlockType::DefinitionDescription => Self::DefinitionDescription,
|
|
kreuzberg::BlockType::Div => Self::Div,
|
|
kreuzberg::BlockType::Section => Self::Section,
|
|
kreuzberg::BlockType::ThematicBreak => Self::ThematicBreak,
|
|
kreuzberg::BlockType::RawBlock => Self::RawBlock,
|
|
kreuzberg::BlockType::MathDisplay => Self::MathDisplay,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmInlineType> for kreuzberg::InlineType {
|
|
fn from(val: WasmInlineType) -> Self {
|
|
match val {
|
|
WasmInlineType::Text => Self::Text,
|
|
WasmInlineType::Strong => Self::Strong,
|
|
WasmInlineType::Emphasis => Self::Emphasis,
|
|
WasmInlineType::Highlight => Self::Highlight,
|
|
WasmInlineType::Subscript => Self::Subscript,
|
|
WasmInlineType::Superscript => Self::Superscript,
|
|
WasmInlineType::Insert => Self::Insert,
|
|
WasmInlineType::Delete => Self::Delete,
|
|
WasmInlineType::Code => Self::Code,
|
|
WasmInlineType::Link => Self::Link,
|
|
WasmInlineType::Image => Self::Image,
|
|
WasmInlineType::Span => Self::Span,
|
|
WasmInlineType::Math => Self::Math,
|
|
WasmInlineType::RawInline => Self::RawInline,
|
|
WasmInlineType::FootnoteRef => Self::FootnoteRef,
|
|
WasmInlineType::Symbol => Self::Symbol,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::InlineType> for WasmInlineType {
|
|
fn from(val: kreuzberg::InlineType) -> Self {
|
|
match val {
|
|
kreuzberg::InlineType::Text => Self::Text,
|
|
kreuzberg::InlineType::Strong => Self::Strong,
|
|
kreuzberg::InlineType::Emphasis => Self::Emphasis,
|
|
kreuzberg::InlineType::Highlight => Self::Highlight,
|
|
kreuzberg::InlineType::Subscript => Self::Subscript,
|
|
kreuzberg::InlineType::Superscript => Self::Superscript,
|
|
kreuzberg::InlineType::Insert => Self::Insert,
|
|
kreuzberg::InlineType::Delete => Self::Delete,
|
|
kreuzberg::InlineType::Code => Self::Code,
|
|
kreuzberg::InlineType::Link => Self::Link,
|
|
kreuzberg::InlineType::Image => Self::Image,
|
|
kreuzberg::InlineType::Span => Self::Span,
|
|
kreuzberg::InlineType::Math => Self::Math,
|
|
kreuzberg::InlineType::RawInline => Self::RawInline,
|
|
kreuzberg::InlineType::FootnoteRef => Self::FootnoteRef,
|
|
kreuzberg::InlineType::Symbol => Self::Symbol,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmRelationshipKind> for kreuzberg::RelationshipKind {
|
|
fn from(val: WasmRelationshipKind) -> Self {
|
|
match val {
|
|
WasmRelationshipKind::FootnoteReference => Self::FootnoteReference,
|
|
WasmRelationshipKind::CitationReference => Self::CitationReference,
|
|
WasmRelationshipKind::InternalLink => Self::InternalLink,
|
|
WasmRelationshipKind::Caption => Self::Caption,
|
|
WasmRelationshipKind::Label => Self::Label,
|
|
WasmRelationshipKind::TocEntry => Self::TocEntry,
|
|
WasmRelationshipKind::CrossReference => Self::CrossReference,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::RelationshipKind> for WasmRelationshipKind {
|
|
fn from(val: kreuzberg::RelationshipKind) -> Self {
|
|
match val {
|
|
kreuzberg::RelationshipKind::FootnoteReference => Self::FootnoteReference,
|
|
kreuzberg::RelationshipKind::CitationReference => Self::CitationReference,
|
|
kreuzberg::RelationshipKind::InternalLink => Self::InternalLink,
|
|
kreuzberg::RelationshipKind::Caption => Self::Caption,
|
|
kreuzberg::RelationshipKind::Label => Self::Label,
|
|
kreuzberg::RelationshipKind::TocEntry => Self::TocEntry,
|
|
kreuzberg::RelationshipKind::CrossReference => Self::CrossReference,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmContentLayer> for kreuzberg::ContentLayer {
|
|
fn from(val: WasmContentLayer) -> Self {
|
|
match val {
|
|
WasmContentLayer::Body => Self::Body,
|
|
WasmContentLayer::Header => Self::Header,
|
|
WasmContentLayer::Footer => Self::Footer,
|
|
WasmContentLayer::Footnote => Self::Footnote,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ContentLayer> for WasmContentLayer {
|
|
fn from(val: kreuzberg::ContentLayer) -> Self {
|
|
match val {
|
|
kreuzberg::ContentLayer::Body => Self::Body,
|
|
kreuzberg::ContentLayer::Header => Self::Header,
|
|
kreuzberg::ContentLayer::Footer => Self::Footer,
|
|
kreuzberg::ContentLayer::Footnote => Self::Footnote,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmNodeContent> for kreuzberg::NodeContent {
|
|
fn from(val: WasmNodeContent) -> Self {
|
|
match val.node_type.as_str() {
|
|
"title" => Self::Title {
|
|
text: val.text.clone().unwrap_or_default(),
|
|
},
|
|
"heading" => Self::Heading {
|
|
level: val.level.clone().unwrap_or_default(),
|
|
text: val.text.clone().unwrap_or_default(),
|
|
},
|
|
"paragraph" => Self::Paragraph {
|
|
text: val.text.clone().unwrap_or_default(),
|
|
},
|
|
"list" => Self::List {
|
|
ordered: val.ordered.clone().unwrap_or_default(),
|
|
},
|
|
"list_item" => Self::ListItem {
|
|
text: val.text.clone().unwrap_or_default(),
|
|
},
|
|
"table" => Self::Table {
|
|
grid: val.grid.clone().map(Into::into).unwrap_or_default(),
|
|
},
|
|
"image" => Self::Image {
|
|
description: val.description.clone(),
|
|
image_index: val.image_index.clone(),
|
|
src: val.src.clone(),
|
|
},
|
|
"code" => Self::Code {
|
|
text: val.text.clone().unwrap_or_default(),
|
|
language: val.language.clone(),
|
|
},
|
|
"quote" => Self::Quote,
|
|
"formula" => Self::Formula {
|
|
text: val.text.clone().unwrap_or_default(),
|
|
},
|
|
"footnote" => Self::Footnote {
|
|
text: val.text.clone().unwrap_or_default(),
|
|
},
|
|
"group" => Self::Group {
|
|
label: val.label.clone(),
|
|
heading_level: val.heading_level.clone(),
|
|
heading_text: val.heading_text.clone(),
|
|
},
|
|
"page_break" => Self::PageBreak,
|
|
"slide" => Self::Slide {
|
|
number: val.number.clone().unwrap_or_default(),
|
|
title: val.title.clone(),
|
|
},
|
|
"definition_list" => Self::DefinitionList,
|
|
"definition_item" => Self::DefinitionItem {
|
|
term: val.term.clone().unwrap_or_default(),
|
|
definition: val.definition.clone().unwrap_or_default(),
|
|
},
|
|
"citation" => Self::Citation {
|
|
key: val.key.clone().unwrap_or_default(),
|
|
text: val.text.clone().unwrap_or_default(),
|
|
},
|
|
"admonition" => Self::Admonition {
|
|
kind: val.kind.clone().unwrap_or_default(),
|
|
title: val.title.clone(),
|
|
},
|
|
"raw_block" => Self::RawBlock {
|
|
format: val.format.clone().unwrap_or_default(),
|
|
content: val.content.clone().unwrap_or_default(),
|
|
},
|
|
"metadata_block" => Self::MetadataBlock {
|
|
entries: val
|
|
.entries
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<Vec<(String, String)>>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
},
|
|
_ => Self::Title {
|
|
text: Default::default(),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::NodeContent> for WasmNodeContent {
|
|
fn from(val: kreuzberg::NodeContent) -> Self {
|
|
match val {
|
|
kreuzberg::NodeContent::Title { text } => Self {
|
|
node_type: "title".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: Some(text),
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Heading { level, text } => Self {
|
|
node_type: "heading".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: Some(level),
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: Some(text),
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Paragraph { text } => Self {
|
|
node_type: "paragraph".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: Some(text),
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::List { ordered } => Self {
|
|
node_type: "list".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: Some(ordered),
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::ListItem { text } => Self {
|
|
node_type: "list_item".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: Some(text),
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Table { grid } => Self {
|
|
node_type: "table".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: Some(grid.into()),
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Image {
|
|
description,
|
|
image_index,
|
|
src,
|
|
} => Self {
|
|
node_type: "image".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Code { text, language } => Self {
|
|
node_type: "code".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: Some(text),
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Quote => Self {
|
|
node_type: "quote".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Formula { text } => Self {
|
|
node_type: "formula".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: Some(text),
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Footnote { text } => Self {
|
|
node_type: "footnote".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: Some(text),
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Group {
|
|
label,
|
|
heading_level,
|
|
heading_text,
|
|
} => Self {
|
|
node_type: "group".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level,
|
|
heading_text,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::PageBreak => Self {
|
|
node_type: "page_break".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Slide { number, title } => Self {
|
|
node_type: "slide".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: Some(number),
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title,
|
|
},
|
|
kreuzberg::NodeContent::DefinitionList => Self {
|
|
node_type: "definition_list".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::DefinitionItem { term, definition } => Self {
|
|
node_type: "definition_item".to_string(),
|
|
content: None,
|
|
definition: Some(definition),
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: Some(term),
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Citation { key, text } => Self {
|
|
node_type: "citation".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: Some(key),
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: Some(text),
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::Admonition { kind, title } => Self {
|
|
node_type: "admonition".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: Some(kind),
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title,
|
|
},
|
|
kreuzberg::NodeContent::RawBlock { format, content } => Self {
|
|
node_type: "raw_block".to_string(),
|
|
content: Some(content),
|
|
definition: None,
|
|
description: None,
|
|
entries: None,
|
|
format: Some(format),
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
kreuzberg::NodeContent::MetadataBlock { entries } => Self {
|
|
node_type: "metadata_block".to_string(),
|
|
content: None,
|
|
definition: None,
|
|
description: None,
|
|
entries: serde_wasm_bindgen::to_value(&entries).ok(),
|
|
format: None,
|
|
grid: None,
|
|
heading_level: None,
|
|
heading_text: None,
|
|
image_index: None,
|
|
key: None,
|
|
kind: None,
|
|
label: None,
|
|
language: None,
|
|
level: None,
|
|
number: None,
|
|
ordered: None,
|
|
src: None,
|
|
term: None,
|
|
text: None,
|
|
title: None,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmAnnotationKind> for kreuzberg::AnnotationKind {
|
|
fn from(val: WasmAnnotationKind) -> Self {
|
|
match val.annotation_type.as_str() {
|
|
"bold" => Self::Bold,
|
|
"italic" => Self::Italic,
|
|
"underline" => Self::Underline,
|
|
"strikethrough" => Self::Strikethrough,
|
|
"code" => Self::Code,
|
|
"subscript" => Self::Subscript,
|
|
"superscript" => Self::Superscript,
|
|
"link" => Self::Link {
|
|
url: val.url.clone().unwrap_or_default(),
|
|
title: val.title.clone(),
|
|
},
|
|
"highlight" => Self::Highlight,
|
|
"color" => Self::Color {
|
|
value: val.value.clone().unwrap_or_default(),
|
|
},
|
|
"font_size" => Self::FontSize {
|
|
value: val.value.clone().unwrap_or_default(),
|
|
},
|
|
"custom" => Self::Custom {
|
|
name: val.name.clone().unwrap_or_default(),
|
|
value: val.value.clone(),
|
|
},
|
|
_ => Self::Bold,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::AnnotationKind> for WasmAnnotationKind {
|
|
fn from(val: kreuzberg::AnnotationKind) -> Self {
|
|
match val {
|
|
kreuzberg::AnnotationKind::Bold => Self {
|
|
annotation_type: "bold".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Italic => Self {
|
|
annotation_type: "italic".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Underline => Self {
|
|
annotation_type: "underline".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Strikethrough => Self {
|
|
annotation_type: "strikethrough".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Code => Self {
|
|
annotation_type: "code".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Subscript => Self {
|
|
annotation_type: "subscript".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Superscript => Self {
|
|
annotation_type: "superscript".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Link { url, title } => Self {
|
|
annotation_type: "link".to_string(),
|
|
name: None,
|
|
title,
|
|
url: Some(url),
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Highlight => Self {
|
|
annotation_type: "highlight".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: None,
|
|
},
|
|
kreuzberg::AnnotationKind::Color { value } => Self {
|
|
annotation_type: "color".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: Some(value),
|
|
},
|
|
kreuzberg::AnnotationKind::FontSize { value } => Self {
|
|
annotation_type: "font_size".to_string(),
|
|
name: None,
|
|
title: None,
|
|
url: None,
|
|
value: Some(value),
|
|
},
|
|
kreuzberg::AnnotationKind::Custom { name, value } => Self {
|
|
annotation_type: "custom".to_string(),
|
|
name: Some(name),
|
|
title: None,
|
|
url: None,
|
|
value,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmExtractionMethod> for kreuzberg::ExtractionMethod {
|
|
fn from(val: WasmExtractionMethod) -> Self {
|
|
match val {
|
|
WasmExtractionMethod::Native => Self::Native,
|
|
WasmExtractionMethod::Ocr => Self::Ocr,
|
|
WasmExtractionMethod::Mixed => Self::Mixed,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ExtractionMethod> for WasmExtractionMethod {
|
|
fn from(val: kreuzberg::ExtractionMethod) -> Self {
|
|
match val {
|
|
kreuzberg::ExtractionMethod::Native => Self::Native,
|
|
kreuzberg::ExtractionMethod::Ocr => Self::Ocr,
|
|
kreuzberg::ExtractionMethod::Mixed => Self::Mixed,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmChunkType> for kreuzberg::ChunkType {
|
|
fn from(val: WasmChunkType) -> Self {
|
|
match val {
|
|
WasmChunkType::Heading => Self::Heading,
|
|
WasmChunkType::PartyList => Self::PartyList,
|
|
WasmChunkType::Definitions => Self::Definitions,
|
|
WasmChunkType::OperativeClause => Self::OperativeClause,
|
|
WasmChunkType::SignatureBlock => Self::SignatureBlock,
|
|
WasmChunkType::Schedule => Self::Schedule,
|
|
WasmChunkType::TableLike => Self::TableLike,
|
|
WasmChunkType::Formula => Self::Formula,
|
|
WasmChunkType::CodeBlock => Self::CodeBlock,
|
|
WasmChunkType::Image => Self::Image,
|
|
WasmChunkType::OrgChart => Self::OrgChart,
|
|
WasmChunkType::Diagram => Self::Diagram,
|
|
WasmChunkType::Unknown => Self::Unknown,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ChunkType> for WasmChunkType {
|
|
fn from(val: kreuzberg::ChunkType) -> Self {
|
|
match val {
|
|
kreuzberg::ChunkType::Heading => Self::Heading,
|
|
kreuzberg::ChunkType::PartyList => Self::PartyList,
|
|
kreuzberg::ChunkType::Definitions => Self::Definitions,
|
|
kreuzberg::ChunkType::OperativeClause => Self::OperativeClause,
|
|
kreuzberg::ChunkType::SignatureBlock => Self::SignatureBlock,
|
|
kreuzberg::ChunkType::Schedule => Self::Schedule,
|
|
kreuzberg::ChunkType::TableLike => Self::TableLike,
|
|
kreuzberg::ChunkType::Formula => Self::Formula,
|
|
kreuzberg::ChunkType::CodeBlock => Self::CodeBlock,
|
|
kreuzberg::ChunkType::Image => Self::Image,
|
|
kreuzberg::ChunkType::OrgChart => Self::OrgChart,
|
|
kreuzberg::ChunkType::Diagram => Self::Diagram,
|
|
kreuzberg::ChunkType::Unknown => Self::Unknown,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmImageKind> for kreuzberg::ImageKind {
|
|
fn from(val: WasmImageKind) -> Self {
|
|
match val {
|
|
WasmImageKind::Photograph => Self::Photograph,
|
|
WasmImageKind::Diagram => Self::Diagram,
|
|
WasmImageKind::Chart => Self::Chart,
|
|
WasmImageKind::Drawing => Self::Drawing,
|
|
WasmImageKind::TextBlock => Self::TextBlock,
|
|
WasmImageKind::Decoration => Self::Decoration,
|
|
WasmImageKind::Logo => Self::Logo,
|
|
WasmImageKind::Icon => Self::Icon,
|
|
WasmImageKind::TileFragment => Self::TileFragment,
|
|
WasmImageKind::Mask => Self::Mask,
|
|
WasmImageKind::PageRaster => Self::PageRaster,
|
|
WasmImageKind::Unknown => Self::Unknown,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ImageKind> for WasmImageKind {
|
|
fn from(val: kreuzberg::ImageKind) -> Self {
|
|
match val {
|
|
kreuzberg::ImageKind::Photograph => Self::Photograph,
|
|
kreuzberg::ImageKind::Diagram => Self::Diagram,
|
|
kreuzberg::ImageKind::Chart => Self::Chart,
|
|
kreuzberg::ImageKind::Drawing => Self::Drawing,
|
|
kreuzberg::ImageKind::TextBlock => Self::TextBlock,
|
|
kreuzberg::ImageKind::Decoration => Self::Decoration,
|
|
kreuzberg::ImageKind::Logo => Self::Logo,
|
|
kreuzberg::ImageKind::Icon => Self::Icon,
|
|
kreuzberg::ImageKind::TileFragment => Self::TileFragment,
|
|
kreuzberg::ImageKind::Mask => Self::Mask,
|
|
kreuzberg::ImageKind::PageRaster => Self::PageRaster,
|
|
kreuzberg::ImageKind::Unknown => Self::Unknown,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmResultFormat> for kreuzberg::ResultFormat {
|
|
fn from(val: WasmResultFormat) -> Self {
|
|
match val {
|
|
WasmResultFormat::Unified => Self::Unified,
|
|
WasmResultFormat::ElementBased => Self::ElementBased,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ResultFormat> for WasmResultFormat {
|
|
fn from(val: kreuzberg::ResultFormat) -> Self {
|
|
match val {
|
|
kreuzberg::ResultFormat::Unified => Self::Unified,
|
|
kreuzberg::ResultFormat::ElementBased => Self::ElementBased,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmElementType> for kreuzberg::ElementType {
|
|
fn from(val: WasmElementType) -> Self {
|
|
match val {
|
|
WasmElementType::Title => Self::Title,
|
|
WasmElementType::NarrativeText => Self::NarrativeText,
|
|
WasmElementType::Heading => Self::Heading,
|
|
WasmElementType::ListItem => Self::ListItem,
|
|
WasmElementType::Table => Self::Table,
|
|
WasmElementType::Image => Self::Image,
|
|
WasmElementType::PageBreak => Self::PageBreak,
|
|
WasmElementType::CodeBlock => Self::CodeBlock,
|
|
WasmElementType::BlockQuote => Self::BlockQuote,
|
|
WasmElementType::Footer => Self::Footer,
|
|
WasmElementType::Header => Self::Header,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ElementType> for WasmElementType {
|
|
fn from(val: kreuzberg::ElementType) -> Self {
|
|
match val {
|
|
kreuzberg::ElementType::Title => Self::Title,
|
|
kreuzberg::ElementType::NarrativeText => Self::NarrativeText,
|
|
kreuzberg::ElementType::Heading => Self::Heading,
|
|
kreuzberg::ElementType::ListItem => Self::ListItem,
|
|
kreuzberg::ElementType::Table => Self::Table,
|
|
kreuzberg::ElementType::Image => Self::Image,
|
|
kreuzberg::ElementType::PageBreak => Self::PageBreak,
|
|
kreuzberg::ElementType::CodeBlock => Self::CodeBlock,
|
|
kreuzberg::ElementType::BlockQuote => Self::BlockQuote,
|
|
kreuzberg::ElementType::Footer => Self::Footer,
|
|
kreuzberg::ElementType::Header => Self::Header,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmFormatMetadata> for kreuzberg::FormatMetadata {
|
|
fn from(val: WasmFormatMetadata) -> Self {
|
|
match val.format_type.as_str() {
|
|
"pdf" => Self::Pdf(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::PdfMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"docx" => Self::Docx(Box::new(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::DocxMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
)),
|
|
"excel" => Self::Excel(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::ExcelMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"email" => Self::Email(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::EmailMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"pptx" => Self::Pptx(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::PptxMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"archive" => Self::Archive(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::ArchiveMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"image" => Self::Image(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::ImageMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"xml" => Self::Xml(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::XmlMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"text" => Self::Text(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::TextMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"html" => Self::Html(Box::new(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::HtmlMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
)),
|
|
"ocr" => Self::Ocr(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::OcrMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"csv" => Self::Csv(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::CsvMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"bibtex" => Self::Bibtex(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::BibtexMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"citation" => Self::Citation(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::CitationMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"fiction_book" => Self::FictionBook(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::FictionBookMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"dbf" => Self::Dbf(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::DbfMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"jats" => Self::Jats(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::JatsMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"epub" => Self::Epub(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::EpubMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"pst" => Self::Pst(
|
|
val._0
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<kreuzberg::PstMetadata>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
),
|
|
"code" => Self::Code(Default::default()),
|
|
_ => Self::Pdf(Default::default()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::FormatMetadata> for WasmFormatMetadata {
|
|
fn from(val: kreuzberg::FormatMetadata) -> Self {
|
|
match val {
|
|
kreuzberg::FormatMetadata::Pdf(field0) => Self {
|
|
format_type: "pdf".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Docx(field0) => Self {
|
|
format_type: "docx".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Excel(field0) => Self {
|
|
format_type: "excel".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Email(field0) => Self {
|
|
format_type: "email".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Pptx(field0) => Self {
|
|
format_type: "pptx".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Archive(field0) => Self {
|
|
format_type: "archive".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Image(field0) => Self {
|
|
format_type: "image".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Xml(field0) => Self {
|
|
format_type: "xml".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Text(field0) => Self {
|
|
format_type: "text".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Html(field0) => Self {
|
|
format_type: "html".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Ocr(field0) => Self {
|
|
format_type: "ocr".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Csv(field0) => Self {
|
|
format_type: "csv".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Bibtex(field0) => Self {
|
|
format_type: "bibtex".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Citation(field0) => Self {
|
|
format_type: "citation".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::FictionBook(field0) => Self {
|
|
format_type: "fiction_book".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Dbf(field0) => Self {
|
|
format_type: "dbf".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Jats(field0) => Self {
|
|
format_type: "jats".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Epub(field0) => Self {
|
|
format_type: "epub".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Pst(field0) => Self {
|
|
format_type: "pst".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
kreuzberg::FormatMetadata::Code(field0) => Self {
|
|
format_type: "code".to_string(),
|
|
_0: serde_wasm_bindgen::to_value(&field0).ok(),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmTextDirection> for kreuzberg::TextDirection {
|
|
fn from(val: WasmTextDirection) -> Self {
|
|
match val {
|
|
WasmTextDirection::LeftToRight => Self::LeftToRight,
|
|
WasmTextDirection::RightToLeft => Self::RightToLeft,
|
|
WasmTextDirection::Auto => Self::Auto,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::TextDirection> for WasmTextDirection {
|
|
fn from(val: kreuzberg::TextDirection) -> Self {
|
|
match val {
|
|
kreuzberg::TextDirection::LeftToRight => Self::LeftToRight,
|
|
kreuzberg::TextDirection::RightToLeft => Self::RightToLeft,
|
|
kreuzberg::TextDirection::Auto => Self::Auto,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmLinkType> for kreuzberg::LinkType {
|
|
fn from(val: WasmLinkType) -> Self {
|
|
match val {
|
|
WasmLinkType::Anchor => Self::Anchor,
|
|
WasmLinkType::Internal => Self::Internal,
|
|
WasmLinkType::External => Self::External,
|
|
WasmLinkType::Email => Self::Email,
|
|
WasmLinkType::Phone => Self::Phone,
|
|
WasmLinkType::Other => Self::Other,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::LinkType> for WasmLinkType {
|
|
fn from(val: kreuzberg::LinkType) -> Self {
|
|
match val {
|
|
kreuzberg::LinkType::Anchor => Self::Anchor,
|
|
kreuzberg::LinkType::Internal => Self::Internal,
|
|
kreuzberg::LinkType::External => Self::External,
|
|
kreuzberg::LinkType::Email => Self::Email,
|
|
kreuzberg::LinkType::Phone => Self::Phone,
|
|
kreuzberg::LinkType::Other => Self::Other,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmImageType> for kreuzberg::ImageType {
|
|
fn from(val: WasmImageType) -> Self {
|
|
match val {
|
|
WasmImageType::DataUri => Self::DataUri,
|
|
WasmImageType::InlineSvg => Self::InlineSvg,
|
|
WasmImageType::External => Self::External,
|
|
WasmImageType::Relative => Self::Relative,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::ImageType> for WasmImageType {
|
|
fn from(val: kreuzberg::ImageType) -> Self {
|
|
match val {
|
|
kreuzberg::ImageType::DataUri => Self::DataUri,
|
|
kreuzberg::ImageType::InlineSvg => Self::InlineSvg,
|
|
kreuzberg::ImageType::External => Self::External,
|
|
kreuzberg::ImageType::Relative => Self::Relative,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmStructuredDataType> for kreuzberg::StructuredDataType {
|
|
fn from(val: WasmStructuredDataType) -> Self {
|
|
match val {
|
|
WasmStructuredDataType::JsonLd => Self::JsonLd,
|
|
WasmStructuredDataType::Microdata => Self::Microdata,
|
|
WasmStructuredDataType::RDFa => Self::RDFa,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::StructuredDataType> for WasmStructuredDataType {
|
|
fn from(val: kreuzberg::StructuredDataType) -> Self {
|
|
match val {
|
|
kreuzberg::StructuredDataType::JsonLd => Self::JsonLd,
|
|
kreuzberg::StructuredDataType::Microdata => Self::Microdata,
|
|
kreuzberg::StructuredDataType::RDFa => Self::RDFa,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmOcrBoundingGeometry> for kreuzberg::OcrBoundingGeometry {
|
|
fn from(val: WasmOcrBoundingGeometry) -> Self {
|
|
match val.r#type.as_str() {
|
|
"rectangle" => Self::Rectangle {
|
|
left: val.left.clone().unwrap_or_default(),
|
|
top: val.top.clone().unwrap_or_default(),
|
|
width: val.width.clone().unwrap_or_default(),
|
|
height: val.height.clone().unwrap_or_default(),
|
|
},
|
|
"quadrilateral" => Self::Quadrilateral {
|
|
points: val
|
|
.points
|
|
.as_ref()
|
|
.and_then(|v| serde_wasm_bindgen::from_value::<[(u32, u32); 4]>(v.clone()).ok())
|
|
.unwrap_or_default(),
|
|
},
|
|
_ => Self::Rectangle {
|
|
left: Default::default(),
|
|
top: Default::default(),
|
|
width: Default::default(),
|
|
height: Default::default(),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::OcrBoundingGeometry> for WasmOcrBoundingGeometry {
|
|
fn from(val: kreuzberg::OcrBoundingGeometry) -> Self {
|
|
match val {
|
|
kreuzberg::OcrBoundingGeometry::Rectangle {
|
|
left,
|
|
top,
|
|
width,
|
|
height,
|
|
} => Self {
|
|
r#type: "rectangle".to_string(),
|
|
height: Some(height),
|
|
left: Some(left),
|
|
points: None,
|
|
top: Some(top),
|
|
width: Some(width),
|
|
},
|
|
kreuzberg::OcrBoundingGeometry::Quadrilateral { points } => Self {
|
|
r#type: "quadrilateral".to_string(),
|
|
height: None,
|
|
left: None,
|
|
points: serde_wasm_bindgen::to_value(&points).ok(),
|
|
top: None,
|
|
width: None,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmOcrElementLevel> for kreuzberg::OcrElementLevel {
|
|
fn from(val: WasmOcrElementLevel) -> Self {
|
|
match val {
|
|
WasmOcrElementLevel::Word => Self::Word,
|
|
WasmOcrElementLevel::Line => Self::Line,
|
|
WasmOcrElementLevel::Block => Self::Block,
|
|
WasmOcrElementLevel::Page => Self::Page,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::OcrElementLevel> for WasmOcrElementLevel {
|
|
fn from(val: kreuzberg::OcrElementLevel) -> Self {
|
|
match val {
|
|
kreuzberg::OcrElementLevel::Word => Self::Word,
|
|
kreuzberg::OcrElementLevel::Line => Self::Line,
|
|
kreuzberg::OcrElementLevel::Block => Self::Block,
|
|
kreuzberg::OcrElementLevel::Page => Self::Page,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmPageUnitType> for kreuzberg::PageUnitType {
|
|
fn from(val: WasmPageUnitType) -> Self {
|
|
match val {
|
|
WasmPageUnitType::Page => Self::Page,
|
|
WasmPageUnitType::Slide => Self::Slide,
|
|
WasmPageUnitType::Sheet => Self::Sheet,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::PageUnitType> for WasmPageUnitType {
|
|
fn from(val: kreuzberg::PageUnitType) -> Self {
|
|
match val {
|
|
kreuzberg::PageUnitType::Page => Self::Page,
|
|
kreuzberg::PageUnitType::Slide => Self::Slide,
|
|
kreuzberg::PageUnitType::Sheet => Self::Sheet,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmDiffLine> for kreuzberg::DiffLine {
|
|
fn from(val: WasmDiffLine) -> Self {
|
|
match val.kind.as_str() {
|
|
"context" => Self::Context(val._0.clone().unwrap_or_default()),
|
|
"added" => Self::Added(val._0.clone().unwrap_or_default()),
|
|
"removed" => Self::Removed(val._0.clone().unwrap_or_default()),
|
|
_ => Self::Context(Default::default()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::DiffLine> for WasmDiffLine {
|
|
fn from(val: kreuzberg::DiffLine) -> Self {
|
|
match val {
|
|
kreuzberg::DiffLine::Context(field0) => Self {
|
|
kind: "context".to_string(),
|
|
_0: Some(field0),
|
|
},
|
|
kreuzberg::DiffLine::Added(field0) => Self {
|
|
kind: "added".to_string(),
|
|
_0: Some(field0),
|
|
},
|
|
kreuzberg::DiffLine::Removed(field0) => Self {
|
|
kind: "removed".to_string(),
|
|
_0: Some(field0),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmRevisionKind> for kreuzberg::RevisionKind {
|
|
fn from(val: WasmRevisionKind) -> Self {
|
|
match val {
|
|
WasmRevisionKind::Insertion => Self::Insertion,
|
|
WasmRevisionKind::Deletion => Self::Deletion,
|
|
WasmRevisionKind::FormatChange => Self::FormatChange,
|
|
WasmRevisionKind::Comment => Self::Comment,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::RevisionKind> for WasmRevisionKind {
|
|
fn from(val: kreuzberg::RevisionKind) -> Self {
|
|
match val {
|
|
kreuzberg::RevisionKind::Insertion => Self::Insertion,
|
|
kreuzberg::RevisionKind::Deletion => Self::Deletion,
|
|
kreuzberg::RevisionKind::FormatChange => Self::FormatChange,
|
|
kreuzberg::RevisionKind::Comment => Self::Comment,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmRevisionAnchor> for kreuzberg::RevisionAnchor {
|
|
fn from(val: WasmRevisionAnchor) -> Self {
|
|
match val.r#type.as_str() {
|
|
"paragraph" => Self::Paragraph {
|
|
index: val.index.clone().unwrap_or_default(),
|
|
},
|
|
"table_cell" => Self::TableCell {
|
|
row: val.row.clone().unwrap_or_default(),
|
|
col: val.col.clone().unwrap_or_default(),
|
|
table_index: val.table_index.clone().unwrap_or_default(),
|
|
},
|
|
"page" => Self::Page {
|
|
index: val.index.clone().unwrap_or_default(),
|
|
},
|
|
"slide" => Self::Slide {
|
|
index: val.index.clone().unwrap_or_default(),
|
|
},
|
|
"sheet" => Self::Sheet {
|
|
index: val.index.clone().unwrap_or_default(),
|
|
name: val.name.clone(),
|
|
},
|
|
_ => Self::Paragraph {
|
|
index: Default::default(),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::RevisionAnchor> for WasmRevisionAnchor {
|
|
fn from(val: kreuzberg::RevisionAnchor) -> Self {
|
|
match val {
|
|
kreuzberg::RevisionAnchor::Paragraph { index } => Self {
|
|
r#type: "paragraph".to_string(),
|
|
col: None,
|
|
index: Some(index),
|
|
name: None,
|
|
row: None,
|
|
table_index: None,
|
|
},
|
|
kreuzberg::RevisionAnchor::TableCell { row, col, table_index } => Self {
|
|
r#type: "table_cell".to_string(),
|
|
col: Some(col),
|
|
index: None,
|
|
name: None,
|
|
row: Some(row),
|
|
table_index: Some(table_index),
|
|
},
|
|
kreuzberg::RevisionAnchor::Page { index } => Self {
|
|
r#type: "page".to_string(),
|
|
col: None,
|
|
index: Some(index),
|
|
name: None,
|
|
row: None,
|
|
table_index: None,
|
|
},
|
|
kreuzberg::RevisionAnchor::Slide { index } => Self {
|
|
r#type: "slide".to_string(),
|
|
col: None,
|
|
index: Some(index),
|
|
name: None,
|
|
row: None,
|
|
table_index: None,
|
|
},
|
|
kreuzberg::RevisionAnchor::Sheet { index, name } => Self {
|
|
r#type: "sheet".to_string(),
|
|
col: None,
|
|
index: Some(index),
|
|
name,
|
|
row: None,
|
|
table_index: None,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<WasmUriKind> for kreuzberg::UriKind {
|
|
fn from(val: WasmUriKind) -> Self {
|
|
match val {
|
|
WasmUriKind::Hyperlink => Self::Hyperlink,
|
|
WasmUriKind::Image => Self::Image,
|
|
WasmUriKind::Anchor => Self::Anchor,
|
|
WasmUriKind::Citation => Self::Citation,
|
|
WasmUriKind::Reference => Self::Reference,
|
|
WasmUriKind::Email => Self::Email,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<kreuzberg::UriKind> for WasmUriKind {
|
|
fn from(val: kreuzberg::UriKind) -> Self {
|
|
match val {
|
|
kreuzberg::UriKind::Hyperlink => Self::Hyperlink,
|
|
kreuzberg::UriKind::Image => Self::Image,
|
|
kreuzberg::UriKind::Anchor => Self::Anchor,
|
|
kreuzberg::UriKind::Citation => Self::Citation,
|
|
kreuzberg::UriKind::Reference => Self::Reference,
|
|
kreuzberg::UriKind::Email => Self::Email,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Return the error code string for a `kreuzberg::error::KreuzbergError` variant.
|
|
#[allow(dead_code)]
|
|
fn kreuzberg_error_error_code(e: &kreuzberg::error::KreuzbergError) -> &'static str {
|
|
#[allow(unreachable_patterns)]
|
|
match e {
|
|
kreuzberg::error::KreuzbergError::Io(..) => "io",
|
|
kreuzberg::error::KreuzbergError::Parsing { .. } => "parsing",
|
|
kreuzberg::error::KreuzbergError::Ocr { .. } => "ocr",
|
|
kreuzberg::error::KreuzbergError::Validation { .. } => "validation",
|
|
kreuzberg::error::KreuzbergError::Cache { .. } => "cache",
|
|
kreuzberg::error::KreuzbergError::ImageProcessing { .. } => "image_processing",
|
|
kreuzberg::error::KreuzbergError::Serialization { .. } => "serialization",
|
|
kreuzberg::error::KreuzbergError::MissingDependency(..) => "missing_dependency",
|
|
kreuzberg::error::KreuzbergError::Plugin { .. } => "plugin",
|
|
kreuzberg::error::KreuzbergError::LockPoisoned(..) => "lock_poisoned",
|
|
kreuzberg::error::KreuzbergError::UnsupportedFormat(..) => "unsupported_format",
|
|
kreuzberg::error::KreuzbergError::Embedding { .. } => "embedding",
|
|
kreuzberg::error::KreuzbergError::Timeout { .. } => "timeout",
|
|
kreuzberg::error::KreuzbergError::Cancelled => "cancelled",
|
|
kreuzberg::error::KreuzbergError::Security { .. } => "security",
|
|
kreuzberg::error::KreuzbergError::Other(..) => "other",
|
|
_ => "kreuzberg_error",
|
|
}
|
|
}
|
|
|
|
/// Convert a `kreuzberg::error::KreuzbergError` error to a `JsValue` object with `code` and `message` fields.
|
|
#[allow(dead_code)]
|
|
fn kreuzberg_error_to_js_value(e: kreuzberg::error::KreuzbergError) -> wasm_bindgen::JsValue {
|
|
let code = kreuzberg_error_error_code(&e);
|
|
let message = e.to_string();
|
|
let obj = js_sys::Object::new();
|
|
js_sys::Reflect::set(&obj, &"code".into(), &code.into()).ok();
|
|
js_sys::Reflect::set(&obj, &"message".into(), &message.into()).ok();
|
|
obj.into()
|
|
}
|