docs/snippets/rust/metadata/pdf_metadata_extractor.md

```rust title="Rust"
use kreuzberg::plugins::{Plugin, PostProcessor, ProcessingStage};
use kreuzberg::{Result, ExtractionResult, ExtractionConfig};
use async_trait::async_trait;
use std::sync::atomic::{AtomicUsize, Ordering};

struct PdfMetadataExtractor {
    processed_count: AtomicUsize,
}

impl PdfMetadataExtractor {
    fn new() -> Self {
        Self {
            processed_count: AtomicUsize::new(0),
        }
    }
}

impl Plugin for PdfMetadataExtractor {
    fn name(&self) -> &str { "pdf-metadata-extractor" }
    fn version(&self) -> String { "1.0.0".to_string() }
    fn description(&self) -> &str {
        "Extracts and enriches PDF metadata"
    }
    fn initialize(&self) -> Result<()> {
        log::info!("PDF metadata extractor initialized");
        Ok(())
    }
    fn shutdown(&self) -> Result<()> {
        let count = self.processed_count.load(Ordering::Acquire);
        log::info!("Processed {} PDFs", count);
        Ok(())
    }
}

#[async_trait]
impl PostProcessor for PdfMetadataExtractor {
    async fn process(
        &self,
        result: &mut ExtractionResult,
        _config: &ExtractionConfig,
    ) -> Result<()> {
        self.processed_count.fetch_add(1, Ordering::AcqRel);

        result.processing_warnings.push(ProcessingWarning {
            source: "pdf-metadata-extractor".to_string(),
            message: "PDF metadata extracted successfully".to_string()
        });

        Ok(())
    }

    fn processing_stage(&self) -> ProcessingStage {
        ProcessingStage::Early
    }

    fn should_process(
        &self,
        result: &ExtractionResult,
        _config: &ExtractionConfig,
    ) -> bool {
        result.mime_type == "application/pdf"
    }

    fn estimated_duration_ms(&self, _result: &ExtractionResult) -> u64 {
        10
    }
}

use kreuzberg::plugins::registry::get_post_processor_registry;
use std::sync::Arc;

fn register() -> Result<()> {
    let processor = Arc::new(PdfMetadataExtractor::new());
    let registry = get_post_processor_registry();
    registry.register(processor, 50)?;
    Ok(())
}
```
Nomad changes 2026-06-01 23:40:55 +02:00			```rust title="Rust"
			`use kreuzberg::plugins::{Plugin, PostProcessor, ProcessingStage};`
			`use kreuzberg::{Result, ExtractionResult, ExtractionConfig};`
			`use async_trait::async_trait;`
			`use std::sync::atomic::{AtomicUsize, Ordering};`

			`struct PdfMetadataExtractor {`
			`processed_count: AtomicUsize,`
			`}`

			`impl PdfMetadataExtractor {`
			`fn new() -> Self {`
			`Self {`
			`processed_count: AtomicUsize::new(0),`
			`}`
			`}`
			`}`

			`impl Plugin for PdfMetadataExtractor {`
			`fn name(&self) -> &str { "pdf-metadata-extractor" }`
			`fn version(&self) -> String { "1.0.0".to_string() }`
			`fn description(&self) -> &str {`
			`"Extracts and enriches PDF metadata"`
			`}`
			`fn initialize(&self) -> Result<()> {`
			`log::info!("PDF metadata extractor initialized");`
			`Ok(())`
			`}`
			`fn shutdown(&self) -> Result<()> {`
			`let count = self.processed_count.load(Ordering::Acquire);`
			`log::info!("Processed {} PDFs", count);`
			`Ok(())`
			`}`
			`}`

			`#[async_trait]`
			`impl PostProcessor for PdfMetadataExtractor {`
			`async fn process(`
			`&self,`
			`result: &mut ExtractionResult,`
			`_config: &ExtractionConfig,`
			`) -> Result<()> {`
			`self.processed_count.fetch_add(1, Ordering::AcqRel);`

			`result.processing_warnings.push(ProcessingWarning {`
			`source: "pdf-metadata-extractor".to_string(),`
			`message: "PDF metadata extracted successfully".to_string()`
			`});`

			`Ok(())`
			`}`

			`fn processing_stage(&self) -> ProcessingStage {`
			`ProcessingStage::Early`
			`}`

			`fn should_process(`
			`&self,`
			`result: &ExtractionResult,`
			`_config: &ExtractionConfig,`
			`) -> bool {`
			`result.mime_type == "application/pdf"`
			`}`

			`fn estimated_duration_ms(&self, _result: &ExtractionResult) -> u64 {`
			`10`
			`}`
			`}`

			`use kreuzberg::plugins::registry::get_post_processor_registry;`
			`use std::sync::Arc;`

			`fn register() -> Result<()> {`
			`let processor = Arc::new(PdfMetadataExtractor::new());`
			`let registry = get_post_processor_registry();`
			`registry.register(processor, 50)?;`
			`Ok(())`
			`}`
			```