Files
fil/docs/snippets/csharp/config/pdf_hierarchy_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

2.0 KiB

using Kreuzberg;

// Basic hierarchy configuration with properties
var config = new ExtractionConfig
{
    PdfOptions = new PdfConfig
    {
        ExtractImages = true,
        Hierarchy = new HierarchyConfig
        {
            Enabled = true,
            KClusters = 6,
            IncludeBbox = true,
            OcrCoverageThreshold = 0.8f
        }
    }
};

var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
Console.WriteLine($"Content length: {result.Content.Length}");

// Advanced hierarchy detection with custom parameters
var advancedConfig = new ExtractionConfig
{
    PdfOptions = new PdfConfig
    {
        ExtractImages = true,
        Hierarchy = new HierarchyConfig
        {
            Enabled = true,
            KClusters = 12,           // More clusters for detailed hierarchy
            IncludeBbox = true,       // Include bounding box coordinates
            OcrCoverageThreshold = 0.7f  // Higher OCR threshold for stricter detection
        }
    }
};

var result = await KreuzbergLib.ExtractFileAsync("complex_document.pdf", advancedConfig);
Console.WriteLine($"Advanced hierarchy detection completed: {result.Content.Length} chars");

// Minimal configuration with only enabled flag
var minimalConfig = new ExtractionConfig
{
    PdfOptions = new PdfConfig
    {
        Hierarchy = new HierarchyConfig
        {
            Enabled = true,
            // Other properties use defaults:
            // KClusters = 6
            // IncludeBbox = true
        }
    }
};

var result = await KreuzbergLib.ExtractFileAsync("document.pdf", minimalConfig);
Console.WriteLine("Extraction with default hierarchy settings complete");

// Disabling hierarchy detection
var noHierarchyConfig = new ExtractionConfig
{
    PdfOptions = new PdfConfig
    {
        Hierarchy = new HierarchyConfig
        {
            Enabled = false
        }
    }
};

var result = await KreuzbergLib.ExtractFileAsync("document.pdf", noHierarchyConfig);
Console.WriteLine("Extraction without hierarchy detection complete");