This commit is contained in:
74
docs/snippets/csharp/config/pdf_hierarchy_config.md
Normal file
74
docs/snippets/csharp/config/pdf_hierarchy_config.md
Normal file
@@ -0,0 +1,74 @@
|
||||
```csharp title="C#"
|
||||
using Kreuzberg;
|
||||
|
||||
// Basic hierarchy configuration with properties
|
||||
var config = new ExtractionConfig
|
||||
{
|
||||
PdfOptions = new PdfConfig
|
||||
{
|
||||
ExtractImages = true,
|
||||
Hierarchy = new HierarchyConfig
|
||||
{
|
||||
Enabled = true,
|
||||
KClusters = 6,
|
||||
IncludeBbox = true,
|
||||
OcrCoverageThreshold = 0.8f
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
|
||||
Console.WriteLine($"Content length: {result.Content.Length}");
|
||||
|
||||
// Advanced hierarchy detection with custom parameters
|
||||
var advancedConfig = new ExtractionConfig
|
||||
{
|
||||
PdfOptions = new PdfConfig
|
||||
{
|
||||
ExtractImages = true,
|
||||
Hierarchy = new HierarchyConfig
|
||||
{
|
||||
Enabled = true,
|
||||
KClusters = 12, // More clusters for detailed hierarchy
|
||||
IncludeBbox = true, // Include bounding box coordinates
|
||||
OcrCoverageThreshold = 0.7f // Higher OCR threshold for stricter detection
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var result = await KreuzbergLib.ExtractFileAsync("complex_document.pdf", advancedConfig);
|
||||
Console.WriteLine($"Advanced hierarchy detection completed: {result.Content.Length} chars");
|
||||
|
||||
// Minimal configuration with only enabled flag
|
||||
var minimalConfig = new ExtractionConfig
|
||||
{
|
||||
PdfOptions = new PdfConfig
|
||||
{
|
||||
Hierarchy = new HierarchyConfig
|
||||
{
|
||||
Enabled = true,
|
||||
// Other properties use defaults:
|
||||
// KClusters = 6
|
||||
// IncludeBbox = true
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var result = await KreuzbergLib.ExtractFileAsync("document.pdf", minimalConfig);
|
||||
Console.WriteLine("Extraction with default hierarchy settings complete");
|
||||
|
||||
// Disabling hierarchy detection
|
||||
var noHierarchyConfig = new ExtractionConfig
|
||||
{
|
||||
PdfOptions = new PdfConfig
|
||||
{
|
||||
Hierarchy = new HierarchyConfig
|
||||
{
|
||||
Enabled = false
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var result = await KreuzbergLib.ExtractFileAsync("document.pdf", noHierarchyConfig);
|
||||
Console.WriteLine("Extraction without hierarchy detection complete");
|
||||
```
|
||||
Reference in New Issue
Block a user