This commit is contained in:
33
docs/snippets/dart/config/pdf_hierarchy_config.md
Normal file
33
docs/snippets/dart/config/pdf_hierarchy_config.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
pdfOptions: const PdfConfig(
|
||||
extractImages: false,
|
||||
extractMetadata: true,
|
||||
extractAnnotations: false,
|
||||
allowSingleColumnTables: false,
|
||||
hierarchy: HierarchyConfig(
|
||||
enabled: true,
|
||||
kClusters: 5,
|
||||
includeBbox: true,
|
||||
ocrCoverageThreshold: 0.8,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final pages = result.pages ?? const [];
|
||||
print('Pages with hierarchy: ${pages.where((p) => p.hierarchy != null).length}');
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user