This commit is contained in:
32
docs/snippets/dart/ocr/cloud_ocr_backend.md
Normal file
32
docs/snippets/dart/ocr/cloud_ocr_backend.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Cloud OCR backends are registered in the Rust core. From Dart, select a
|
||||
// registered backend by name. Use `KreuzbergBridge.listOcrBackends()` to
|
||||
// discover available backends at runtime.
|
||||
final backends = await KreuzbergBridge.listOcrBackends();
|
||||
print('Available OCR backends: $backends');
|
||||
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'cloud',
|
||||
language: 'en',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
31
docs/snippets/dart/ocr/image_extraction.md
Normal file
31
docs/snippets/dart/ocr/image_extraction.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
images: const ImageExtractionConfig(
|
||||
extractImages: true,
|
||||
targetDpi: 300,
|
||||
maxImageDimension: 4096,
|
||||
injectPlaceholders: false,
|
||||
autoAdjustDpi: true,
|
||||
minDpi: 150,
|
||||
maxDpi: 600,
|
||||
classify: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final images = result.images ?? const [];
|
||||
print('Extracted images: ${images.length}');
|
||||
}
|
||||
```
|
||||
32
docs/snippets/dart/ocr/image_preprocessing.md
Normal file
32
docs/snippets/dart/ocr/image_preprocessing.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
images: const ImageExtractionConfig(
|
||||
extractImages: true,
|
||||
targetDpi: 300,
|
||||
maxImageDimension: 4096,
|
||||
injectPlaceholders: true,
|
||||
autoAdjustDpi: true,
|
||||
minDpi: 150,
|
||||
maxDpi: 600,
|
||||
maxImagesPerPage: 20,
|
||||
classify: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final images = result.images ?? const [];
|
||||
print('Preprocessed images: ${images.length}');
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_easyocr.md
Normal file
26
docs/snippets/dart/ocr/ocr_easyocr.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'easyocr',
|
||||
language: 'en',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
38
docs/snippets/dart/ocr/ocr_elements.md
Normal file
38
docs/snippets/dart/ocr/ocr_elements.md
Normal file
@@ -0,0 +1,38 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'paddleocr',
|
||||
language: 'en',
|
||||
autoRotate: false,
|
||||
elementConfig: OcrElementConfig(
|
||||
includeElements: true,
|
||||
minLevel: OcrElementLevel.word,
|
||||
minConfidence: 0.0,
|
||||
buildHierarchy: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
final elements = result.ocrElements ?? const <OcrElement>[];
|
||||
for (final element in elements) {
|
||||
print('Text: ${element.text}');
|
||||
print('Confidence: ${element.confidence.recognition.toStringAsFixed(2)}');
|
||||
print('Level: ${element.level}');
|
||||
print('Page: ${element.pageNumber}');
|
||||
}
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_extraction.md
Normal file
26
docs/snippets/dart/ocr/ocr_extraction.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_force_all_pages.md
Normal file
26
docs/snippets/dart/ocr/ocr_force_all_pages.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: true,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_multi_language.md
Normal file
26
docs/snippets/dart/ocr/ocr_multi_language.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng+deu+fra',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('multilingual.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_paddleocr.md
Normal file
26
docs/snippets/dart/ocr/ocr_paddleocr.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'paddleocr',
|
||||
language: 'en',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user