Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/docs/snippets/python/ocr/image_preprocessing.md
+++ b/docs/snippets/python/ocr/image_preprocessing.md
@@ -0,0 +1,29 @@
+```python title="Python"
+from kreuzberg import (
+    extract_file_sync,
+    ExtractionConfig,
+    ImagePreprocessingConfig,
+    OcrConfig,
+    TesseractConfig,
+)
+
+preprocessing: ImagePreprocessingConfig = ImagePreprocessingConfig(
+    target_dpi=300,
+    denoise=True,
+    deskew=True,
+    contrast_enhance=True,
+    binarization_method="otsu",
+)
+
+config: ExtractionConfig = ExtractionConfig(
+    ocr=OcrConfig(
+        backend="tesseract",
+        language="eng",
+        tesseract_config=TesseractConfig(preprocessing=preprocessing),
+    )
+)
+
+result = extract_file_sync("document.pdf", config=config)
+
+print(f"Content length: {len(result.content)} characters")
+```