Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/docs/snippets/python/ocr/ocr_paddleocr.md
+++ b/docs/snippets/python/ocr/ocr_paddleocr.md
@@ -0,0 +1,16 @@
+```python title="Python"
+from kreuzberg import extract_file_sync, ExtractionConfig, OcrConfig
+
+config: ExtractionConfig = ExtractionConfig(
+    ocr=OcrConfig(backend="paddleocr", language="en")  # model_tier="server" for max accuracy
+)
+
+result = extract_file_sync("scanned.pdf", config=config)
+
+content: str = result.content
+preview: str = content[:100]
+total_length: int = len(content)
+
+print(f"Extracted content (preview): {preview}")
+print(f"Total characters: {total_length}")
+```