This commit is contained in:
26
docs/snippets/python/config/ocr_dpi_config.md
Normal file
26
docs/snippets/python/config/ocr_dpi_config.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```python title="Python"
|
||||
from kreuzberg import (
|
||||
extract_file_sync,
|
||||
ExtractionConfig,
|
||||
OcrConfig,
|
||||
TesseractConfig,
|
||||
ImagePreprocessingConfig,
|
||||
)
|
||||
|
||||
config: ExtractionConfig = ExtractionConfig(
|
||||
ocr=OcrConfig(
|
||||
backend="tesseract",
|
||||
tesseract_config=TesseractConfig(
|
||||
preprocessing=ImagePreprocessingConfig(target_dpi=300),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
result = extract_file_sync("scanned.pdf", config=config)
|
||||
|
||||
content_length: int = len(result.content)
|
||||
table_count: int = len(result.tables)
|
||||
|
||||
print(f"Content length: {content_length} characters")
|
||||
print(f"Tables detected: {table_count}")
|
||||
```
|
||||
Reference in New Issue
Block a user