This commit is contained in:
14
docs/snippets/python/getting-started/basic_usage.md
Normal file
14
docs/snippets/python/getting-started/basic_usage.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```python title="Python"
|
||||
import asyncio
|
||||
from kreuzberg import extract_file, ExtractionConfig
|
||||
|
||||
async def main() -> None:
|
||||
config = ExtractionConfig(
|
||||
use_cache=True,
|
||||
enable_quality_processing=True
|
||||
)
|
||||
result = await extract_file("document.pdf", config=config)
|
||||
print(result.content)
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
16
docs/snippets/python/getting-started/extract_file.md
Normal file
16
docs/snippets/python/getting-started/extract_file.md
Normal file
@@ -0,0 +1,16 @@
|
||||
```python title="Python"
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from kreuzberg import extract_file
|
||||
|
||||
async def main() -> None:
|
||||
file_path: Path = Path("document.pdf")
|
||||
|
||||
result = await extract_file(file_path)
|
||||
|
||||
print(f"Content: {result.content}")
|
||||
print(f"Format: {result.metadata.format.format_type if result.metadata.format else None}")
|
||||
print(f"Tables: {len(result.tables)}")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
19
docs/snippets/python/getting-started/extract_with_ocr.md
Normal file
19
docs/snippets/python/getting-started/extract_with_ocr.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```python title="Python"
|
||||
import asyncio
|
||||
from kreuzberg import extract_file, ExtractionConfig, OcrConfig, TesseractConfig
|
||||
|
||||
async def main() -> None:
|
||||
config = ExtractionConfig(
|
||||
force_ocr=True,
|
||||
ocr=OcrConfig(
|
||||
backend="tesseract",
|
||||
language="eng",
|
||||
tesseract_config=TesseractConfig(psm=3)
|
||||
)
|
||||
)
|
||||
result = await extract_file("scanned.pdf", config=config)
|
||||
print(result.content)
|
||||
print(f"Detected Languages: {result.detected_languages}")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
10
docs/snippets/python/getting-started/hello_world.md
Normal file
10
docs/snippets/python/getting-started/hello_world.md
Normal file
@@ -0,0 +1,10 @@
|
||||
```python title="Python"
|
||||
import asyncio
|
||||
from kreuzberg import extract_file
|
||||
|
||||
async def main() -> None:
|
||||
result = await extract_file("document.pdf")
|
||||
print(result.content)
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
12
docs/snippets/python/getting-started/install_verify.md
Normal file
12
docs/snippets/python/getting-started/install_verify.md
Normal file
@@ -0,0 +1,12 @@
|
||||
```python title="Python"
|
||||
import asyncio
|
||||
from kreuzberg import extract_file, __version__
|
||||
|
||||
async def main() -> None:
|
||||
print(f"Kreuzberg version: {__version__}")
|
||||
|
||||
result = await extract_file("document.pdf")
|
||||
print(f"Extraction successful: {len(result.content) > 0}")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
17
docs/snippets/python/getting-started/read_content.md
Normal file
17
docs/snippets/python/getting-started/read_content.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```python title="Python"
|
||||
import asyncio
|
||||
from kreuzberg import extract_file
|
||||
|
||||
async def main() -> None:
|
||||
result = await extract_file("document.pdf")
|
||||
|
||||
content: str = result.content
|
||||
tables: int = len(result.tables)
|
||||
format_type: str | None = result.metadata.format.format_type if result.metadata.format else None
|
||||
|
||||
print(f"Content length: {len(content)} characters")
|
||||
print(f"Tables found: {tables}")
|
||||
print(f"Format: {format_type}")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
Reference in New Issue
Block a user