Files
fil/docs/snippets/python/metadata/page_boundaries.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

653 B

from kreuzberg import extract_file_sync, ExtractionConfig

result = extract_file_sync("document.pdf", config=ExtractionConfig())

if result.metadata.pages and result.metadata.pages.boundaries:
    boundaries = result.metadata.pages.boundaries
    content_bytes = result.content.encode("utf-8")

    for boundary in boundaries[:3]:
        page_bytes = content_bytes[boundary.byte_start:boundary.byte_end]
        page_text = page_bytes.decode("utf-8")

        print(f"Page {boundary.page_number}:")
        print(f"  Byte range: {boundary.byte_start}-{boundary.byte_end}")
        print(f"  Preview: {page_text[:100]}...")