Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

497
packages/python/kreuzberg/__init__.py generated Normal file
View File

@@ -0,0 +1,497 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
"""Public API for _kreuzberg.
Version: 5.0.0-rc.3
"""
from ._kreuzberg import (
AnnotationKind,
ArchiveEntry,
BatchBytesItem,
BatchFileItem,
BBox,
BlockType,
CacheStats,
CellChange,
Chunk,
ChunkerType,
ChunkMetadata,
ChunkSizing,
ChunkType,
CodeContentMode,
ContentLayer,
ContributorRole,
DbfFieldInfo,
DetectionResult,
DetectResponse,
DiffHunk,
DiffLine,
DjotContent,
DjotImage,
DjotLink,
DocumentNode,
DocumentRelationship,
DocumentRevision,
Element,
ElementMetadata,
ElementType,
EmailAttachment,
EmailExtractionResult,
EmbeddedChanges,
EmbeddedDiff,
EmbeddedFile,
EmbeddingModelType,
EmbeddingPreset,
ErrorMetadata,
ExcelSheet,
ExcelWorkbook,
ExecutionProviderType,
ExtractedImage,
ExtractedUri,
ExtractionDiff,
ExtractionMethod,
Footnote,
FormatMetadata,
FormattedBlock,
GridCell,
HeaderMetadata,
HeadingContext,
HeadingLevel,
HierarchicalBlock,
HtmlTheme,
ImageKind,
ImageMetadataType,
ImagePreprocessingMetadata,
ImageType,
InlineElement,
InlineType,
Keyword,
KeywordAlgorithm,
LayoutClass,
LayoutDetection,
LinkMetadata,
LinkType,
ListType,
ModelPaths,
NodeContent,
OcrBackendType,
OcrBoundingGeometry,
OcrElementLevel,
OcrExtractionResult,
OcrPipelineConfig,
OcrPipelineStage,
OcrRotation,
OcrTable,
OcrTableBoundingBox,
OrientationResult,
OutputFormat,
PaddleLanguage,
PageBoundary,
PageContent,
PageHierarchy,
PageInfo,
PageStructure,
PageUnitType,
PdfAnnotation,
PdfAnnotationType,
PptxExtractionResult,
ProcessingStage,
ProcessingWarning,
PSMMode,
RecognizedTable,
ReductionLevel,
RelationshipKind,
ResultFormat,
RevisionAnchor,
RevisionKind,
StructuredData,
StructuredDataResult,
StructuredDataType,
StructuredExtractionConfig,
SupportedFormat,
TableDiff,
TableModel,
TextAnnotation,
TextDirection,
TextExtractionResult,
UriKind,
XmlExtractionResult,
YearRange,
)
from .api import (
batch_extract_bytes,
batch_extract_bytes_sync,
batch_extract_files,
batch_extract_files_sync,
clear_document_extractors,
clear_embedding_backends,
clear_ocr_backends,
clear_post_processors,
clear_renderers,
clear_validators,
compare,
detect_mime_type,
detect_mime_type_from_bytes,
embed_texts,
embed_texts_async,
extract_bytes,
extract_bytes_sync,
extract_file,
extract_file_sync,
get_embedding_preset,
get_extensions_for_mime,
list_document_extractors,
list_embedding_backends,
list_embedding_presets,
list_ocr_backends,
list_post_processors,
list_renderers,
list_validators,
register_document_extractor,
register_embedding_backend,
register_ocr_backend,
register_post_processor,
register_renderer,
register_validator,
render_pdf_page_to_png,
unregister_document_extractor,
unregister_embedding_backend,
unregister_ocr_backend,
unregister_post_processor,
unregister_renderer,
unregister_validator,
)
from .exceptions import (
CacheError,
CancelledError,
EmbeddingError,
ImageProcessingError,
IoError,
KreuzbergError,
KreuzbergTimeoutError,
LockPoisonedError,
MissingDependencyError,
OcrError,
OtherError,
ParsingError,
PluginError,
SecurityError,
SerializationError,
UnsupportedFormatError,
ValidationError,
)
from .options import (
AccelerationConfig,
ArchiveMetadata,
BibtexMetadata,
BoundingBox,
ChunkingConfig,
CitationMetadata,
ContentFilterConfig,
CoreProperties,
CsvMetadata,
DbfMetadata,
DiffOptions,
DocumentStructure,
DocxAppProperties,
DocxMetadata,
EmailConfig,
EmailMetadata,
EmbeddingConfig,
EpubMetadata,
ExcelMetadata,
ExtractionConfig,
ExtractionResult,
FictionBookMetadata,
FileExtractionConfig,
HierarchyConfig,
HtmlMetadata,
HtmlOutputConfig,
ImageExtractionConfig,
ImageMetadata,
ImagePreprocessingConfig,
JatsMetadata,
KeywordConfig,
LanguageDetectionConfig,
LayoutDetectionConfig,
LayoutRegion,
LlmConfig,
LlmUsage,
Metadata,
OcrConfidence,
OcrConfig,
OcrElement,
OcrElementConfig,
OcrMetadata,
OcrQualityThresholds,
PaddleOcrConfig,
PageConfig,
PdfConfig,
PdfMetadata,
PostProcessorConfig,
PptxAppProperties,
PptxMetadata,
PstMetadata,
RakeParams,
RevisionDelta,
SecurityLimits,
ServerConfig,
Table,
TableCell,
TableGrid,
TesseractConfig,
TextMetadata,
TokenReductionConfig,
TokenReductionOptions,
TreeSitterConfig,
TreeSitterProcessConfig,
XlsxAppProperties,
XmlMetadata,
YakeParams,
)
__all__ = [
"AccelerationConfig",
"AnnotationKind",
"ArchiveEntry",
"ArchiveMetadata",
"BBox",
"BatchBytesItem",
"BatchFileItem",
"BibtexMetadata",
"BlockType",
"BoundingBox",
"CacheError",
"CacheStats",
"CancelledError",
"CellChange",
"Chunk",
"ChunkMetadata",
"ChunkSizing",
"ChunkType",
"ChunkerType",
"ChunkingConfig",
"CitationMetadata",
"CodeContentMode",
"ContentFilterConfig",
"ContentLayer",
"ContributorRole",
"CoreProperties",
"CsvMetadata",
"DbfFieldInfo",
"DbfMetadata",
"DetectResponse",
"DetectionResult",
"DiffHunk",
"DiffLine",
"DiffOptions",
"DjotContent",
"DjotImage",
"DjotLink",
"DocumentNode",
"DocumentRelationship",
"DocumentRevision",
"DocumentStructure",
"DocxAppProperties",
"DocxMetadata",
"Element",
"ElementMetadata",
"ElementType",
"EmailAttachment",
"EmailConfig",
"EmailExtractionResult",
"EmailMetadata",
"EmbeddedChanges",
"EmbeddedDiff",
"EmbeddedFile",
"EmbeddingConfig",
"EmbeddingError",
"EmbeddingModelType",
"EmbeddingPreset",
"EpubMetadata",
"ErrorMetadata",
"ExcelMetadata",
"ExcelSheet",
"ExcelWorkbook",
"ExecutionProviderType",
"ExtractedImage",
"ExtractedUri",
"ExtractionConfig",
"ExtractionDiff",
"ExtractionMethod",
"ExtractionResult",
"FictionBookMetadata",
"FileExtractionConfig",
"Footnote",
"FormatMetadata",
"FormattedBlock",
"GridCell",
"HeaderMetadata",
"HeadingContext",
"HeadingLevel",
"HierarchicalBlock",
"HierarchyConfig",
"HtmlMetadata",
"HtmlOutputConfig",
"HtmlTheme",
"ImageExtractionConfig",
"ImageKind",
"ImageMetadata",
"ImageMetadataType",
"ImagePreprocessingConfig",
"ImagePreprocessingMetadata",
"ImageProcessingError",
"ImageType",
"InlineElement",
"InlineType",
"IoError",
"JatsMetadata",
"Keyword",
"KeywordAlgorithm",
"KeywordConfig",
"KreuzbergError",
"KreuzbergTimeoutError",
"LanguageDetectionConfig",
"LayoutClass",
"LayoutDetection",
"LayoutDetectionConfig",
"LayoutRegion",
"LinkMetadata",
"LinkType",
"ListType",
"LlmConfig",
"LlmUsage",
"LockPoisonedError",
"Metadata",
"MissingDependencyError",
"ModelPaths",
"NodeContent",
"OcrBackendType",
"OcrBoundingGeometry",
"OcrConfidence",
"OcrConfig",
"OcrElement",
"OcrElementConfig",
"OcrElementLevel",
"OcrError",
"OcrExtractionResult",
"OcrMetadata",
"OcrPipelineConfig",
"OcrPipelineStage",
"OcrQualityThresholds",
"OcrRotation",
"OcrTable",
"OcrTableBoundingBox",
"OrientationResult",
"OtherError",
"OutputFormat",
"PSMMode",
"PaddleLanguage",
"PaddleOcrConfig",
"PageBoundary",
"PageConfig",
"PageContent",
"PageHierarchy",
"PageInfo",
"PageStructure",
"PageUnitType",
"ParsingError",
"PdfAnnotation",
"PdfAnnotationType",
"PdfConfig",
"PdfMetadata",
"PluginError",
"PostProcessorConfig",
"PptxAppProperties",
"PptxExtractionResult",
"PptxMetadata",
"ProcessingStage",
"ProcessingWarning",
"PstMetadata",
"RakeParams",
"RecognizedTable",
"ReductionLevel",
"RelationshipKind",
"ResultFormat",
"RevisionAnchor",
"RevisionDelta",
"RevisionKind",
"SecurityError",
"SecurityLimits",
"SerializationError",
"ServerConfig",
"StructuredData",
"StructuredDataResult",
"StructuredDataType",
"StructuredExtractionConfig",
"SupportedFormat",
"Table",
"TableCell",
"TableDiff",
"TableGrid",
"TableModel",
"TesseractConfig",
"TextAnnotation",
"TextDirection",
"TextExtractionResult",
"TextMetadata",
"TokenReductionConfig",
"TokenReductionOptions",
"TreeSitterConfig",
"TreeSitterProcessConfig",
"UnsupportedFormatError",
"UriKind",
"ValidationError",
"XlsxAppProperties",
"XmlExtractionResult",
"XmlMetadata",
"YakeParams",
"YearRange",
"batch_extract_bytes",
"batch_extract_bytes_sync",
"batch_extract_files",
"batch_extract_files_sync",
"clear_document_extractors",
"clear_embedding_backends",
"clear_ocr_backends",
"clear_post_processors",
"clear_renderers",
"clear_validators",
"compare",
"detect_mime_type",
"detect_mime_type_from_bytes",
"embed_texts",
"embed_texts_async",
"extract_bytes",
"extract_bytes_sync",
"extract_file",
"extract_file_sync",
"get_embedding_preset",
"get_extensions_for_mime",
"list_document_extractors",
"list_embedding_backends",
"list_embedding_presets",
"list_ocr_backends",
"list_post_processors",
"list_renderers",
"list_validators",
"register_document_extractor",
"register_embedding_backend",
"register_ocr_backend",
"register_post_processor",
"register_renderer",
"register_validator",
"render_pdf_page_to_png",
"unregister_document_extractor",
"unregister_embedding_backend",
"unregister_ocr_backend",
"unregister_post_processor",
"unregister_renderer",
"unregister_validator",
]
__version__ = "5.0.0-rc.3"

3057
packages/python/kreuzberg/_kreuzberg.pyi generated Normal file

File diff suppressed because it is too large Load Diff

1171
packages/python/kreuzberg/api.py generated Normal file

File diff suppressed because it is too large Load Diff

74
packages/python/kreuzberg/exceptions.py generated Normal file
View File

@@ -0,0 +1,74 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
"""Exception hierarchy."""
class KreuzbergError(Exception):
"""Main error type for all Kreuzberg operations."""
class IoError(KreuzbergError):
"""Io error."""
class ParsingError(KreuzbergError):
"""Parsing error."""
class OcrError(KreuzbergError):
"""Ocr error."""
class ValidationError(KreuzbergError):
"""Validation error."""
class CacheError(KreuzbergError):
"""Cache error."""
class ImageProcessingError(KreuzbergError):
"""Image processing error."""
class SerializationError(KreuzbergError):
"""Serialization error."""
class MissingDependencyError(KreuzbergError):
"""Missing dependency error."""
class PluginError(KreuzbergError):
"""Plugin error."""
class LockPoisonedError(KreuzbergError):
"""Lock poisoned error."""
class UnsupportedFormatError(KreuzbergError):
"""Unsupported format error."""
class EmbeddingError(KreuzbergError):
"""Embedding error."""
class KreuzbergTimeoutError(KreuzbergError):
"""Kreuzberg timeout error."""
class CancelledError(KreuzbergError):
"""Cancelled error."""
class SecurityError(KreuzbergError):
"""Security error."""
class OtherError(KreuzbergError):
"""Other error."""

2671
packages/python/kreuzberg/options.py generated Normal file

File diff suppressed because it is too large Load Diff

0
packages/python/kreuzberg/py.typed generated Normal file
View File