This commit is contained in:
497
packages/python/kreuzberg/__init__.py
generated
Normal file
497
packages/python/kreuzberg/__init__.py
generated
Normal file
@@ -0,0 +1,497 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
"""Public API for _kreuzberg.
|
||||
|
||||
Version: 5.0.0-rc.3
|
||||
"""
|
||||
|
||||
from ._kreuzberg import (
|
||||
AnnotationKind,
|
||||
ArchiveEntry,
|
||||
BatchBytesItem,
|
||||
BatchFileItem,
|
||||
BBox,
|
||||
BlockType,
|
||||
CacheStats,
|
||||
CellChange,
|
||||
Chunk,
|
||||
ChunkerType,
|
||||
ChunkMetadata,
|
||||
ChunkSizing,
|
||||
ChunkType,
|
||||
CodeContentMode,
|
||||
ContentLayer,
|
||||
ContributorRole,
|
||||
DbfFieldInfo,
|
||||
DetectionResult,
|
||||
DetectResponse,
|
||||
DiffHunk,
|
||||
DiffLine,
|
||||
DjotContent,
|
||||
DjotImage,
|
||||
DjotLink,
|
||||
DocumentNode,
|
||||
DocumentRelationship,
|
||||
DocumentRevision,
|
||||
Element,
|
||||
ElementMetadata,
|
||||
ElementType,
|
||||
EmailAttachment,
|
||||
EmailExtractionResult,
|
||||
EmbeddedChanges,
|
||||
EmbeddedDiff,
|
||||
EmbeddedFile,
|
||||
EmbeddingModelType,
|
||||
EmbeddingPreset,
|
||||
ErrorMetadata,
|
||||
ExcelSheet,
|
||||
ExcelWorkbook,
|
||||
ExecutionProviderType,
|
||||
ExtractedImage,
|
||||
ExtractedUri,
|
||||
ExtractionDiff,
|
||||
ExtractionMethod,
|
||||
Footnote,
|
||||
FormatMetadata,
|
||||
FormattedBlock,
|
||||
GridCell,
|
||||
HeaderMetadata,
|
||||
HeadingContext,
|
||||
HeadingLevel,
|
||||
HierarchicalBlock,
|
||||
HtmlTheme,
|
||||
ImageKind,
|
||||
ImageMetadataType,
|
||||
ImagePreprocessingMetadata,
|
||||
ImageType,
|
||||
InlineElement,
|
||||
InlineType,
|
||||
Keyword,
|
||||
KeywordAlgorithm,
|
||||
LayoutClass,
|
||||
LayoutDetection,
|
||||
LinkMetadata,
|
||||
LinkType,
|
||||
ListType,
|
||||
ModelPaths,
|
||||
NodeContent,
|
||||
OcrBackendType,
|
||||
OcrBoundingGeometry,
|
||||
OcrElementLevel,
|
||||
OcrExtractionResult,
|
||||
OcrPipelineConfig,
|
||||
OcrPipelineStage,
|
||||
OcrRotation,
|
||||
OcrTable,
|
||||
OcrTableBoundingBox,
|
||||
OrientationResult,
|
||||
OutputFormat,
|
||||
PaddleLanguage,
|
||||
PageBoundary,
|
||||
PageContent,
|
||||
PageHierarchy,
|
||||
PageInfo,
|
||||
PageStructure,
|
||||
PageUnitType,
|
||||
PdfAnnotation,
|
||||
PdfAnnotationType,
|
||||
PptxExtractionResult,
|
||||
ProcessingStage,
|
||||
ProcessingWarning,
|
||||
PSMMode,
|
||||
RecognizedTable,
|
||||
ReductionLevel,
|
||||
RelationshipKind,
|
||||
ResultFormat,
|
||||
RevisionAnchor,
|
||||
RevisionKind,
|
||||
StructuredData,
|
||||
StructuredDataResult,
|
||||
StructuredDataType,
|
||||
StructuredExtractionConfig,
|
||||
SupportedFormat,
|
||||
TableDiff,
|
||||
TableModel,
|
||||
TextAnnotation,
|
||||
TextDirection,
|
||||
TextExtractionResult,
|
||||
UriKind,
|
||||
XmlExtractionResult,
|
||||
YearRange,
|
||||
)
|
||||
from .api import (
|
||||
batch_extract_bytes,
|
||||
batch_extract_bytes_sync,
|
||||
batch_extract_files,
|
||||
batch_extract_files_sync,
|
||||
clear_document_extractors,
|
||||
clear_embedding_backends,
|
||||
clear_ocr_backends,
|
||||
clear_post_processors,
|
||||
clear_renderers,
|
||||
clear_validators,
|
||||
compare,
|
||||
detect_mime_type,
|
||||
detect_mime_type_from_bytes,
|
||||
embed_texts,
|
||||
embed_texts_async,
|
||||
extract_bytes,
|
||||
extract_bytes_sync,
|
||||
extract_file,
|
||||
extract_file_sync,
|
||||
get_embedding_preset,
|
||||
get_extensions_for_mime,
|
||||
list_document_extractors,
|
||||
list_embedding_backends,
|
||||
list_embedding_presets,
|
||||
list_ocr_backends,
|
||||
list_post_processors,
|
||||
list_renderers,
|
||||
list_validators,
|
||||
register_document_extractor,
|
||||
register_embedding_backend,
|
||||
register_ocr_backend,
|
||||
register_post_processor,
|
||||
register_renderer,
|
||||
register_validator,
|
||||
render_pdf_page_to_png,
|
||||
unregister_document_extractor,
|
||||
unregister_embedding_backend,
|
||||
unregister_ocr_backend,
|
||||
unregister_post_processor,
|
||||
unregister_renderer,
|
||||
unregister_validator,
|
||||
)
|
||||
from .exceptions import (
|
||||
CacheError,
|
||||
CancelledError,
|
||||
EmbeddingError,
|
||||
ImageProcessingError,
|
||||
IoError,
|
||||
KreuzbergError,
|
||||
KreuzbergTimeoutError,
|
||||
LockPoisonedError,
|
||||
MissingDependencyError,
|
||||
OcrError,
|
||||
OtherError,
|
||||
ParsingError,
|
||||
PluginError,
|
||||
SecurityError,
|
||||
SerializationError,
|
||||
UnsupportedFormatError,
|
||||
ValidationError,
|
||||
)
|
||||
from .options import (
|
||||
AccelerationConfig,
|
||||
ArchiveMetadata,
|
||||
BibtexMetadata,
|
||||
BoundingBox,
|
||||
ChunkingConfig,
|
||||
CitationMetadata,
|
||||
ContentFilterConfig,
|
||||
CoreProperties,
|
||||
CsvMetadata,
|
||||
DbfMetadata,
|
||||
DiffOptions,
|
||||
DocumentStructure,
|
||||
DocxAppProperties,
|
||||
DocxMetadata,
|
||||
EmailConfig,
|
||||
EmailMetadata,
|
||||
EmbeddingConfig,
|
||||
EpubMetadata,
|
||||
ExcelMetadata,
|
||||
ExtractionConfig,
|
||||
ExtractionResult,
|
||||
FictionBookMetadata,
|
||||
FileExtractionConfig,
|
||||
HierarchyConfig,
|
||||
HtmlMetadata,
|
||||
HtmlOutputConfig,
|
||||
ImageExtractionConfig,
|
||||
ImageMetadata,
|
||||
ImagePreprocessingConfig,
|
||||
JatsMetadata,
|
||||
KeywordConfig,
|
||||
LanguageDetectionConfig,
|
||||
LayoutDetectionConfig,
|
||||
LayoutRegion,
|
||||
LlmConfig,
|
||||
LlmUsage,
|
||||
Metadata,
|
||||
OcrConfidence,
|
||||
OcrConfig,
|
||||
OcrElement,
|
||||
OcrElementConfig,
|
||||
OcrMetadata,
|
||||
OcrQualityThresholds,
|
||||
PaddleOcrConfig,
|
||||
PageConfig,
|
||||
PdfConfig,
|
||||
PdfMetadata,
|
||||
PostProcessorConfig,
|
||||
PptxAppProperties,
|
||||
PptxMetadata,
|
||||
PstMetadata,
|
||||
RakeParams,
|
||||
RevisionDelta,
|
||||
SecurityLimits,
|
||||
ServerConfig,
|
||||
Table,
|
||||
TableCell,
|
||||
TableGrid,
|
||||
TesseractConfig,
|
||||
TextMetadata,
|
||||
TokenReductionConfig,
|
||||
TokenReductionOptions,
|
||||
TreeSitterConfig,
|
||||
TreeSitterProcessConfig,
|
||||
XlsxAppProperties,
|
||||
XmlMetadata,
|
||||
YakeParams,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AccelerationConfig",
|
||||
"AnnotationKind",
|
||||
"ArchiveEntry",
|
||||
"ArchiveMetadata",
|
||||
"BBox",
|
||||
"BatchBytesItem",
|
||||
"BatchFileItem",
|
||||
"BibtexMetadata",
|
||||
"BlockType",
|
||||
"BoundingBox",
|
||||
"CacheError",
|
||||
"CacheStats",
|
||||
"CancelledError",
|
||||
"CellChange",
|
||||
"Chunk",
|
||||
"ChunkMetadata",
|
||||
"ChunkSizing",
|
||||
"ChunkType",
|
||||
"ChunkerType",
|
||||
"ChunkingConfig",
|
||||
"CitationMetadata",
|
||||
"CodeContentMode",
|
||||
"ContentFilterConfig",
|
||||
"ContentLayer",
|
||||
"ContributorRole",
|
||||
"CoreProperties",
|
||||
"CsvMetadata",
|
||||
"DbfFieldInfo",
|
||||
"DbfMetadata",
|
||||
"DetectResponse",
|
||||
"DetectionResult",
|
||||
"DiffHunk",
|
||||
"DiffLine",
|
||||
"DiffOptions",
|
||||
"DjotContent",
|
||||
"DjotImage",
|
||||
"DjotLink",
|
||||
"DocumentNode",
|
||||
"DocumentRelationship",
|
||||
"DocumentRevision",
|
||||
"DocumentStructure",
|
||||
"DocxAppProperties",
|
||||
"DocxMetadata",
|
||||
"Element",
|
||||
"ElementMetadata",
|
||||
"ElementType",
|
||||
"EmailAttachment",
|
||||
"EmailConfig",
|
||||
"EmailExtractionResult",
|
||||
"EmailMetadata",
|
||||
"EmbeddedChanges",
|
||||
"EmbeddedDiff",
|
||||
"EmbeddedFile",
|
||||
"EmbeddingConfig",
|
||||
"EmbeddingError",
|
||||
"EmbeddingModelType",
|
||||
"EmbeddingPreset",
|
||||
"EpubMetadata",
|
||||
"ErrorMetadata",
|
||||
"ExcelMetadata",
|
||||
"ExcelSheet",
|
||||
"ExcelWorkbook",
|
||||
"ExecutionProviderType",
|
||||
"ExtractedImage",
|
||||
"ExtractedUri",
|
||||
"ExtractionConfig",
|
||||
"ExtractionDiff",
|
||||
"ExtractionMethod",
|
||||
"ExtractionResult",
|
||||
"FictionBookMetadata",
|
||||
"FileExtractionConfig",
|
||||
"Footnote",
|
||||
"FormatMetadata",
|
||||
"FormattedBlock",
|
||||
"GridCell",
|
||||
"HeaderMetadata",
|
||||
"HeadingContext",
|
||||
"HeadingLevel",
|
||||
"HierarchicalBlock",
|
||||
"HierarchyConfig",
|
||||
"HtmlMetadata",
|
||||
"HtmlOutputConfig",
|
||||
"HtmlTheme",
|
||||
"ImageExtractionConfig",
|
||||
"ImageKind",
|
||||
"ImageMetadata",
|
||||
"ImageMetadataType",
|
||||
"ImagePreprocessingConfig",
|
||||
"ImagePreprocessingMetadata",
|
||||
"ImageProcessingError",
|
||||
"ImageType",
|
||||
"InlineElement",
|
||||
"InlineType",
|
||||
"IoError",
|
||||
"JatsMetadata",
|
||||
"Keyword",
|
||||
"KeywordAlgorithm",
|
||||
"KeywordConfig",
|
||||
"KreuzbergError",
|
||||
"KreuzbergTimeoutError",
|
||||
"LanguageDetectionConfig",
|
||||
"LayoutClass",
|
||||
"LayoutDetection",
|
||||
"LayoutDetectionConfig",
|
||||
"LayoutRegion",
|
||||
"LinkMetadata",
|
||||
"LinkType",
|
||||
"ListType",
|
||||
"LlmConfig",
|
||||
"LlmUsage",
|
||||
"LockPoisonedError",
|
||||
"Metadata",
|
||||
"MissingDependencyError",
|
||||
"ModelPaths",
|
||||
"NodeContent",
|
||||
"OcrBackendType",
|
||||
"OcrBoundingGeometry",
|
||||
"OcrConfidence",
|
||||
"OcrConfig",
|
||||
"OcrElement",
|
||||
"OcrElementConfig",
|
||||
"OcrElementLevel",
|
||||
"OcrError",
|
||||
"OcrExtractionResult",
|
||||
"OcrMetadata",
|
||||
"OcrPipelineConfig",
|
||||
"OcrPipelineStage",
|
||||
"OcrQualityThresholds",
|
||||
"OcrRotation",
|
||||
"OcrTable",
|
||||
"OcrTableBoundingBox",
|
||||
"OrientationResult",
|
||||
"OtherError",
|
||||
"OutputFormat",
|
||||
"PSMMode",
|
||||
"PaddleLanguage",
|
||||
"PaddleOcrConfig",
|
||||
"PageBoundary",
|
||||
"PageConfig",
|
||||
"PageContent",
|
||||
"PageHierarchy",
|
||||
"PageInfo",
|
||||
"PageStructure",
|
||||
"PageUnitType",
|
||||
"ParsingError",
|
||||
"PdfAnnotation",
|
||||
"PdfAnnotationType",
|
||||
"PdfConfig",
|
||||
"PdfMetadata",
|
||||
"PluginError",
|
||||
"PostProcessorConfig",
|
||||
"PptxAppProperties",
|
||||
"PptxExtractionResult",
|
||||
"PptxMetadata",
|
||||
"ProcessingStage",
|
||||
"ProcessingWarning",
|
||||
"PstMetadata",
|
||||
"RakeParams",
|
||||
"RecognizedTable",
|
||||
"ReductionLevel",
|
||||
"RelationshipKind",
|
||||
"ResultFormat",
|
||||
"RevisionAnchor",
|
||||
"RevisionDelta",
|
||||
"RevisionKind",
|
||||
"SecurityError",
|
||||
"SecurityLimits",
|
||||
"SerializationError",
|
||||
"ServerConfig",
|
||||
"StructuredData",
|
||||
"StructuredDataResult",
|
||||
"StructuredDataType",
|
||||
"StructuredExtractionConfig",
|
||||
"SupportedFormat",
|
||||
"Table",
|
||||
"TableCell",
|
||||
"TableDiff",
|
||||
"TableGrid",
|
||||
"TableModel",
|
||||
"TesseractConfig",
|
||||
"TextAnnotation",
|
||||
"TextDirection",
|
||||
"TextExtractionResult",
|
||||
"TextMetadata",
|
||||
"TokenReductionConfig",
|
||||
"TokenReductionOptions",
|
||||
"TreeSitterConfig",
|
||||
"TreeSitterProcessConfig",
|
||||
"UnsupportedFormatError",
|
||||
"UriKind",
|
||||
"ValidationError",
|
||||
"XlsxAppProperties",
|
||||
"XmlExtractionResult",
|
||||
"XmlMetadata",
|
||||
"YakeParams",
|
||||
"YearRange",
|
||||
"batch_extract_bytes",
|
||||
"batch_extract_bytes_sync",
|
||||
"batch_extract_files",
|
||||
"batch_extract_files_sync",
|
||||
"clear_document_extractors",
|
||||
"clear_embedding_backends",
|
||||
"clear_ocr_backends",
|
||||
"clear_post_processors",
|
||||
"clear_renderers",
|
||||
"clear_validators",
|
||||
"compare",
|
||||
"detect_mime_type",
|
||||
"detect_mime_type_from_bytes",
|
||||
"embed_texts",
|
||||
"embed_texts_async",
|
||||
"extract_bytes",
|
||||
"extract_bytes_sync",
|
||||
"extract_file",
|
||||
"extract_file_sync",
|
||||
"get_embedding_preset",
|
||||
"get_extensions_for_mime",
|
||||
"list_document_extractors",
|
||||
"list_embedding_backends",
|
||||
"list_embedding_presets",
|
||||
"list_ocr_backends",
|
||||
"list_post_processors",
|
||||
"list_renderers",
|
||||
"list_validators",
|
||||
"register_document_extractor",
|
||||
"register_embedding_backend",
|
||||
"register_ocr_backend",
|
||||
"register_post_processor",
|
||||
"register_renderer",
|
||||
"register_validator",
|
||||
"render_pdf_page_to_png",
|
||||
"unregister_document_extractor",
|
||||
"unregister_embedding_backend",
|
||||
"unregister_ocr_backend",
|
||||
"unregister_post_processor",
|
||||
"unregister_renderer",
|
||||
"unregister_validator",
|
||||
]
|
||||
|
||||
__version__ = "5.0.0-rc.3"
|
||||
3057
packages/python/kreuzberg/_kreuzberg.pyi
generated
Normal file
3057
packages/python/kreuzberg/_kreuzberg.pyi
generated
Normal file
File diff suppressed because it is too large
Load Diff
1171
packages/python/kreuzberg/api.py
generated
Normal file
1171
packages/python/kreuzberg/api.py
generated
Normal file
File diff suppressed because it is too large
Load Diff
74
packages/python/kreuzberg/exceptions.py
generated
Normal file
74
packages/python/kreuzberg/exceptions.py
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
"""Exception hierarchy."""
|
||||
|
||||
|
||||
class KreuzbergError(Exception):
|
||||
"""Main error type for all Kreuzberg operations."""
|
||||
|
||||
|
||||
class IoError(KreuzbergError):
|
||||
"""Io error."""
|
||||
|
||||
|
||||
class ParsingError(KreuzbergError):
|
||||
"""Parsing error."""
|
||||
|
||||
|
||||
class OcrError(KreuzbergError):
|
||||
"""Ocr error."""
|
||||
|
||||
|
||||
class ValidationError(KreuzbergError):
|
||||
"""Validation error."""
|
||||
|
||||
|
||||
class CacheError(KreuzbergError):
|
||||
"""Cache error."""
|
||||
|
||||
|
||||
class ImageProcessingError(KreuzbergError):
|
||||
"""Image processing error."""
|
||||
|
||||
|
||||
class SerializationError(KreuzbergError):
|
||||
"""Serialization error."""
|
||||
|
||||
|
||||
class MissingDependencyError(KreuzbergError):
|
||||
"""Missing dependency error."""
|
||||
|
||||
|
||||
class PluginError(KreuzbergError):
|
||||
"""Plugin error."""
|
||||
|
||||
|
||||
class LockPoisonedError(KreuzbergError):
|
||||
"""Lock poisoned error."""
|
||||
|
||||
|
||||
class UnsupportedFormatError(KreuzbergError):
|
||||
"""Unsupported format error."""
|
||||
|
||||
|
||||
class EmbeddingError(KreuzbergError):
|
||||
"""Embedding error."""
|
||||
|
||||
|
||||
class KreuzbergTimeoutError(KreuzbergError):
|
||||
"""Kreuzberg timeout error."""
|
||||
|
||||
|
||||
class CancelledError(KreuzbergError):
|
||||
"""Cancelled error."""
|
||||
|
||||
|
||||
class SecurityError(KreuzbergError):
|
||||
"""Security error."""
|
||||
|
||||
|
||||
class OtherError(KreuzbergError):
|
||||
"""Other error."""
|
||||
2671
packages/python/kreuzberg/options.py
generated
Normal file
2671
packages/python/kreuzberg/options.py
generated
Normal file
File diff suppressed because it is too large
Load Diff
0
packages/python/kreuzberg/py.typed
generated
Normal file
0
packages/python/kreuzberg/py.typed
generated
Normal file
Reference in New Issue
Block a user