Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,75 @@
import asyncio
from kreuzberg import (
ExtractionConfig,
KeywordConfig,
KeywordAlgorithm,
YakeParams,
RakeParams,
extract_file,
)
# Example 1: Basic YAKE configuration
# Uses YAKE algorithm with default parameters and English stopword filtering
async def basic_yake() -> None:
config = ExtractionConfig(
keywords=KeywordConfig(
algorithm=KeywordAlgorithm.YAKE,
max_keywords=10,
min_score=0.0,
ngram_range=(1, 3),
language="en",
yake_params=None,
rake_params=None,
)
)
result = await extract_file("document.pdf", config=config)
print(f"Keywords: {result.keywords}")
# Example 2: Advanced YAKE with custom parameters
# Fine-tunes YAKE with custom window size for co-occurrence analysis
async def advanced_yake() -> None:
config = ExtractionConfig(
keywords=KeywordConfig(
algorithm=KeywordAlgorithm.YAKE,
max_keywords=15,
min_score=0.1,
ngram_range=(1, 2),
language="en",
yake_params=YakeParams(
window_size=1,
),
rake_params=None,
)
)
result = await extract_file("document.pdf", config=config)
print(f"Keywords: {result.keywords}")
# Example 3: RAKE configuration
# Uses RAKE algorithm for rapid keyword extraction with phrase constraints
async def rake_config() -> None:
config = ExtractionConfig(
keywords=KeywordConfig(
algorithm=KeywordAlgorithm.RAKE,
max_keywords=10,
min_score=5.0,
ngram_range=(1, 3),
language="en",
yake_params=None,
rake_params=RakeParams(
min_word_length=1,
max_words_per_phrase=3,
),
)
)
result = await extract_file("document.pdf", config=config)
print(f"Keywords: {result.keywords}")
if __name__ == "__main__":
asyncio.run(basic_yake())