23 lines
633 B
Markdown
23 lines
633 B
Markdown
|
|
```python title="Python"
|
||
|
|
import asyncio
|
||
|
|
from kreuzberg import extract_file, ExtractionConfig, KeywordConfig, KeywordAlgorithm
|
||
|
|
|
||
|
|
async def main() -> None:
|
||
|
|
config: ExtractionConfig = ExtractionConfig(
|
||
|
|
keywords=KeywordConfig(
|
||
|
|
algorithm=KeywordAlgorithm.YAKE,
|
||
|
|
max_keywords=10,
|
||
|
|
min_score=0.3
|
||
|
|
)
|
||
|
|
)
|
||
|
|
result = await extract_file("research_paper.pdf", config=config)
|
||
|
|
|
||
|
|
keywords: list = result.extracted_keywords or []
|
||
|
|
for kw in keywords:
|
||
|
|
score: float = kw.score or 0.0
|
||
|
|
text: str = kw.text or ""
|
||
|
|
print(f"{text}: {score:.3f}")
|
||
|
|
|
||
|
|
asyncio.run(main())
|
||
|
|
```
|