19 lines
540 B
Markdown
19 lines
540 B
Markdown
|
|
```java title="Java"
|
||
|
|
import dev.kreuzberg.Kreuzberg;
|
||
|
|
import dev.kreuzberg.ExtractionResult;
|
||
|
|
import dev.kreuzberg.ExtractionConfig;
|
||
|
|
import dev.kreuzberg.ChunkingConfig;
|
||
|
|
|
||
|
|
ExtractionConfig config = ExtractionConfig.builder()
|
||
|
|
.chunking(ChunkingConfig.builder()
|
||
|
|
.maxChars(512)
|
||
|
|
.maxOverlap(50)
|
||
|
|
.embedding("balanced")
|
||
|
|
.build())
|
||
|
|
.build();
|
||
|
|
|
||
|
|
ExtractionResult result = Kreuzberg.extractFile("document.pdf", config);
|
||
|
|
|
||
|
|
System.out.println("Extracted content: " + result.getContent().length() + " characters");
|
||
|
|
```
|