Files
fil/docs/snippets/java/config/chunking_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

61 lines
1.8 KiB
Markdown

```java title="Java"
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.ChunkingConfig;
ExtractionConfig config = ExtractionConfig.builder()
.chunking(ChunkingConfig.builder()
.maxChars(1000)
.maxOverlap(200)
.build())
.build();
```
```java title="Java - Markdown with Heading Context"
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.ChunkingConfig;
import dev.kreuzberg.HeadingContext;
import dev.kreuzberg.HeadingLevel;
ExtractionConfig config = ExtractionConfig.builder()
.chunking(ChunkingConfig.builder()
.chunkerType("markdown")
.maxChars(500)
.maxOverlap(50)
.sizingTokenizer("Xenova/gpt-4o")
.build())
.build();
ExtractionResult result = KreuzbergClient.extractFile("document.md", config);
result.getChunks().forEach(chunk -> {
var headingContext = chunk.getMetadata().getHeadingContext();
if (headingContext.isPresent()) {
System.out.println("Headings:");
headingContext.get().getHeadings().forEach(heading ->
System.out.println(" Level " + heading.getLevel() + ": " + heading.getText())
);
}
});
```
```java title="Java - Prepend Heading Context"
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.ChunkingConfig;
ExtractionConfig config = ExtractionConfig.builder()
.chunking(ChunkingConfig.builder()
.chunkerType("markdown")
.maxChars(500)
.maxOverlap(50)
.prependHeadingContext(true)
.build())
.build();
ExtractionResult result = KreuzbergClient.extractFile("document.md", config);
result.getChunks().forEach(chunk -> {
// Each chunk's content is prefixed with its heading breadcrumb
System.out.println(chunk.getContent().substring(0, Math.min(100, chunk.getContent().length())));
});
```