This commit is contained in:
41
packages/java/dev/kreuzberg/ChunkSizing.java
generated
Normal file
41
packages/java/dev/kreuzberg/ChunkSizing.java
generated
Normal file
@@ -0,0 +1,41 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
package dev.kreuzberg;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* How chunk size is measured.
|
||||
*
|
||||
* Defaults to {@code Characters} (Unicode character count). When using token-based sizing,
|
||||
* chunks are sized by token count according to the specified tokenizer.
|
||||
*
|
||||
* Token-based sizing uses HuggingFace tokenizers loaded at runtime. Any tokenizer
|
||||
* available on HuggingFace Hub can be used, including OpenAI-compatible tokenizers
|
||||
* (e.g., {@code Xenova/gpt-4o}, {@code Xenova/cl100k_base}).
|
||||
*/
|
||||
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", visible = false)
|
||||
@JsonSubTypes({
|
||||
@JsonSubTypes.Type(value = ChunkSizing.Characters.class, name = "characters"),
|
||||
@JsonSubTypes.Type(value = ChunkSizing.Tokenizer.class, name = "tokenizer")
|
||||
})
|
||||
@com.fasterxml.jackson.annotation.JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public sealed interface ChunkSizing {
|
||||
|
||||
/** Size measured in Unicode characters (default). */
|
||||
record Characters() implements ChunkSizing {
|
||||
}
|
||||
|
||||
/** Size measured in tokens from a HuggingFace tokenizer. */
|
||||
record Tokenizer(
|
||||
@JsonProperty("model") String model,
|
||||
@JsonProperty("cache_dir") Optional<java.nio.file.Path> cacheDir
|
||||
) implements ChunkSizing {
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user