Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,38 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxChars := 1000
maxOverlap := 200
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxChars: &maxChars,
MaxOverlap: &maxOverlap,
},
}
result, err := kreuzberg.ExtractFileSync("document.pdf", config)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
for i, chunk := range result.Chunks {
fmt.Printf("Chunk %d/%d (%d-%d)\n", i+1, chunk.Metadata.TotalChunks, chunk.Metadata.CharStart, chunk.Metadata.CharEnd)
fmt.Printf("%s...\n", chunk.Content[:min(len(chunk.Content), 100)])
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
```

View File

@@ -0,0 +1,45 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxChars := 500
maxOverlap := 50
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxChars: &maxChars,
MaxOverlap: &maxOverlap,
Embedding: &kreuzberg.EmbeddingConfig{
Model: "balanced",
Normalize: true,
},
},
}
result, err := kreuzberg.ExtractFileSync("research_paper.pdf", config)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
for i, chunk := range result.Chunks {
fmt.Printf("Chunk %d/%d (%d-%d)\n", i+1, chunk.Metadata.TotalChunks, chunk.Metadata.CharStart, chunk.Metadata.CharEnd)
fmt.Printf("Content: %s...\n", chunk.Content[:min(len(chunk.Content), 100)])
if chunk.Embedding != nil {
fmt.Printf("Embedding: %d dimensions\n", len(chunk.Embedding))
}
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
```

View File

@@ -0,0 +1,32 @@
```go title="Go"
package main
import (
"fmt"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxChars := 1024
maxOverlap := 100
batchSize := int32(32)
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxChars: &maxChars,
MaxOverlap: &maxOverlap,
Embedding: &kreuzberg.EmbeddingConfig{
Model: "balanced",
Normalize: true,
BatchSize: &batchSize,
ShowDownloadProgress: false,
},
},
}
fmt.Printf("Config: MaxChars=%d, MaxOverlap=%d, Model=%s\n",
*config.Chunking.MaxChars,
*config.Chunking.MaxOverlap,
config.Chunking.Embedding.Model)
}
```

View File

@@ -0,0 +1,29 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
config := &kreuzberg.ExtractionConfig{
Keywords: &kreuzberg.KeywordConfig{
Algorithm: "YAKE",
MaxKeywords: 10,
MinScore: 0.3,
},
}
result, err := kreuzberg.ExtractFileSync("research_paper.pdf", config)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
if keywords, ok := result.Metadata.Additional["keywords"]; ok {
fmt.Printf("Keywords: %v\n", keywords)
}
}
```

View File

@@ -0,0 +1,33 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
config := &kreuzberg.ExtractionConfig{
EnableQualityProcessing: true,
}
result, err := kreuzberg.ExtractFileSync("scanned_document.pdf", config)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
qualityScore := 0.0
if result.QualityScore != nil {
qualityScore = *result.QualityScore
}
if qualityScore < 0.5 {
fmt.Printf("Warning: Low quality extraction (%.2f)\n", qualityScore)
fmt.Println("Consider re-scanning with higher DPI or adjusting OCR settings")
} else {
fmt.Printf("Quality score: %.2f\n", qualityScore)
}
}
```

View File

@@ -0,0 +1,36 @@
```go title="Go"
package main
import (
"fmt"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
preset := "balanced"
normalize := true
config := kreuzberg.EmbeddingConfig{
Model: kreuzberg.EmbeddingModelType{
Type: "preset",
Name: &preset,
},
Normalize: &normalize,
}
// Synchronous
embeddings, err := kreuzberg.EmbedTexts([]string{"Hello, world!", "Kreuzberg is fast"}, config)
if err != nil {
panic(err)
}
fmt.Println(len(embeddings)) // 2
fmt.Println(len(embeddings[0])) // 768
// Asynchronous
embeddings, err = kreuzberg.EmbedTextsAsync([]string{"Hello, world!"}, config)
if err != nil {
panic(err)
}
fmt.Println(len(embeddings[0])) // 768
}
```

View File

@@ -0,0 +1,24 @@
```go title="Go"
package main
import (
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
preserve := true
result, err := kreuzberg.ExtractFileSync("document.pdf", &kreuzberg.ExtractionConfig{
TokenReduction: &kreuzberg.TokenReductionConfig{
Mode: "moderate",
PreserveImportantWords: &preserve,
},
})
if err != nil {
log.Fatalf("extract failed: %v", err)
}
log.Println("content length:", len(result.Content))
}
```

View File

@@ -0,0 +1,28 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
config := &kreuzberg.ExtractionConfig{
TokenReduction: &kreuzberg.TokenReductionConfig{
Mode: "moderate",
PreserveMarkdown: true,
},
}
result, err := kreuzberg.ExtractFileSync("verbose_document.pdf", config)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
fmt.Printf("Original tokens: %v\n", result.Metadata.Additional["original_token_count"])
fmt.Printf("Reduced tokens: %v\n", result.Metadata.Additional["token_count"])
fmt.Printf("Reduction ratio: %v\n", result.Metadata.Additional["token_reduction_ratio"])
}
```