Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,44 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/v5"
)
func main() {
maxChars := uint(500)
overlap := uint(50)
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxCharacters: &maxChars,
Overlap: &overlap,
},
}
result, err := kreuzberg.ExtractFileSync("document.pdf", config)
if err != nil {
log.Fatal(err)
}
for _, chunk := range result.Chunks {
first := chunk.Metadata.FirstPage
last := chunk.Metadata.LastPage
if first == nil {
continue
}
pageRange := fmt.Sprintf("Page %d", *first)
if last != nil && *first != *last {
pageRange = fmt.Sprintf("Pages %d-%d", *first, *last)
}
preview := chunk.Content
if len(preview) > 50 {
preview = preview[:50]
}
fmt.Printf("Chunk: %s... (%s)\n", preview, pageRange)
}
}
```

View File

@@ -0,0 +1,27 @@
```go title="Go"
package main
import (
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxChars := 1000
maxOverlap := 200
normalize := true
batchSize := int32(32)
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxChars: &maxChars,
MaxOverlap: &maxOverlap,
Embedding: &kreuzberg.EmbeddingConfig{
Model: kreuzberg.EmbeddingModelType_Preset("all-minilm-l6-v2"),
Normalize: &normalize,
BatchSize: &batchSize,
},
},
}
_ = config
}
```

View File

@@ -0,0 +1,46 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxChars := 500
maxOverlap := 50
normalize := true
batchSize := int32(16)
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxChars: &maxChars,
MaxOverlap: &maxOverlap,
Embedding: &kreuzberg.EmbeddingConfig{
Model: kreuzberg.EmbeddingModelType_Preset("all-mpnet-base-v2"),
Normalize: &normalize,
BatchSize: &batchSize,
},
},
}
result, err := kreuzberg.ExtractFileSync("research_paper.pdf", config)
if err != nil {
log.Fatalf("RAG extraction failed: %v", err)
}
chunks := result.Chunks
fmt.Printf("Found %d chunks for RAG pipeline\n", len(chunks))
for i := 0; i < len(chunks) && i < 3; i++ {
chunk := chunks[i]
content := chunk.Content
if len(content) > 80 {
content = content[:80]
}
fmt.Printf("Chunk %d: %s...\n", i, content)
}
}
```

View File

@@ -0,0 +1,49 @@
```go title="Go"
package main
import (
"fmt"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxChars := 512
maxOverlap := 50
normalize := true
batchSize := int32(32)
showProgress := false
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxChars: &maxChars,
MaxOverlap: &maxOverlap,
Embedding: &kreuzberg.EmbeddingConfig{
Model: kreuzberg.EmbeddingModelType_Preset("balanced"),
Normalize: &normalize,
BatchSize: &batchSize,
ShowDownloadProgress: &showProgress,
},
},
}
result, err := kreuzberg.ExtractFileSync("document.pdf", config)
if err != nil {
fmt.Printf("Error: %v\n", err)
return
}
for index, chunk := range result.Chunks {
chunkID := fmt.Sprintf("doc_chunk_%d", index)
content := chunk.Content
if len(content) > 50 {
content = content[:50]
}
fmt.Printf("Chunk %s: %s\n", chunkID, content)
if chunk.Embedding != nil && len(chunk.Embedding) > 0 {
fmt.Printf(" Embedding dimensions: %d\n", len(chunk.Embedding))
}
}
}
```

View File

@@ -0,0 +1,23 @@
```go title="Go"
package main
import (
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxKeywords := int32(10)
minScore := 0.3
language := "en"
config := &kreuzberg.ExtractionConfig{
Keywords: &kreuzberg.KeywordConfig{
Algorithm: kreuzberg.KeywordAlgorithm_YAKE,
MaxKeywords: &maxKeywords,
MinScore: &minScore,
Language: &language,
},
}
_ = config
}
```

View File

@@ -0,0 +1,37 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxKeywords := int32(10)
minScore := 0.3
config := &kreuzberg.ExtractionConfig{
Keywords: &kreuzberg.KeywordConfig{
Algorithm: kreuzberg.KeywordAlgorithm_YAKE,
MaxKeywords: &maxKeywords,
MinScore: &minScore,
},
}
result, err := kreuzberg.ExtractFileSync("research_paper.pdf", config)
if err != nil {
log.Fatalf("extraction failed: %v", err)
}
if keywords, ok := result.Metadata["keywords"]; ok {
keywordList := keywords.([]map[string]interface{})
for _, kw := range keywordList {
text := kw["text"].(string)
score := kw["score"].(float64)
fmt.Printf("%s: %.3f\n", text, score)
}
}
}
```

View File

@@ -0,0 +1,22 @@
```go title="Go"
package main
import (
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
enabled := true
detectMultiple := false
minConfidence := 0.8
config := &kreuzberg.ExtractionConfig{
LanguageDetection: &kreuzberg.LanguageDetectionConfig{
Enabled: &enabled,
MinConfidence: &minConfidence,
DetectMultiple: &detectMultiple,
},
}
_ = config
}
```

View File

@@ -0,0 +1,40 @@
```go title="Go"
package main
import (
"fmt"
"log"
"strings"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
enabled := true
detectMultiple := true
minConfidence := 0.8
config := &kreuzberg.ExtractionConfig{
LanguageDetection: &kreuzberg.LanguageDetectionConfig{
Enabled: &enabled,
MinConfidence: &minConfidence,
DetectMultiple: &detectMultiple,
},
}
result, err := kreuzberg.ExtractFileSync("multilingual_document.pdf", config)
if err != nil {
log.Fatalf("Processing failed: %v", err)
}
languages := result.DetectedLanguages
if len(languages) > 0 {
fmt.Printf("Detected %d language(s): %s\n", len(languages), strings.Join(languages, ", "))
} else {
fmt.Println("No languages detected")
}
fmt.Printf("Total content: %d characters\n", len(result.Content))
fmt.Printf("MIME type: %s\n", result.MimeType)
}
```

View File

@@ -0,0 +1,16 @@
```go title="Go"
package main
import (
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
enableQualityProcessing := true
config := &kreuzberg.ExtractionConfig{
EnableQualityProcessing: &enableQualityProcessing,
}
_ = config
}
```

View File

@@ -0,0 +1,35 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
enableQualityProcessing := true
config := &kreuzberg.ExtractionConfig{
EnableQualityProcessing: &enableQualityProcessing,
}
result, err := kreuzberg.ExtractFileSync("scanned_document.pdf", config)
if err != nil {
log.Fatalf("extraction failed: %v", err)
}
qualityScore := 0.0
if result.QualityScore != nil {
qualityScore = *result.QualityScore
}
if qualityScore < 0.5 {
fmt.Printf("Warning: Low quality extraction (%.2f)\n", qualityScore)
fmt.Println("Consider re-scanning with higher DPI or adjusting OCR settings")
} else {
fmt.Printf("Quality score: %.2f\n", qualityScore)
}
}
```

View File

@@ -0,0 +1,24 @@
```go title="Go"
package main
import (
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
preserveMarkdown := true
preserveCode := true
mode := "moderate"
languageHint := "eng"
config := &kreuzberg.ExtractionConfig{
TokenReduction: &kreuzberg.TokenReductionConfig{
Mode: &mode,
PreserveMarkdown: &preserveMarkdown,
PreserveCode: &preserveCode,
LanguageHint: &languageHint,
},
}
_ = config
}
```

View File

@@ -0,0 +1,46 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
preserveMarkdown := true
mode := "moderate"
config := &kreuzberg.ExtractionConfig{
TokenReduction: &kreuzberg.TokenReductionConfig{
Mode: &mode,
PreserveMarkdown: &preserveMarkdown,
},
}
result, err := kreuzberg.ExtractFileSync("verbose_document.pdf", config)
if err != nil {
log.Fatalf("extraction failed: %v", err)
}
original := 0
reduced := 0
ratio := 0.0
if val, ok := result.Metadata["original_token_count"]; ok {
original = val.(int)
}
if val, ok := result.Metadata["token_count"]; ok {
reduced = val.(int)
}
if val, ok := result.Metadata["token_reduction_ratio"]; ok {
ratio = val.(float64)
}
fmt.Printf("Reduced from %d to %d tokens\n", original, reduced)
fmt.Printf("Reduction: %.1f%%\n", ratio*100)
}
```

View File

@@ -0,0 +1,67 @@
```go title="Go"
package main
import (
"fmt"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
type VectorRecord struct {
ID string
Embedding []float32
Content string
Metadata map[string]string
}
func extractAndVectorize(documentPath string, documentID string) ([]VectorRecord, error) {
maxChars := 512
maxOverlap := 50
normalize := true
batchSize := int32(32)
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxChars: &maxChars,
MaxOverlap: &maxOverlap,
Embedding: &kreuzberg.EmbeddingConfig{
Model: kreuzberg.EmbeddingModelType_Preset("balanced"),
Normalize: &normalize,
BatchSize: &batchSize,
},
},
}
result, err := kreuzberg.ExtractFileSync(documentPath, config)
if err != nil {
return nil, err
}
var vectorRecords []VectorRecord
for index, chunk := range result.Chunks {
record := VectorRecord{
ID: fmt.Sprintf("%s_chunk_%d", documentID, index),
Content: chunk.Content,
Embedding: chunk.Embedding,
Metadata: map[string]string{
"document_id": documentID,
"chunk_index": fmt.Sprintf("%d", index),
"content_length": fmt.Sprintf("%d", len(chunk.Content)),
},
}
vectorRecords = append(vectorRecords, record)
}
storeInVectorDatabase(vectorRecords)
return vectorRecords, nil
}
func storeInVectorDatabase(records []VectorRecord) {
for _, record := range records {
if len(record.Embedding) > 0 {
fmt.Printf("Storing %s: %d chars, %d dims\n",
record.ID, len(record.Content), len(record.Embedding))
}
}
}
```