Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
```go title="Go"
package main
import (
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
enabled := true
minConfidence := 0.9
result, err := kreuzberg.ExtractFileSync("document.pdf", nil, kreuzberg.ExtractionConfig{
LanguageDetection: &kreuzberg.LanguageDetectionConfig{
Enabled: &enabled,
MinConfidence: &minConfidence,
DetectMultiple: true,
},
})
if err != nil {
log.Fatalf("extract failed: %v", err)
}
log.Println("content length:", len(result.Content))
}
```

View File

@@ -0,0 +1,29 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
minConfidence := 0.8
config := &kreuzberg.ExtractionConfig{
LanguageDetection: &kreuzberg.LanguageDetectionConfig{
Enabled: true,
MinConfidence: &minConfidence,
DetectMultiple: true,
},
}
result, err := kreuzberg.ExtractFileSync("multilingual_document.pdf", config)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
fmt.Printf("Detected languages: %v\n", result.DetectedLanguages)
// Output: [eng fra deu]
}
```

View File

@@ -0,0 +1,115 @@
```go title="Go"
package main
import (
"fmt"
"log"
"strings"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
result, err := kreuzberg.ExtractFileSync("document.pdf", nil)
if err != nil {
log.Fatalf("extract pdf: %v", err)
}
// Access PDF metadata
if pdf, ok := result.Metadata.PdfMetadata(); ok {
if pdf.PageCount != nil {
fmt.Printf("Pages: %d\n", *pdf.PageCount)
}
if pdf.Author != nil {
fmt.Printf("Author: %s\n", *pdf.Author)
}
if pdf.Title != nil {
fmt.Printf("Title: %s\n", *pdf.Title)
}
}
// Access HTML metadata
htmlResult, err := kreuzberg.ExtractFileSync("page.html", nil)
if err != nil {
log.Fatalf("extract html: %v", err)
}
if html, ok := htmlResult.Metadata.HTMLMetadata(); ok {
if html.Title != nil {
fmt.Printf("Title: %s\n", *html.Title)
}
if html.Description != nil {
fmt.Printf("Description: %s\n", *html.Description)
}
// Access keywords as array
if len(html.Keywords) > 0 {
fmt.Printf("Keywords: %s\n", strings.Join(html.Keywords, ", "))
}
// Access canonical URL (renamed from canonical)
if html.CanonicalURL != nil {
fmt.Printf("Canonical URL: %s\n", *html.CanonicalURL)
}
// Access Open Graph fields from map
if len(html.OpenGraph) > 0 {
if image, ok := html.OpenGraph["image"]; ok {
fmt.Printf("Open Graph Image: %s\n", image)
}
if ogTitle, ok := html.OpenGraph["title"]; ok {
fmt.Printf("Open Graph Title: %s\n", ogTitle)
}
if ogType, ok := html.OpenGraph["type"]; ok {
fmt.Printf("Open Graph Type: %s\n", ogType)
}
}
// Access Twitter Card fields from map
if len(html.TwitterCard) > 0 {
if card, ok := html.TwitterCard["card"]; ok {
fmt.Printf("Twitter Card Type: %s\n", card)
}
if creator, ok := html.TwitterCard["creator"]; ok {
fmt.Printf("Twitter Creator: %s\n", creator)
}
}
// Access new fields
if html.Language != nil {
fmt.Printf("Language: %s\n", *html.Language)
}
if html.TextDirection != nil {
fmt.Printf("Text Direction: %s\n", *html.TextDirection)
}
// Access headers
if len(html.Headers) > 0 {
headers := make([]string, len(html.Headers))
for i, h := range html.Headers {
headers[i] = h.Text
}
fmt.Printf("Headers: %s\n", strings.Join(headers, ", "))
}
// Access links
if len(html.Links) > 0 {
for _, link := range html.Links {
fmt.Printf("Link: %s (%s)\n", link.Href, link.Text)
}
}
// Access images
if len(html.Images) > 0 {
for _, image := range html.Images {
fmt.Printf("Image: %s\n", image.Src)
}
}
// Access structured data
if len(html.StructuredData) > 0 {
fmt.Printf("Structured data items: %d\n", len(html.StructuredData))
}
}
}
```

View File

@@ -0,0 +1,37 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/v5"
)
func main() {
result, err := kreuzberg.ExtractFileSync("document.pdf", nil)
if err != nil {
log.Fatal(err)
}
if result.Metadata.Pages == nil || result.Metadata.Pages.Boundaries == nil {
return
}
contentBytes := []byte(result.Content)
for i, boundary := range result.Metadata.Pages.Boundaries {
if i >= 3 {
break
}
pageText := string(contentBytes[boundary.ByteStart:boundary.ByteEnd])
preview := pageText
if len(preview) > 100 {
preview = preview[:100]
}
fmt.Printf("Page %d:\n", boundary.PageNumber)
fmt.Printf(" Byte range: %d-%d\n", boundary.ByteStart, boundary.ByteEnd)
fmt.Printf(" Preview: %s...\n", preview)
}
}
```

View File

@@ -0,0 +1,29 @@
Package main
Import (
"fmt"
"Kreuzberg"
)
Func main() {
config := &kreuzberg.ExtractionConfig{
Pages: &kreuzberg.PageConfig{
ExtractPages: true,
},
}
result, err := kreuzberg.ExtractFileSync("document.pdf", config)
if err != nil {
panic(err)
}
if result.Pages != nil {
for _, page := range result.Pages {
fmt.Printf("Page %d:\n", page.PageNumber)
fmt.Printf(" Content: %d chars\n", len(page.Content))
fmt.Printf(" Tables: %d\n", len(page.Tables))
fmt.Printf(" Images: %d\n", len(page.Images))
}
}
}

View File

@@ -0,0 +1,28 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
result, err := kreuzberg.ExtractFileSync("document.pdf", nil)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
// Iterate over tables
for _, table := range result.Tables {
fmt.Printf("Table with %d rows\n", len(table.Cells))
fmt.Println(table.Markdown) // Markdown representation
// Access cells
for _, row := range table.Cells {
fmt.Println(row)
}
}
}
```

View File

@@ -0,0 +1,39 @@
```go title="Go"
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
maxChars := 512
maxOverlap := 50
config := &kreuzberg.ExtractionConfig{
Chunking: &kreuzberg.ChunkingConfig{
MaxChars: &maxChars,
MaxOverlap: &maxOverlap,
Embedding: &kreuzberg.EmbeddingConfig{
Model: "balanced",
Normalize: true,
},
},
}
result, err := kreuzberg.ExtractFileSync("document.pdf", config)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
if result.Chunks != nil {
for i, chunk := range result.Chunks {
if chunk.Embedding != nil {
fmt.Printf("Chunk %d: %d dimensions\n", i, len(chunk.Embedding))
// Store in vector database
}
}
}
}
```