Files
fil/docs/snippets/go/config/tesseract_config.md

38 lines
829 B
Markdown
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
```go title="Go"
package main
import (
"log"
"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)
func main() {
psm := int32(6)
oem := int32(1)
enableTableDetection := true
whitelist := "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?"
config := kreuzberg.ExtractionConfig{
Ocr: &kreuzberg.OcrConfig{
Backend: "tesseract",
Language: "eng+fra+deu",
TesseractConfig: &kreuzberg.TesseractConfig{
Psm: &psm,
Oem: &oem,
MinConfidence: 0.8,
EnableTableDetection: &enableTableDetection,
TesseditCharWhitelist: whitelist,
},
},
}
result, err := kreuzberg.ExtractFileSync("document.pdf", nil, config)
if err != nil {
log.Fatalf("extract failed: %v", err)
}
log.Println("content length:", len(result.Content))
}
```