Files
fil/docs/snippets/go/config/tesseract_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

829 B

package main

import (
	"log"

	"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)

func main() {
	psm := int32(6)
	oem := int32(1)
	enableTableDetection := true
	whitelist := "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?"

	config := kreuzberg.ExtractionConfig{
		Ocr: &kreuzberg.OcrConfig{
			Backend:  "tesseract",
			Language: "eng+fra+deu",
			TesseractConfig: &kreuzberg.TesseractConfig{
				Psm:                   &psm,
				Oem:                   &oem,
				MinConfidence:         0.8,
				EnableTableDetection:  &enableTableDetection,
				TesseditCharWhitelist: whitelist,
			},
		},
	}

	result, err := kreuzberg.ExtractFileSync("document.pdf", nil, config)
	if err != nil {
		log.Fatalf("extract failed: %v", err)
	}

	log.Println("content length:", len(result.Content))
}