Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
```ruby title="Ruby"
require 'kreuzberg'
require 'net/http'
class CloudOcrBackend
def name
'cloud-ocr'
end
def supported_languages
%w[eng fra deu]
end
def process_image(image_data, language)
uri = URI('https://api.example.com/ocr')
req = Net::HTTP::Post.new(uri)
req['Authorization'] = "Bearer #{ENV['OCR_API_KEY']}"
req.body = image_data
res = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) { |h| h.request(req) }
raise StandardError, res.message unless res.is_a?(Net::HTTPSuccess)
{ content: JSON.parse(res.body)['text'] }
rescue StandardError => e
raise StandardError, e.message
end
end
Kreuzberg.register_ocr_backend(CloudOcrBackend.new)
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(backend: 'cloud-ocr')
)
Kreuzberg.extract_file_sync('doc.pdf', config: config)
```

View File

@@ -0,0 +1,13 @@
```ruby title="Ruby"
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
images: Kreuzberg::ImageExtractionConfig.new(
extract_images: true,
target_dpi: 200,
max_image_dimension: 2048,
inject_placeholders: true, # set to false to extract images without markdown references
auto_adjust_dpi: true
)
)
```

View File

@@ -0,0 +1,17 @@
```ruby title="Ruby"
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(
tesseract_config: Kreuzberg::TesseractConfig.new(
preprocessing: Kreuzberg::ImagePreprocessingConfig.new(
target_dpi: 300,
denoise: true,
deskew: true,
contrast_enhance: true,
binarization_method: 'otsu'
)
)
)
)
```

View File

@@ -0,0 +1,14 @@
```ruby title="Ruby"
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(
backend: 'easyocr',
language: 'eng'
)
)
result = Kreuzberg.extract_file_sync('scanned.pdf', config: config)
puts result.content[0..100]
puts "Total length: #{result.content.length}"
```

View File

@@ -0,0 +1,22 @@
```ruby title="Ruby"
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(
backend: 'paddleocr',
language: 'eng'
)
)
result = Kreuzberg.extract_file_sync('scanned.pdf', config: config)
result.ocr_elements&.each do |element|
puts "Text: #{element.text}"
puts "Confidence: #{format('%.2f', element.confidence.recognition)}"
puts "Geometry: #{element.geometry}"
if element.rotation
puts "Rotation: #{element.rotation.angle}°"
end
puts
end
```

View File

@@ -0,0 +1,12 @@
```ruby title="Ruby"
require 'kreuzberg'
ocr_config = Kreuzberg::OcrConfig.new(
backend: 'tesseract',
language: 'eng'
)
config = Kreuzberg::ExtractionConfig.new(ocr: ocr_config)
result = Kreuzberg.extract_file_sync('scanned.pdf', config: config)
puts result.content
```

View File

@@ -0,0 +1,11 @@
```ruby title="Ruby"
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(backend: 'tesseract'),
force_ocr: true
)
result = Kreuzberg.extract_file_sync('document.pdf', config: config)
puts result.content
```

View File

@@ -0,0 +1,13 @@
```ruby title="Ruby"
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(
backend: 'tesseract',
language: 'eng+deu+fra'
)
)
result = Kreuzberg.extract_file_sync('multilingual.pdf', config: config)
puts result.content
```

View File

@@ -0,0 +1,15 @@
```ruby title="Ruby"
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(
backend: 'paddleocr',
language: 'eng'
# model_tier: 'server' # for max accuracy
)
)
result = Kreuzberg.extract_file_sync('scanned.pdf', config: config)
puts result.content[0..100]
puts "Total length: #{result.content.length}"
```