Files
fil/docs/snippets/ruby/config/element_based_output.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

32 lines
877 B
Markdown

```ruby title="Element-Based Output (Ruby)"
require 'kreuzberg'
# Configure element-based output
config = Kreuzberg::ExtractionConfig.new(output_format: 'element_based')
# Extract document
result = Kreuzberg.extract_file_sync('document.pdf', config: config)
# Access elements
result.elements.each do |element|
puts "Type: #{element.element_type}"
puts "Text: #{element.text[0...100]}"
puts "Page: #{element.metadata.page_number}" if element.metadata.page_number
if element.metadata.coordinates
coords = element.metadata.coordinates
puts "Coords: (#{coords.left}, #{coords.top}) - (#{coords.right}, #{coords.bottom})"
end
puts "---"
end
# Filter by element type
titles = result.elements.select { |e| e.element_type == 'title' }
titles.each do |title|
level = title.metadata.additional['level'] || 'unknown'
puts "[#{level}] #{title.text}"
end
```