Files
fil/docs/snippets/ruby/config/element_based_output.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

877 B

require 'kreuzberg'

# Configure element-based output
config = Kreuzberg::ExtractionConfig.new(output_format: 'element_based')

# Extract document
result = Kreuzberg.extract_file_sync('document.pdf', config: config)

# Access elements
result.elements.each do |element|
  puts "Type: #{element.element_type}"
  puts "Text: #{element.text[0...100]}"

  puts "Page: #{element.metadata.page_number}" if element.metadata.page_number

  if element.metadata.coordinates
    coords = element.metadata.coordinates
    puts "Coords: (#{coords.left}, #{coords.top}) - (#{coords.right}, #{coords.bottom})"
  end

  puts "---"
end

# Filter by element type
titles = result.elements.select { |e| e.element_type == 'title' }
titles.each do |title|
  level = title.metadata.additional['level'] || 'unknown'
  puts "[#{level}] #{title.text}"
end