Files
fil/e2e/ruby/spec/format_specific_spec.rb
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

48 lines
1.8 KiB
Ruby
Generated

# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# frozen_string_literal: true
require 'kreuzberg'
require 'json'
require 'spec_helper'
RSpec.describe 'format_specific' do
it 'format_docx_standalone: Standalone DOCX extraction using extract_bytes_sync' do
content = File.read("docx/fake.docx").bytes
result = Kreuzberg.extract_bytes_sync(content, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')
expect(result.content.length).to be >= 20
end
it 'format_hwpx_standalone: Standalone HWPX extraction using extract_bytes_sync' do
content = File.read("hwpx/simple.hwpx").bytes
result = Kreuzberg.extract_bytes_sync(content, 'application/haansofthwpx')
expect(result.content.length).to be >= 20
expect(result.content.to_s).to include('Hello from HWPX')
end
it 'format_pdf_text: Standalone PDF text extraction using extract_bytes_sync' do
content = File.read("pdf/fake_memo.pdf").bytes
result = Kreuzberg.extract_bytes_sync(content, 'application/pdf')
expect(result.content.length).to be >= 50
expect(['Mallori', 'May'].any? { |v| result.content.to_s.include?(v) }).to be(true)
end
it 'format_pptx: PPTX presentation extraction using extract_file_sync' do
result = Kreuzberg.extract_file_sync('pptx/simple.pptx', 'application/vnd.openxmlformats-officedocument.presentationml.presentation')
expect(result).not_to be_nil
end
it 'format_xlsx: XLSX spreadsheet extraction using extract_file_sync' do
result = Kreuzberg.extract_file_sync('xlsx/stanley_cups.xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
expect(result).not_to be_nil
end
end