48 lines
1.8 KiB
Ruby
48 lines
1.8 KiB
Ruby
|
|
# This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
# To regenerate: alef generate
|
||
|
|
# To verify freshness: alef verify --exit-code
|
||
|
|
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
# frozen_string_literal: true
|
||
|
|
|
||
|
|
require 'kreuzberg'
|
||
|
|
require 'json'
|
||
|
|
require 'spec_helper'
|
||
|
|
|
||
|
|
RSpec.describe 'format_specific' do
|
||
|
|
it 'format_docx_standalone: Standalone DOCX extraction using extract_bytes_sync' do
|
||
|
|
content = File.read("docx/fake.docx").bytes
|
||
|
|
result = Kreuzberg.extract_bytes_sync(content, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
||
|
|
expect(result.content.length).to be >= 20
|
||
|
|
|
||
|
|
end
|
||
|
|
|
||
|
|
it 'format_hwpx_standalone: Standalone HWPX extraction using extract_bytes_sync' do
|
||
|
|
content = File.read("hwpx/simple.hwpx").bytes
|
||
|
|
result = Kreuzberg.extract_bytes_sync(content, 'application/haansofthwpx')
|
||
|
|
expect(result.content.length).to be >= 20
|
||
|
|
expect(result.content.to_s).to include('Hello from HWPX')
|
||
|
|
|
||
|
|
end
|
||
|
|
|
||
|
|
it 'format_pdf_text: Standalone PDF text extraction using extract_bytes_sync' do
|
||
|
|
content = File.read("pdf/fake_memo.pdf").bytes
|
||
|
|
result = Kreuzberg.extract_bytes_sync(content, 'application/pdf')
|
||
|
|
expect(result.content.length).to be >= 50
|
||
|
|
expect(['Mallori', 'May'].any? { |v| result.content.to_s.include?(v) }).to be(true)
|
||
|
|
|
||
|
|
end
|
||
|
|
|
||
|
|
it 'format_pptx: PPTX presentation extraction using extract_file_sync' do
|
||
|
|
result = Kreuzberg.extract_file_sync('pptx/simple.pptx', 'application/vnd.openxmlformats-officedocument.presentationml.presentation')
|
||
|
|
|
||
|
|
expect(result).not_to be_nil
|
||
|
|
end
|
||
|
|
|
||
|
|
it 'format_xlsx: XLSX spreadsheet extraction using extract_file_sync' do
|
||
|
|
result = Kreuzberg.extract_file_sync('xlsx/stanley_cups.xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
|
||
|
|
|
||
|
|
expect(result).not_to be_nil
|
||
|
|
end
|
||
|
|
end
|