# This file is auto-generated by alef — DO NOT EDIT. # alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 # To regenerate: alef generate # To verify freshness: alef verify --exit-code # Issues & docs: https://github.com/kreuzberg-dev/alef # frozen_string_literal: true require 'kreuzberg' require 'json' require 'spec_helper' RSpec.describe 'format_specific' do it 'format_docx_standalone: Standalone DOCX extraction using extract_bytes_sync' do content = File.read("docx/fake.docx").bytes result = Kreuzberg.extract_bytes_sync(content, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') expect(result.content.length).to be >= 20 end it 'format_hwpx_standalone: Standalone HWPX extraction using extract_bytes_sync' do content = File.read("hwpx/simple.hwpx").bytes result = Kreuzberg.extract_bytes_sync(content, 'application/haansofthwpx') expect(result.content.length).to be >= 20 expect(result.content.to_s).to include('Hello from HWPX') end it 'format_pdf_text: Standalone PDF text extraction using extract_bytes_sync' do content = File.read("pdf/fake_memo.pdf").bytes result = Kreuzberg.extract_bytes_sync(content, 'application/pdf') expect(result.content.length).to be >= 50 expect(['Mallori', 'May'].any? { |v| result.content.to_s.include?(v) }).to be(true) end it 'format_pptx: PPTX presentation extraction using extract_file_sync' do result = Kreuzberg.extract_file_sync('pptx/simple.pptx', 'application/vnd.openxmlformats-officedocument.presentationml.presentation') expect(result).not_to be_nil end it 'format_xlsx: XLSX spreadsheet extraction using extract_file_sync' do result = Kreuzberg.extract_file_sync('xlsx/stanley_cups.xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') expect(result).not_to be_nil end end