This commit is contained in:
24
docs/snippets/ruby/api/batch_extract_bytes_sync.md
Normal file
24
docs/snippets/ruby/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
items = [
|
||||
Kreuzberg::BatchBytesItem.new(
|
||||
content: File.read('doc1.pdf'),
|
||||
mime_type: 'application/pdf'
|
||||
),
|
||||
Kreuzberg::BatchBytesItem.new(
|
||||
content: File.read('doc2.docx'),
|
||||
mime_type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||||
),
|
||||
Kreuzberg::BatchBytesItem.new(
|
||||
content: File.read('doc3.xlsx'),
|
||||
mime_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||||
)
|
||||
]
|
||||
|
||||
config = Kreuzberg::ExtractionConfig.new(use_cache: true)
|
||||
|
||||
results = Kreuzberg.batch_extract_bytes_sync(items, config: config)
|
||||
|
||||
results.each { |result| puts "Extracted: #{result.content.length} chars" }
|
||||
```
|
||||
20
docs/snippets/ruby/api/batch_extract_files_sync.md
Normal file
20
docs/snippets/ruby/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
items = [
|
||||
Kreuzberg::BatchFileItem.new(path: 'doc1.pdf'),
|
||||
Kreuzberg::BatchFileItem.new(path: 'doc2.docx'),
|
||||
Kreuzberg::BatchFileItem.new(path: 'doc3.pptx')
|
||||
]
|
||||
|
||||
config = Kreuzberg::ExtractionConfig.new(use_cache: true)
|
||||
|
||||
results = Kreuzberg.batch_extract_files_sync(items, config: config)
|
||||
|
||||
results.each_with_index do |result, idx|
|
||||
puts "Document #{idx + 1}:"
|
||||
puts " Extracted: #{result.content.length} characters"
|
||||
puts " Quality: #{result.quality_score}"
|
||||
puts " MIME: #{result.mime_type}"
|
||||
end
|
||||
```
|
||||
36
docs/snippets/ruby/api/client_chunk_text.md
Normal file
36
docs/snippets/ruby/api/client_chunk_text.md
Normal file
@@ -0,0 +1,36 @@
|
||||
```ruby title="Ruby"
|
||||
require 'net/http'
|
||||
require 'json'
|
||||
|
||||
uri = URI('http://localhost:8000/chunk')
|
||||
http = Net::HTTP.new(uri.host, uri.port)
|
||||
|
||||
request = Net::HTTP::Post.new(uri)
|
||||
request['Content-Type'] = 'application/json'
|
||||
|
||||
payload = {
|
||||
text: 'Your long text content here...',
|
||||
chunker_type: 'text',
|
||||
config: {
|
||||
max_characters: 1000,
|
||||
overlap: 50,
|
||||
trim: true
|
||||
}
|
||||
}
|
||||
|
||||
request.body = JSON.generate(payload)
|
||||
|
||||
response = http.request(request)
|
||||
|
||||
if response.is_a?(Net::HTTPSuccess)
|
||||
result = JSON.parse(response.body)
|
||||
puts "Created #{result['chunk_count']} chunks"
|
||||
|
||||
result['chunks'].each do |chunk|
|
||||
preview = chunk['content'][0..49]
|
||||
puts "Chunk #{chunk['chunk_index']}: #{preview}..."
|
||||
end
|
||||
else
|
||||
puts "Error: #{response.code} #{response.message}"
|
||||
end
|
||||
```
|
||||
24
docs/snippets/ruby/api/client_extract_single_file.md
Normal file
24
docs/snippets/ruby/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```ruby title="Ruby"
|
||||
require 'net/http'
|
||||
require 'json'
|
||||
|
||||
uri = URI('http://localhost:8000/extract')
|
||||
http = Net::HTTP.new(uri.host, uri.port)
|
||||
|
||||
request = Net::HTTP::Post.new(uri)
|
||||
|
||||
File.open('document.pdf', 'rb') do |file|
|
||||
body = file.read
|
||||
request['Content-Type'] = 'application/octet-stream'
|
||||
request.body = body
|
||||
|
||||
response = http.request(request)
|
||||
|
||||
if response.is_a?(Net::HTTPSuccess)
|
||||
data = JSON.parse(response.body)
|
||||
puts JSON.pretty_generate(data)
|
||||
else
|
||||
puts "Error: #{response.code} #{response.message}"
|
||||
end
|
||||
end
|
||||
```
|
||||
45
docs/snippets/ruby/api/combining_all_features.md
Normal file
45
docs/snippets/ruby/api/combining_all_features.md
Normal file
@@ -0,0 +1,45 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
config = Kreuzberg::ExtractionConfig.new(
|
||||
enable_quality_processing: true,
|
||||
|
||||
language_detection: Kreuzberg::LanguageDetectionConfig.new(
|
||||
enabled: true,
|
||||
detect_multiple: true,
|
||||
min_confidence: 0.8
|
||||
),
|
||||
|
||||
token_reduction: Kreuzberg::TokenReductionOptions.new(
|
||||
mode: 'moderate',
|
||||
preserve_important_words: true
|
||||
),
|
||||
|
||||
chunking: Kreuzberg::ChunkingConfig.new(
|
||||
max_characters: 512,
|
||||
overlap: 50,
|
||||
embedding: Kreuzberg::EmbeddingConfig.new(
|
||||
model: { type: 'preset', name: 'text-embedding-all-minilm-l6-v2' }
|
||||
)
|
||||
),
|
||||
|
||||
keywords: Kreuzberg::KeywordConfig.new(
|
||||
algorithm: 'yake',
|
||||
max_keywords: 10
|
||||
)
|
||||
)
|
||||
|
||||
result = Kreuzberg.extract_file_sync('document.pdf', config: config)
|
||||
|
||||
puts "Content length: #{result.content.length} characters"
|
||||
puts "Quality score: #{result.quality_score}"
|
||||
puts "Detected languages: #{result.detected_languages&.join(', ')}"
|
||||
puts "Total chunks: #{result.chunks&.length || 0}"
|
||||
puts "Keywords: #{result.extracted_keywords&.map(&:text)&.join(', ')}"
|
||||
|
||||
if result.chunks && result.chunks.length > 0
|
||||
first_chunk = result.chunks[0]
|
||||
puts "First chunk size: #{first_chunk.content.length} chars"
|
||||
puts "Embedding dims: #{first_chunk.embedding&.length || 0}"
|
||||
end
|
||||
```
|
||||
21
docs/snippets/ruby/api/error_handling.md
Normal file
21
docs/snippets/ruby/api/error_handling.md
Normal file
@@ -0,0 +1,21 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
begin
|
||||
result = Kreuzberg.extract_file_sync('missing.pdf')
|
||||
puts result.content
|
||||
rescue RuntimeError => e
|
||||
# All extraction errors are raised as RuntimeError
|
||||
# Check error message for specific error details
|
||||
case e.message
|
||||
when /validation/i
|
||||
puts "Validation error: #{e.message}"
|
||||
when /io|not found/i
|
||||
puts "IO error: #{e.message}"
|
||||
raise
|
||||
else
|
||||
puts "Extraction failed: #{e.message}"
|
||||
raise
|
||||
end
|
||||
end
|
||||
```
|
||||
24
docs/snippets/ruby/api/error_handling_extract.md
Normal file
24
docs/snippets/ruby/api/error_handling_extract.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
begin
|
||||
pdf_bytes = File.read('document.pdf')
|
||||
config = Kreuzberg::ExtractionConfig.new
|
||||
|
||||
result = Kreuzberg.extract_bytes_sync(pdf_bytes, 'application/pdf', config: config)
|
||||
puts "Extracted #{result.content.length} characters"
|
||||
rescue RuntimeError => e
|
||||
# All extraction errors are raised as RuntimeError
|
||||
# Check error message for details
|
||||
case e.message
|
||||
when /parse|parsing/i
|
||||
puts "Failed to parse document: #{e.message}"
|
||||
when /ocr/i
|
||||
puts "OCR processing failed: #{e.message}"
|
||||
when /validation|invalid/i
|
||||
puts "Invalid configuration: #{e.message}"
|
||||
else
|
||||
puts "Extraction error: #{e.message}"
|
||||
end
|
||||
end
|
||||
```
|
||||
18
docs/snippets/ruby/api/extract_bytes_async.md
Normal file
18
docs/snippets/ruby/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
pdf_bytes = File.read('document.pdf')
|
||||
config = Kreuzberg::ExtractionConfig.new(
|
||||
enable_quality_processing: true
|
||||
)
|
||||
|
||||
result = Kreuzberg.extract_bytes_async(
|
||||
pdf_bytes,
|
||||
'application/pdf',
|
||||
config: config
|
||||
)
|
||||
|
||||
puts "Async bytes extraction done"
|
||||
puts "Content preview: #{result.content[0..100]}"
|
||||
puts "Quality score: #{result.quality_score}"
|
||||
```
|
||||
17
docs/snippets/ruby/api/extract_bytes_sync.md
Normal file
17
docs/snippets/ruby/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
pdf_bytes = File.read('document.pdf')
|
||||
config = Kreuzberg::ExtractionConfig.new(
|
||||
use_cache: true
|
||||
)
|
||||
|
||||
result = Kreuzberg.extract_bytes_sync(
|
||||
pdf_bytes,
|
||||
'application/pdf',
|
||||
config: config
|
||||
)
|
||||
|
||||
puts "Extracted #{result.content.length} characters"
|
||||
puts "Detected MIME: #{result.mime_type}"
|
||||
```
|
||||
14
docs/snippets/ruby/api/extract_file_async.md
Normal file
14
docs/snippets/ruby/api/extract_file_async.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
config = Kreuzberg::ExtractionConfig.new(
|
||||
use_cache: false,
|
||||
enable_quality_processing: true
|
||||
)
|
||||
|
||||
result = Kreuzberg.extract_file_async('document.pdf', config: config)
|
||||
|
||||
puts "Async extraction complete"
|
||||
puts "Extracted #{result.content.length} characters"
|
||||
puts "Quality: #{result.quality_score}"
|
||||
```
|
||||
14
docs/snippets/ruby/api/extract_file_sync.md
Normal file
14
docs/snippets/ruby/api/extract_file_sync.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
config = Kreuzberg::ExtractionConfig.new(
|
||||
use_cache: true,
|
||||
enable_quality_processing: true
|
||||
)
|
||||
|
||||
result = Kreuzberg.extract_file_sync('document.pdf', config: config)
|
||||
|
||||
puts "Extracted #{result.content.length} characters"
|
||||
puts "MIME type: #{result.mime_type}"
|
||||
puts "Quality score: #{result.quality_score}"
|
||||
```
|
||||
Reference in New Issue
Block a user