This commit is contained in:
27
docs/snippets/ruby/plugins/pdf_metadata_extractor.md
Normal file
27
docs/snippets/ruby/plugins/pdf_metadata_extractor.md
Normal file
@@ -0,0 +1,27 @@
|
||||
```ruby title="Ruby"
|
||||
require 'kreuzberg'
|
||||
|
||||
class PdfMetadataExtractor
|
||||
def initialize
|
||||
@count = 0
|
||||
end
|
||||
|
||||
def call(result)
|
||||
return result unless result['mime_type'] == 'application/pdf'
|
||||
@count += 1
|
||||
result['metadata'] ||= {}
|
||||
result['metadata']['pdf_order'] = @count
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
extractor = PdfMetadataExtractor.new
|
||||
Kreuzberg.register_post_processor('pdf_metadata', extractor)
|
||||
|
||||
config = Kreuzberg::ExtractionConfig.new(
|
||||
postprocessor: { enabled: true }
|
||||
)
|
||||
|
||||
result = Kreuzberg.extract_file_sync('report.pdf', config: config)
|
||||
puts "Metadata: #{result.metadata.inspect}"
|
||||
```
|
||||
Reference in New Issue
Block a user