Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

60
packages/ruby/.rubocop.yml generated Normal file
View File

@@ -0,0 +1,60 @@
plugins:
- rubocop-performance
- rubocop-rspec
AllCops:
TargetRubyVersion: 3.2
NewCops: enable
SuggestExtensions: false
Exclude:
- "vendor/**/*"
- "tmp/**/*"
- "lib/**/*.bundle"
- "lib/**/*.rb"
- "ext/**/*"
Style/FrozenStringLiteralComment:
Enabled: true
EnforcedStyle: always
Style/StringLiterals:
Enabled: true
EnforcedStyle: double_quotes
Style/StringLiteralsInInterpolation:
Enabled: true
EnforcedStyle: double_quotes
Style/Documentation:
Enabled: false
Layout/LineLength:
Max: 120
AllowedPatterns:
- '\A\s*#'
Exclude:
- "spec/**/*"
Metrics/MethodLength:
Max: 20
Exclude:
- "spec/**/*"
Metrics/BlockLength:
Enabled: true
Max: 350
CountComments: false
Metrics/AbcSize:
Max: 20
Exclude:
- "spec/**/*"
RSpec/ExampleLength:
Max: 50
RSpec/MultipleExpectations:
Max: 25
RSpec/NestedGroups:
Max: 6

15
packages/ruby/Gemfile generated Normal file
View File

@@ -0,0 +1,15 @@
# frozen_string_literal: true
source "https://rubygems.org"
gemspec
group :development do
gem "rake-compiler", "~> 1.2"
gem "rb_sys", "~> 0.9"
gem "rspec", "~> 3.0"
gem "rubocop", "~> 1.0"
gem "rubocop-performance", "~> 1.0"
gem "rubocop-rspec", "~> 3.0"
gem "steep", "~> 1.0"
end

232
packages/ruby/Gemfile.lock generated Normal file
View File

@@ -0,0 +1,232 @@
PATH
remote: .
specs:
kreuzberg (5.0.0.pre.rc.3)
rb_sys (~> 0.9)
sorbet-runtime (~> 0.5)
GEM
remote: https://rubygems.org/
specs:
activesupport (8.1.3)
base64
bigdecimal
concurrent-ruby (~> 1.0, >= 1.3.1)
connection_pool (>= 2.2.5)
drb
i18n (>= 1.6, < 2)
json
logger (>= 1.4.2)
minitest (>= 5.1)
securerandom (>= 0.3)
tzinfo (~> 2.0, >= 2.0.5)
uri (>= 0.13.1)
ast (2.4.3)
base64 (0.3.0)
bigdecimal (4.1.2)
concurrent-ruby (1.3.6)
connection_pool (3.0.2)
csv (3.3.5)
diff-lcs (1.6.2)
drb (2.2.3)
ffi (1.17.4)
ffi (1.17.4-aarch64-linux-gnu)
ffi (1.17.4-aarch64-linux-musl)
ffi (1.17.4-arm-linux-gnu)
ffi (1.17.4-arm-linux-musl)
ffi (1.17.4-arm64-darwin)
ffi (1.17.4-x86-linux-gnu)
ffi (1.17.4-x86-linux-musl)
ffi (1.17.4-x86_64-darwin)
ffi (1.17.4-x86_64-linux-gnu)
ffi (1.17.4-x86_64-linux-musl)
fileutils (1.8.0)
i18n (1.14.8)
concurrent-ruby (~> 1.0)
json (2.19.7)
language_server-protocol (3.17.0.5)
lint_roller (1.1.0)
listen (3.10.0)
logger
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
logger (1.7.0)
minitest (6.0.6)
drb (~> 2.0)
prism (~> 1.5)
mutex_m (0.3.0)
parallel (2.1.0)
parser (3.3.11.1)
ast (~> 2.4.1)
racc
prism (1.9.0)
racc (1.8.1)
rainbow (3.1.1)
rake (13.4.2)
rake-compiler (1.3.1)
rake
rake-compiler-dock (1.12.0)
rb-fsevent (0.11.2)
rb-inotify (0.11.1)
ffi (~> 1.0)
rb_sys (0.9.128)
rake-compiler-dock (= 1.12.0)
rbs (3.10.4)
logger
tsort
regexp_parser (2.12.0)
rspec (3.13.2)
rspec-core (~> 3.13.0)
rspec-expectations (~> 3.13.0)
rspec-mocks (~> 3.13.0)
rspec-core (3.13.6)
rspec-support (~> 3.13.0)
rspec-expectations (3.13.5)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-mocks (3.13.8)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-support (3.13.7)
rubocop (1.86.2)
json (~> 2.3)
language_server-protocol (~> 3.17.0.2)
lint_roller (~> 1.1.0)
parallel (>= 1.10)
parser (>= 3.3.0.2)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 2.9.3, < 3.0)
rubocop-ast (>= 1.49.0, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 4.0)
rubocop-ast (1.49.1)
parser (>= 3.3.7.2)
prism (~> 1.7)
rubocop-performance (1.26.1)
lint_roller (~> 1.1)
rubocop (>= 1.75.0, < 2.0)
rubocop-ast (>= 1.47.1, < 2.0)
rubocop-rspec (3.9.0)
lint_roller (~> 1.1)
rubocop (~> 1.81)
ruby-progressbar (1.13.0)
securerandom (0.4.1)
sorbet-runtime (0.6.13261)
steep (1.10.0)
activesupport (>= 5.1)
concurrent-ruby (>= 1.1.10)
csv (>= 3.0.9)
fileutils (>= 1.1.0)
json (>= 2.1.0)
language_server-protocol (>= 3.17.0.4, < 4.0)
listen (~> 3.0)
logger (>= 1.3.0)
mutex_m (>= 0.3.0)
parser (>= 3.1)
rainbow (>= 2.2.2, < 4.0)
rbs (~> 3.9)
securerandom (>= 0.1)
strscan (>= 1.0.0)
terminal-table (>= 2, < 5)
uri (>= 0.12.0)
strscan (3.1.8)
terminal-table (4.0.0)
unicode-display_width (>= 1.1.1, < 4)
tsort (0.2.0)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unicode-display_width (3.2.0)
unicode-emoji (~> 4.1)
unicode-emoji (4.2.0)
uri (1.1.1)
PLATFORMS
aarch64-linux-gnu
aarch64-linux-musl
arm-linux-gnu
arm-linux-musl
arm64-darwin
ruby
x86-linux-gnu
x86-linux-musl
x86_64-darwin
x86_64-linux-gnu
x86_64-linux-musl
DEPENDENCIES
kreuzberg!
rake-compiler (~> 1.2)
rb_sys (~> 0.9)
rspec (~> 3.0)
rubocop (~> 1.0)
rubocop-performance (~> 1.0)
rubocop-rspec (~> 3.0)
steep (~> 1.0)
CHECKSUMS
activesupport (8.1.3) sha256=21a5e0dfbd4c3ddd9e1317ec6a4d782fa226e7867dc70b0743acda81a1dca20e
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
bigdecimal (4.1.2) sha256=53d217666027eab4280346fba98e7d5b66baaae1b9c3c1c0ffe89d48188a3fbd
concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
diff-lcs (1.6.2) sha256=9ae0d2cba7d4df3075fe8cd8602a8604993efc0dfa934cff568969efb1909962
drb (2.2.3) sha256=0b00d6fdb50995fe4a45dea13663493c841112e4068656854646f418fda13373
ffi (1.17.4) sha256=bcd1642e06f0d16fc9e09ac6d49c3a7298b9789bcb58127302f934e437d60acf
ffi (1.17.4-aarch64-linux-gnu) sha256=b208f06f91ffd8f5e1193da3cae3d2ccfc27fc36fba577baf698d26d91c080df
ffi (1.17.4-aarch64-linux-musl) sha256=9286b7a615f2676245283aef0a0a3b475ae3aae2bb5448baace630bb77b91f39
ffi (1.17.4-arm-linux-gnu) sha256=d6dbddf7cb77bf955411af5f187a65b8cd378cb003c15c05697f5feee1cb1564
ffi (1.17.4-arm-linux-musl) sha256=9d4838ded0465bef6e2426935f6bcc93134b6616785a84ffd2a3d82bc3cf6f95
ffi (1.17.4-arm64-darwin) sha256=19071aaf1419251b0a46852abf960e77330a3b334d13a4ab51d58b31a937001b
ffi (1.17.4-x86-linux-gnu) sha256=38e150df5f4ca555e25beca4090823ae09657bceded154e3c52f8631c1ed72cf
ffi (1.17.4-x86-linux-musl) sha256=fbeec0fc7c795bcf86f623bb18d31ea1820f7bd580e1703a3d3740d527437809
ffi (1.17.4-x86_64-darwin) sha256=aa70390523cf3235096cf64962b709b4cfbd5c082a2cb2ae714eb0fe2ccda496
ffi (1.17.4-x86_64-linux-gnu) sha256=9d3db14c2eae074b382fa9c083fe95aec6e0a1451da249eab096c34002bc752d
ffi (1.17.4-x86_64-linux-musl) sha256=3fdf9888483de005f8ef8d1cf2d3b20d86626af206cbf780f6a6a12439a9c49e
fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
json (2.19.7) sha256=fe432c8639f6efff69f9d73b518a3705d9581ab93156f981ea72806e1e5bcc3e
kreuzberg (5.0.0.pre.rc.3)
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
minitest (6.0.6) sha256=153ea36d1d987a62942382b61075745042a2b3123b1cd48f4c3675af9cc7d6f1
mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
parallel (2.1.0) sha256=b35258865c2e31134c5ecb708beaaf6772adf9d5efae28e93e99260877b09356
parser (3.3.11.1) sha256=d17ace7aabe3e72c3cc94043714be27cc6f852f104d81aa284c2281aecc65d54
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
rake (13.4.2) sha256=cb825b2bd5f1f8e91ca37bddb4b9aaf345551b4731da62949be002fa89283701
rake-compiler (1.3.1) sha256=6b351612b6e2d73ddd5563ee799bb58685176e05363db6758504bd11573d670a
rake-compiler-dock (1.12.0) sha256=f13205c2738f3d2053afcd03491a9e4541b22a59a0bfc53fc8bc883bd8188023
rb-fsevent (0.11.2) sha256=43900b972e7301d6570f64b850a5aa67833ee7d87b458ee92805d56b7318aefe
rb-inotify (0.11.1) sha256=a0a700441239b0ff18eb65e3866236cd78613d6b9f78fea1f9ac47a85e47be6e
rb_sys (0.9.128) sha256=9ab81f4d6d4e1895de18762232362d1264475aa7035756b50441e442130538fd
rbs (3.10.4) sha256=b17d7c4be4bb31a11a3b529830f0aa206a807ca42f2e7921a3027dfc6b7e5ce8
regexp_parser (2.12.0) sha256=35a916a1d63190ab5c9009457136ae5f3c0c7512d60291d0d1378ba18ce08ebb
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
rubocop (1.86.2) sha256=bb2e97f635eda42c448f2588f4a6ff78f221b8bdfdf65b1e9b07fbd57521b45d
rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
sorbet-runtime (0.6.13261) sha256=880c1575676b9f3ddfd42328ea5dd13739e24186c35bac63adad41589ee3c763
steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
strscan (3.1.8) sha256=aae2db611a225559f21ffbb71765c9a4e60fd262534a9ea84f4f11c7f32f679e
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
tsort (0.2.0) sha256=9650a793f6859a43b6641671278f79cfead60ac714148aabe4e3f0060480089f
tzinfo (2.0.6) sha256=8daf828cc77bcf7d63b0e3bdb6caa47e2272dcfaf4fbfe46f8c3a9df087a829b
unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f
uri (1.1.1) sha256=379fa58d27ffb1387eaada68c749d1426738bd0f654d812fcc07e7568f5c57c6
BUNDLED WITH
4.0.7

93
packages/ruby/LICENSE generated Normal file
View File

@@ -0,0 +1,93 @@
Elastic License 2.0 (ELv2)
Copyright 2025-2026 Kreuzberg, Inc.
Acceptance
By using the software, you agree to all of the terms and conditions below.
Copyright License
The licensor grants you a non-exclusive, royalty-free, worldwide,
non-sublicensable, non-transferable license to use, copy, distribute, make
available, and prepare derivative works of the software, in each case subject to
the limitations and conditions below.
Limitations
You may not provide the software to third parties as a hosted or managed
service, where the service provides users with access to any substantial set of
the features or functionality of the software.
You may not move, change, disable, or circumvent the license key functionality
in the software, and you may not remove or obscure any functionality in the
software that is protected by the license key.
You may not alter, remove, or obscure any licensing, copyright, or other notices
of the licensor in the software. Any use of the licensor's trademarks is subject
to applicable law.
Patents
The licensor grants you a license, under any patent claims the licensor can
license, or becomes able to license, to make, have made, use, sell, offer for
sale, import and have imported the software, in each case subject to the
limitations and conditions in this license. This license does not cover any
patent claims that you cause to be infringed by modifications or additions to the
software. If you or your company make any written claim that the software
infringes or contributes to infringement of any patent, your patent license for
the software granted under these terms ends immediately. If your company makes
such a claim, your patent license ends immediately for work on behalf of your
company.
Notices
You must ensure that anyone who gets a copy of any part of the software from you
also gets a copy of these terms.
If you modify the software, you must include in any modified copies of the
software prominent notices stating that you have modified the software.
No Other Rights
These terms do not imply any licenses other than those expressly granted in
these terms.
Termination
If you use the software in violation of these terms, such use is not licensed,
and your licenses will automatically terminate. If the licensor provides you with
a notice of your violation, and you cease all violation of this license no later
than 30 days after you receive that notice, your licenses will be reinstated
retroactively. However, if you violate these terms after such reinstatement, any
additional violation of these terms will cause your licenses to terminate
automatically and permanently.
No Liability
As far as the law allows, the software comes as is, without any warranty or
condition, and the licensor will not be liable to you for any damages arising out
of these terms or the use or nature of the software, under any kind of legal
claim.
Definitions
The licensor is the entity offering these terms, and the software is the
software the licensor makes available under these terms, including any portion
of it.
you refers to the individual or entity agreeing to these terms.
your company is any legal entity, sole proprietorship, or other kind of
organization that you work for, plus all organizations that have control over,
are under the control of, or are under common control with that organization.
control means ownership of substantially all the assets of an entity, or the
power to direct its management and policies by vote, contract, or otherwise.
Control can be direct or indirect.
your licenses are all the licenses granted to you for the software under these
terms.
use means anything you do with the software requiring one of your licenses.
trademark means trademarks, service marks, and similar rights.

467
packages/ruby/README.md generated Normal file
View File

@@ -0,0 +1,467 @@
# Kreuzberg for Ruby
<div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
<a href="https://github.com/kreuzberg-dev/alef">
<img src="https://img.shields.io/badge/Bindings-alef%20%D7%90-007ec6" alt="Bindings">
</a>
<!-- Language Bindings -->
<a href="https://crates.io/crates/kreuzberg">
<img src="https://img.shields.io/crates/v/kreuzberg?label=Rust&color=007ec6" alt="Rust">
</a>
<a href="https://pypi.org/project/kreuzberg/">
<img src="https://img.shields.io/pypi/v/kreuzberg?label=Python&color=007ec6" alt="Python">
</a>
<a href="https://www.npmjs.com/package/@kreuzberg/node">
<img src="https://img.shields.io/npm/v/@kreuzberg/node?label=Node.js&color=007ec6" alt="Node.js">
</a>
<a href="https://www.npmjs.com/package/@kreuzberg/wasm">
<img src="https://img.shields.io/npm/v/@kreuzberg/wasm?label=WASM&color=007ec6" alt="WASM">
</a>
<a href="https://central.sonatype.com/artifact/dev.kreuzberg/kreuzberg">
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
</a>
<a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/go/v5">
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v5*" alt="Go">
</a>
<a href="https://www.nuget.org/packages/Kreuzberg/">
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
</a>
<a href="https://packagist.org/packages/kreuzberg/kreuzberg">
<img src="https://img.shields.io/packagist/v/kreuzberg/kreuzberg?label=PHP&color=007ec6" alt="PHP">
</a>
<a href="https://rubygems.org/gems/kreuzberg">
<img src="https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6" alt="Ruby">
</a>
<a href="https://hex.pm/packages/kreuzberg">
<img src="https://img.shields.io/hexpm/v/kreuzberg?label=Elixir&color=007ec6" alt="Elixir">
</a>
<a href="https://kreuzberg-dev.r-universe.dev/kreuzberg">
<img src="https://img.shields.io/badge/R-kreuzberg-007ec6" alt="R">
</a>
<a href="https://pub.dev/packages/kreuzberg">
<img src="https://img.shields.io/pub/v/kreuzberg?label=Dart&color=007ec6" alt="Dart">
</a>
<a href="https://central.sonatype.com/artifact/dev.kreuzberg/kreuzberg-android">
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg-android?label=Kotlin&color=007ec6" alt="Kotlin">
</a>
<a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/swift">
<img src="https://img.shields.io/badge/Swift-SPM-007ec6" alt="Swift">
</a>
<a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/zig">
<img src="https://img.shields.io/badge/Zig-package-007ec6" alt="Zig">
</a>
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
<img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C FFI">
</a>
<a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
<img src="https://img.shields.io/badge/Docker-ghcr.io-007ec6?logo=docker&logoColor=white" alt="Docker">
</a>
<a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/charts%2Fkreuzberg">
<img src="https://img.shields.io/badge/Helm-ghcr.io-007ec6?logo=helm&logoColor=white" alt="Helm">
</a>
<!-- Project Info -->
<a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
<img src="https://img.shields.io/badge/License-Elastic--2.0-007ec6" alt="License">
</a>
<a href="https://docs.kreuzberg.dev">
<img src="https://img.shields.io/badge/Docs-kreuzberg-007ec6" alt="Documentation">
</a>
<a href="https://huggingface.co/Kreuzberg">
<img src="https://img.shields.io/badge/Hugging%20Face-Kreuzberg-007ec6" alt="Hugging Face">
</a>
</div>
<div align="center" style="margin: 24px 0 0;">
<a href="https://kreuzberg.dev">
<img alt="Kreuzberg" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
</a>
</div>
<div align="center" style="display: flex; flex-wrap: wrap; gap: 12px; justify-content: center; margin: 28px 0 24px;">
<a href="https://discord.gg/xt9WY3GnKR">
<img height="22" src="https://img.shields.io/badge/Discord-Chat-007ec6?logo=discord&logoColor=white" alt="Join Discord">
</a>
<a href="https://docs.kreuzberg.dev/demo.html">
<img height="22" src="https://img.shields.io/badge/Live%20Demo-Open-007ec6?logo=webassembly&logoColor=white" alt="Live Demo">
</a>
</div>
Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance.
## What This Package Provides
- **Ruby-native extraction** — idiomatic Ruby objects over the shared Rust document engine.
- **Structured results** — text, tables, images, metadata, language detection, chunks, and warnings.
- **OCR support** — Tesseract and PaddleOCR through the same configuration model as other bindings.
- **Cross-binding parity** — output matches the Python, Node.js, Go, Java, .NET, PHP, Elixir, R, Dart, Swift, Zig, WASM, and C FFI packages.
## Installation
Add to your Gemfile:
```ruby
gem 'kreuzberg'
```
Then execute:
```bash
bundle install
```
Or install it directly:
```bash
gem install kreuzberg
```
## Quick Start
### Basic Usage
```ruby
require 'kreuzberg'
# Simple synchronous extraction
result = Kreuzberg.extract_file("document.pdf")
puts result.content
```
### Async Extraction
```ruby
require 'kreuzberg'
# Using Fiber for concurrency (Ruby 3.0+)
Fiber.new do
result = Kreuzberg.extract_file_async("document.pdf")
puts result.content
end.resume
```
### Batch Processing
```ruby
require 'kreuzberg'
files = ["doc1.pdf", "doc2.docx", "doc3.xlsx"]
results = files.map { |file| Kreuzberg.extract_file(file) }
results.each do |result|
puts "Content length: #{result.content.length}"
end
```
## Configuration
```ruby
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
use_cache: true,
enable_quality_processing: true,
ocr: Kreuzberg::OcrConfig.new(
backend: 'tesseract',
language: 'eng'
)
)
result = Kreuzberg.extract_file("document.pdf", config: config)
puts result.content
```
## OCR Support
### Tesseract Configuration
```ruby
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(
backend: 'tesseract',
language: 'eng',
tesseract_config: Kreuzberg::TesseractConfig.new(
psm: 6,
enable_table_detection: true
)
)
)
result = Kreuzberg.extract_file("scanned.pdf", config: config)
puts result.content
```
## Table Extraction
```ruby
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
ocr: Kreuzberg::OcrConfig.new(
backend: 'tesseract',
tesseract_config: Kreuzberg::TesseractConfig.new(
enable_table_detection: true
)
)
)
result = Kreuzberg.extract_file("invoice.pdf", config: config)
result.tables.each_with_index do |table, index|
puts "Table #{index}:"
puts table.markdown
end
```
## Metadata Extraction
```ruby
require 'kreuzberg'
result = Kreuzberg.extract_file("document.pdf")
# PDF metadata
if result.metadata[:pdf]
pdf_meta = result.metadata[:pdf]
puts "Title: #{pdf_meta[:title]}"
puts "Author: #{pdf_meta[:author]}"
puts "Pages: #{pdf_meta[:page_count]}"
end
# Detected languages
puts "Languages: #{result.detected_languages}"
# Images
if result.images
puts "Images found: #{result.images.count}"
end
```
## Text Chunking
```ruby
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
chunking: Kreuzberg::ChunkingConfig.new(
max_chars: 1000,
max_overlap: 200
)
)
result = Kreuzberg.extract_file("long_document.pdf", config: config)
result.chunks.each_with_index do |chunk, index|
puts "Chunk #{index}: #{chunk.length} characters"
end
```
## Password-Protected PDFs
```ruby
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
pdf_options: Kreuzberg::PdfConfig.new(
passwords: ["password1", "password2"]
)
)
result = Kreuzberg.extract_file("protected.pdf", config: config)
puts result.content
```
## Language Detection
```ruby
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
language_detection: Kreuzberg::LanguageDetectionConfig.new(
enabled: true
)
)
result = Kreuzberg.extract_file("multilingual.pdf", config: config)
puts "Detected languages: #{result.detected_languages}"
```
## API Reference
### Main Methods
- `Kreuzberg.extract_file(path, config: nil)` Extract from file
- `Kreuzberg.extract_file_async(path, config: nil)` Async extraction
- `Kreuzberg.extract_bytes(data, mime_type, config: nil)` Extract from bytes
- `Kreuzberg.batch_extract_files(paths, config: nil)` Batch processing
### Configuration Classes
- `ExtractionConfig` Main configuration
- `OcrConfig` OCR settings
- `TesseractConfig` Tesseract-specific options
- `ChunkingConfig` Text chunking settings
- `PdfConfig` PDF-specific options
- `LanguageDetectionConfig` Language detection settings
### Result Object
- `content` Extracted text
- `metadata` File metadata as Hash
- `tables` Array of ExtractedTable objects
- `detected_languages` Array of language codes
- `chunks` Array of text chunks
- `images` Array of extracted images (if enabled)
## System Requirements
### Ruby Version
- **Ruby 3.2.0 or higher** (including Ruby 4.x)
- Ruby 4.0+ is fully supported with no code changes required
- Magnus bindings compile successfully on all supported Ruby versions
### Required
- Rust toolchain (for native extension compilation)
### Optional
```bash
# Tesseract OCR
brew install tesseract # macOS
sudo apt-get install tesseract-ocr # Ubuntu/Debian
```
### Ruby 4.0 Compatibility
Kreuzberg is fully compatible with Ruby 4.0 (released December 25, 2025) and later. Key Ruby 4.0 features that work seamlessly:
- **Ruby Box** - Improved memory efficiency and performance
- **ZJIT Compiler** - Enhanced JIT compilation for faster execution
- **Ractor Improvements** - Better multi-threaded document processing
- **Set Promoted to Core** - No changes needed for Kreuzberg
All tests pass with Ruby 4.0.1 with 100% compatibility. The gem compiles without any breaking changes.
## Development
Clone and setup:
```bash
git clone https://github.com/kreuzberg-dev/kreuzberg.git
cd kreuzberg
bundle install
```
Run tests:
```bash
rake test
```
## Troubleshooting
### Native extension compilation error
Ensure build tools are installed:
```bash
# macOS
xcode-select --install
# Ubuntu/Debian
sudo apt-get install build-essential ruby-dev
# Windows (via RubyInstaller)
ridk install
```
### "Could not find Kreuzberg"
Reinstall the gem:
```bash
gem uninstall kreuzberg
gem install kreuzberg --no-document
```
### OCR not working
Verify Tesseract is installed:
```bash
tesseract --version
```
## Examples
### Process Directory of PDFs
```ruby
require 'kreuzberg'
require 'pathname'
Dir.glob("documents/*.pdf").each do |file|
puts "Processing: #{file}"
result = Kreuzberg.extract_file(file)
puts " Content length: #{result.content.length}"
puts " Language: #{result.detected_languages}"
end
```
### Extract and Parse Structured Data
```ruby
require 'kreuzberg'
require 'json'
result = Kreuzberg.extract_file("data.pdf")
# Parse content as JSON (if applicable)
begin
data = JSON.parse(result.content)
puts "Parsed data: #{data}"
rescue JSON::ParserError
puts "Content is not JSON"
end
```
### Save Extracted Images
```ruby
require 'kreuzberg'
config = Kreuzberg::ExtractionConfig.new(
images: Kreuzberg::ImageExtractionConfig.new(
extract_images: true
)
)
result = Kreuzberg.extract_file("document.pdf", config: config)
result.images&.each_with_index do |image, index|
File.write("image_#{index}.png", image.data)
end
```
## Documentation
For comprehensive documentation, visit [https://kreuzberg.dev](https://kreuzberg.dev)
## Part of Kreuzberg.dev
- [Kreuzberg Cloud](https://github.com/kreuzberg-dev/kreuzberg-cloud) — managed extraction API with SDKs, dashboards, and observability.
- [kreuzcrawl](https://github.com/kreuzberg-dev/kreuzcrawl) — web crawling and scraping with HTML→Markdown and headless-Chrome fallback.
- [html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown) — fast, lossless HTML→Markdown engine.
- [liter-llm](https://github.com/kreuzberg-dev/liter-llm) — universal LLM API client with native bindings for 14 languages and 143 providers.
- [tree-sitter-language-pack](https://github.com/kreuzberg-dev/tree-sitter-language-pack) — tree-sitter grammars and code-intelligence primitives.
- [alef](https://github.com/kreuzberg-dev/alef) — the polyglot binding generator that produces this README and all per-language bindings.
- [Discord](https://discord.gg/xt9WY3GnKR) — community, roadmap, announcements.
## License
Elastic-2.0 License - see [LICENSE](../../LICENSE) for details.

21
packages/ruby/Rakefile generated Normal file
View File

@@ -0,0 +1,21 @@
# frozen_string_literal: true
require "bundler"
Bundler::GemHelper.install_tasks name: "kreuzberg"
require "rake/extensiontask"
require "rspec/core/rake_task"
GEM_ROOT = __dir__
GEMSPEC = Gem::Specification.load(File.expand_path("kreuzberg.gemspec", GEM_ROOT))
Rake::ExtensionTask.new("kreuzberg-rb", GEMSPEC) do |ext|
ext.lib_dir = "lib"
ext.ext_dir = "ext/kreuzberg_rb/native"
ext.source_pattern = "*.{}"
ext.platform = "ruby"
end
RSpec::Core::RakeTask.new(:spec)
task spec: :compile
task default: :spec

14
packages/ruby/Steepfile generated Normal file
View File

@@ -0,0 +1,14 @@
# frozen_string_literal: true
target :lib do
signature "sig"
check "lib"
# The generated `lib/kreuzberg/native.rb` carries inline Sorbet
# `sig { ... }` blocks on tagged-enum variant Data classes. Sorbet's runtime
# provides those via `extend T::Sig`, but Steep does not understand the
# extension (it relies on RBS, not Sorbet sigs) and reports
# `Type `self` does not have method `sig`` on every block. RBS coverage
# for the same surface lives in `sig/types.rbs`, so we steer Steep to the
# RBS file by ignoring the .rb.
ignore "lib/kreuzberg/native.rb"
end

View File

@@ -0,0 +1,11 @@
# frozen_string_literal: true
require "mkmf"
require "rb_sys/mkmf"
default_profile = ENV.fetch("CARGO_PROFILE", "release")
create_rust_makefile("kreuzberg_rb") do |config|
config.profile = default_profile.to_sym
config.ext_dir = "native"
end

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,25 @@
[package]
name = "kreuzberg-rb"
version = "5.0.0-rc.3"
edition = "2024"
license = "Elastic-2.0"
description = "High-performance document intelligence library"
readme = false
keywords = ["document", "extraction", "ocr", "pdf", "text"]
categories = ["text-processing"]
[package.metadata.cargo-machete]
ignored = ["tokio", "async-trait"]
[lib]
name = "kreuzberg_rb"
path = "../src/lib.rs"
crate-type = ["cdylib"]
[dependencies]
async-trait = "0.1"
kreuzberg = { version = "5.0.0-rc.3", path = "../../../../../crates/kreuzberg", features = ["full", "pdf", "ocr", "paddle-ocr", "paddle-ocr-types", "layout-detection", "layout-types", "embeddings", "embedding-presets", "chunking", "keywords-yake", "keywords-rake", "language-detection", "html", "tree-sitter", "office", "email", "archives", "stopwords", "auto-rotate", "auto-rotate-types", "tokio-runtime", "api", "mcp", "liter-llm", "quality"] }
magnus = "0.8"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["rt-multi-thread"] }

View File

@@ -0,0 +1,11 @@
# frozen_string_literal: true
require "mkmf"
require "rb_sys/mkmf"
default_profile = ENV.fetch("CARGO_PROFILE", "release")
create_rust_makefile("kreuzberg_rb") do |config|
config.profile = default_profile.to_sym
config.ext_dir = "native"
end

27093
packages/ruby/ext/kreuzberg_rb/src/lib.rs generated Normal file

File diff suppressed because it is too large Load Diff

21
packages/ruby/kreuzberg.gemspec generated Normal file
View File

@@ -0,0 +1,21 @@
# frozen_string_literal: true
Gem::Specification.new do |spec|
spec.name = "kreuzberg"
spec.version = "5.0.0.pre.rc.3"
spec.authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
spec.summary = "High-performance document intelligence library"
spec.description = "High-performance document intelligence library"
spec.homepage = "https://github.com/kreuzberg-dev/kreuzberg"
spec.license = "Elastic-2.0"
spec.required_ruby_version = ">= 3.2.0"
spec.metadata["keywords"] = %w[document extraction ocr pdf text].join(",")
spec.metadata["rubygems_mfa_required"] = "true"
spec.files = Dir.glob(%w[README* LICENSE* lib/**/* ext/**/* sig/**/* Steepfile]).select { |f| File.file?(f) }.reject { |f| f.include?("/native/target/") || f.include?("/native/tmp/") }
spec.require_paths = ["lib"]
spec.extensions = ["ext/kreuzberg_rb/native/extconf.rb"]
spec.add_dependency "rb_sys", "~> 0.9"
spec.add_dependency "sorbet-runtime", "~> 0.5"
end

13
packages/ruby/lib/kreuzberg.rb generated Normal file
View File

@@ -0,0 +1,13 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# frozen_string_literal: true
require_relative "kreuzberg/version"
require_relative "kreuzberg/native"
module Kreuzberg
# Re-export all types and functions from native extension
end

4497
packages/ruby/lib/kreuzberg/native.rb generated Normal file

File diff suppressed because it is too large Load Diff

10
packages/ruby/lib/kreuzberg/version.rb generated Normal file
View File

@@ -0,0 +1,10 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# frozen_string_literal: true
module Kreuzberg
VERSION = "5.0.0.pre.rc.3"
end

1842
packages/ruby/sig/types.rbs generated Normal file

File diff suppressed because it is too large Load Diff