This commit is contained in:
22
docs/snippets/zig/api/batch_extract_bytes_sync.md
Normal file
22
docs/snippets/zig/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
// Batch items are passed as a JSON-encoded array across the FFI boundary.
|
||||
// `content` is base64-encoded bytes per the FFI schema for BatchBytesItem.
|
||||
const items_json =
|
||||
\\[
|
||||
\\ {"content": "SGVsbG8sIHdvcmxkIQ==", "mime_type": "text/plain", "config": null},
|
||||
\\ {"content": "IyBIZWFkaW5nCgpQYXJhZ3JhcGggdGV4dC4=", "mime_type": "text/markdown", "config": null}
|
||||
\\]
|
||||
;
|
||||
const config_json = "{}";
|
||||
|
||||
const results_json = try kreuzberg.batch_extract_bytes_sync(items_json, config_json);
|
||||
defer std.heap.c_allocator.free(results_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{results_json});
|
||||
}
|
||||
```
|
||||
22
docs/snippets/zig/api/batch_extract_files_sync.md
Normal file
22
docs/snippets/zig/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
// Batch items are passed as a JSON-encoded array across the FFI boundary.
|
||||
const items_json =
|
||||
\\[
|
||||
\\ {"path": "doc1.pdf", "config": null},
|
||||
\\ {"path": "doc2.docx", "config": null},
|
||||
\\ {"path": "report.pdf", "config": null}
|
||||
\\]
|
||||
;
|
||||
const config_json = "{}";
|
||||
|
||||
const results_json = try kreuzberg.batch_extract_files_sync(items_json, config_json);
|
||||
defer std.heap.c_allocator.free(results_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{results_json});
|
||||
}
|
||||
```
|
||||
53
docs/snippets/zig/api/client_chunk_text.md
Normal file
53
docs/snippets/zig/api/client_chunk_text.md
Normal file
@@ -0,0 +1,53 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const file_bytes = try std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024);
|
||||
defer allocator.free(file_bytes);
|
||||
|
||||
const boundary = "----kreuzberg-zig-boundary";
|
||||
var body = std.ArrayList(u8).init(allocator);
|
||||
defer body.deinit();
|
||||
|
||||
try body.writer().print(
|
||||
"--{s}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"document.pdf\"\r\n" ++
|
||||
"Content-Type: application/pdf\r\n\r\n",
|
||||
.{boundary},
|
||||
);
|
||||
try body.appendSlice(file_bytes);
|
||||
try body.writer().print(
|
||||
"\r\n--{s}\r\nContent-Disposition: form-data; name=\"chunking\"\r\n\r\n" ++
|
||||
"{{\"max_characters\":800,\"overlap\":100}}\r\n--{s}--\r\n",
|
||||
.{ boundary, boundary },
|
||||
);
|
||||
|
||||
var client = std.http.Client{ .allocator = allocator };
|
||||
defer client.deinit();
|
||||
|
||||
const uri = try std.Uri.parse("http://localhost:8000/extract");
|
||||
var header_buf: [4096]u8 = undefined;
|
||||
var req = try client.open(.POST, uri, .{
|
||||
.server_header_buffer = &header_buf,
|
||||
.extra_headers = &.{
|
||||
.{ .name = "content-type", .value = "multipart/form-data; boundary=" ++ boundary },
|
||||
},
|
||||
});
|
||||
defer req.deinit();
|
||||
|
||||
req.transfer_encoding = .{ .content_length = body.items.len };
|
||||
try req.send();
|
||||
try req.writeAll(body.items);
|
||||
try req.finish();
|
||||
try req.wait();
|
||||
|
||||
const response_body = try req.reader().readAllAlloc(allocator, 16 * 1024 * 1024);
|
||||
defer allocator.free(response_body);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{response_body});
|
||||
}
|
||||
```
|
||||
49
docs/snippets/zig/api/client_extract_single_file.md
Normal file
49
docs/snippets/zig/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,49 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const file_bytes = try std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024);
|
||||
defer allocator.free(file_bytes);
|
||||
|
||||
const boundary = "----kreuzberg-zig-boundary";
|
||||
var body = std.ArrayList(u8).init(allocator);
|
||||
defer body.deinit();
|
||||
|
||||
try body.writer().print(
|
||||
"--{s}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"document.pdf\"\r\n" ++
|
||||
"Content-Type: application/pdf\r\n\r\n",
|
||||
.{boundary},
|
||||
);
|
||||
try body.appendSlice(file_bytes);
|
||||
try body.writer().print("\r\n--{s}--\r\n", .{boundary});
|
||||
|
||||
var client = std.http.Client{ .allocator = allocator };
|
||||
defer client.deinit();
|
||||
|
||||
const uri = try std.Uri.parse("http://localhost:8000/extract");
|
||||
var header_buf: [4096]u8 = undefined;
|
||||
var req = try client.open(.POST, uri, .{
|
||||
.server_header_buffer = &header_buf,
|
||||
.extra_headers = &.{
|
||||
.{ .name = "content-type", .value = "multipart/form-data; boundary=" ++ boundary },
|
||||
},
|
||||
});
|
||||
defer req.deinit();
|
||||
|
||||
req.transfer_encoding = .{ .content_length = body.items.len };
|
||||
try req.send();
|
||||
try req.writeAll(body.items);
|
||||
try req.finish();
|
||||
try req.wait();
|
||||
|
||||
const response_body = try req.reader().readAllAlloc(allocator, 16 * 1024 * 1024);
|
||||
defer allocator.free(response_body);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{response_body});
|
||||
}
|
||||
```
|
||||
37
docs/snippets/zig/api/combining_all_features.md
Normal file
37
docs/snippets/zig/api/combining_all_features.md
Normal file
@@ -0,0 +1,37 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
// Configuration is passed across the FFI as a JSON document.
|
||||
// This combines OCR, chunking, image extraction, output format, and caching.
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "use_cache": true,
|
||||
\\ "enable_quality_processing": true,
|
||||
\\ "force_ocr": false,
|
||||
\\ "ocr": {
|
||||
\\ "backend": "tesseract",
|
||||
\\ "language": "eng"
|
||||
\\ },
|
||||
\\ "chunking": {
|
||||
\\ "max_characters": 800,
|
||||
\\ "overlap": 100,
|
||||
\\ "chunker_type": "markdown",
|
||||
\\ "prepend_heading_context": true
|
||||
\\ },
|
||||
\\ "images": {
|
||||
\\ "extract_images": true
|
||||
\\ },
|
||||
\\ "output_format": "markdown",
|
||||
\\ "include_document_structure": true
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("report.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("Result ({d} bytes of JSON):\n{s}\n", .{ result_json.len, result_json });
|
||||
}
|
||||
```
|
||||
28
docs/snippets/zig/api/error_handling.md
Normal file
28
docs/snippets/zig/api/error_handling.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
const config_json = "{}";
|
||||
const result_json = kreuzberg.extract_file_sync("document.pdf", null, config_json) catch |err| {
|
||||
const stderr = std.io.getStdErr().writer();
|
||||
switch (err) {
|
||||
error.Io => try stderr.print("File error\n", .{}),
|
||||
error.UnsupportedFormat => try stderr.print("Unsupported format\n", .{}),
|
||||
error.Parsing => try stderr.print("Corrupt or invalid document\n", .{}),
|
||||
error.MissingDependency => try stderr.print("Missing dependency — install required backend\n", .{}),
|
||||
error.Ocr => try stderr.print("OCR processing failed\n", .{}),
|
||||
error.OutOfMemory => try stderr.print("Out of memory\n", .{}),
|
||||
else => try stderr.print("Extraction failed: {s}\n", .{@errorName(err)}),
|
||||
}
|
||||
if (kreuzberg._last_error()) |context| {
|
||||
try stderr.print(" context: {s}\n", .{context});
|
||||
}
|
||||
return;
|
||||
};
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{result_json});
|
||||
}
|
||||
```
|
||||
33
docs/snippets/zig/api/error_handling_extract.md
Normal file
33
docs/snippets/zig/api/error_handling_extract.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
fn extract_text(bytes: []const u8, mime_type: []const u8) ![]u8 {
|
||||
const config_json = "{}";
|
||||
return kreuzberg.extract_bytes_sync(bytes, mime_type, config_json);
|
||||
}
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const bytes = std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024) catch &[_]u8{};
|
||||
defer if (bytes.len > 0) allocator.free(bytes);
|
||||
|
||||
const stderr = std.io.getStdErr().writer();
|
||||
const result_json = extract_text(bytes, "application/pdf") catch |err| {
|
||||
switch (err) {
|
||||
error.UnsupportedFormat => try stderr.print("Format not supported\n", .{}),
|
||||
error.Ocr => try stderr.print("OCR failed\n", .{}),
|
||||
error.Validation => try stderr.print("Invalid input or configuration\n", .{}),
|
||||
else => try stderr.print("Error: {s}\n", .{@errorName(err)}),
|
||||
}
|
||||
return;
|
||||
};
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("Extracted {d} bytes of JSON\n", .{result_json.len});
|
||||
}
|
||||
```
|
||||
25
docs/snippets/zig/api/extract_bytes_async.md
Normal file
25
docs/snippets/zig/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,25 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Note: the Zig binding is sync-only. There is no `extract_bytes` async variant —
|
||||
// the FFI surface exposes blocking entry points that internally drive the global
|
||||
// Tokio runtime. Use `extract_bytes_sync` from any thread.
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const content = try std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024);
|
||||
defer allocator.free(content);
|
||||
|
||||
const config_json = "{}";
|
||||
const result_json = try kreuzberg.extract_bytes_sync(content, "application/pdf", config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{result_json});
|
||||
}
|
||||
```
|
||||
20
docs/snippets/zig/api/extract_bytes_sync.md
Normal file
20
docs/snippets/zig/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const content = try std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024);
|
||||
defer allocator.free(content);
|
||||
|
||||
const config_json = "{}";
|
||||
const result_json = try kreuzberg.extract_bytes_sync(content, "application/pdf", config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{result_json});
|
||||
}
|
||||
```
|
||||
18
docs/snippets/zig/api/extract_file_async.md
Normal file
18
docs/snippets/zig/api/extract_file_async.md
Normal file
@@ -0,0 +1,18 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Note: the Zig binding is sync-only. There is no `extract_file` async variant —
|
||||
// the FFI surface exposes blocking entry points that internally drive the global
|
||||
// Tokio runtime. Use `extract_file_sync` from any thread.
|
||||
pub fn main() !void {
|
||||
const config_json = "{}";
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{result_json});
|
||||
}
|
||||
```
|
||||
20
docs/snippets/zig/api/extract_file_sync.md
Normal file
20
docs/snippets/zig/api/extract_file_sync.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json = "{}";
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user