This commit is contained in:
58
docs/snippets/zig/config/advanced_config.md
Normal file
58
docs/snippets/zig/config/advanced_config.md
Normal file
@@ -0,0 +1,58 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "use_cache": true,
|
||||
\\ "enable_quality_processing": true,
|
||||
\\ "ocr": {
|
||||
\\ "backend": "tesseract",
|
||||
\\ "language": "eng"
|
||||
\\ },
|
||||
\\ "chunking": {
|
||||
\\ "max_characters": 1000,
|
||||
\\ "overlap": 200,
|
||||
\\ "embedding": {
|
||||
\\ "model": {"type": "preset", "name": "balanced"},
|
||||
\\ "batch_size": 32,
|
||||
\\ "normalize": true
|
||||
\\ }
|
||||
\\ },
|
||||
\\ "language_detection": {
|
||||
\\ "enabled": true,
|
||||
\\ "min_confidence": 0.8,
|
||||
\\ "detect_multiple": false
|
||||
\\ },
|
||||
\\ "keywords": {
|
||||
\\ "algorithm": "yake",
|
||||
\\ "max_keywords": 10,
|
||||
\\ "min_score": 0.1,
|
||||
\\ "ngram_range": [1, 3],
|
||||
\\ "language": "en"
|
||||
\\ },
|
||||
\\ "token_reduction": {
|
||||
\\ "mode": "moderate",
|
||||
\\ "preserve_important_words": true
|
||||
\\ },
|
||||
\\ "postprocessor": {
|
||||
\\ "enabled": true
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
30
docs/snippets/zig/config/chunking_config.md
Normal file
30
docs/snippets/zig/config/chunking_config.md
Normal file
@@ -0,0 +1,30 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "chunking": {
|
||||
\\ "max_characters": 1000,
|
||||
\\ "overlap": 200,
|
||||
\\ "chunker_type": "markdown",
|
||||
\\ "prepend_heading_context": true
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
26
docs/snippets/zig/config/config_basic.md
Normal file
26
docs/snippets/zig/config/config_basic.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "use_cache": true,
|
||||
\\ "enable_quality_processing": true
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
25
docs/snippets/zig/config/config_discover.md
Normal file
25
docs/snippets/zig/config/config_discover.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
// The Zig binding accepts JSON config strings. To use a discovered config
|
||||
// file, load it from disk into a string and pass it through unchanged.
|
||||
const cwd = std.fs.cwd();
|
||||
const config_json = cwd.readFileAlloc(allocator, "kreuzberg.json", 1 << 20) catch |err| switch (err) {
|
||||
error.FileNotFound => try allocator.dupe(u8, "{}"),
|
||||
else => return err,
|
||||
};
|
||||
defer allocator.free(config_json);
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{result_json});
|
||||
}
|
||||
```
|
||||
28
docs/snippets/zig/config/config_ocr.md
Normal file
28
docs/snippets/zig/config/config_ocr.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "ocr": {
|
||||
\\ "backend": "tesseract",
|
||||
\\ "language": "eng"
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("scanned.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
39
docs/snippets/zig/config/config_programmatic.md
Normal file
39
docs/snippets/zig/config/config_programmatic.md
Normal file
@@ -0,0 +1,39 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
// Build the config JSON programmatically with std.json.
|
||||
var arena = std.heap.ArenaAllocator.init(allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
var root = std.json.ObjectMap.init(a);
|
||||
try root.put("use_cache", std.json.Value{ .bool = true });
|
||||
try root.put("enable_quality_processing", std.json.Value{ .bool = true });
|
||||
|
||||
var ocr = std.json.ObjectMap.init(a);
|
||||
try ocr.put("backend", std.json.Value{ .string = "tesseract" });
|
||||
try ocr.put("language", std.json.Value{ .string = "eng+deu" });
|
||||
try root.put("ocr", std.json.Value{ .object = ocr });
|
||||
|
||||
var chunking = std.json.ObjectMap.init(a);
|
||||
try chunking.put("max_characters", std.json.Value{ .integer = 1000 });
|
||||
try chunking.put("overlap", std.json.Value{ .integer = 200 });
|
||||
try root.put("chunking", std.json.Value{ .object = chunking });
|
||||
|
||||
const config_value = std.json.Value{ .object = root };
|
||||
var buffer = std.ArrayList(u8).init(a);
|
||||
try std.json.stringify(config_value, .{}, buffer.writer());
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, buffer.items);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{result_json});
|
||||
}
|
||||
```
|
||||
25
docs/snippets/zig/config/document_structure_config.md
Normal file
25
docs/snippets/zig/config/document_structure_config.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "include_document_structure": true
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
25
docs/snippets/zig/config/element_based_output.md
Normal file
25
docs/snippets/zig/config/element_based_output.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "result_format": "element_based"
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
34
docs/snippets/zig/config/embedding_config.md
Normal file
34
docs/snippets/zig/config/embedding_config.md
Normal file
@@ -0,0 +1,34 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "chunking": {
|
||||
\\ "max_characters": 1000,
|
||||
\\ "overlap": 200,
|
||||
\\ "embedding": {
|
||||
\\ "model": {"type": "preset", "name": "balanced"},
|
||||
\\ "batch_size": 16,
|
||||
\\ "normalize": true,
|
||||
\\ "show_download_progress": true
|
||||
\\ }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
29
docs/snippets/zig/config/html_output.md
Normal file
29
docs/snippets/zig/config/html_output.md
Normal file
@@ -0,0 +1,29 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "output_format": "html",
|
||||
\\ "html_output": {
|
||||
\\ "theme": "github",
|
||||
\\ "embed_css": true
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
31
docs/snippets/zig/config/keyword_extraction_config.md
Normal file
31
docs/snippets/zig/config/keyword_extraction_config.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "keywords": {
|
||||
\\ "algorithm": "yake",
|
||||
\\ "max_keywords": 10,
|
||||
\\ "min_score": 0.1,
|
||||
\\ "ngram_range": [1, 3],
|
||||
\\ "language": "en"
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
29
docs/snippets/zig/config/language_detection_config.md
Normal file
29
docs/snippets/zig/config/language_detection_config.md
Normal file
@@ -0,0 +1,29 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "language_detection": {
|
||||
\\ "enabled": true,
|
||||
\\ "min_confidence": 0.8,
|
||||
\\ "detect_multiple": true
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
32
docs/snippets/zig/config/ocr_dpi_config.md
Normal file
32
docs/snippets/zig/config/ocr_dpi_config.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "images": {
|
||||
\\ "extract_images": true,
|
||||
\\ "target_dpi": 300,
|
||||
\\ "max_image_dimension": 4096,
|
||||
\\ "auto_adjust_dpi": true,
|
||||
\\ "min_dpi": 150,
|
||||
\\ "max_dpi": 600
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
30
docs/snippets/zig/config/pdf_config.md
Normal file
30
docs/snippets/zig/config/pdf_config.md
Normal file
@@ -0,0 +1,30 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "pdf_options": {
|
||||
\\ "extract_images": true,
|
||||
\\ "passwords": ["password123"],
|
||||
\\ "extract_metadata": true,
|
||||
\\ "extract_annotations": true
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("encrypted.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
32
docs/snippets/zig/config/pdf_hierarchy_config.md
Normal file
32
docs/snippets/zig/config/pdf_hierarchy_config.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "pdf_options": {
|
||||
\\ "hierarchy": {
|
||||
\\ "enabled": true,
|
||||
\\ "k_clusters": 6,
|
||||
\\ "include_bbox": true,
|
||||
\\ "ocr_coverage_threshold": 0.5
|
||||
\\ }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
28
docs/snippets/zig/config/postprocessor_config.md
Normal file
28
docs/snippets/zig/config/postprocessor_config.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "postprocessor": {
|
||||
\\ "enabled": true,
|
||||
\\ "enabled_processors": ["whitespace_normalizer", "unicode_normalizer"]
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
26
docs/snippets/zig/config/quality_processing_config.md
Normal file
26
docs/snippets/zig/config/quality_processing_config.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "enable_quality_processing": true,
|
||||
\\ "use_cache": true
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
33
docs/snippets/zig/config/tesseract_config.md
Normal file
33
docs/snippets/zig/config/tesseract_config.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "ocr": {
|
||||
\\ "backend": "tesseract",
|
||||
\\ "language": "eng+deu",
|
||||
\\ "tesseract_config": {
|
||||
\\ "language": "eng+deu",
|
||||
\\ "psm": 6,
|
||||
\\ "oem": 3
|
||||
\\ }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("scanned.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
28
docs/snippets/zig/config/token_reduction_config.md
Normal file
28
docs/snippets/zig/config/token_reduction_config.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```zig title="Zig"
|
||||
const std = @import("std");
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const config_json =
|
||||
\\{
|
||||
\\ "token_reduction": {
|
||||
\\ "mode": "moderate",
|
||||
\\ "preserve_important_words": true
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
|
||||
defer std.heap.c_allocator.free(result_json);
|
||||
|
||||
const owned = try allocator.dupe(u8, result_json);
|
||||
defer allocator.free(owned);
|
||||
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
try stdout.print("{s}\n", .{owned});
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user