This commit is contained in:
25
docs/snippets/c/config/advanced_config.md
Normal file
25
docs/snippets/c/config/advanced_config.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
struct ConfigBuilder *builder = kreuzberg_config_builder_new();
|
||||
kreuzberg_config_builder_set_use_cache(builder, 1);
|
||||
kreuzberg_config_builder_set_include_document_structure(builder, 1);
|
||||
kreuzberg_config_builder_set_ocr(builder,
|
||||
"{\"tesseract\":{\"language\":\"eng\"}}");
|
||||
|
||||
ExtractionConfig *config = kreuzberg_config_builder_build(builder);
|
||||
|
||||
struct CExtractionResult *result =
|
||||
kreuzberg_extract_file_sync_with_config("scan.pdf",
|
||||
kreuzberg_config_to_json(config));
|
||||
if (result && result->success) {
|
||||
printf("%s\n", result->content);
|
||||
}
|
||||
|
||||
kreuzberg_free_result(result);
|
||||
kreuzberg_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
43
docs/snippets/c/config/chunking_config.md
Normal file
43
docs/snippets/c/config/chunking_config.md
Normal file
@@ -0,0 +1,43 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"chunking\": {"
|
||||
"\"chunker_type\": \"character\","
|
||||
"\"max_characters\": 1000,"
|
||||
"\"overlap\": 200,"
|
||||
"\"trim\": true"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
38
docs/snippets/c/config/config_basic.md
Normal file
38
docs/snippets/c/config/config_basic.md
Normal file
@@ -0,0 +1,38 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"use_cache\": true,"
|
||||
"\"enable_quality_processing\": true"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
62
docs/snippets/c/config/config_discover.md
Normal file
62
docs/snippets/c/config/config_discover.md
Normal file
@@ -0,0 +1,62 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* The C FFI does not expose config-file auto-discovery directly. Load the
|
||||
* file contents in your application and pass the JSON to
|
||||
* kreuzberg_extraction_config_from_json. For TOML/YAML, convert in your
|
||||
* application before calling the FFI. */
|
||||
static char *read_text_file(const char *path) {
|
||||
FILE *fp = fopen(path, "rb");
|
||||
if (!fp) {
|
||||
return NULL;
|
||||
}
|
||||
fseek(fp, 0, SEEK_END);
|
||||
long size = ftell(fp);
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
char *buf = (char *)malloc((size_t)size + 1);
|
||||
if (!buf) {
|
||||
fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
fread(buf, 1, (size_t)size, fp);
|
||||
buf[size] = '\0';
|
||||
fclose(fp);
|
||||
return buf;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
char *json = read_text_file("kreuzberg.json");
|
||||
KREUZBERGExtractionConfig *config = json
|
||||
? kreuzberg_extraction_config_from_json(json)
|
||||
: kreuzberg_extraction_config_default();
|
||||
free(json);
|
||||
|
||||
if (!config) {
|
||||
fprintf(stderr, "config load failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
40
docs/snippets/c/config/config_ocr.md
Normal file
40
docs/snippets/c/config/config_ocr.md
Normal file
@@ -0,0 +1,40 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"tesseract\","
|
||||
"\"language\": \"eng\""
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
48
docs/snippets/c/config/config_programmatic.md
Normal file
48
docs/snippets/c/config/config_programmatic.md
Normal file
@@ -0,0 +1,48 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"use_cache\": true,"
|
||||
"\"enable_quality_processing\": true,"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"tesseract\","
|
||||
"\"language\": \"eng+deu\","
|
||||
"\"tesseract_config\": {\"psm\": 6}"
|
||||
"},"
|
||||
"\"chunking\": {"
|
||||
"\"chunker_type\": \"character\","
|
||||
"\"max_characters\": 1000,"
|
||||
"\"overlap\": 200"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
37
docs/snippets/c/config/document_structure_config.md
Normal file
37
docs/snippets/c/config/document_structure_config.md
Normal file
@@ -0,0 +1,37 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"include_document_structure\": true"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
37
docs/snippets/c/config/element_based_output.md
Normal file
37
docs/snippets/c/config/element_based_output.md
Normal file
@@ -0,0 +1,37 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"result_format\": \"element_based\""
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
47
docs/snippets/c/config/embedding_config.md
Normal file
47
docs/snippets/c/config/embedding_config.md
Normal file
@@ -0,0 +1,47 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"chunking\": {"
|
||||
"\"chunker_type\": \"character\","
|
||||
"\"max_characters\": 1000,"
|
||||
"\"overlap\": 200,"
|
||||
"\"embedding\": {"
|
||||
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
|
||||
"\"batch_size\": 16,"
|
||||
"\"normalize\": true,"
|
||||
"\"show_download_progress\": true"
|
||||
"}"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
40
docs/snippets/c/config/html_output.md
Normal file
40
docs/snippets/c/config/html_output.md
Normal file
@@ -0,0 +1,40 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"output_format\": \"html\","
|
||||
"\"html_output\": {"
|
||||
"\"theme\": \"github\""
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
43
docs/snippets/c/config/keyword_extraction_config.md
Normal file
43
docs/snippets/c/config/keyword_extraction_config.md
Normal file
@@ -0,0 +1,43 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"keywords\": {"
|
||||
"\"algorithm\": \"yake\","
|
||||
"\"max_keywords\": 10,"
|
||||
"\"min_score\": 0.1,"
|
||||
"\"ngram_range\": [1, 3],"
|
||||
"\"language\": \"en\""
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
41
docs/snippets/c/config/language_detection_config.md
Normal file
41
docs/snippets/c/config/language_detection_config.md
Normal file
@@ -0,0 +1,41 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"language_detection\": {"
|
||||
"\"enabled\": true,"
|
||||
"\"min_confidence\": 0.8,"
|
||||
"\"detect_multiple\": true"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
44
docs/snippets/c/config/ocr_dpi_config.md
Normal file
44
docs/snippets/c/config/ocr_dpi_config.md
Normal file
@@ -0,0 +1,44 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"images\": {"
|
||||
"\"extract_images\": true,"
|
||||
"\"target_dpi\": 300,"
|
||||
"\"max_image_dimension\": 4096,"
|
||||
"\"auto_adjust_dpi\": true,"
|
||||
"\"min_dpi\": 150,"
|
||||
"\"max_dpi\": 600"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
41
docs/snippets/c/config/pdf_config.md
Normal file
41
docs/snippets/c/config/pdf_config.md
Normal file
@@ -0,0 +1,41 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"pdf_options\": {"
|
||||
"\"extract_images\": true,"
|
||||
"\"passwords\": [\"password123\"],"
|
||||
"\"extract_metadata\": true"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("encrypted.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
45
docs/snippets/c/config/pdf_hierarchy_config.md
Normal file
45
docs/snippets/c/config/pdf_hierarchy_config.md
Normal file
@@ -0,0 +1,45 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"pdf_options\": {"
|
||||
"\"hierarchy\": {"
|
||||
"\"enabled\": true,"
|
||||
"\"detection_threshold\": 0.75,"
|
||||
"\"ocr_coverage_threshold\": 0.8,"
|
||||
"\"min_level\": 1,"
|
||||
"\"max_level\": 5"
|
||||
"}"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
40
docs/snippets/c/config/postprocessor_config.md
Normal file
40
docs/snippets/c/config/postprocessor_config.md
Normal file
@@ -0,0 +1,40 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"postprocessor\": {"
|
||||
"\"enabled\": true,"
|
||||
"\"enabled_processors\": [\"whitespace_normalizer\", \"unicode_normalizer\"]"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
38
docs/snippets/c/config/quality_processing_config.md
Normal file
38
docs/snippets/c/config/quality_processing_config.md
Normal file
@@ -0,0 +1,38 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"enable_quality_processing\": true,"
|
||||
"\"use_cache\": true"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
44
docs/snippets/c/config/tesseract_config.md
Normal file
44
docs/snippets/c/config/tesseract_config.md
Normal file
@@ -0,0 +1,44 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"tesseract\","
|
||||
"\"language\": \"eng+deu\","
|
||||
"\"tesseract_config\": {"
|
||||
"\"psm\": 6,"
|
||||
"\"oem\": 3"
|
||||
"}"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
40
docs/snippets/c/config/token_reduction_config.md
Normal file
40
docs/snippets/c/config/token_reduction_config.md
Normal file
@@ -0,0 +1,40 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"token_reduction\": {"
|
||||
"\"mode\": \"moderate\","
|
||||
"\"preserve_important_words\": true"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user