This commit is contained in:
42
docs/snippets/c/ocr/cloud_ocr_backend.md
Normal file
42
docs/snippets/c/ocr/cloud_ocr_backend.md
Normal file
@@ -0,0 +1,42 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
/* Cloud OCR backends are registered as custom plugins via the Rust core. */
|
||||
/* Select a registered cloud backend by name through the OCR config. */
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"cloud-ocr\","
|
||||
"\"language\": \"eng\""
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
39
docs/snippets/c/ocr/image_extraction.md
Normal file
39
docs/snippets/c/ocr/image_extraction.md
Normal file
@@ -0,0 +1,39 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"images\": {"
|
||||
"\"extract_images\": true"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
44
docs/snippets/c/ocr/image_preprocessing.md
Normal file
44
docs/snippets/c/ocr/image_preprocessing.md
Normal file
@@ -0,0 +1,44 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"images\": {"
|
||||
"\"extract_images\": true,"
|
||||
"\"target_dpi\": 300,"
|
||||
"\"max_image_dimension\": 4096,"
|
||||
"\"auto_adjust_dpi\": true,"
|
||||
"\"min_dpi\": 150,"
|
||||
"\"max_dpi\": 600"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
40
docs/snippets/c/ocr/ocr_easyocr.md
Normal file
40
docs/snippets/c/ocr/ocr_easyocr.md
Normal file
@@ -0,0 +1,40 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"easyocr\","
|
||||
"\"language\": \"en\""
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
43
docs/snippets/c/ocr/ocr_elements.md
Normal file
43
docs/snippets/c/ocr/ocr_elements.md
Normal file
@@ -0,0 +1,43 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"paddleocr\","
|
||||
"\"language\": \"en\","
|
||||
"\"element_config\": {"
|
||||
"\"include_elements\": true"
|
||||
"}"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
26
docs/snippets/c/ocr/ocr_extraction.md
Normal file
26
docs/snippets/c/ocr/ocr_extraction.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
struct ConfigBuilder *builder = kreuzberg_config_builder_new();
|
||||
kreuzberg_config_builder_set_ocr(builder,
|
||||
"{\"tesseract\":{\"language\":\"eng\"}}");
|
||||
ExtractionConfig *config = kreuzberg_config_builder_build(builder);
|
||||
|
||||
char *config_json = kreuzberg_config_to_json(config);
|
||||
struct CExtractionResult *result =
|
||||
kreuzberg_extract_file_sync_with_config("scanned.png", config_json);
|
||||
|
||||
if (result && result->success) {
|
||||
printf("OCR text: %s\n", result->content);
|
||||
} else {
|
||||
fprintf(stderr, "OCR error: %s\n", kreuzberg_get_error_details().message);
|
||||
}
|
||||
|
||||
kreuzberg_free_result(result);
|
||||
kreuzberg_free_string(config_json);
|
||||
kreuzberg_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
41
docs/snippets/c/ocr/ocr_force_all_pages.md
Normal file
41
docs/snippets/c/ocr/ocr_force_all_pages.md
Normal file
@@ -0,0 +1,41 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"force_ocr\": true,"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"tesseract\","
|
||||
"\"language\": \"eng\""
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
40
docs/snippets/c/ocr/ocr_multi_language.md
Normal file
40
docs/snippets/c/ocr/ocr_multi_language.md
Normal file
@@ -0,0 +1,40 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"tesseract\","
|
||||
"\"language\": \"eng+deu+fra\""
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("multilingual.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
40
docs/snippets/c/ocr/ocr_paddleocr.md
Normal file
40
docs/snippets/c/ocr/ocr_paddleocr.md
Normal file
@@ -0,0 +1,40 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"ocr\": {"
|
||||
"\"backend\": \"paddleocr\","
|
||||
"\"language\": \"en\""
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user