This commit is contained in:
27
docs/snippets/go/api/batch_extract_bytes_sync.md
Normal file
27
docs/snippets/go/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,27 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func main() {
|
||||
doc1, _ := os.ReadFile("doc1.pdf")
|
||||
doc2, _ := os.ReadFile("doc2.docx")
|
||||
|
||||
items := []kreuzberg.BatchBytesItem{
|
||||
{Content: doc1, MimeType: "application/pdf"},
|
||||
{Content: doc2, MimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document"},
|
||||
}
|
||||
|
||||
results, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
log.Fatalf("batch extraction failed: %v", err)
|
||||
}
|
||||
|
||||
println("Processed", len(results), "documents")
|
||||
}
|
||||
```
|
||||
26
docs/snippets/go/api/batch_extract_files_sync.md
Normal file
26
docs/snippets/go/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func main() {
|
||||
items := []kreuzberg.BatchFileItem{
|
||||
{Path: "doc1.pdf"},
|
||||
{Path: "doc2.docx"},
|
||||
{Path: "doc3.pptx"},
|
||||
}
|
||||
|
||||
results, err := kreuzberg.BatchExtractFilesSync(items, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
log.Fatalf("batch extraction failed: %v", err)
|
||||
}
|
||||
|
||||
for i, result := range results {
|
||||
println("Doc", i, "content length:", len(result.Content))
|
||||
}
|
||||
}
|
||||
```
|
||||
42
docs/snippets/go/api/client_chunk_text.md
Normal file
42
docs/snippets/go/api/client_chunk_text.md
Normal file
@@ -0,0 +1,42 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func main() {
|
||||
client := &http.Client{}
|
||||
|
||||
payload := map[string]interface{}{
|
||||
"text": "Your long text content here...",
|
||||
"chunker_type": "text",
|
||||
"config": map[string]interface{}{
|
||||
"max_characters": 1000,
|
||||
"overlap": 50,
|
||||
"trim": true,
|
||||
},
|
||||
}
|
||||
|
||||
data, _ := json.Marshal(payload)
|
||||
resp, err := client.Post("http://localhost:8000/chunk", "application/json", bytes.NewBuffer(data))
|
||||
if err != nil {
|
||||
log.Fatalf("request failed: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var result map[string]interface{}
|
||||
json.NewDecoder(resp.Body).Decode(&result)
|
||||
|
||||
chunks := result["chunks"].([]interface{})
|
||||
log.Printf("Created %d chunks", len(chunks))
|
||||
for _, chunk := range chunks {
|
||||
c := chunk.(map[string]interface{})
|
||||
println("Chunk content:", c["content"].(string))
|
||||
}
|
||||
}
|
||||
```
|
||||
34
docs/snippets/go/api/client_extract_single_file.md
Normal file
34
docs/snippets/go/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,34 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"log"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"os"
|
||||
)
|
||||
|
||||
func main() {
|
||||
file, err := os.Open("document.pdf")
|
||||
if err != nil {
|
||||
log.Fatalf("failed to open file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
body := &bytes.Buffer{}
|
||||
writer := multipart.NewWriter(body)
|
||||
part, _ := writer.CreateFormFile("files", "document.pdf")
|
||||
io.Copy(part, file)
|
||||
writer.Close()
|
||||
|
||||
resp, err := http.Post("http://localhost:8000/extract", writer.FormDataContentType(), body)
|
||||
if err != nil {
|
||||
log.Fatalf("request failed: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
io.Copy(os.Stdout, resp.Body)
|
||||
}
|
||||
```
|
||||
35
docs/snippets/go/api/combining_all_features.md
Normal file
35
docs/snippets/go/api/combining_all_features.md
Normal file
@@ -0,0 +1,35 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func main() {
|
||||
trueVal := true
|
||||
maxChars := uint(1000)
|
||||
overlap := uint(200)
|
||||
config := kreuzberg.ExtractionConfig{
|
||||
UseCache: &trueVal,
|
||||
EnableQualityProcessing: &trueVal,
|
||||
Ocr: &kreuzberg.OcrConfig{
|
||||
Backend: "tesseract",
|
||||
Language: "eng",
|
||||
},
|
||||
Chunking: &kreuzberg.ChunkingConfig{
|
||||
MaxCharacters: &maxChars,
|
||||
Overlap: &overlap,
|
||||
},
|
||||
}
|
||||
|
||||
result, err := kreuzberg.ExtractFileSync("document.pdf", nil, config)
|
||||
if err != nil {
|
||||
log.Fatalf("extraction failed: %v", err)
|
||||
}
|
||||
|
||||
println("Content length:", len(result.Content))
|
||||
println("Chunks:", len(result.Chunks))
|
||||
}
|
||||
```
|
||||
26
docs/snippets/go/api/error_handling.md
Normal file
26
docs/snippets/go/api/error_handling.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
|
||||
"github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func main() {
|
||||
result, err := kreuzberg.ExtractFileSync("missing.pdf", nil, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
if errors.Is(err, kreuzberg.ErrIo) {
|
||||
log.Printf("file not found: %v", err)
|
||||
} else if errors.Is(err, kreuzberg.ErrUnsupportedFormat) {
|
||||
log.Printf("unsupported format: %v", err)
|
||||
} else {
|
||||
log.Printf("extraction error: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
println("Content:", result.Content)
|
||||
}
|
||||
```
|
||||
43
docs/snippets/go/api/error_handling_extract.md
Normal file
43
docs/snippets/go/api/error_handling_extract.md
Normal file
@@ -0,0 +1,43 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"os"
|
||||
)
|
||||
|
||||
func main() {
|
||||
file, err := os.Open("document.pdf")
|
||||
if err != nil {
|
||||
log.Fatalf("failed to open file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
body := &bytes.Buffer{}
|
||||
writer := multipart.NewWriter(body)
|
||||
part, _ := writer.CreateFormFile("files", "document.pdf")
|
||||
io.Copy(part, file)
|
||||
writer.Close()
|
||||
|
||||
resp, err := http.Post("http://localhost:8000/extract", writer.FormDataContentType(), body)
|
||||
if err != nil {
|
||||
log.Fatalf("request failed: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
var errResp map[string]string
|
||||
json.NewDecoder(resp.Body).Decode(&errResp)
|
||||
log.Fatalf("error: %s: %s", errResp["error_type"], errResp["message"])
|
||||
}
|
||||
|
||||
var result map[string]interface{}
|
||||
json.NewDecoder(resp.Body).Decode(&result)
|
||||
println("Success:", result["content"].(string))
|
||||
}
|
||||
```
|
||||
24
docs/snippets/go/api/extract_bytes_async.md
Normal file
24
docs/snippets/go/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func main() {
|
||||
content, err := os.ReadFile("document.pdf")
|
||||
if err != nil {
|
||||
log.Fatalf("failed to read file: %v", err)
|
||||
}
|
||||
|
||||
result, err := kreuzberg.ExtractBytes(content, "application/pdf", kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
log.Fatalf("extraction failed: %v", err)
|
||||
}
|
||||
|
||||
println("Content:", result.Content)
|
||||
}
|
||||
```
|
||||
24
docs/snippets/go/api/extract_bytes_sync.md
Normal file
24
docs/snippets/go/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func main() {
|
||||
content, err := os.ReadFile("document.pdf")
|
||||
if err != nil {
|
||||
log.Fatalf("failed to read file: %v", err)
|
||||
}
|
||||
|
||||
result, err := kreuzberg.ExtractBytesSync(content, "application/pdf", kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
log.Fatalf("extraction failed: %v", err)
|
||||
}
|
||||
|
||||
println("Content:", result.Content)
|
||||
}
|
||||
```
|
||||
19
docs/snippets/go/api/extract_file_async.md
Normal file
19
docs/snippets/go/api/extract_file_async.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func main() {
|
||||
result, err := kreuzberg.ExtractFile("document.pdf", nil, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
log.Fatalf("extraction failed: %v", err)
|
||||
}
|
||||
|
||||
println("Content:", result.Content)
|
||||
println("MIME type:", result.MimeType)
|
||||
}
|
||||
```
|
||||
18
docs/snippets/go/api/extract_file_sync.md
Normal file
18
docs/snippets/go/api/extract_file_sync.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```go title="Go"
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func main() {
|
||||
result, err := kreuzberg.ExtractFileSync("document.pdf", nil, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
log.Fatalf("extraction failed: %v", err)
|
||||
}
|
||||
|
||||
println("Content:", result.Content)
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user