Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
```csharp title="C#"
using Kreuzberg;
var items = new List<BatchBytesItem>
{
new() { Content = await File.ReadAllBytesAsync("doc1.pdf"), MimeType = "application/pdf", Config = null },
new() { Content = await File.ReadAllBytesAsync("doc2.txt"), MimeType = "text/plain", Config = null }
};
var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
var results = KreuzbergLib.BatchExtractBytesSync(items, config);
foreach (var result in results)
{
Console.WriteLine($"Content length: {result.Content.Length}");
}
```

View File

@@ -0,0 +1,21 @@
```csharp title="C#"
using Kreuzberg;
var items = new List<BatchFileItem>
{
new() { Path = "document1.pdf", Config = null },
new()
{
Path = "document2.pdf",
Config = new FileExtractionConfig { ForceOcr = true }
}
};
var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
var results = KreuzbergLib.BatchExtractFilesSync(items, config);
foreach (var result in results)
{
Console.WriteLine($"Content length: {result.Content.Length}");
}
```

View File

@@ -0,0 +1,45 @@
```csharp title="C#"
using System.Net.Http;
using System.Net.Http.Json;
using System.Text.Json.Serialization;
public record ChunkRequest(
[property: JsonPropertyName("text")] string Text,
[property: JsonPropertyName("max_characters")] int? MaxCharacters = null,
[property: JsonPropertyName("overlap")] int? Overlap = null,
[property: JsonPropertyName("chunker_type")] string? ChunkerType = null
);
public record ChunkResponse(
[property: JsonPropertyName("chunks")] List<ChunkItem> Chunks,
[property: JsonPropertyName("chunk_count")] int ChunkCount
);
public record ChunkItem(
[property: JsonPropertyName("content")] string Content,
[property: JsonPropertyName("chunk_index")] int ChunkIndex
);
class Program
{
static async Task Main()
{
var client = new HttpClient();
var request = new ChunkRequest(
Text: "Your long text content here...",
MaxCharacters: 1000,
Overlap: 50,
ChunkerType: "text"
);
var response = await client.PostAsJsonAsync("http://localhost:8000/chunk", request);
var result = await response.Content.ReadFromJsonAsync<ChunkResponse>();
Console.WriteLine($"Created {result?.ChunkCount} chunks");
foreach (var chunk in result?.Chunks ?? [])
{
Console.WriteLine($"Chunk {chunk.ChunkIndex}: {chunk.Content[..Math.Min(50, chunk.Content.Length)]}...");
}
}
}
```

View File

@@ -0,0 +1,25 @@
```csharp title="C#"
using System.Net.Http;
using System.Net.Http.Json;
var client = new HttpClient();
using (var fileStream = File.OpenRead("document.pdf"))
{
using (var content = new MultipartFormDataContent())
{
content.Add(new StreamContent(fileStream), "files", "document.pdf");
var response = await client.PostAsync("http://localhost:8000/extract", content);
if (response.IsSuccessStatusCode)
{
var json = await response.Content.ReadAsStringAsync();
Console.WriteLine(json);
}
else
{
Console.WriteLine($"Error: {response.StatusCode}");
}
}
}
```

View File

@@ -0,0 +1,44 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
OutputFormat = OutputFormat.Markdown,
UseCache = true,
Ocr = new OcrConfig
{
Enabled = true,
Backend = OcrBackendType.Tesseract,
Languages = ["eng"]
},
ImageExtraction = new ImageExtractionConfig
{
Enabled = true,
MinImageHeight = 100,
MinImageWidth = 100
},
Chunking = new ChunkingConfig
{
Enabled = true,
ChunkerType = ChunkerType.Text,
MaxCharacters = 2000,
Overlap = 100
},
LanguageDetection = new LanguageDetectionConfig
{
Enabled = true
}
};
try
{
var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
Console.WriteLine($"Content: {result.Content}");
Console.WriteLine($"Language: {result.Metadata?.LanguageDetection}");
Console.WriteLine($"Format: {result.OutputFormat}");
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Extraction error: {ex.Message}");
}
```

View File

@@ -0,0 +1,18 @@
```csharp title="C#"
using Kreuzberg;
try
{
var result = KreuzbergLib.ExtractFileSync("nonexistent.pdf", null, null);
Console.WriteLine(result.Content);
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Error Code: {ex.Code}");
Console.WriteLine($"Error Message: {ex.Message}");
}
catch (Exception ex)
{
Console.WriteLine($"Unexpected error: {ex.Message}");
}
```

View File

@@ -0,0 +1,22 @@
```csharp title="C#"
using Kreuzberg;
try
{
var data = File.ReadAllBytes("document.unsupported");
var result = KreuzbergLib.ExtractBytesSync(data, "application/x-custom", null);
Console.WriteLine(result.Content);
}
catch (KreuzbergException ex) when (ex.Code == 1)
{
Console.WriteLine("Validation error: Invalid MIME type");
}
catch (KreuzbergException ex) when (ex.Code == 2)
{
Console.WriteLine("Format error: MIME type not supported");
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Extraction failed with error {ex.Code}: {ex.Message}");
}
```

View File

@@ -0,0 +1,10 @@
```csharp title="C#"
using Kreuzberg;
var data = await File.ReadAllBytesAsync("document.pdf");
var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
var result = await KreuzbergLib.ExtractBytes(data, "application/pdf", config);
Console.WriteLine(result.Content);
Console.WriteLine($"MIME Type: {result.MimeType}");
```

View File

@@ -0,0 +1,10 @@
```csharp title="C#"
using Kreuzberg;
var data = File.ReadAllBytes("document.pdf");
var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
var result = KreuzbergLib.ExtractBytesSync(data, "application/pdf", config);
Console.WriteLine(result.Content);
Console.WriteLine($"MIME Type: {result.MimeType}");
```

View File

@@ -0,0 +1,9 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
Console.WriteLine(result.Content);
Console.WriteLine($"MIME Type: {result.MimeType}");
```

View File

@@ -0,0 +1,9 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
var result = KreuzbergLib.ExtractFileSync("document.pdf", null, config);
Console.WriteLine(result.Content);
Console.WriteLine($"MIME Type: {result.MimeType}");
```