Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/docs/snippets/csharp/advanced/ChunkPageMapping.cs
+++ b/docs/snippets/csharp/advanced/ChunkPageMapping.cs
@@ -0,0 +1,24 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Chunking = new ChunkingConfig { ChunkSize = 500, Overlap = 50 },
+    Pages = new PageConfig { ExtractPages = true }
+};
+
+var result = Kreuzberg.ExtractFileSync("document.pdf", config);
+
+if (result.Chunks != null)
+{
+    foreach (var chunk in result.Chunks)
+    {
+        if (chunk.Metadata.FirstPage.HasValue)
+        {
+            var pageRange = chunk.Metadata.FirstPage == chunk.Metadata.LastPage
+                ? $"Page {chunk.Metadata.FirstPage}"
+                : $"Pages {chunk.Metadata.FirstPage}-{chunk.Metadata.LastPage}";
+
+            Console.WriteLine($"Chunk: {chunk.Text[..50]}... ({pageRange})");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/async_extraction.cs
+++ b/docs/snippets/csharp/advanced/async_extraction.cs
@@ -0,0 +1,33 @@
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync("document.pdf");
+
+            Console.WriteLine($"Content length: {result.Content.Length}");
+            Console.WriteLine($"MIME type: {result.MimeType}");
+
+            var tasks = new[]
+            {
+                KreuzbergLib.ExtractFileAsync("file1.pdf"),
+                KreuzbergLib.ExtractFileAsync("file2.pdf"),
+                KreuzbergLib.ExtractFileAsync("file3.pdf")
+            };
+
+            var results = await Task.WhenAll(tasks);
+
+            foreach (var r in results)
+            {
+                Console.WriteLine($"Extracted {r.Content.Length} characters");
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Extraction failed: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/batch_processing.cs
+++ b/docs/snippets/csharp/advanced/batch_processing.cs
@@ -0,0 +1,46 @@
+using Kreuzberg;
+using System.Collections.Generic;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            UseCache = true,
+            EnableQualityProcessing = true
+        };
+
+        var filePaths = new[]
+        {
+            "document1.pdf",
+            "document2.pdf",
+            "document3.pdf"
+        };
+
+        try
+        {
+            var batchResults = new List<ExtractionResult>();
+
+            foreach (var filePath in filePaths)
+            {
+                var result = await KreuzbergLib.ExtractFileAsync(filePath, config);
+                batchResults.Add(result);
+                Console.WriteLine($"Processed {filePath}: {result.Content.Length} chars");
+            }
+
+            var tasks = filePaths.Select(path =>
+                KreuzbergLib.ExtractFileAsync(path, config)
+            ).ToArray();
+
+            var results = await Task.WhenAll(tasks);
+
+            var totalChars = results.Sum(r => r.Content.Length);
+            Console.WriteLine($"Total extracted: {totalChars} characters");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Batch processing error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/chunk_page_mapping.md
+++ b/docs/snippets/csharp/advanced/chunk_page_mapping.md
@@ -0,0 +1,52 @@
+```csharp title="C#"
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxCharacters = 500,
+                Overlap = 50
+            },
+            Pages = new PageConfig
+            {
+                ExtractPages = true
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "document.pdf",
+                config
+            ).ConfigureAwait(false);
+
+            if (result.Chunks != null)
+            {
+                foreach (var chunk in result.Chunks)
+                {
+                    if (chunk.Metadata.FirstPage.HasValue && chunk.Metadata.LastPage.HasValue)
+                    {
+                        var first = chunk.Metadata.FirstPage.Value;
+                        var last = chunk.Metadata.LastPage.Value;
+                        var pageRange = first == last
+                            ? $"Page {first}"
+                            : $"Pages {first}-{last}";
+
+                        var preview = chunk.Content[..Math.Min(50, chunk.Content.Length)];
+                        Console.WriteLine($"Chunk: {preview}... ({pageRange})");
+                    }
+                }
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
+```
--- a/docs/snippets/csharp/advanced/chunking_config.cs
+++ b/docs/snippets/csharp/advanced/chunking_config.cs
@@ -0,0 +1,75 @@
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxChars = 1000,
+                MaxOverlap = 200,
+                Embedding = new EmbeddingConfig
+                {
+                    Model = EmbeddingModelType.Preset("all-minilm-l6-v2"),
+                    Normalize = true,
+                    BatchSize = 32
+                }
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "document.pdf",
+                config
+            ).ConfigureAwait(false);
+
+            Console.WriteLine($"Chunks: {result.Chunks.Count}");
+            foreach (var chunk in result.Chunks)
+            {
+                Console.WriteLine($"Content length: {chunk.Content.Length}");
+                if (chunk.Embedding != null)
+                {
+                    Console.WriteLine($"Embedding dimensions: {chunk.Embedding.Length}");
+                }
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+
+    static async Task PrependHeadingContextExample()
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxChars = 500,
+                MaxOverlap = 50,
+                PrependHeadingContext = true
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "document.md",
+                config
+            ).ConfigureAwait(false);
+
+            foreach (var chunk in result.Chunks)
+            {
+                // Each chunk's content is prefixed with its heading breadcrumb
+                Console.WriteLine(chunk.Content[..Math.Min(100, chunk.Content.Length)]);
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/chunking_config.md
+++ b/docs/snippets/csharp/advanced/chunking_config.md
@@ -0,0 +1,132 @@
+```csharp title="C#"
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxChars = 1000,
+                MaxOverlap = 200,
+                Embedding = new EmbeddingConfig
+                {
+                    Model = EmbeddingModelType.Preset("all-minilm-l6-v2"),
+                    Normalize = true,
+                    BatchSize = 32
+                }
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "document.pdf",
+                config
+            ).ConfigureAwait(false);
+
+            Console.WriteLine($"Chunks: {result.Chunks.Count}");
+            foreach (var chunk in result.Chunks)
+            {
+                Console.WriteLine($"Content length: {chunk.Content.Length}");
+                if (chunk.Embedding != null)
+                {
+                    Console.WriteLine($"Embedding dimensions: {chunk.Embedding.Length}");
+                }
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
+```
+
+```csharp title="C# - Markdown with Heading Context"
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxChars = 500,
+                MaxOverlap = 50,
+                Sizing = new ChunkSizingConfig
+                {
+                    Type = "tokenizer",
+                    Model = "Xenova/gpt-4o"
+                }
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "document.md",
+                config
+            ).ConfigureAwait(false);
+
+            foreach (var chunk in result.Chunks)
+            {
+                if (chunk.HeadingContext?.Headings != null)
+                {
+                    Console.WriteLine("Headings:");
+                    foreach (var heading in chunk.HeadingContext.Headings)
+                    {
+                        Console.WriteLine($"  Level {heading.Level}: {heading.Text}");
+                    }
+                }
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
+```
+
+```csharp title="C# - Prepend Heading Context"
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxChars = 500,
+                MaxOverlap = 50,
+                PrependHeadingContext = true
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "document.md",
+                config
+            ).ConfigureAwait(false);
+
+            foreach (var chunk in result.Chunks)
+            {
+                // Each chunk's content is prefixed with its heading breadcrumb
+                Console.WriteLine(chunk.Content[..Math.Min(100, chunk.Content.Length)]);
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
+```
--- a/docs/snippets/csharp/advanced/chunking_rag.cs
+++ b/docs/snippets/csharp/advanced/chunking_rag.cs
@@ -0,0 +1,83 @@
+using Kreuzberg;
+using System.Collections.Generic;
+using System.Linq;
+
+class RagPipelineExample
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxChars = 500,
+                MaxOverlap = 50,
+                Embedding = new EmbeddingConfig
+                {
+                    Model = EmbeddingModelType.Preset("all-mpnet-base-v2"),
+                    Normalize = true,
+                    BatchSize = 16
+                }
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "research_paper.pdf",
+                config
+            ).ConfigureAwait(false);
+
+            var vectorStore = await BuildVectorStoreAsync(result.Chunks)
+                .ConfigureAwait(false);
+
+            var query = "machine learning optimization";
+            var relevantChunks = await SearchAsync(vectorStore, query)
+                .ConfigureAwait(false);
+
+            Console.WriteLine($"Found {relevantChunks.Count} relevant chunks");
+            foreach (var chunk in relevantChunks.Take(3))
+            {
+                Console.WriteLine($"Content: {chunk.Content[..80]}...");
+                Console.WriteLine($"Similarity: {chunk.Similarity:F3}\n");
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+
+    static async Task<List<VectorEntry>> BuildVectorStoreAsync(
+        IEnumerable<Chunk> chunks)
+    {
+        return await Task.Run(() =>
+        {
+            return chunks.Select(c => new VectorEntry
+            {
+                Content = c.Content,
+                Embedding = c.Embedding?.ToArray() ?? Array.Empty<float>(),
+                Similarity = 0f
+            }).ToList();
+        }).ConfigureAwait(false);
+    }
+
+    static async Task<List<VectorEntry>> SearchAsync(
+        List<VectorEntry> store,
+        string query)
+    {
+        return await Task.Run(() =>
+        {
+            return store
+                .OrderByDescending(e => e.Similarity)
+                .ToList();
+        }).ConfigureAwait(false);
+    }
+
+    class VectorEntry
+    {
+        public string Content { get; set; } = string.Empty;
+        public float[] Embedding { get; set; } = Array.Empty<float>();
+        public float Similarity { get; set; }
+    }
+}
--- a/docs/snippets/csharp/advanced/chunking_rag.md
+++ b/docs/snippets/csharp/advanced/chunking_rag.md
@@ -0,0 +1,85 @@
+```csharp title="C#"
+using Kreuzberg;
+using System.Collections.Generic;
+using System.Linq;
+
+class RagPipelineExample
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxChars = 500,
+                MaxOverlap = 50,
+                Embedding = new EmbeddingConfig
+                {
+                    Model = EmbeddingModelType.Preset("all-mpnet-base-v2"),
+                    Normalize = true,
+                    BatchSize = 16
+                }
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "research_paper.pdf",
+                config
+            ).ConfigureAwait(false);
+
+            var vectorStore = await BuildVectorStoreAsync(result.Chunks)
+                .ConfigureAwait(false);
+
+            var query = "machine learning optimization";
+            var relevantChunks = await SearchAsync(vectorStore, query)
+                .ConfigureAwait(false);
+
+            Console.WriteLine($"Found {relevantChunks.Count} relevant chunks");
+            foreach (var chunk in relevantChunks.Take(3))
+            {
+                Console.WriteLine($"Content: {chunk.Content[..80]}...");
+                Console.WriteLine($"Similarity: {chunk.Similarity:F3}\n");
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+
+    static async Task<List<VectorEntry>> BuildVectorStoreAsync(
+        IEnumerable<Chunk> chunks)
+    {
+        return await Task.Run(() =>
+        {
+            return chunks.Select(c => new VectorEntry
+            {
+                Content = c.Content,
+                Embedding = c.Embedding?.ToArray() ?? Array.Empty<float>(),
+                Similarity = 0f
+            }).ToList();
+        }).ConfigureAwait(false);
+    }
+
+    static async Task<List<VectorEntry>> SearchAsync(
+        List<VectorEntry> store,
+        string query)
+    {
+        return await Task.Run(() =>
+        {
+            return store
+                .OrderByDescending(e => e.Similarity)
+                .ToList();
+        }).ConfigureAwait(false);
+    }
+
+    class VectorEntry
+    {
+        public string Content { get; set; } = string.Empty;
+        public float[] Embedding { get; set; } = Array.Empty<float>();
+        public float Similarity { get; set; }
+    }
+}
+```
--- a/docs/snippets/csharp/advanced/combining_all_features.md
+++ b/docs/snippets/csharp/advanced/combining_all_features.md
@@ -0,0 +1,72 @@
+```csharp title="C#"
+using System;
+using System.Threading.Tasks;
+using Kreuzberg;
+
+async Task RunRagPipeline()
+{
+    var config = new ExtractionConfig
+    {
+        EnableQualityProcessing = true,
+
+        LanguageDetection = new LanguageDetectionConfig
+        {
+            Enabled = true,
+            DetectMultiple = true,
+            MinConfidence = 0.8,
+        },
+
+        TokenReduction = new TokenReductionConfig
+        {
+            Mode = "moderate",
+            PreserveImportantWords = true,
+        },
+
+        Chunking = new ChunkingConfig
+        {
+            MaxChars = 512,
+            MaxOverlap = 50,
+            Embedding = new Dictionary<string, object?>
+            {
+                { "preset", "balanced" },
+            },
+            Enabled = true,
+        },
+
+        Keywords = new KeywordConfig
+        {
+            Algorithm = "yake",
+            MaxKeywords = 10,
+        },
+    };
+
+    var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+
+    Console.WriteLine($"Content length: {result.Content.Length} characters");
+
+    if (result.DetectedLanguages?.Count > 0)
+    {
+        Console.WriteLine($"Languages: {string.Join(", ", result.DetectedLanguages)}");
+    }
+
+    if (result.Chunks?.Count > 0)
+    {
+        Console.WriteLine($"Total chunks: {result.Chunks.Count}");
+        var firstChunk = result.Chunks[0];
+        Console.WriteLine($"First chunk tokens: {firstChunk.Metadata.TokenCount}");
+        if (firstChunk.Embedding?.Length > 0)
+        {
+            Console.WriteLine($"Embedding dimensions: {firstChunk.Embedding.Length}");
+        }
+    }
+
+    Console.WriteLine($"Quality score: {result.QualityScore}");
+
+    if (result.ExtractedKeywords?.Count > 0)
+    {
+        Console.WriteLine($"Keywords: {string.Join(", ", result.ExtractedKeywords)}");
+    }
+}
+
+await RunRagPipeline();
+```
--- a/docs/snippets/csharp/advanced/custom_cache.cs
+++ b/docs/snippets/csharp/advanced/custom_cache.cs
@@ -0,0 +1,63 @@
+using Kreuzberg;
+using System.Collections.Generic;
+
+class CustomCacheBackend
+{
+    private Dictionary<string, ExtractionResult> _cache = new();
+
+    public async Task<ExtractionResult> GetOrExtractAsync(
+        string filePath,
+        ExtractionConfig config)
+    {
+        var cacheKey = GenerateCacheKey(filePath, config);
+
+        if (_cache.TryGetValue(cacheKey, out var cachedResult))
+        {
+            Console.WriteLine("Using cached result");
+            return cachedResult;
+        }
+
+        var result = await KreuzbergLib.ExtractFileAsync(filePath, config);
+
+        _cache[cacheKey] = result;
+        Console.WriteLine("Result cached");
+
+        return result;
+    }
+
+    private string GenerateCacheKey(string filePath, ExtractionConfig config)
+    {
+        var configHash = config.ToString().GetHashCode();
+        return $"{filePath}:{configHash}";
+    }
+
+    public void ClearCache()
+    {
+        _cache.Clear();
+        Console.WriteLine("Cache cleared");
+    }
+}
+
+class Program
+{
+    static async Task Main()
+    {
+        var cacheBackend = new CustomCacheBackend();
+        var config = new ExtractionConfig { UseCache = true };
+
+        try
+        {
+            var result1 = await cacheBackend.GetOrExtractAsync("document.pdf", config);
+            Console.WriteLine($"Result 1: {result1.Content.Length} chars");
+
+            var result2 = await cacheBackend.GetOrExtractAsync("document.pdf", config);
+            Console.WriteLine($"Result 2: {result2.Content.Length} chars");
+
+            cacheBackend.ClearCache();
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/custom_extractor.cs
+++ b/docs/snippets/csharp/advanced/custom_extractor.cs
@@ -0,0 +1,68 @@
+using Kreuzberg;
+using System.Text.Json;
+
+// NOTE: IDocumentExtractor interface is not available in C# bindings
+
+class CustomJsonProcessor
+{
+    public static ExtractionResult ProcessJson(byte[] content, string mimeType)
+    {
+        try
+        {
+            var jsonContent = System.Text.Encoding.UTF8.GetString(content);
+            var document = JsonDocument.Parse(jsonContent);
+
+            var text = ExtractText(document.RootElement);
+
+            return new ExtractionResult
+            {
+                Content = text,
+                MimeType = mimeType,
+                Metadata = new Metadata(),
+                Tables = new List<Table>(),
+                Success = true
+            };
+        }
+        catch (JsonException ex)
+        {
+            throw new KreuzbergParsingException($"Failed to parse JSON: {ex.Message}");
+        }
+    }
+
+    private static string ExtractText(JsonElement element)
+    {
+        return element.ValueKind switch
+        {
+            JsonValueKind.String => element.GetString() + "\n",
+            JsonValueKind.Array => string.Concat(
+                element.EnumerateArray().Select(ExtractText)
+            ),
+            JsonValueKind.Object => string.Concat(
+                element.EnumerateObject()
+                    .Select(p => ExtractText(p.Value))
+            ),
+            _ => ""
+        };
+    }
+}
+
+class Program
+{
+    static void Main()
+    {
+        try
+        {
+            var jsonBytes = System.Text.Encoding.UTF8.GetBytes(
+                @"{""name"": ""John"", ""age"": 30}"
+            );
+
+            var result = CustomJsonProcessor.ProcessJson(jsonBytes, "application/json");
+
+            Console.WriteLine($"Extracted: {result.Content}");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/custom_ocr_backend.cs
+++ b/docs/snippets/csharp/advanced/custom_ocr_backend.cs
@@ -0,0 +1,84 @@
+using Kreuzberg;
+using System.Net.Http;
+using System.Text.Json;
+
+class CloudOcrBackend : IOcrBackend
+{
+    private readonly string _apiKey;
+    private readonly HttpClient _httpClient;
+
+    public CloudOcrBackend(string apiKey)
+    {
+        _apiKey = apiKey;
+        _httpClient = new HttpClient();
+    }
+
+    public string Name => "cloud-ocr";
+
+    public string Process(ReadOnlySpan<byte> imageBytes, OcrConfig? config)
+    {
+        return Task.Run(async () =>
+        {
+            try
+            {
+                var bytes = imageBytes.ToArray();
+                using var content = new MultipartFormDataContent();
+                content.Add(new ByteArrayContent(bytes), "image");
+
+                var request = new HttpRequestMessage(
+                    HttpMethod.Post,
+                    "https://api.example.com/ocr"
+                )
+                {
+                    Content = content,
+                    Headers =
+                    {
+                        { "Authorization", $"Bearer {_apiKey}" }
+                    }
+                };
+
+                var response = await _httpClient.SendAsync(request);
+                response.EnsureSuccessStatusCode();
+
+                var jsonContent = await response.Content.ReadAsStringAsync();
+                return jsonContent;
+            }
+            catch (HttpRequestException ex)
+            {
+                throw new KreuzbergOcrException($"Cloud OCR service error: {ex.Message}");
+            }
+        }).GetAwaiter().GetResult();
+    }
+
+    public void Dispose()
+    {
+        _httpClient?.Dispose();
+    }
+}
+
+class Program
+{
+    static void Main()
+    {
+        using var backend = new CloudOcrBackend("your-api-key");
+        KreuzbergLib.RegisterOcrBackend(backend);
+
+        try
+        {
+            var config = new ExtractionConfig
+            {
+                Ocr = new OcrConfig
+                {
+                    Backend = "cloud-ocr"
+                }
+            };
+
+            var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
+            Console.WriteLine($"OCR text: {result.Content}");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/custom_postprocessor.cs
+++ b/docs/snippets/csharp/advanced/custom_postprocessor.cs
@@ -0,0 +1,80 @@
+using Kreuzberg;
+
+class WordCountPostProcessor : IPostProcessor
+{
+    public string Name => "word-count";
+    public int Priority => 10;
+
+    public ExtractionResult Process(ExtractionResult result)
+    {
+        var wordCount = result.Content.Split(
+            new[] { ' ', '\n', '\r', '\t' },
+            StringSplitOptions.RemoveEmptyEntries
+        ).Length;
+
+        if (result.Metadata.Additional == null)
+        {
+            result.Metadata.Additional = new Dictionary<string, System.Text.Json.Nodes.JsonNode?>();
+        }
+        result.Metadata.Additional["word_count"] = System.Text.Json.Nodes.JsonValue.Create(wordCount);
+
+        return result;
+    }
+}
+
+class SentimentPostProcessor : IPostProcessor
+{
+    public string Name => "sentiment-analyzer";
+    public int Priority => 5;
+
+    public ExtractionResult Process(ExtractionResult result)
+    {
+        var sentiment = AnalyzeSentiment(result.Content);
+
+        if (result.Metadata.Additional == null)
+        {
+            result.Metadata.Additional = new Dictionary<string, System.Text.Json.Nodes.JsonNode?>();
+        }
+        result.Metadata.Additional["sentiment"] = System.Text.Json.Nodes.JsonValue.Create(sentiment);
+
+        return result;
+    }
+
+    private string AnalyzeSentiment(string text)
+    {
+        return text.Length > 0 ? "neutral" : "unknown";
+    }
+}
+
+class Program
+{
+    static void Main()
+    {
+        var wordCountProcessor = new WordCountPostProcessor();
+        var sentimentProcessor = new SentimentPostProcessor();
+
+        KreuzbergLib.RegisterPostProcessor(wordCountProcessor);
+        KreuzbergLib.RegisterPostProcessor(sentimentProcessor);
+
+        try
+        {
+            var result = KreuzbergLib.ExtractFileSync("document.pdf");
+
+            if (result.Metadata.Additional != null)
+            {
+                if (result.Metadata.Additional.TryGetValue("word_count", out var wordCount))
+                {
+                    Console.WriteLine($"Word count: {wordCount}");
+                }
+                if (result.Metadata.Additional.TryGetValue("sentiment", out var sentiment))
+                {
+                    Console.WriteLine($"Sentiment: {sentiment}");
+                }
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/custom_validator.cs
+++ b/docs/snippets/csharp/advanced/custom_validator.cs
@@ -0,0 +1,82 @@
+using Kreuzberg;
+
+class MinLengthValidator : IValidator
+{
+    private readonly int _minLength;
+
+    public MinLengthValidator(int minLength)
+    {
+        _minLength = minLength;
+    }
+
+    public string Name => "min-length";
+    public int Priority => 10;
+
+    public void Validate(ExtractionResult result)
+    {
+        if (result.Content.Length < _minLength)
+        {
+            throw new KreuzbergValidationException(
+                $"Content too short: {result.Content.Length} < {_minLength}"
+            );
+        }
+    }
+}
+
+class QualityScoreValidator : IValidator
+{
+    private readonly double _minScore;
+
+    public QualityScoreValidator(double minScore)
+    {
+        _minScore = minScore;
+    }
+
+    public string Name => "quality-score";
+    public int Priority => 5;
+
+    public void Validate(ExtractionResult result)
+    {
+        var score = result.QualityScore;
+
+        if (score < _minScore)
+        {
+            throw new KreuzbergValidationException(
+                $"Quality score too low: {score:F2} < {_minScore:F2}"
+            );
+        }
+    }
+}
+
+class Program
+{
+    static void Main()
+    {
+        var minLengthValidator = new MinLengthValidator(minLength: 50);
+        var qualityValidator = new QualityScoreValidator(minScore: 0.7);
+
+        KreuzbergLib.RegisterValidator(minLengthValidator);
+        KreuzbergLib.RegisterValidator(qualityValidator);
+
+        try
+        {
+            var config = new ExtractionConfig
+            {
+                EnableQualityProcessing = true
+            };
+
+            var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
+
+            Console.WriteLine("Validation passed");
+            Console.WriteLine($"Content length: {result.Content.Length}");
+        }
+        catch (KreuzbergValidationException ex)
+        {
+            Console.WriteLine($"Validation failed: {ex.Message}");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/embedding_config.md
+++ b/docs/snippets/csharp/advanced/embedding_config.md
@@ -0,0 +1,18 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Chunking = new ChunkingConfig
+    {
+        MaxChars = 1000,
+        Embedding = new EmbeddingConfig
+        {
+            Model = EmbeddingModelType.Preset("all-mpnet-base-v2"),
+            BatchSize = 16,
+            Normalize = true,
+            ShowDownloadProgress = true
+        }
+    }
+};
+```
--- a/docs/snippets/csharp/advanced/embedding_with_chunking.md
+++ b/docs/snippets/csharp/advanced/embedding_with_chunking.md
@@ -0,0 +1,49 @@
+```csharp title="C#"
+using Kreuzberg;
+using System;
+using System.Collections.Generic;
+using System.Threading.Tasks;
+
+var config = new ExtractionConfig
+{
+    Chunking = new ChunkingConfig
+    {
+        MaxChars = 512,
+        MaxOverlap = 50,
+        Embedding = new EmbeddingConfig
+        {
+            Model = EmbeddingModelType.Preset("balanced"),
+            Normalize = true,
+            BatchSize = 32,
+            ShowDownloadProgress = false
+        }
+    }
+};
+
+var result = await Kreuzberg.ExtractFileAsync("document.pdf", config);
+
+var chunks = result.Chunks ?? new List<Chunk>();
+foreach (var (index, chunk) in chunks.WithIndex())
+{
+    var chunkId = $"doc_chunk_{index}";
+    Console.WriteLine($"Chunk {chunkId}: {chunk.Content[..Math.Min(50, chunk.Content.Length)]}");
+
+    if (chunk.Embedding != null)
+    {
+        Console.WriteLine($"  Embedding dimensions: {chunk.Embedding.Length}");
+    }
+}
+
+internal static class EnumerableExtensions
+{
+    public static IEnumerable<(int Index, T Item)> WithIndex<T>(
+        this IEnumerable<T> items)
+    {
+        var index = 0;
+        foreach (var item in items)
+        {
+            yield return (index++, item);
+        }
+    }
+}
+```
--- a/docs/snippets/csharp/advanced/error_handling.cs
+++ b/docs/snippets/csharp/advanced/error_handling.cs
@@ -0,0 +1,72 @@
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync("document.pdf");
+            Console.WriteLine($"Extracted {result.Content.Length} characters");
+        }
+        catch (KreuzbergParsingException ex)
+        {
+            Console.WriteLine($"Failed to parse document: {ex.Message}");
+        }
+        catch (KreuzbergOcrException ex)
+        {
+            Console.WriteLine($"OCR processing failed: {ex.Message}");
+        }
+        catch (KreuzbergMissingDependencyException ex)
+        {
+            Console.WriteLine($"Missing dependency: {ex.Message}");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Extraction error: {ex.Message}");
+        }
+
+        try
+        {
+            var config = new ExtractionConfig();
+            var pdfBytes = new byte[] { 0x25, 0x50, 0x44, 0x46 }; 
+
+            var result = await KreuzbergLib.ExtractBytesAsync(
+                pdfBytes,
+                "application/pdf",
+                config
+            );
+
+            var preview = result.Content.Length > 100
+                ? result.Content[..100] + "..."
+                : result.Content;
+
+            Console.WriteLine($"Extracted: {preview}");
+        }
+        catch (KreuzbergValidationException ex)
+        {
+            Console.WriteLine($"Invalid configuration: {ex.Message}");
+        }
+        catch (KreuzbergOcrException ex)
+        {
+            Console.WriteLine($"OCR failed: {ex.Message}");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Extraction failed: {ex.Message}");
+        }
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync("nonexistent.pdf");
+        }
+        catch (KreuzbergIOException)
+        {
+            Console.WriteLine("File not found");
+        }
+        catch (Exception ex)
+        {
+            Console.WriteLine($"Unexpected error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/extract_from_bytes.cs
+++ b/docs/snippets/csharp/advanced/extract_from_bytes.cs
@@ -0,0 +1,66 @@
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        try
+        {
+            var pdfBytes = await File.ReadAllBytesAsync("document.pdf");
+
+            var result = await KreuzbergLib.ExtractBytesAsync(
+                pdfBytes,
+                "application/pdf"
+            );
+
+            Console.WriteLine($"Content: {result.Content}");
+            Console.WriteLine($"MIME type: {result.MimeType}");
+
+            var config = new ExtractionConfig
+            {
+                UseCache = true,
+                EnableQualityProcessing = true
+            };
+
+            var result2 = await KreuzbergLib.ExtractBytesAsync(
+                pdfBytes,
+                "application/pdf",
+                config
+            );
+
+            Console.WriteLine($"Configured extraction: {result2.Content.Length} chars");
+
+            var imageBytes = new byte[] {  };
+
+            var imageResult = await KreuzbergLib.ExtractBytesAsync(
+                imageBytes,
+                "image/jpeg"
+            );
+
+            Console.WriteLine($"Image text: {imageResult.Content}");
+
+            var multipleFiles = new Dictionary<string, (byte[], string)>
+            {
+                { "file1", (await File.ReadAllBytesAsync("file1.pdf"), "application/pdf") },
+                { "file2", (await File.ReadAllBytesAsync("file2.pdf"), "application/pdf") }
+            };
+
+            foreach (var (name, (bytes, mimeType)) in multipleFiles)
+            {
+                var extractResult = await KreuzbergLib.ExtractBytesAsync(
+                    bytes,
+                    mimeType
+                );
+                Console.WriteLine($"{name}: {extractResult.Content.Length} chars");
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Extraction error: {ex.Message}");
+        }
+        catch (IOException ex)
+        {
+            Console.WriteLine($"File I/O error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/extract_from_url.cs
+++ b/docs/snippets/csharp/advanced/extract_from_url.cs
@@ -0,0 +1,73 @@
+using Kreuzberg;
+using System.Net.Http;
+
+class Program
+{
+    static async Task Main()
+    {
+        using var httpClient = new HttpClient();
+
+        try
+        {
+            var url = "https://example.com/document.pdf";
+            var documentBytes = await httpClient.GetByteArrayAsync(url);
+
+            var result = await KreuzbergLib.ExtractBytesAsync(
+                documentBytes,
+                "application/pdf"
+            );
+
+            Console.WriteLine($"Extracted from URL: {result.Content.Length} chars");
+
+            var config = new ExtractionConfig
+            {
+                EnableQualityProcessing = true
+            };
+
+            var result2 = await KreuzbergLib.ExtractBytesAsync(
+                documentBytes,
+                "application/pdf",
+                config
+            );
+
+            Console.WriteLine($"Quality score: {result2.QualityScore}");
+
+            var urls = new[]
+            {
+                "https://example.com/doc1.pdf",
+                "https://example.com/doc2.pdf",
+                "https://example.com/doc3.pdf"
+            };
+
+            var downloadTasks = urls.Select(async u =>
+            {
+                try
+                {
+                    var bytes = await httpClient.GetByteArrayAsync(u);
+                    return await KreuzbergLib.ExtractBytesAsync(
+                        bytes,
+                        "application/pdf"
+                    );
+                }
+                catch (HttpRequestException ex)
+                {
+                    Console.WriteLine($"Download failed for {u}: {ex.Message}");
+                    return null;
+                }
+            });
+
+            var results = await Task.WhenAll(downloadTasks);
+
+            var successCount = results.Count(r => r != null);
+            Console.WriteLine($"Successfully processed {successCount} documents");
+        }
+        catch (HttpRequestException ex)
+        {
+            Console.WriteLine($"HTTP error: {ex.Message}");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Extraction error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/extract_with_config.cs
+++ b/docs/snippets/csharp/advanced/extract_with_config.cs
@@ -0,0 +1,98 @@
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        try
+        {
+            var config = new ExtractionConfig
+            {
+                UseCache = true,
+                EnableQualityProcessing = true,
+                ForceOcr = false,
+
+                Ocr = new OcrConfig
+                {
+                    Backend = "tesseract",
+                    Language = "eng+fra",
+                    TesseractConfig = new TesseractConfig
+                    {
+                        Psm = 3,
+                        Oem = 3,
+                        MinConfidence = 0.8,
+                        Preprocessing = new ImagePreprocessingConfig
+                        {
+                            TargetDpi = 300,
+                            Denoise = true,
+                            Deskew = true,
+                            ContrastEnhance = true
+                        },
+                        EnableTableDetection = true
+                    }
+                },
+
+                PdfOptions = new PdfConfig
+                {
+                    ExtractImages = true,
+                    ExtractMetadata = true
+                },
+
+                Images = new ImageExtractionConfig
+                {
+                    ExtractImages = true,
+                    TargetDpi = 150,
+                    MaxImageDimension = 4096
+                },
+
+                Chunking = new ChunkingConfig
+                {
+                    MaxChars = 1000,
+                    MaxOverlap = 200,
+                    Preset = "default"
+                },
+
+                TokenReduction = new TokenReductionConfig
+                {
+                    Mode = "moderate",
+                    PreserveImportantWords = true
+                },
+
+                LanguageDetection = new LanguageDetectionConfig
+                {
+                    Enabled = true,
+                    MinConfidence = 0.8,
+                    DetectMultiple = false
+                },
+
+                Postprocessor = new PostProcessorConfig
+                {
+                    Enabled = true
+                }
+            };
+
+            var result = await KreuzbergLib.ExtractFileAsync(
+                "document.pdf",
+                config
+            );
+
+            Console.WriteLine($"Content length: {result.Content.Length}");
+            Console.WriteLine($"MIME type: {result.MimeType}");
+            Console.WriteLine($"Format type: {result.Metadata.FormatType}");
+
+            if (result.Tables.Any())
+            {
+                Console.WriteLine($"Found {result.Tables.Count} tables");
+            }
+
+            if (result.Chunks?.Any() == true)
+            {
+                Console.WriteLine($"Created {result.Chunks.Count} chunks");
+            }
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Extraction error: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/keyword_extraction_config.md
+++ b/docs/snippets/csharp/advanced/keyword_extraction_config.md
@@ -0,0 +1,15 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Keywords = new KeywordConfig
+    {
+        Algorithm = KeywordAlgorithm.Yake,
+        MaxKeywords = 10,
+        MinScore = 0.3,
+        NgramRange = (1, 3),
+        Language = "en"
+    }
+};
+```
--- a/docs/snippets/csharp/advanced/keyword_extraction_example.md
+++ b/docs/snippets/csharp/advanced/keyword_extraction_example.md
@@ -0,0 +1,30 @@
+```csharp title="C#"
+using Kreuzberg;
+using System.Collections.Generic;
+
+var config = new ExtractionConfig
+{
+    Keywords = new KeywordConfig
+    {
+        Algorithm = KeywordAlgorithm.Yake,
+        MaxKeywords = 10,
+        MinScore = 0.3
+    }
+};
+
+var result = await KreuzbergLib.ExtractFileAsync(
+    "research_paper.pdf",
+    config
+);
+
+if (result.Metadata.ContainsKey("keywords"))
+{
+    var keywords = (List<Dictionary<string, object>>)result.Metadata["keywords"];
+    foreach (var kw in keywords)
+    {
+        var text = (string)kw["text"];
+        var score = (double)kw["score"];
+        Console.WriteLine($"{text}: {score:F3}");
+    }
+}
+```
--- a/docs/snippets/csharp/advanced/language_detection_config.cs
+++ b/docs/snippets/csharp/advanced/language_detection_config.cs
@@ -0,0 +1,37 @@
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            LanguageDetection = new LanguageDetectionConfig
+            {
+                Enabled = true,
+                MinConfidence = 0.8m,
+                DetectMultiple = false
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+
+            if (result.DetectedLanguages?.Count > 0)
+            {
+                Console.WriteLine($"Detected Language: {result.DetectedLanguages[0]}");
+            }
+            else
+            {
+                Console.WriteLine("No language detected");
+            }
+
+            Console.WriteLine($"Content length: {result.Content.Length} characters");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Extraction failed: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/language_detection_config.md
+++ b/docs/snippets/csharp/advanced/language_detection_config.md
@@ -0,0 +1,39 @@
+```csharp title="C#"
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            LanguageDetection = new LanguageDetectionConfig
+            {
+                Enabled = true,
+                MinConfidence = 0.8m,
+                DetectMultiple = false
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+
+            if (result.DetectedLanguages?.Count > 0)
+            {
+                Console.WriteLine($"Detected Language: {result.DetectedLanguages[0]}");
+            }
+            else
+            {
+                Console.WriteLine("No language detected");
+            }
+
+            Console.WriteLine($"Content length: {result.Content.Length} characters");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Extraction failed: {ex.Message}");
+        }
+    }
+}
+```
--- a/docs/snippets/csharp/advanced/language_detection_multilingual.cs
+++ b/docs/snippets/csharp/advanced/language_detection_multilingual.cs
@@ -0,0 +1,40 @@
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            LanguageDetection = new LanguageDetectionConfig
+            {
+                Enabled = true,
+                MinConfidence = 0.8m,
+                DetectMultiple = true
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync("multilingual_document.pdf", config);
+
+            var languages = result.DetectedLanguages ?? new List<string>();
+
+            if (languages.Count > 0)
+            {
+                Console.WriteLine($"Detected {languages.Count} language(s): {string.Join(", ", languages)}");
+            }
+            else
+            {
+                Console.WriteLine("No languages detected");
+            }
+
+            Console.WriteLine($"Total content: {result.Content.Length} characters");
+            Console.WriteLine($"MIME type: {result.MimeType}");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Processing failed: {ex.Message}");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/language_detection_multilingual.md
+++ b/docs/snippets/csharp/advanced/language_detection_multilingual.md
@@ -0,0 +1,42 @@
+```csharp title="C#"
+using Kreuzberg;
+
+class Program
+{
+    static async Task Main()
+    {
+        var config = new ExtractionConfig
+        {
+            LanguageDetection = new LanguageDetectionConfig
+            {
+                Enabled = true,
+                MinConfidence = 0.8m,
+                DetectMultiple = true
+            }
+        };
+
+        try
+        {
+            var result = await KreuzbergLib.ExtractFileAsync("multilingual_document.pdf", config);
+
+            var languages = result.DetectedLanguages ?? new List<string>();
+
+            if (languages.Count > 0)
+            {
+                Console.WriteLine($"Detected {languages.Count} language(s): {string.Join(", ", languages)}");
+            }
+            else
+            {
+                Console.WriteLine("No languages detected");
+            }
+
+            Console.WriteLine($"Total content: {result.Content.Length} characters");
+            Console.WriteLine($"MIME type: {result.MimeType}");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Processing failed: {ex.Message}");
+        }
+    }
+}
+```
--- a/docs/snippets/csharp/advanced/plugin_registry.cs
+++ b/docs/snippets/csharp/advanced/plugin_registry.cs
@@ -0,0 +1,65 @@
+using Kreuzberg;
+using System.Collections.Generic;
+
+class Program
+{
+    static void Main()
+    {
+        try
+        {
+            var extractors = KreuzbergLib.ListDocumentExtractors();
+            Console.WriteLine("Registered Document Extractors:");
+            foreach (var extractor in extractors)
+            {
+                Console.WriteLine($"  - {extractor}");
+            }
+
+            var ocrBackends = KreuzbergLib.ListOcrBackends();
+            Console.WriteLine("\nRegistered OCR Backends:");
+            foreach (var backend in ocrBackends)
+            {
+                Console.WriteLine($"  - {backend}");
+            }
+
+            var processors = KreuzbergLib.ListPostProcessors();
+            Console.WriteLine("\nRegistered Post-Processors:");
+            foreach (var processor in processors)
+            {
+                Console.WriteLine($"  - {processor}");
+            }
+
+            var validators = KreuzbergLib.ListValidators();
+            Console.WriteLine("\nRegistered Validators:");
+            foreach (var validator in validators)
+            {
+                Console.WriteLine($"  - {validator}");
+            }
+
+            var customProcessor = new CustomPostProcessor();
+            KreuzbergLib.RegisterPostProcessor(customProcessor);
+            Console.WriteLine($"\nRegistered custom post-processor: {customProcessor.Name}");
+
+            KreuzbergLib.UnregisterPostProcessor(customProcessor.Name);
+            Console.WriteLine($"Unregistered post-processor: {customProcessor.Name}");
+
+            KreuzbergLib.ClearValidators();
+            Console.WriteLine("All validators cleared");
+        }
+        catch (KreuzbergException ex)
+        {
+            Console.WriteLine($"Plugin registry error: {ex.Message}");
+        }
+    }
+}
+
+class CustomPostProcessor : IPostProcessor
+{
+    public string Name => "custom-processor";
+    public int Priority => 50;
+
+    public ExtractionResult Process(ExtractionResult result)
+    {
+        result.Content = result.Content.ToUpper();
+        return result;
+    }
+}
--- a/docs/snippets/csharp/advanced/quality_processing_config.md
+++ b/docs/snippets/csharp/advanced/quality_processing_config.md
@@ -0,0 +1,17 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    EnableQualityProcessing = true
+};
+
+var result = await KreuzbergLib.ExtractFileAsync(
+    "document.pdf",
+    config
+);
+
+var qualityScore = result.QualityScore;
+
+Console.WriteLine($"Quality score: {qualityScore:F2}");
+```
--- a/docs/snippets/csharp/advanced/quality_processing_example.md
+++ b/docs/snippets/csharp/advanced/quality_processing_example.md
@@ -0,0 +1,29 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    EnableQualityProcessing = true
+};
+
+var result = KreuzbergLib.ExtractFile(
+    "scanned_document.pdf",
+    config
+);
+
+var qualityScore = result.QualityScore;
+
+if (qualityScore < 0.5)
+{
+    Console.WriteLine(
+        $"Warning: Low quality extraction ({qualityScore:F2})"
+    );
+    Console.WriteLine(
+        "Consider re-scanning with higher DPI or adjusting OCR settings"
+    );
+}
+else
+{
+    Console.WriteLine($"Quality score: {qualityScore:F2}");
+}
+```
--- a/docs/snippets/csharp/advanced/streaming.cs
+++ b/docs/snippets/csharp/advanced/streaming.cs
@@ -0,0 +1,108 @@
+using Kreuzberg;
+using System.IO;
+
+class Program
+{
+    static async Task Main()
+    {
+        try
+        {
+            var filePath = "large_document.pdf";
+
+            await ProcessLargeFileAsync(filePath);
+        }
+        catch (Exception ex)
+        {
+            Console.WriteLine($"Error: {ex.Message}");
+        }
+    }
+
+    static async Task ProcessLargeFileAsync(string filePath)
+    {
+        var config = new ExtractionConfig
+        {
+            EnableQualityProcessing = true
+        };
+
+        var result = await KreuzbergLib.ExtractFileAsync(filePath, config);
+
+        var contentChunks = ChunkContent(result.Content, chunkSize: 1000);
+
+        Console.WriteLine($"Processing {contentChunks.Count} chunks");
+
+        foreach (var (index, chunk) in contentChunks.Select((c, i) => (i, c)))
+        {
+            Console.WriteLine($"Chunk {index}: {chunk.Length} characters");
+            await ProcessChunkAsync(chunk);
+        }
+    }
+
+    static async Task ProcessChunkAsync(string chunk)
+    {
+        var wordCount = chunk.Split(
+            new[] { ' ', '\n', '\r' },
+            StringSplitOptions.RemoveEmptyEntries
+        ).Length;
+
+        Console.WriteLine($"  Words: {wordCount}");
+
+        await Task.Delay(10); 
+    }
+
+    static List<string> ChunkContent(string content, int chunkSize)
+    {
+        var chunks = new List<string>();
+
+        for (int i = 0; i < content.Length; i += chunkSize)
+        {
+            var chunk = content.Substring(
+                i,
+                Math.Min(chunkSize, content.Length - i)
+            );
+            chunks.Add(chunk);
+        }
+
+        return chunks;
+    }
+
+    static async IAsyncEnumerable<string> StreamExtractedChunksAsync(
+        string filePath)
+    {
+        var result = await KreuzbergLib.ExtractFileAsync(filePath);
+
+        if (result.Chunks?.Any() == true)
+        {
+            foreach (var chunk in result.Chunks)
+            {
+                yield return chunk.Content;
+                await Task.Yield();
+            }
+        }
+        else
+        {
+            var content = result.Content;
+            const int chunkSize = 512;
+
+            for (int i = 0; i < content.Length; i += chunkSize)
+            {
+                var chunk = content.Substring(
+                    i,
+                    Math.Min(chunkSize, content.Length - i)
+                );
+                yield return chunk;
+                await Task.Yield();
+            }
+        }
+    }
+
+    static async Task StreamProcessingExample()
+    {
+        var streamEnumerator = StreamExtractedChunksAsync("document.pdf");
+
+        int index = 0;
+        await foreach (var chunk in streamEnumerator)
+        {
+            Console.WriteLine($"Chunk {index++}: {chunk[..50]}...");
+        }
+    }
+}
--- a/docs/snippets/csharp/advanced/token_reduction_config.md
+++ b/docs/snippets/csharp/advanced/token_reduction_config.md
@@ -0,0 +1,14 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    TokenReduction = new TokenReductionConfig
+    {
+        Mode = "moderate",              // "off", "moderate", or "aggressive"
+        PreserveMarkdown = true,
+        PreserveCode = true,
+        LanguageHint = "eng"
+    }
+};
+```
--- a/docs/snippets/csharp/advanced/token_reduction_example.md
+++ b/docs/snippets/csharp/advanced/token_reduction_example.md
@@ -0,0 +1,32 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    TokenReduction = new TokenReductionConfig
+    {
+        Mode = "moderate",
+        PreserveMarkdown = true
+    }
+};
+
+var result = await KreuzbergLib.ExtractFileAsync(
+    "verbose_document.pdf",
+    config
+);
+
+var original = result.Metadata.ContainsKey("original_token_count")
+    ? (int)result.Metadata["original_token_count"]
+    : 0;
+
+var reduced = result.Metadata.ContainsKey("token_count")
+    ? (int)result.Metadata["token_count"]
+    : 0;
+
+var ratio = result.Metadata.ContainsKey("token_reduction_ratio")
+    ? (double)result.Metadata["token_reduction_ratio"]
+    : 0.0;
+
+Console.WriteLine($"Reduced from {original} to {reduced} tokens");
+Console.WriteLine($"Reduction: {ratio * 100:F1}%");
+```
--- a/docs/snippets/csharp/advanced/vector_database_integration.md
+++ b/docs/snippets/csharp/advanced/vector_database_integration.md
@@ -0,0 +1,74 @@
+```csharp title="C#"
+using Kreuzberg;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading.Tasks;
+
+public class VectorDatabaseIntegration
+{
+    public class VectorRecord
+    {
+        public string Id { get; set; }
+        public float[] Embedding { get; set; }
+        public string Content { get; set; }
+        public Dictionary<string, string> Metadata { get; set; }
+    }
+
+    public async Task<List<VectorRecord>> ExtractAndVectorize(
+        string documentPath,
+        string documentId)
+    {
+        var config = new ExtractionConfig
+        {
+            Chunking = new ChunkingConfig
+            {
+                MaxChars = 512,
+                MaxOverlap = 50,
+                Embedding = new EmbeddingConfig
+                {
+                    Model = EmbeddingModelType.Preset("balanced"),
+                    Normalize = true,
+                    BatchSize = 32
+                }
+            }
+        };
+
+        var result = await Kreuzberg.ExtractFileAsync(documentPath, config);
+        var chunks = result.Chunks ?? new List<Chunk>();
+
+        var vectorRecords = chunks
+            .Select((chunk, index) => new VectorRecord
+            {
+                Id = $"{documentId}_chunk_{index}",
+                Content = chunk.Content,
+                Embedding = chunk.Embedding,
+                Metadata = new Dictionary<string, string>
+                {
+                    { "document_id", documentId },
+                    { "chunk_index", index.ToString() },
+                    { "content_length", chunk.Content.Length.ToString() }
+                }
+            })
+            .ToList();
+
+        await StoreInVectorDatabase(vectorRecords);
+        return vectorRecords;
+    }
+
+    private async Task StoreInVectorDatabase(List<VectorRecord> records)
+    {
+        foreach (var record in records)
+        {
+            if (record.Embedding != null && record.Embedding.Length > 0)
+            {
+                Console.WriteLine(
+                    $"Storing {record.Id}: {record.Content.Length} chars, " +
+                    $"{record.Embedding.Length} dims");
+            }
+        }
+
+        await Task.CompletedTask;
+    }
+}
+```
--- a/docs/snippets/csharp/advanced_config.md
+++ b/docs/snippets/csharp/advanced_config.md
@@ -0,0 +1,29 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Ocr = new OcrConfig { Backend = "tesseract", Language = "eng+deu" },
+    Chunking = new ChunkingConfig { MaxChars = 1000, MaxOverlap = 100 },
+    TokenReduction = new TokenReductionConfig { Enabled = true },
+    LanguageDetection = new LanguageDetectionConfig
+    {
+        Enabled = true,
+        DetectMultiple = true
+    },
+    UseCache = true,
+    EnableQualityProcessing = true
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
+
+foreach (var chunk in result.Chunks)
+{
+    Console.WriteLine($"Chunk: {chunk.Content[..Math.Min(100, chunk.Content.Length)]}");
+}
+
+if (result.DetectedLanguages?.Count > 0)
+{
+    Console.WriteLine($"Languages: {string.Join(", ", result.DetectedLanguages)}");
+}
+```
--- a/docs/snippets/csharp/api/batch_extract_bytes_sync.md
+++ b/docs/snippets/csharp/api/batch_extract_bytes_sync.md
@@ -0,0 +1,17 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var items = new List<BatchBytesItem>
+{
+    new() { Content = await File.ReadAllBytesAsync("doc1.pdf"), MimeType = "application/pdf", Config = null },
+    new() { Content = await File.ReadAllBytesAsync("doc2.txt"), MimeType = "text/plain", Config = null }
+};
+
+var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
+var results = KreuzbergLib.BatchExtractBytesSync(items, config);
+
+foreach (var result in results)
+{
+    Console.WriteLine($"Content length: {result.Content.Length}");
+}
+```
--- a/docs/snippets/csharp/api/batch_extract_files_sync.md
+++ b/docs/snippets/csharp/api/batch_extract_files_sync.md
@@ -0,0 +1,21 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var items = new List<BatchFileItem>
+{
+    new() { Path = "document1.pdf", Config = null },
+    new()
+    {
+        Path = "document2.pdf",
+        Config = new FileExtractionConfig { ForceOcr = true }
+    }
+};
+
+var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
+var results = KreuzbergLib.BatchExtractFilesSync(items, config);
+
+foreach (var result in results)
+{
+    Console.WriteLine($"Content length: {result.Content.Length}");
+}
+```
--- a/docs/snippets/csharp/api/client_chunk_text.md
+++ b/docs/snippets/csharp/api/client_chunk_text.md
@@ -0,0 +1,45 @@
+```csharp title="C#"
+using System.Net.Http;
+using System.Net.Http.Json;
+using System.Text.Json.Serialization;
+
+public record ChunkRequest(
+    [property: JsonPropertyName("text")] string Text,
+    [property: JsonPropertyName("max_characters")] int? MaxCharacters = null,
+    [property: JsonPropertyName("overlap")] int? Overlap = null,
+    [property: JsonPropertyName("chunker_type")] string? ChunkerType = null
+);
+
+public record ChunkResponse(
+    [property: JsonPropertyName("chunks")] List<ChunkItem> Chunks,
+    [property: JsonPropertyName("chunk_count")] int ChunkCount
+);
+
+public record ChunkItem(
+    [property: JsonPropertyName("content")] string Content,
+    [property: JsonPropertyName("chunk_index")] int ChunkIndex
+);
+
+class Program
+{
+    static async Task Main()
+    {
+        var client = new HttpClient();
+        var request = new ChunkRequest(
+            Text: "Your long text content here...",
+            MaxCharacters: 1000,
+            Overlap: 50,
+            ChunkerType: "text"
+        );
+
+        var response = await client.PostAsJsonAsync("http://localhost:8000/chunk", request);
+        var result = await response.Content.ReadFromJsonAsync<ChunkResponse>();
+
+        Console.WriteLine($"Created {result?.ChunkCount} chunks");
+        foreach (var chunk in result?.Chunks ?? [])
+        {
+            Console.WriteLine($"Chunk {chunk.ChunkIndex}: {chunk.Content[..Math.Min(50, chunk.Content.Length)]}...");
+        }
+    }
+}
+```
--- a/docs/snippets/csharp/api/client_extract_single_file.md
+++ b/docs/snippets/csharp/api/client_extract_single_file.md
@@ -0,0 +1,25 @@
+```csharp title="C#"
+using System.Net.Http;
+using System.Net.Http.Json;
+
+var client = new HttpClient();
+
+using (var fileStream = File.OpenRead("document.pdf"))
+{
+    using (var content = new MultipartFormDataContent())
+    {
+        content.Add(new StreamContent(fileStream), "files", "document.pdf");
+        var response = await client.PostAsync("http://localhost:8000/extract", content);
+
+        if (response.IsSuccessStatusCode)
+        {
+            var json = await response.Content.ReadAsStringAsync();
+            Console.WriteLine(json);
+        }
+        else
+        {
+            Console.WriteLine($"Error: {response.StatusCode}");
+        }
+    }
+}
+```
--- a/docs/snippets/csharp/api/combining_all_features.md
+++ b/docs/snippets/csharp/api/combining_all_features.md
@@ -0,0 +1,44 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    OutputFormat = OutputFormat.Markdown,
+    UseCache = true,
+    Ocr = new OcrConfig
+    {
+        Enabled = true,
+        Backend = OcrBackendType.Tesseract,
+        Languages = ["eng"]
+    },
+    ImageExtraction = new ImageExtractionConfig
+    {
+        Enabled = true,
+        MinImageHeight = 100,
+        MinImageWidth = 100
+    },
+    Chunking = new ChunkingConfig
+    {
+        Enabled = true,
+        ChunkerType = ChunkerType.Text,
+        MaxCharacters = 2000,
+        Overlap = 100
+    },
+    LanguageDetection = new LanguageDetectionConfig
+    {
+        Enabled = true
+    }
+};
+
+try
+{
+    var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+    Console.WriteLine($"Content: {result.Content}");
+    Console.WriteLine($"Language: {result.Metadata?.LanguageDetection}");
+    Console.WriteLine($"Format: {result.OutputFormat}");
+}
+catch (KreuzbergException ex)
+{
+    Console.WriteLine($"Extraction error: {ex.Message}");
+}
+```
--- a/docs/snippets/csharp/api/error_handling.md
+++ b/docs/snippets/csharp/api/error_handling.md
@@ -0,0 +1,18 @@
+```csharp title="C#"
+using Kreuzberg;
+
+try
+{
+    var result = KreuzbergLib.ExtractFileSync("nonexistent.pdf", null, null);
+    Console.WriteLine(result.Content);
+}
+catch (KreuzbergException ex)
+{
+    Console.WriteLine($"Error Code: {ex.Code}");
+    Console.WriteLine($"Error Message: {ex.Message}");
+}
+catch (Exception ex)
+{
+    Console.WriteLine($"Unexpected error: {ex.Message}");
+}
+```
--- a/docs/snippets/csharp/api/error_handling_extract.md
+++ b/docs/snippets/csharp/api/error_handling_extract.md
@@ -0,0 +1,22 @@
+```csharp title="C#"
+using Kreuzberg;
+
+try
+{
+    var data = File.ReadAllBytes("document.unsupported");
+    var result = KreuzbergLib.ExtractBytesSync(data, "application/x-custom", null);
+    Console.WriteLine(result.Content);
+}
+catch (KreuzbergException ex) when (ex.Code == 1)
+{
+    Console.WriteLine("Validation error: Invalid MIME type");
+}
+catch (KreuzbergException ex) when (ex.Code == 2)
+{
+    Console.WriteLine("Format error: MIME type not supported");
+}
+catch (KreuzbergException ex)
+{
+    Console.WriteLine($"Extraction failed with error {ex.Code}: {ex.Message}");
+}
+```
--- a/docs/snippets/csharp/api/extract_bytes_async.md
+++ b/docs/snippets/csharp/api/extract_bytes_async.md
@@ -0,0 +1,10 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var data = await File.ReadAllBytesAsync("document.pdf");
+var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
+var result = await KreuzbergLib.ExtractBytes(data, "application/pdf", config);
+
+Console.WriteLine(result.Content);
+Console.WriteLine($"MIME Type: {result.MimeType}");
+```
--- a/docs/snippets/csharp/api/extract_bytes_sync.md
+++ b/docs/snippets/csharp/api/extract_bytes_sync.md
@@ -0,0 +1,10 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var data = File.ReadAllBytes("document.pdf");
+var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
+var result = KreuzbergLib.ExtractBytesSync(data, "application/pdf", config);
+
+Console.WriteLine(result.Content);
+Console.WriteLine($"MIME Type: {result.MimeType}");
+```
--- a/docs/snippets/csharp/api/extract_file_async.md
+++ b/docs/snippets/csharp/api/extract_file_async.md
@@ -0,0 +1,9 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+
+Console.WriteLine(result.Content);
+Console.WriteLine($"MIME Type: {result.MimeType}");
+```
--- a/docs/snippets/csharp/api/extract_file_sync.md
+++ b/docs/snippets/csharp/api/extract_file_sync.md
@@ -0,0 +1,9 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig { OutputFormat = OutputFormat.Text };
+var result = KreuzbergLib.ExtractFileSync("document.pdf", null, config);
+
+Console.WriteLine(result.Content);
+Console.WriteLine($"MIME Type: {result.MimeType}");
+```
--- a/docs/snippets/csharp/batch_extract_bytes_sync.md
+++ b/docs/snippets/csharp/batch_extract_bytes_sync.md
@@ -0,0 +1,13 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var documents = new[]
+{
+    new BytesWithMime(await File.ReadAllBytesAsync("doc1.pdf"), "application/pdf"),
+    new BytesWithMime(await File.ReadAllBytesAsync("doc2.docx"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
+};
+
+var results = KreuzbergLib.BatchExtractBytesSync(documents, new ExtractionConfig());
+
+Console.WriteLine($"Processed {results.Count} documents");
+```
--- a/docs/snippets/csharp/batch_extract_files_sync.md
+++ b/docs/snippets/csharp/batch_extract_files_sync.md
@@ -0,0 +1,11 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var files = new[] { "doc1.pdf", "doc2.docx", "doc3.pptx" };
+var results = KreuzbergLib.BatchExtractFilesSync(files, new ExtractionConfig());
+
+foreach (var result in results)
+{
+    Console.WriteLine($"Content length: {result.Content.Length}");
+}
+```
--- a/docs/snippets/csharp/benchmarking/simple_benchmark.cs
+++ b/docs/snippets/csharp/benchmarking/simple_benchmark.cs
@@ -0,0 +1,102 @@
+```csharp title="simple_benchmark.cs"
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Running;
+using Kreuzberg;
+using System;
+using System.Diagnostics;
+using System.Threading.Tasks;
+
+[MemoryDiagnoser]
+[SimpleJob(warmupCount: 3, targetCount: 5)]
+public class KreuzbergBenchmark
+{
+    private string _testFilePath;
+    private ExtractionConfig _config;
+
+    [GlobalSetup]
+    public void Setup()
+    {
+        _testFilePath = "document.pdf";
+        _config = new ExtractionConfig
+        {
+            UseCache = false,
+            EnableQualityProcessing = true,
+        };
+    }
+
+    [Benchmark]
+    public void ExtractFileSync()
+    {
+        var result = KreuzbergLib.ExtractFileSync(_testFilePath, _config);
+        _ = result.Content.Length;
+    }
+
+    [Benchmark]
+    public async Task ExtractFileAsync()
+    {
+        var result = await KreuzbergLib.ExtractFileAsync(_testFilePath, _config);
+        _ = result.Content.Length;
+    }
+
+    [Benchmark]
+    public async Task ExtractWithOcr()
+    {
+        var ocrConfig = new ExtractionConfig
+        {
+            ForceOcr = true,
+            Ocr = new OcrConfig
+            {
+                Backend = "tesseract",
+                Language = "eng",
+            }
+        };
+
+        var result = await KreuzbergLib.ExtractFileAsync(_testFilePath, ocrConfig);
+        _ = result.Content.Length;
+    }
+
+    [Benchmark]
+    public async Task ExtractWithCache()
+    {
+        var cacheConfig = new ExtractionConfig
+        {
+            UseCache = true,
+            EnableQualityProcessing = true,
+        };
+
+        var result = await KreuzbergLib.ExtractFileAsync(_testFilePath, cacheConfig);
+        _ = result.Content.Length;
+    }
+}
+
+public class ManualBenchmark
+{
+    public static async Task Main(string[] args)
+    {
+        var filePath = "document.pdf";
+        var config = new ExtractionConfig();
+
+        await KreuzbergLib.ExtractFileAsync(filePath, config);
+
+        var sw = Stopwatch.StartNew();
+        for (int i = 0; i < 10; i++)
+        {
+            KreuzbergLib.ExtractFileSync(filePath, config);
+        }
+        sw.Stop();
+        Console.WriteLine($"Sync extraction (10 runs): {sw.ElapsedMilliseconds}ms avg {sw.ElapsedMilliseconds / 10f}ms");
+
+        sw.Restart();
+        var tasks = new System.Collections.Generic.List<Task>();
+        for (int i = 0; i < 10; i++)
+        {
+            tasks.Add(KreuzbergLib.ExtractFileAsync(filePath, config));
+        }
+        await Task.WhenAll(tasks);
+        sw.Stop();
+        Console.WriteLine($"Async extraction (10 parallel runs): {sw.ElapsedMilliseconds}ms");
+
+        var summary = BenchmarkRunner.Run<KreuzbergBenchmark>();
+    }
+}
+```
--- a/docs/snippets/csharp/cache/disk_cache.cs
+++ b/docs/snippets/csharp/cache/disk_cache.cs
@@ -0,0 +1,42 @@
+```csharp title="disk_cache.cs"
+using Kreuzberg;
+using System;
+using System.IO;
+using System.Threading.Tasks;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    CacheConfig = new CacheConfig
+    {
+        CachePath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "kreuzberg_cache"),
+        MaxCacheSize = 1024 * 1024 * 500, 
+        CacheTtlSeconds = 86400 * 7,      
+        EnableCompression = true
+    }
+};
+
+Console.WriteLine("First extraction (will be cached)...");
+var result1 = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+Console.WriteLine($"  - Content length: {result1.Content.Length}");
+Console.WriteLine($"  - Cached: {result1.Metadata.WasCached}");
+
+Console.WriteLine("\nSecond extraction (from cache)...");
+var result2 = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+Console.WriteLine($"  - Content length: {result2.Content.Length}");
+Console.WriteLine($"  - Cached: {result2.Metadata.WasCached}");
+
+Console.WriteLine($"\nResults are identical: {result1.Content == result2.Content}");
+
+await KreuzbergLib.ClearCacheAsync("document.pdf");
+Console.WriteLine("\nCache cleared for document.pdf");
+
+await KreuzbergLib.ClearAllCacheAsync();
+Console.WriteLine("All cache cleared");
+
+var cacheStats = await KreuzbergLib.GetCacheStatsAsync();
+Console.WriteLine($"\nCache Statistics:");
+Console.WriteLine($"  - Total entries: {cacheStats.TotalEntries}");
+Console.WriteLine($"  - Cache size: {cacheStats.CacheSizeBytes / 1024 / 1024} MB");
+Console.WriteLine($"  - Hit rate: {cacheStats.HitRate:P}");
+```
--- a/docs/snippets/csharp/clear_plugins.md
+++ b/docs/snippets/csharp/clear_plugins.md
@@ -0,0 +1,10 @@
+```csharp title="C#"
+using Kreuzberg;
+
+KreuzbergLib.ClearPostProcessors();
+KreuzbergLib.ClearValidators();
+KreuzbergLib.ClearOcrBackends();
+KreuzbergLib.ClearDocumentExtractors();
+
+Console.WriteLine("All plugins cleared");
+```
--- a/docs/snippets/csharp/cli/basic_cli.cs
+++ b/docs/snippets/csharp/cli/basic_cli.cs
@@ -0,0 +1,46 @@
+```csharp title="basic_cli.cs"
+using System;
+using System.CommandLine;
+using System.CommandLine.Invocation;
+using System.Threading.Tasks;
+using Kreuzberg;
+
+var rootCommand = new RootCommand("Kreuzberg document extraction CLI");
+
+var extractFileCommand = new Command("extract-file", "Extract text from a document file");
+var filePath = new Argument<string>("path", "Path to the document file");
+var outputFormat = new Option<string>(
+    new[] { "-f", "--format" },
+    getDefaultValue: () => "text",
+    "Output format (text, json)"
+);
+
+extractFileCommand.AddArgument(filePath);
+extractFileCommand.AddOption(outputFormat);
+
+extractFileCommand.SetHandler(async (path, format) =>
+{
+    try
+    {
+        var result = await KreuzbergLib.ExtractFileAsync(path);
+
+        if (format == "json")
+        {
+            Console.WriteLine(System.Text.Json.JsonSerializer.Serialize(result));
+        }
+        else
+        {
+            Console.WriteLine(result.Content);
+        }
+    }
+    catch (Exception ex)
+    {
+        Console.Error.WriteLine($"Error: {ex.Message}");
+        Environment.Exit(1);
+    }
+}, filePath, outputFormat);
+
+rootCommand.AddCommand(extractFileCommand);
+
+return await rootCommand.InvokeAsync(args);
+```
--- a/docs/snippets/csharp/cli/cli_with_config.cs
+++ b/docs/snippets/csharp/cli/cli_with_config.cs
@@ -0,0 +1,75 @@
+```csharp title="cli_with_config.cs"
+using System;
+using System.CommandLine;
+using System.Text.Json;
+using System.Threading.Tasks;
+using Kreuzberg;
+
+var rootCommand = new RootCommand("Kreuzberg with configuration");
+
+var extractCommand = new Command("extract", "Extract with custom configuration");
+var filePath = new Argument<string>("path", "Document file path");
+var configPath = new Option<string>(
+    new[] { "-c", "--config" },
+    "Path to JSON configuration file"
+);
+var forceOcr = new Option<bool>(
+    new[] { "--force-ocr" },
+    "Force OCR processing"
+);
+var useCache = new Option<bool>(
+    new[] { "--use-cache" },
+    getDefaultValue: () => true,
+    "Use caching (default: true)"
+);
+
+extractCommand.AddArgument(filePath);
+extractCommand.AddOption(configPath);
+extractCommand.AddOption(forceOcr);
+extractCommand.AddOption(useCache);
+
+extractCommand.SetHandler(async (path, config, ocr, cache) =>
+{
+    try
+    {
+        ExtractionConfig extractionConfig;
+
+        if (!string.IsNullOrEmpty(config))
+        {
+            var json = await System.IO.File.ReadAllTextAsync(config);
+            extractionConfig = JsonSerializer.Deserialize<ExtractionConfig>(json);
+        }
+        else
+        {
+            extractionConfig = new ExtractionConfig
+            {
+                UseCache = cache,
+                ForceOcr = ocr,
+            };
+        }
+
+        Console.WriteLine("Extracting with configuration:");
+        Console.WriteLine($"  - File: {path}");
+        Console.WriteLine($"  - Force OCR: {extractionConfig.ForceOcr}");
+        Console.WriteLine($"  - Use Cache: {extractionConfig.UseCache}");
+
+        var result = await KreuzbergLib.ExtractFileAsync(path, extractionConfig);
+
+        Console.WriteLine($"\nExtraction complete:");
+        Console.WriteLine($"  - Content length: {result.Content.Length}");
+        Console.WriteLine($"  - Format: {result.Metadata.FormatType}");
+        Console.WriteLine($"  - Languages: {string.Join(", ", result.DetectedLanguages)}");
+
+        Console.WriteLine($"\n{result.Content}");
+    }
+    catch (Exception ex)
+    {
+        Console.Error.WriteLine($"Error: {ex.Message}");
+        Environment.Exit(1);
+    }
+}, filePath, configPath, forceOcr, useCache);
+
+rootCommand.AddCommand(extractCommand);
+
+return await rootCommand.InvokeAsync(args);
+```
--- a/docs/snippets/csharp/client_chunk_text.md
+++ b/docs/snippets/csharp/client_chunk_text.md
@@ -0,0 +1,68 @@
+```csharp title="C#"
+using System.Net.Http.Json;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+// Request models
+public record ChunkRequest(
+    [property: JsonPropertyName("text")] string Text,
+    [property: JsonPropertyName("chunker_type")] string? ChunkerType = null,
+    [property: JsonPropertyName("config")] ChunkConfig? Config = null
+);
+
+public record ChunkConfig(
+    [property: JsonPropertyName("max_characters")] int? MaxCharacters = null,
+    [property: JsonPropertyName("overlap")] int? Overlap = null,
+    [property: JsonPropertyName("trim")] bool? Trim = null
+);
+
+// Response models
+public record ChunkResponse(
+    [property: JsonPropertyName("chunks")] List<ChunkItem> Chunks,
+    [property: JsonPropertyName("chunk_count")] int ChunkCount,
+    [property: JsonPropertyName("input_size_bytes")] int InputSizeBytes,
+    [property: JsonPropertyName("chunker_type")] string ChunkerType
+);
+
+public record ChunkItem(
+    [property: JsonPropertyName("content")] string Content,
+    [property: JsonPropertyName("byte_start")] int ByteStart,
+    [property: JsonPropertyName("byte_end")] int ByteEnd,
+    [property: JsonPropertyName("chunk_index")] int ChunkIndex,
+    [property: JsonPropertyName("total_chunks")] int TotalChunks,
+    [property: JsonPropertyName("first_page")] int? FirstPage,
+    [property: JsonPropertyName("last_page")] int? LastPage
+);
+
+class Program
+{
+    static async Task Main()
+    {
+        using var client = new HttpClient();
+
+        var request = new ChunkRequest(
+            Text: "Your long text content here...",
+            ChunkerType: "text",
+            Config: new ChunkConfig(
+                MaxCharacters: 1000,
+                Overlap: 50,
+                Trim: true
+            )
+        );
+
+        var response = await client.PostAsJsonAsync(
+            "http://localhost:8000/chunk",
+            request
+        );
+
+        var result = await response.Content.ReadFromJsonAsync<ChunkResponse>();
+
+        Console.WriteLine($"Created {result?.ChunkCount} chunks");
+        foreach (var chunk in result?.Chunks ?? [])
+        {
+            var preview = chunk.Content[..Math.Min(50, chunk.Content.Length)];
+            Console.WriteLine($"Chunk {chunk.ChunkIndex}: {preview}...");
+        }
+    }
+}
+```
--- a/docs/snippets/csharp/client_extract_single_file.md
+++ b/docs/snippets/csharp/client_extract_single_file.md
@@ -0,0 +1,20 @@
+```csharp title="C#"
+using System;
+using System.IO;
+using System.Net.Http;
+
+var client = new HttpClient();
+
+using (var fileStream = File.OpenRead("document.pdf"))
+{
+    using (var content = new MultipartFormDataContent())
+    {
+        content.Add(new StreamContent(fileStream), "files", "document.pdf");
+
+        var response = await client.PostAsync("http://localhost:8000/extract", content);
+        var json = await response.Content.ReadAsStringAsync();
+
+        Console.WriteLine(json);
+    }
+}
+```
--- a/docs/snippets/csharp/cloud_ocr_backend.md
+++ b/docs/snippets/csharp/cloud_ocr_backend.md
@@ -0,0 +1,56 @@
+```csharp title="C#"
+using Kreuzberg;
+using System.Net.Http;
+using System.Text.Json;
+
+public class CloudOcrBackend : IOcrBackend
+{
+    private readonly string _apiKey;
+    private readonly List<string> _langs = new() { "eng", "deu", "fra" };
+
+    public CloudOcrBackend(string apiKey)
+    {
+        _apiKey = apiKey;
+    }
+
+    public string Name() => "cloud-ocr";
+    public string Version() => "1.0.0";
+    public List<string> SupportedLanguages() => _langs;
+
+    public Dictionary<string, object> ProcessImage(byte[] imageBytes, Dictionary<string, object> config)
+    {
+        using (var client = new HttpClient())
+        {
+            using (var form = new MultipartFormDataContent())
+            {
+                form.Add(new ByteArrayContent(imageBytes), "image");
+                var lang = config.ContainsKey("language") ? config["language"].ToString() : "eng";
+                form.Add(new StringContent(lang), "language");
+
+                var response = client.PostAsync("https://api.example.com/ocr", form).Result;
+                var json = response.Content.ReadAsStringAsync().Result;
+                var doc = JsonDocument.Parse(json);
+                var text = doc.RootElement.GetProperty("text").GetString();
+
+                return new Dictionary<string, object>
+                {
+                    { "content", text },
+                    { "mime_type", "text/plain" }
+                };
+            }
+        }
+    }
+
+    public void Initialize() { }
+    public void Shutdown() { }
+}
+
+class Program
+{
+    static void Main()
+    {
+        var backend = new CloudOcrBackend(apiKey: "your-api-key");
+        KreuzbergLib.RegisterOcrBackend(backend);
+    }
+}
+```
--- a/docs/snippets/csharp/complete_example.md
+++ b/docs/snippets/csharp/complete_example.md
@@ -0,0 +1,28 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true,
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng+fra",
+        TesseractConfig = new TesseractConfig { Psm = 3 }
+    },
+    PdfOptions = new PdfConfig { ExtractImages = true },
+    Chunking = new ChunkingConfig
+    {
+        MaxChars = 1000,
+        MaxOverlap = 200,
+        Embedding = new EmbeddingConfig
+        {
+            Model = EmbeddingModelType.Preset("all-MiniLM-L6-v2")
+        }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+Console.WriteLine($"Content: {result.Content[..Math.Min(100, result.Content.Length)]}");
+```
--- a/docs/snippets/csharp/config/ElementBasedOutput.md
+++ b/docs/snippets/csharp/config/ElementBasedOutput.md
@@ -0,0 +1,48 @@
+```csharp title="Element-Based Output (C#)"
+using Kreuzberg;
+
+// Configure element-based output
+var config = new ExtractionConfig
+{
+    OutputFormat = OutputFormat.ElementBased
+};
+
+// Extract document
+var result = Kreuzberg.ExtractFileSync("document.pdf", config);
+
+// Access elements
+foreach (var element in result.Elements)
+{
+    Console.WriteLine($"Type: {element.ElementType}");
+
+    var text = element.Text.Length > 100
+        ? element.Text.Substring(0, 100)
+        : element.Text;
+    Console.WriteLine($"Text: {text}");
+
+    if (element.Metadata.PageNumber.HasValue)
+    {
+        Console.WriteLine($"Page: {element.Metadata.PageNumber}");
+    }
+
+    if (element.Metadata.Coordinates != null)
+    {
+        var coords = element.Metadata.Coordinates;
+        Console.WriteLine($"Coords: ({coords.Left}, {coords.Top}) - ({coords.Right}, {coords.Bottom})");
+    }
+
+    Console.WriteLine("---");
+}
+
+// Filter by element type
+var titles = result.Elements
+    .Where(e => e.ElementType == "title");
+
+foreach (var title in titles)
+{
+    var level = title.Metadata.Additional.TryGetValue("level", out var levelValue)
+        ? levelValue.ToString()
+        : "unknown";
+    Console.WriteLine($"[{level}] {title.Text}");
+}
+```
--- a/docs/snippets/csharp/config/advanced_config.md
+++ b/docs/snippets/csharp/config/advanced_config.md
@@ -0,0 +1,41 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true,
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng+deu"
+    },
+    Chunking = new ChunkingConfig
+    {
+        MaxCharacters = 1000,
+        Overlap = 200
+    },
+    LanguageDetection = new LanguageDetectionConfig
+    {
+        Enabled = true,
+        DetectMultiple = true
+    },
+    TokenReduction = new TokenReductionOptions
+    {
+        Mode = "moderate"
+    },
+    Keywords = new KeywordConfig
+    {
+        MaxKeywords = 10,
+        MinScore = 0.1f
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine(result.Content);
+
+if (result.DetectedLanguages?.Count > 0)
+{
+    Console.WriteLine($"Languages: {string.Join(", ", result.DetectedLanguages)}");
+}
+```
--- a/docs/snippets/csharp/config/basic.cs
+++ b/docs/snippets/csharp/config/basic.cs
@@ -0,0 +1,9 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
--- a/docs/snippets/csharp/config/chunking_config.md
+++ b/docs/snippets/csharp/config/chunking_config.md
@@ -0,0 +1,47 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Chunking = new ChunkingConfig
+    {
+        MaxCharacters = 1000,
+        Overlap = 200,
+        ChunkerType = ChunkerType.Text
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+if (result.Chunks != null)
+{
+    Console.WriteLine($"Total chunks: {result.Chunks.Count}");
+    foreach (var chunk in result.Chunks)
+    {
+        Console.WriteLine($"Chunk length: {chunk.Content.Length}");
+    }
+}
+```
+
+```csharp title="C# - Markdown with Heading Context"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Chunking = new ChunkingConfig
+    {
+        MaxCharacters = 500,
+        Overlap = 50,
+        ChunkerType = ChunkerType.Markdown,
+        PrependHeadingContext = true
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.md", null, config);
+if (result.Chunks != null)
+{
+    foreach (var chunk in result.Chunks)
+    {
+        Console.WriteLine($"Content: {chunk.Content.Substring(0, Math.Min(100, chunk.Content.Length))}");
+    }
+}
+```
--- a/docs/snippets/csharp/config/config_basic.md
+++ b/docs/snippets/csharp/config/config_basic.md
@@ -0,0 +1,12 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine(result.Content);
+```
--- a/docs/snippets/csharp/config/config_discover.md
+++ b/docs/snippets/csharp/config/config_discover.md
@@ -0,0 +1,8 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = ExtractionConfig.Discover() ?? new ExtractionConfig();
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine(result.Content);
+```
--- a/docs/snippets/csharp/config/config_ocr.md
+++ b/docs/snippets/csharp/config/config_ocr.md
@@ -0,0 +1,19 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng"
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("scanned.pdf", null, config);
+Console.WriteLine($"Content length: {result.Content.Length}");
+if (result.Tables != null)
+{
+    Console.WriteLine($"Tables detected: {result.Tables.Count}");
+}
+```
--- a/docs/snippets/csharp/config/config_programmatic.md
+++ b/docs/snippets/csharp/config/config_programmatic.md
@@ -0,0 +1,26 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true,
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng+deu",
+        TesseractConfig = new TesseractConfig
+        {
+            Psm = 6
+        }
+    },
+    Chunking = new ChunkingConfig
+    {
+        MaxCharacters = 1000,
+        Overlap = 200
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine($"Content length: {result.Content.Length}");
+```
--- a/docs/snippets/csharp/config/custom_mime_types.cs
+++ b/docs/snippets/csharp/config/custom_mime_types.cs
@@ -0,0 +1,14 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true
+};
+
+var result = KreuzbergLib.ExtractBytesSync(
+    new BytesWithMime(fileBytes, "application/pdf"),
+    config
+);
+
+var mimeType = result.MimeType;
--- a/docs/snippets/csharp/config/disable_cache.cs
+++ b/docs/snippets/csharp/config/disable_cache.cs
@@ -0,0 +1,8 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = false
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
--- a/docs/snippets/csharp/config/document_structure_config.md
+++ b/docs/snippets/csharp/config/document_structure_config.md
@@ -0,0 +1,18 @@
+```csharp title="Document Structure Config (C#)"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    IncludeDocumentStructure = true
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
+
+if (result.Document is not null)
+{
+    foreach (var node in result.Document.Nodes)
+    {
+        Console.WriteLine($"[{node.Content.NodeType}]");
+    }
+}
+```
--- a/docs/snippets/csharp/config/element_based_output.md
+++ b/docs/snippets/csharp/config/element_based_output.md
@@ -0,0 +1,37 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    ResultFormat = ResultFormat.ElementBased
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+
+if (result.Elements != null)
+{
+    foreach (var element in result.Elements)
+    {
+        Console.WriteLine($"Type: {element.ElementType}");
+        Console.WriteLine($"Text: {element.Text.Substring(0, Math.Min(100, element.Text.Length))}");
+
+        if (element.Metadata.PageNumber.HasValue)
+        {
+            Console.WriteLine($"Page: {element.Metadata.PageNumber}");
+        }
+
+        if (element.Metadata.Coordinates != null)
+        {
+            Console.WriteLine($"Coords: ({element.Metadata.Coordinates.X0}, {element.Metadata.Coordinates.Y0})");
+        }
+
+        Console.WriteLine("---");
+    }
+
+    var titles = result.Elements
+        .Where(e => e.ElementType == ElementType.Title)
+        .ToList();
+
+    Console.WriteLine($"Found {titles.Count} titles");
+}
+```
--- a/docs/snippets/csharp/config/embedding_config.cs
+++ b/docs/snippets/csharp/config/embedding_config.cs
@@ -0,0 +1,106 @@
+using Kreuzberg.Config;
+
+public class EmbeddingConfigExample
+{
+    public static void Main()
+    {
+        // Example 1: Preset model (recommended)
+        // Fast, balanced, or quality preset configurations optimized for common use cases.
+        var embeddingConfig = new EmbeddingConfig
+        {
+            Model = new EmbeddingModelType.Preset
+            {
+                Name = "balanced"
+            },
+            BatchSize = 32,
+            Normalize = true,
+            ShowDownloadProgress = true,
+            CacheDir = "~/.cache/kreuzberg/embeddings"
+        };
+
+        // Available presets:
+        // - "fast" (384 dims): Quick prototyping, development, resource-constrained
+        // - "balanced" (768 dims): Production, general-purpose RAG, English documents
+        // - "quality" (1024 dims): Complex documents, maximum accuracy
+        // - "multilingual" (768 dims): International documents, 100+ languages
+
+
+        // Example 2: Custom ONNX model (requires embeddings feature)
+        // Direct access to specific ONNX embedding models from HuggingFace with custom dimensions.
+        embeddingConfig = new EmbeddingConfig
+        {
+            Model = new EmbeddingModelType.Custom
+            {
+                ModelId = "BAAI/bge-small-en-v1.5",
+                Dimensions = 384
+            },
+            BatchSize = 32,
+            Normalize = true,
+            ShowDownloadProgress = true,
+            CacheDir = null  // Uses default: .kreuzberg/embeddings/
+        };
+
+        // Popular ONNX-compatible models:
+        // - "BAAI/bge-small-en-v1.5" (384 dims): Fast, efficient
+        // - "BAAI/bge-base-en-v1.5" (768 dims): Balanced quality/speed
+        // - "BAAI/bge-large-en-v1.5" (1024 dims): High quality, slower
+        // - "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" (768 dims): Multilingual support
+
+
+        // Example 3: Alternative Custom ONNX Model
+        // For advanced users wanting different ONNX embedding models.
+        embeddingConfig = new EmbeddingConfig
+        {
+            Model = new EmbeddingModelType.Custom
+            {
+                ModelId = "sentence-transformers/all-mpnet-base-v2",
+                Dimensions = 768
+            },
+            BatchSize = 16,  // Larger model requires smaller batch size
+            Normalize = true,
+            ShowDownloadProgress = true,
+            CacheDir = "/var/cache/embeddings"
+        };
+
+
+        // Integration with ChunkingConfig
+        // Add embeddings to your chunking configuration:
+        var chunkingConfig = new ChunkingConfig
+        {
+            MaxChars = 1024,
+            MaxOverlap = 100,
+            Preset = "balanced",
+            Embedding = new EmbeddingConfig
+            {
+                Model = new EmbeddingModelType.Preset
+                {
+                    Name = "balanced"
+                },
+                BatchSize = 32,
+                Normalize = true
+            }
+        };
+
+        var extractionConfig = new ExtractionConfig
+        {
+            Chunking = chunkingConfig
+        };
+    }
+}
+
+// Key parameter explanations:
+//
+// BatchSize: Number of texts to embed at once (32-128 typical)
+//   - Larger batches are faster but use more memory
+//   - Smaller batches for resource-constrained environments
+//
+// Normalize: Whether to normalize vectors (L2 norm)
+//   - true (recommended): Enables cosine similarity in vector DBs
+//   - false: Raw embedding values
+//
+// CacheDir: Where to store downloaded models
+//   - null: Uses .kreuzberg/embeddings/ in current directory
+//   - String path: Custom directory for model storage
+//
+// ShowDownloadProgress: Display download progress bar
+//   - Useful for monitoring large model downloads
--- a/docs/snippets/csharp/config/embedding_config.md
+++ b/docs/snippets/csharp/config/embedding_config.md
@@ -0,0 +1,25 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Chunking = new ChunkingConfig
+    {
+        MaxCharacters = 1000,
+        Overlap = 200,
+        Embedding = new EmbeddingConfig
+        {
+            Normalize = true,
+            BatchSize = 16,
+            ShowDownloadProgress = true,
+            CacheDir = null
+        }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+if (result.Chunks != null)
+{
+    Console.WriteLine($"Chunks with embeddings: {result.Chunks.Count}");
+}
+```
--- a/docs/snippets/csharp/config/enable_cache.cs
+++ b/docs/snippets/csharp/config/enable_cache.cs
@@ -0,0 +1,8 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
--- a/docs/snippets/csharp/config/full_example.cs
+++ b/docs/snippets/csharp/config/full_example.cs
@@ -0,0 +1,60 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true,
+    ForceOcr = false,
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng+fra",
+        TesseractConfig = new TesseractConfig
+        {
+            Psm = 3,
+            Oem = 3,
+            MinConfidence = 0.8,
+            Preprocessing = new ImagePreprocessingConfig
+            {
+                TargetDpi = 300,
+                Denoise = true,
+                Deskew = true,
+                ContrastEnhance = true
+            },
+            EnableTableDetection = true
+        }
+    },
+    PdfOptions = new PdfConfig
+    {
+        ExtractImages = true,
+        ExtractMetadata = true
+    },
+    Images = new ImageExtractionConfig
+    {
+        ExtractImages = true,
+        TargetDpi = 150,
+        MaxImageDimension = 4096
+    },
+    Chunking = new ChunkingConfig
+    {
+        MaxChars = 1000,
+        MaxOverlap = 200
+    },
+    TokenReduction = new TokenReductionConfig
+    {
+        Mode = "moderate",
+        PreserveImportantWords = true
+    },
+    LanguageDetection = new LanguageDetectionConfig
+    {
+        Enabled = true,
+        MinConfidence = 0.8,
+        DetectMultiple = false
+    },
+    Postprocessor = new PostProcessorConfig
+    {
+        Enabled = true
+    }
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
--- a/docs/snippets/csharp/config/hierarchy_config.cs
+++ b/docs/snippets/csharp/config/hierarchy_config.cs
@@ -0,0 +1,99 @@
+using Kreuzberg.Config;
+using Kreuzberg;
+
+public class HierarchyConfigExample
+{
+    public static void Main()
+    {
+        // Example 1: Basic hierarchy extraction
+        // Enabled with default KClusters=6 for standard H1-H6 heading hierarchy.
+        // Extract bounding box information for spatial layout awareness.
+        var hierarchyConfigBasic = new HierarchyConfig
+        {
+            Enabled = true,
+            KClusters = 6,  // Default: creates 6 font size clusters (H1-H6 structure)
+            IncludeBbox = true,  // Include bounding box coordinates
+            OcrCoverageThreshold = null  // No OCR coverage threshold
+        };
+
+        var pdfConfigBasic = new PdfConfig
+        {
+            Hierarchy = hierarchyConfigBasic
+        };
+
+        var extractionConfigBasic = new ExtractionConfig
+        {
+            PdfOptions = pdfConfigBasic
+        };
+
+        var kreuzberg = new Kreuzberg(extractionConfigBasic);
+        // var result = kreuzberg.ExtractFileSync("document.pdf");
+
+
+        // Example 2: Custom KClusters for minimal structure
+        // Use 3 clusters for simpler hierarchy with minimal structure.
+        // Useful when you only need major section divisions (Main, Subsection, Detail).
+        var hierarchyConfigMinimal = new HierarchyConfig
+        {
+            Enabled = true,
+            KClusters = 3,  // Minimal clustering: just 3 levels
+            IncludeBbox = true,
+            OcrCoverageThreshold = null
+        };
+
+        var pdfConfigMinimal = new PdfConfig
+        {
+            Hierarchy = hierarchyConfigMinimal
+        };
+
+        var extractionConfigMinimal = new ExtractionConfig
+        {
+            PdfOptions = pdfConfigMinimal
+        };
+
+
+        // Example 3: With OCR coverage threshold
+        // Trigger OCR if less than 50% of text has font data.
+        // Useful for documents with mixed digital and scanned content.
+        var hierarchyConfigOcr = new HierarchyConfig
+        {
+            Enabled = true,
+            KClusters = 6,
+            IncludeBbox = true,
+            OcrCoverageThreshold = 0.5f  // Trigger OCR if text coverage < 50%
+        };
+
+        var pdfConfigOcr = new PdfConfig
+        {
+            Hierarchy = hierarchyConfigOcr
+        };
+
+        var extractionConfigOcr = new ExtractionConfig
+        {
+            PdfOptions = pdfConfigOcr
+        };
+    }
+}
+
+// Field descriptions:
+//
+// Enabled: bool (default: true)
+//   - Enable or disable hierarchy extraction
+//   - When false, hierarchy structure is not analyzed
+//
+// KClusters: int (default: 6, valid: 1-7)
+//   - Number of font size clusters for hierarchy levels
+//   - 6 provides H1-H6 heading levels with body text
+//   - Higher values create more fine-grained hierarchy
+//   - Lower values create simpler structure
+//
+// IncludeBbox: bool (default: true)
+//   - Include bounding box coordinates in hierarchy blocks
+//   - Required for spatial layout awareness and document structure
+//   - Set to false only if space optimization is critical
+//
+// OcrCoverageThreshold: float? (default: null)
+//   - Range: 0.0 to 1.0
+//   - Triggers OCR when text block coverage falls below this fraction
+//   - Example: 0.5f means "run OCR if less than 50% of page has text data"
+//   - null means no OCR coverage-based triggering
--- a/docs/snippets/csharp/config/html_output.md
+++ b/docs/snippets/csharp/config/html_output.md
@@ -0,0 +1,17 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    OutputFormat = OutputFormat.Html,
+    HtmlOutput = new HtmlOutputConfig
+    {
+        Theme = HtmlTheme.GitHub,
+        EmbedCss = true,
+        ClassPrefix = "kb-"
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine(result.Content);
+```
--- a/docs/snippets/csharp/config/include_meta.cs
+++ b/docs/snippets/csharp/config/include_meta.cs
@@ -0,0 +1,19 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng"
+    }
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
+
+if (result.Metadata != null)
+{
+    var language = result.Metadata.Language;
+    var format = result.Metadata.FormatType;
+}
--- a/docs/snippets/csharp/config/keyword_config.cs
+++ b/docs/snippets/csharp/config/keyword_config.cs
@@ -0,0 +1,66 @@
+using Kreuzberg;
+using Kreuzberg.Keywords;
+
+// Example 1: Basic YAKE configuration
+// Uses YAKE algorithm with default parameters and English stopword filtering
+var basicYakeConfig = new ExtractionConfig
+{
+    Keywords = new KeywordConfig
+    {
+        Algorithm = KeywordAlgorithm.Yake,
+        MaxKeywords = 10,
+        MinScore = 0.0f,
+        NgramRange = (1, 3),
+        Language = "en",
+        YakeParams = null,
+        RakeParams = null,
+    }
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", basicYakeConfig);
+Console.WriteLine($"Keywords: {string.Join(", ", result.Keywords)}");
+
+// Example 2: Advanced YAKE with custom parameters
+// Fine-tunes YAKE with custom window size for co-occurrence analysis
+var advancedYakeConfig = new ExtractionConfig
+{
+    Keywords = new KeywordConfig
+    {
+        Algorithm = KeywordAlgorithm.Yake,
+        MaxKeywords = 15,
+        MinScore = 0.1f,
+        NgramRange = (1, 2),
+        Language = "en",
+        YakeParams = new YakeParams
+        {
+            WindowSize = 1,
+        },
+        RakeParams = null,
+    }
+};
+
+result = KreuzbergLib.ExtractFileSync("document.pdf", advancedYakeConfig);
+Console.WriteLine($"Keywords: {string.Join(", ", result.Keywords)}");
+
+// Example 3: RAKE configuration
+// Uses RAKE algorithm for rapid keyword extraction with phrase constraints
+var rakeConfig = new ExtractionConfig
+{
+    Keywords = new KeywordConfig
+    {
+        Algorithm = KeywordAlgorithm.Rake,
+        MaxKeywords = 10,
+        MinScore = 5.0f,
+        NgramRange = (1, 3),
+        Language = "en",
+        YakeParams = null,
+        RakeParams = new RakeParams
+        {
+            MinWordLength = 1,
+            MaxWordsPerPhrase = 3,
+        },
+    }
+};
+
+result = KreuzbergLib.ExtractFileSync("document.pdf", rakeConfig);
+Console.WriteLine($"Keywords: {string.Join(", ", result.Keywords)}");
--- a/docs/snippets/csharp/config/keyword_extraction_config.md
+++ b/docs/snippets/csharp/config/keyword_extraction_config.md
@@ -0,0 +1,21 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Keywords = new KeywordConfig
+    {
+        Algorithm = KeywordAlgorithm.Yake,
+        MaxKeywords = 10,
+        MinScore = 0.1f,
+        NgramRange = [1, 3],
+        Language = "en"
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+if (result.Keywords != null)
+{
+    Console.WriteLine($"Keywords: {string.Join(", ", result.Keywords)}");
+}
+```
--- a/docs/snippets/csharp/config/language_detection_config.md
+++ b/docs/snippets/csharp/config/language_detection_config.md
@@ -0,0 +1,20 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    LanguageDetection = new LanguageDetectionConfig
+    {
+        Enabled = true,
+        MinConfidence = 0.8,
+        DetectMultiple = true
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine($"Detected language: {result.Language}");
+if (result.DetectedLanguages != null)
+{
+    Console.WriteLine($"All detected: {string.Join(", ", result.DetectedLanguages)}");
+}
+```
--- a/docs/snippets/csharp/config/ocr_dpi_config.md
+++ b/docs/snippets/csharp/config/ocr_dpi_config.md
@@ -0,0 +1,22 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Images = new ImageExtractionConfig
+    {
+        ExtractImages = true,
+        TargetDpi = 300,
+        MaxImageDimension = 4096,
+        AutoAdjustDpi = true,
+        MinDpi = 150,
+        MaxDpi = 600
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+if (result.Images != null)
+{
+    Console.WriteLine($"Extracted images: {result.Images.Count}");
+}
+```
--- a/docs/snippets/csharp/config/ocr_lang.cs
+++ b/docs/snippets/csharp/config/ocr_lang.cs
@@ -0,0 +1,12 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng+fra"
+    }
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
--- a/docs/snippets/csharp/config/parse_links.cs
+++ b/docs/snippets/csharp/config/parse_links.cs
@@ -0,0 +1,17 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.html", config);
+
+if (result.Metadata?.Format.Text?.Links != null)
+{
+    foreach (var link in result.Metadata.Format.Text.Links)
+    {
+        var text = link[0];
+        var url = link[1];
+    }
+}
--- a/docs/snippets/csharp/config/parse_metadata.cs
+++ b/docs/snippets/csharp/config/parse_metadata.cs
@@ -0,0 +1,18 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    PdfOptions = new PdfConfig
+    {
+        ExtractMetadata = true
+    }
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
+
+if (result.Metadata?.Format.Pdf != null)
+{
+    var title = result.Metadata.Format.Pdf.Title;
+    var author = result.Metadata.Format.Pdf.Author;
+    var pageCount = result.Metadata.Format.Pdf.PageCount;
+}
--- a/docs/snippets/csharp/config/pdf_config.md
+++ b/docs/snippets/csharp/config/pdf_config.md
@@ -0,0 +1,21 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    PdfOptions = new PdfConfig
+    {
+        ExtractImages = true,
+        ExtractMetadata = true,
+        ExtractAnnotations = false,
+        Passwords = new List<string> { "password123" }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("encrypted.pdf", null, config);
+if (result.Metadata != null)
+{
+    Console.WriteLine($"Title: {result.Metadata.Title}");
+    Console.WriteLine($"Authors: {string.Join(", ", result.Metadata.Authors ?? new List<string>())}");
+}
+```
--- a/docs/snippets/csharp/config/pdf_hierarchy_config.md
+++ b/docs/snippets/csharp/config/pdf_hierarchy_config.md
@@ -0,0 +1,74 @@
+```csharp title="C#"
+using Kreuzberg;
+
+// Basic hierarchy configuration with properties
+var config = new ExtractionConfig
+{
+    PdfOptions = new PdfConfig
+    {
+        ExtractImages = true,
+        Hierarchy = new HierarchyConfig
+        {
+            Enabled = true,
+            KClusters = 6,
+            IncludeBbox = true,
+            OcrCoverageThreshold = 0.8f
+        }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+Console.WriteLine($"Content length: {result.Content.Length}");
+
+// Advanced hierarchy detection with custom parameters
+var advancedConfig = new ExtractionConfig
+{
+    PdfOptions = new PdfConfig
+    {
+        ExtractImages = true,
+        Hierarchy = new HierarchyConfig
+        {
+            Enabled = true,
+            KClusters = 12,           // More clusters for detailed hierarchy
+            IncludeBbox = true,       // Include bounding box coordinates
+            OcrCoverageThreshold = 0.7f  // Higher OCR threshold for stricter detection
+        }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFileAsync("complex_document.pdf", advancedConfig);
+Console.WriteLine($"Advanced hierarchy detection completed: {result.Content.Length} chars");
+
+// Minimal configuration with only enabled flag
+var minimalConfig = new ExtractionConfig
+{
+    PdfOptions = new PdfConfig
+    {
+        Hierarchy = new HierarchyConfig
+        {
+            Enabled = true,
+            // Other properties use defaults:
+            // KClusters = 6
+            // IncludeBbox = true
+        }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFileAsync("document.pdf", minimalConfig);
+Console.WriteLine("Extraction with default hierarchy settings complete");
+
+// Disabling hierarchy detection
+var noHierarchyConfig = new ExtractionConfig
+{
+    PdfOptions = new PdfConfig
+    {
+        Hierarchy = new HierarchyConfig
+        {
+            Enabled = false
+        }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFileAsync("document.pdf", noHierarchyConfig);
+Console.WriteLine("Extraction without hierarchy detection complete");
+```
--- a/docs/snippets/csharp/config/postprocessor.cs
+++ b/docs/snippets/csharp/config/postprocessor.cs
@@ -0,0 +1,13 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    Postprocessor = new PostProcessorConfig
+    {
+        Enabled = true,
+        EnabledProcessors = new List<string> { "normalize_whitespace", "remove_diacritics" }
+    }
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
--- a/docs/snippets/csharp/config/postprocessor_config.md
+++ b/docs/snippets/csharp/config/postprocessor_config.md
@@ -0,0 +1,20 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Postprocessor = new PostProcessorConfig
+    {
+        Enabled = true,
+        EnabledProcessors = new List<string>
+        {
+            "whitespace_normalizer",
+            "unicode_normalizer"
+        },
+        DisabledProcessors = null
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine($"Processed content: {result.Content.Substring(0, Math.Min(100, result.Content.Length))}");
+```
--- a/docs/snippets/csharp/config/quality_processing_config.md
+++ b/docs/snippets/csharp/config/quality_processing_config.md
@@ -0,0 +1,13 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    EnableQualityProcessing = true,
+    UseCache = true
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine($"Quality score: {result.QualityScore}");
+Console.WriteLine($"Content length: {result.Content.Length}");
+```
--- a/docs/snippets/csharp/config/tesseract_config.md
+++ b/docs/snippets/csharp/config/tesseract_config.md
@@ -0,0 +1,22 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng+deu",
+        TesseractConfig = new TesseractConfig
+        {
+            Psm = 6,
+            Oem = 3,
+            MinConfidence = 0.5,
+            Language = "eng"
+        }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("scanned.pdf", null, config);
+Console.WriteLine($"OCR text: {result.Content.Substring(0, Math.Min(100, result.Content.Length))}");
+```
--- a/docs/snippets/csharp/config/token_reduction_config.md
+++ b/docs/snippets/csharp/config/token_reduction_config.md
@@ -0,0 +1,16 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    TokenReduction = new TokenReductionOptions
+    {
+        Mode = "moderate",
+        PreserveImportantWords = true
+    }
+};
+
+var result = await KreuzbergLib.ExtractFile("document.pdf", null, config);
+Console.WriteLine($"Reduced content length: {result.Content.Length}");
+Console.WriteLine($"Content: {result.Content.Substring(0, Math.Min(100, result.Content.Length))}");
+```
--- a/docs/snippets/csharp/config/validator.cs
+++ b/docs/snippets/csharp/config/validator.cs
@@ -0,0 +1,18 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
+
+if (!result.Success)
+{
+    if (result.Metadata?.Error != null)
+    {
+        var errorType = result.Metadata.Error.ErrorType;
+        var errorMessage = result.Metadata.Error.Message;
+    }
+}
--- a/docs/snippets/csharp/config/with_cache.cs
+++ b/docs/snippets/csharp/config/with_cache.cs
@@ -0,0 +1,13 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng"
+    }
+};
+
+var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
--- a/docs/snippets/csharp/config/with_timeout.cs
+++ b/docs/snippets/csharp/config/with_timeout.cs
@@ -0,0 +1,10 @@
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true
+};
+
+var cts = new System.Threading.CancellationTokenSource(TimeSpan.FromSeconds(30));
+var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config, cts.Token);
--- a/docs/snippets/csharp/config_basic.md
+++ b/docs/snippets/csharp/config_basic.md
@@ -0,0 +1,12 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    UseCache = true,
+    EnableQualityProcessing = true
+};
+
+var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+Console.WriteLine(result.Content);
+```
--- a/docs/snippets/csharp/config_discover.md
+++ b/docs/snippets/csharp/config_discover.md
@@ -0,0 +1,9 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig();
+var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+
+Console.WriteLine(result.Content[..Math.Min(100, result.Content.Length)]);
+Console.WriteLine($"Total length: {result.Content.Length}");
+```
--- a/docs/snippets/csharp/config_ocr.md
+++ b/docs/snippets/csharp/config_ocr.md
@@ -0,0 +1,16 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var config = new ExtractionConfig
+{
+    Ocr = new OcrConfig
+    {
+        Backend = "tesseract",
+        Language = "eng+fra",
+        TesseractConfig = new TesseractConfig { Psm = 3 }
+    }
+};
+
+var result = await KreuzbergLib.ExtractFileAsync("document.pdf", config);
+Console.WriteLine(result.Content);
+```
--- a/docs/snippets/csharp/docker/usage.cs
+++ b/docs/snippets/csharp/docker/usage.cs
@@ -0,0 +1,96 @@
+```csharp title="usage.cs"
+using System;
+using System.Diagnostics;
+using System.IO;
+using System.Text.Json;
+using System.Threading.Tasks;
+
+var dockerClient = new DockerKreuzbergLib();
+
+try
+{
+    await dockerClient.StartContainerAsync();
+    await Task.Delay(2000);
+
+    var content = await dockerClient.ExtractFileAsync("document.pdf");
+    Console.WriteLine($"Extracted content:\n{content}");
+}
+finally
+{
+    await dockerClient.StopContainerAsync();
+}
+
+class DockerKreuzbergLib
+{
+    private const string ContainerName = "kreuzberg-api";
+    private const string ContainerImage = "kreuzberg:latest";
+    private const int ApiPort = 8000;
+
+    public async Task StartContainerAsync()
+    {
+        Console.WriteLine("Starting Kreuzberg Docker container...");
+
+        var processInfo = new ProcessStartInfo
+        {
+            FileName = "docker",
+            Arguments = $"run -d --name {ContainerName} -p {ApiPort}:8000 {ContainerImage}",
+            UseShellExecute = false,
+            RedirectStandardOutput = true,
+        };
+
+        using (var process = Process.Start(processInfo))
+        {
+            await process.WaitForExitAsync();
+        }
+
+        Console.WriteLine($"Container started on http://localhost:{ApiPort}");
+    }
+
+    public async Task<string> ExtractFileAsync(string filePath)
+    {
+        using (var client = new HttpClient())
+        {
+            var fileBytes = await File.ReadAllBytesAsync(filePath);
+            using (var content = new MultipartFormDataContent())
+            {
+                content.Add(new ByteArrayContent(fileBytes), "file", Path.GetFileName(filePath));
+
+                var response = await client.PostAsync(
+                    $"http://localhost:{ApiPort}/api/extract",
+                    content
+                );
+
+                response.EnsureSuccessStatusCode();
+                var json = await response.Content.ReadAsStringAsync();
+                var result = JsonSerializer.Deserialize<JsonElement>(json);
+                return result.GetProperty("content").GetString();
+            }
+        }
+    }
+
+    public async Task StopContainerAsync()
+    {
+        Console.WriteLine("Stopping Kreuzberg Docker container...");
+
+        var processInfo = new ProcessStartInfo
+        {
+            FileName = "docker",
+            Arguments = $"stop {ContainerName}",
+            UseShellExecute = false,
+        };
+
+        using (var process = Process.Start(processInfo))
+        {
+            await process.WaitForExitAsync();
+        }
+
+        processInfo.Arguments = $"rm {ContainerName}";
+        using (var process = Process.Start(processInfo))
+        {
+            await process.WaitForExitAsync();
+        }
+
+        Console.WriteLine("Container stopped and removed");
+    }
+}
+```
--- a/docs/snippets/csharp/error_handling.md
+++ b/docs/snippets/csharp/error_handling.md
@@ -0,0 +1,23 @@
+```csharp title="C#"
+using Kreuzberg;
+
+try
+{
+    var result = KreuzbergLib.ExtractFileSync("missing.pdf");
+    Console.WriteLine(result.Content);
+}
+catch (KreuzbergValidationException ex)
+{
+    Console.Error.WriteLine($"Validation error: {ex.Message}");
+}
+catch (KreuzbergIOException ex)
+{
+    Console.Error.WriteLine($"IO error: {ex.Message}");
+    throw;
+}
+catch (KreuzbergException ex)
+{
+    Console.Error.WriteLine($"Extraction failed: {ex.Message}");
+    throw;
+}
+```
--- a/docs/snippets/csharp/error_handling_extract.md
+++ b/docs/snippets/csharp/error_handling_extract.md
@@ -0,0 +1,39 @@
+```csharp title="C#"
+using System;
+using System.IO;
+using System.Net.Http;
+using System.Text.Json;
+
+var client = new HttpClient();
+
+try
+{
+    using (var fileStream = File.OpenRead("document.pdf"))
+    {
+        using (var content = new MultipartFormDataContent())
+        {
+            content.Add(new StreamContent(fileStream), "files", "document.pdf");
+
+            var response = await client.PostAsync("http://localhost:8000/extract", content);
+
+            if (!response.IsSuccessStatusCode)
+            {
+                var errorJson = await response.Content.ReadAsStringAsync();
+                var errorDoc = JsonDocument.Parse(errorJson);
+                var errorType = errorDoc.RootElement.GetProperty("error_type").GetString();
+                var message = errorDoc.RootElement.GetProperty("message").GetString();
+
+                Console.WriteLine($"Error: {errorType}: {message}");
+                return;
+            }
+
+            var json = await response.Content.ReadAsStringAsync();
+            Console.WriteLine($"Success: {json}");
+        }
+    }
+}
+catch (HttpRequestException e)
+{
+    Console.WriteLine($"Request failed: {e.Message}");
+}
+```
--- a/docs/snippets/csharp/extract_bytes_async.md
+++ b/docs/snippets/csharp/extract_bytes_async.md
@@ -0,0 +1,9 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var data = await File.ReadAllBytesAsync("document.pdf");
+var result = await KreuzbergLib.ExtractBytesAsync(data, "application/pdf");
+
+Console.WriteLine(result.Content);
+Console.WriteLine(result.MimeType);
+```
--- a/docs/snippets/csharp/extract_bytes_sync.md
+++ b/docs/snippets/csharp/extract_bytes_sync.md
@@ -0,0 +1,9 @@
+```csharp title="C#"
+using Kreuzberg;
+
+var data = await File.ReadAllBytesAsync("document.pdf");
+var result = KreuzbergLib.ExtractBytesSync(data, "application/pdf");
+
+Console.WriteLine(result.Content);
+Console.WriteLine(result.MimeType);
+```
--- a/Show More
+++ b/Show More