Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
```csharp title="C#"
using Kreuzberg;
KreuzbergLib.ClearDocumentExtractors();
KreuzbergLib.ClearOcrBackends();
KreuzbergLib.ClearPostProcessors();
KreuzbergLib.ClearValidators();
Console.WriteLine("All plugins cleared");
```

View File

@@ -0,0 +1,103 @@
using Kreuzberg;
using System.Collections.Generic;
using System.Security.Cryptography;
using System.Text;
// NOTE: ICacheBackend interface is not available in C# bindings
class CustomCacheWrapper
{
private readonly Dictionary<string, (ExtractionResult result, DateTime timestamp)> _cache;
private readonly TimeSpan _cacheExpiration;
public CustomCacheWrapper(TimeSpan? cacheExpiration = null)
{
_cache = new Dictionary<string, (ExtractionResult, DateTime)>();
_cacheExpiration = cacheExpiration ?? TimeSpan.FromHours(1);
}
public ExtractionResult? Get(string key)
{
if (_cache.TryGetValue(key, out var entry))
{
if (DateTime.UtcNow - entry.timestamp < _cacheExpiration)
{
return entry.result;
}
else
{
_cache.Remove(key);
}
}
return null;
}
public void Set(string key, ExtractionResult result)
{
_cache[key] = (result, DateTime.UtcNow);
}
public void Delete(string key)
{
_cache.Remove(key);
}
public void Clear()
{
_cache.Clear();
}
public string GenerateKey(string filePath, ExtractionConfig? config)
{
var keyData = $"{filePath}:{config?.GetHashCode() ?? 0}";
using var sha256 = SHA256.Create();
var hashBytes = sha256.ComputeHash(Encoding.UTF8.GetBytes(keyData));
return Convert.ToHexString(hashBytes);
}
public ExtractionResult GetOrExtract(string filePath, ExtractionConfig? config = null)
{
var cacheKey = GenerateKey(filePath, config);
var cached = Get(cacheKey);
if (cached != null)
{
Console.WriteLine("Retrieved from cache");
return cached;
}
var result = KreuzbergLib.ExtractFileSync(filePath, config);
Set(cacheKey, result);
Console.WriteLine("Extracted and cached");
return result;
}
}
class Program
{
static void Main()
{
var cache = new CustomCacheWrapper(cacheExpiration: TimeSpan.FromMinutes(30));
try
{
var config = new ExtractionConfig { UseCache = true };
var filePath = "document.pdf";
var result1 = cache.GetOrExtract(filePath, config);
Console.WriteLine($"First extraction: {result1.Content.Length} chars");
var result2 = cache.GetOrExtract(filePath, config);
Console.WriteLine($"Second extraction: {result2.Content.Length} chars");
cache.Clear();
Console.WriteLine("Cache cleared");
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
}
}

View File

@@ -0,0 +1,70 @@
using Kreuzberg;
using System.Text.Json;
// NOTE: IDocumentExtractor interface is not available in C# bindings
class CustomJsonProcessor
{
public static ExtractionResult ProcessJson(byte[] content, string mimeType)
{
try
{
var jsonContent = System.Text.Encoding.UTF8.GetString(content);
var document = JsonDocument.Parse(jsonContent);
var text = ExtractText(document.RootElement);
return new ExtractionResult
{
Content = text,
MimeType = mimeType,
Metadata = new Metadata(),
Tables = new List<Table>(),
Success = true
};
}
catch (JsonException ex)
{
throw new KreuzbergParsingException($"Failed to parse JSON: {ex.Message}");
}
}
private static string ExtractText(JsonElement element)
{
return element.ValueKind switch
{
JsonValueKind.String => element.GetString() + "\n",
JsonValueKind.Array => string.Concat(
element.EnumerateArray().Select(ExtractText)
),
JsonValueKind.Object => string.Concat(
element.EnumerateObject()
.Select(p => ExtractText(p.Value))
),
_ => ""
};
}
}
class Program
{
static void Main()
{
try
{
var jsonData = new { message = "Hello, world!", timestamp = DateTime.UtcNow };
var jsonBytes = System.Text.Encoding.UTF8.GetBytes(
JsonSerializer.Serialize(jsonData)
);
var result = CustomJsonProcessor.ProcessJson(jsonBytes, "application/json");
Console.WriteLine($"Extracted: {result.Content}");
Console.WriteLine($"MIME type: {result.MimeType}");
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
}
}

View File

@@ -0,0 +1,90 @@
using Kreuzberg;
using System.Net.Http;
using System.Text.Json;
class CloudOcrBackend : IOcrBackend
{
private readonly string _apiKey;
private readonly HttpClient _httpClient;
private readonly string _apiEndpoint;
public CloudOcrBackend(string apiKey, string apiEndpoint = "https://api.example.com/ocr")
{
_apiKey = apiKey;
_apiEndpoint = apiEndpoint;
_httpClient = new HttpClient();
}
public string Name => "cloud-ocr-backend";
public string Process(ReadOnlySpan<byte> imageBytes, OcrConfig? config)
{
return Task.Run(async () =>
{
try
{
var bytes = imageBytes.ToArray();
using var content = new MultipartFormDataContent();
content.Add(new ByteArrayContent(bytes), "image");
var request = new HttpRequestMessage(
HttpMethod.Post,
_apiEndpoint
)
{
Content = content,
Headers =
{
{ "Authorization", $"Bearer {_apiKey}" }
}
};
var response = await _httpClient.SendAsync(request);
response.EnsureSuccessStatusCode();
var jsonContent = await response.Content.ReadAsStringAsync();
return jsonContent;
}
catch (HttpRequestException ex)
{
throw new KreuzbergOcrException($"Cloud OCR service error: {ex.Message}");
}
}).GetAwaiter().GetResult();
}
public void Dispose()
{
_httpClient?.Dispose();
}
}
class Program
{
static void Main()
{
using var backend = new CloudOcrBackend(apiKey: "your-api-key-here");
KreuzbergLib.RegisterOcrBackend(backend);
try
{
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "cloud-ocr-backend"
}
};
var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
Console.WriteLine($"OCR text: {result.Content}");
}
catch (KreuzbergOcrException ex)
{
Console.WriteLine($"OCR error: {ex.Message}");
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
}
}

View File

@@ -0,0 +1,124 @@
using Kreuzberg;
using System.Text.RegularExpressions;
class WordCountPostProcessor : IPostProcessor
{
public string Name => "word-count";
public int Priority => 10;
public ExtractionResult Process(ExtractionResult result)
{
if (string.IsNullOrEmpty(result.Content))
{
return result;
}
var wordCount = result.Content.Split(
new[] { ' ', '\n', '\r', '\t' },
StringSplitOptions.RemoveEmptyEntries
).Length;
if (result.Metadata.Additional == null)
{
result.Metadata.Additional = new Dictionary<string, System.Text.Json.Nodes.JsonNode?>();
}
result.Metadata.Additional["word_count"] = System.Text.Json.Nodes.JsonValue.Create(wordCount);
return result;
}
}
class CleanupPostProcessor : IPostProcessor
{
public string Name => "text-cleanup";
public int Priority => 5;
public ExtractionResult Process(ExtractionResult result)
{
if (string.IsNullOrEmpty(result.Content))
{
return result;
}
var cleaned = Regex.Replace(result.Content, @"\s+", " ").Trim();
cleaned = Regex.Replace(cleaned, @"[^\w\s\.\,\!\?\-]", "");
result.Content = cleaned;
return result;
}
}
class LanguageDetectionPostProcessor : IPostProcessor
{
public string Name => "language-detection";
public int Priority => 1;
public ExtractionResult Process(ExtractionResult result)
{
if (string.IsNullOrEmpty(result.Content))
{
return result;
}
var detectedLanguage = DetectLanguage(result.Content);
if (result.Metadata.Additional == null)
{
result.Metadata.Additional = new Dictionary<string, System.Text.Json.Nodes.JsonNode?>();
}
result.Metadata.Additional["detected_language"] = System.Text.Json.Nodes.JsonValue.Create(detectedLanguage);
return result;
}
private string DetectLanguage(string text)
{
var commonEnglishWords = new[] { "the", "is", "and", "to", "of", "a", "in", "that" };
var lowerText = text.ToLower();
var matches = commonEnglishWords.Count(word =>
Regex.IsMatch(lowerText, $@"\b{word}\b")
);
return matches > 5 ? "en" : "unknown";
}
}
class Program
{
static void Main()
{
var wordCountProcessor = new WordCountPostProcessor();
var cleanupProcessor = new CleanupPostProcessor();
var languageProcessor = new LanguageDetectionPostProcessor();
KreuzbergLib.RegisterPostProcessor(wordCountProcessor);
KreuzbergLib.RegisterPostProcessor(cleanupProcessor);
KreuzbergLib.RegisterPostProcessor(languageProcessor);
try
{
var config = new ExtractionConfig();
var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
Console.WriteLine($"Original content length: {result.Content.Length}");
if (result.Metadata.Additional != null)
{
if (result.Metadata.Additional.TryGetValue("word_count", out var wc))
{
Console.WriteLine($"Word count: {wc}");
}
if (result.Metadata.Additional.TryGetValue("detected_language", out var lang))
{
Console.WriteLine($"Detected language: {lang}");
}
}
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
}
}

View File

@@ -0,0 +1,103 @@
using Kreuzberg;
class MinLengthValidator : IValidator
{
private readonly int _minLength;
public MinLengthValidator(int minLength)
{
_minLength = minLength;
}
public string Name => "min-length";
public int Priority => 10;
public void Validate(ExtractionResult result)
{
if (result.Content.Length < _minLength)
{
throw new KreuzbergValidationException(
$"Content too short: {result.Content.Length} < {_minLength}"
);
}
}
}
class QualityScoreValidator : IValidator
{
private readonly double _minScore;
public QualityScoreValidator(double minScore)
{
_minScore = minScore;
}
public string Name => "quality-score";
public int Priority => 5;
public void Validate(ExtractionResult result)
{
var score = result.QualityScore;
if (score < _minScore)
{
throw new KreuzbergValidationException(
$"Quality score too low: {score:F2} < {_minScore:F2}"
);
}
}
}
class ContentValidValidator : IValidator
{
public string Name => "content-valid";
public int Priority => 20;
public void Validate(ExtractionResult result)
{
if (string.IsNullOrWhiteSpace(result.Content))
{
throw new KreuzbergValidationException("Extracted content is empty or whitespace");
}
if (result.Content.Length < 10)
{
throw new KreuzbergValidationException("Extracted content is too short (minimum 10 characters)");
}
}
}
class Program
{
static void Main()
{
var minLengthValidator = new MinLengthValidator(minLength: 50);
var qualityValidator = new QualityScoreValidator(minScore: 0.7);
var contentValidator = new ContentValidValidator();
KreuzbergLib.RegisterValidator(minLengthValidator);
KreuzbergLib.RegisterValidator(qualityValidator);
KreuzbergLib.RegisterValidator(contentValidator);
try
{
var config = new ExtractionConfig
{
EnableQualityProcessing = true
};
var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
Console.WriteLine("All validations passed");
Console.WriteLine($"Content length: {result.Content.Length}");
}
catch (KreuzbergValidationException ex)
{
Console.WriteLine($"Validation failed: {ex.Message}");
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
}
}

View File

@@ -0,0 +1,43 @@
```csharp title="C#"
using Kreuzberg;
using System.Collections.Generic;
var backend = new CustomEmbeddingBackend();
EmbeddingBackendRegistry.Register(backend);
public class CustomEmbeddingBackend : IEmbeddingBackend
{
public string Name => "custom-embeddings";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine("Embedding backend initialized");
}
public void Shutdown()
{
Console.WriteLine("Embedding backend shut down");
}
public ulong Dimensions()
{
return 384;
}
public List<List<float>> Embed(List<string> texts)
{
var embeddings = new List<List<float>>();
foreach (var text in texts)
{
var embedding = new List<float>();
for (int i = 0; i < 384; i++)
{
embedding.Add((float)(text.Length % (i + 1)) / (float)(i + 1));
}
embeddings.Add(embedding);
}
return embeddings;
}
}
```

View File

@@ -0,0 +1,51 @@
```csharp title="C#"
using Kreuzberg;
var extractor = new JsonDocumentExtractor();
KreuzbergLib.RegisterDocumentExtractor(extractor);
public class JsonDocumentExtractor : IDocumentExtractor
{
public string Name => "json-extractor";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine("JSON extractor initialized");
}
public void Shutdown()
{
Console.WriteLine("JSON extractor shut down");
}
public ExtractionResult ExtractBytes(byte[] content, string mimeType, ExtractionConfig config)
{
var json = System.Text.Encoding.UTF8.GetString(content);
var result = new ExtractionResult
{
Content = json,
MimeType = mimeType,
DetectedLanguages = null
};
return result;
}
public ExtractionResult ExtractFile(string path, string mimeType, ExtractionConfig config)
{
var content = System.IO.File.ReadAllBytes(path);
return ExtractBytes(content, mimeType, config);
}
public string[] SupportedMimeTypes()
{
return new[] { "application/json", "text/json" };
}
public int Priority()
{
return 50;
}
}
```

View File

@@ -0,0 +1,15 @@
```csharp title="C#"
using Kreuzberg;
var extractors = KreuzbergLib.ListDocumentExtractors();
Console.WriteLine("Registered extractors: " + string.Join(", ", extractors));
var ocrBackends = KreuzbergLib.ListOcrBackends();
Console.WriteLine("Registered OCR backends: " + string.Join(", ", ocrBackends));
var processors = KreuzbergLib.ListPostProcessors();
Console.WriteLine("Registered post-processors: " + string.Join(", ", processors));
var validators = KreuzbergLib.ListValidators();
Console.WriteLine("Registered validators: " + string.Join(", ", validators));
```

View File

@@ -0,0 +1,45 @@
```csharp title="C#"
using Kreuzberg;
var validator = new MinimumLengthValidator();
ValidatorRegistry.Register(validator);
public class MinimumLengthValidator : IValidator
{
private const int MinimumLength = 10;
public string Name => "min-length-validator";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine($"Minimum length validator initialized (min: {MinimumLength})");
}
public void Shutdown()
{
Console.WriteLine("Minimum length validator shut down");
}
public void Validate(ExtractionResult result, ExtractionConfig config)
{
if (result.Content.Length < MinimumLength)
{
throw new KreuzbergException(
$"Content length {result.Content.Length} is below minimum {MinimumLength}",
1001
);
}
}
public bool ShouldValidate(ExtractionResult result, ExtractionConfig config)
{
return !string.IsNullOrEmpty(result.Content);
}
public int Priority()
{
return 50;
}
}
```

View File

@@ -0,0 +1,58 @@
```csharp title="C#"
using Kreuzberg;
var enricher = new PdfMetadataEnricher();
PostProcessorRegistry.Register(enricher);
public class PdfMetadataEnricher : IPostProcessor
{
private int _processedCount = 0;
public string Name => "pdf-metadata-enricher";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine("PDF metadata enricher initialized");
_processedCount = 0;
}
public void Shutdown()
{
Console.WriteLine($"PDF metadata enricher processed {_processedCount} documents");
}
public void Process(ExtractionResult result, ExtractionConfig config)
{
if (result.MimeType == "application/pdf")
{
_processedCount++;
if (result.Metadata == null)
{
result.Metadata = new Metadata();
}
result.Metadata.Author = result.Metadata.Author ?? "Unknown";
}
}
public ProcessingStage ProcessingStage()
{
return ProcessingStage.Early;
}
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
{
return result.MimeType == "application/pdf";
}
public ulong EstimatedDurationMs(ExtractionResult result)
{
return 50;
}
public int Priority()
{
return 50;
}
}
```

View File

@@ -0,0 +1,54 @@
```csharp title="C#"
using Kreuzberg;
public class PdfOnlyProcessor : IPostProcessor
{
public string Name => "pdf-only-processor";
public string Version => "1.0.0";
public void Initialize()
{
}
public void Shutdown()
{
}
public void Process(ExtractionResult result, ExtractionConfig config)
{
if (result.MimeType != "application/pdf")
{
Console.WriteLine($"Skipping non-PDF: {result.MimeType}");
}
}
public ProcessingStage ProcessingStage()
{
return ProcessingStage.Middle;
}
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
{
return result.MimeType == "application/pdf";
}
public ulong EstimatedDurationMs(ExtractionResult result)
{
return 10;
}
public int Priority()
{
return 50;
}
}
class Program
{
static void Main()
{
var processor = new PdfOnlyProcessor();
PostProcessorRegistry.Register(processor);
}
}
```

View File

@@ -0,0 +1,50 @@
```csharp title="C#"
using Kreuzberg;
var extractor = new CustomTextExtractor();
KreuzbergLib.RegisterDocumentExtractor(extractor);
public class CustomTextExtractor : IDocumentExtractor
{
public string Name => "custom-text-extractor";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine("Custom text extractor initialized");
}
public void Shutdown()
{
Console.WriteLine("Custom text extractor shut down");
}
public ExtractionResult ExtractBytes(byte[] content, string mimeType, ExtractionConfig config)
{
var text = System.Text.Encoding.UTF8.GetString(content);
return new ExtractionResult
{
Content = text.ToUpper(),
MimeType = mimeType,
DetectedLanguages = null
};
}
public ExtractionResult ExtractFile(string path, string mimeType, ExtractionConfig config)
{
var content = System.IO.File.ReadAllBytes(path);
return ExtractBytes(content, mimeType, config);
}
public string[] SupportedMimeTypes()
{
return new[] { "text/plain" };
}
public int Priority()
{
return 50;
}
}
```

View File

@@ -0,0 +1,52 @@
```csharp title="C#"
using Kreuzberg;
var processor = new LoggingPostProcessor();
PostProcessorRegistry.Register(processor);
public class LoggingPostProcessor : IPostProcessor
{
public string Name => "logging-processor";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine("Logging post-processor initialized");
}
public void Shutdown()
{
Console.WriteLine("Logging post-processor shut down");
}
public void Process(ExtractionResult result, ExtractionConfig config)
{
Console.WriteLine($"Processing: {result.MimeType}, Content length: {result.Content.Length}");
if (string.IsNullOrEmpty(result.Content))
{
Console.WriteLine("Warning: Extracted content is empty");
}
}
public ProcessingStage ProcessingStage()
{
return ProcessingStage.Early;
}
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
{
return true;
}
public ulong EstimatedDurationMs(ExtractionResult result)
{
return 1;
}
public int Priority()
{
return 10;
}
}
```

View File

@@ -0,0 +1,59 @@
```csharp title="C#"
using Kreuzberg;
using Xunit;
public class CustomValidatorTests
{
[Fact]
public void TestValidatorRegistration()
{
var validator = new TestValidator();
ValidatorRegistry.Register(validator);
var validators = KreuzbergLib.ListValidators();
Assert.Contains("test-validator", validators);
}
[Fact]
public void TestValidatorProcessing()
{
var result = new ExtractionResult
{
Content = "Test content with some length",
MimeType = "text/plain"
};
var config = new ExtractionConfig();
var validator = new TestValidator();
validator.Initialize();
Assert.True(validator.ShouldValidate(result, config));
validator.Validate(result, config);
validator.Shutdown();
}
}
public class TestValidator : IValidator
{
public string Name => "test-validator";
public string Version => "1.0.0";
public void Initialize() { }
public void Shutdown() { }
public void Validate(ExtractionResult result, ExtractionConfig config)
{
if (string.IsNullOrEmpty(result.Content))
{
throw new KreuzbergException("Content cannot be empty", 1000);
}
}
public bool ShouldValidate(ExtractionResult result, ExtractionConfig config)
{
return !string.IsNullOrEmpty(result.Content);
}
public int Priority() => 50;
}
```

View File

@@ -0,0 +1,50 @@
```csharp title="C#"
using Kreuzberg;
var validator = new ContentTypeValidator("application/pdf", "text/plain");
ValidatorRegistry.Register(validator);
public class ContentTypeValidator : IValidator
{
private readonly string[] _allowedMimeTypes;
public ContentTypeValidator(params string[] allowedMimeTypes)
{
_allowedMimeTypes = allowedMimeTypes;
}
public string Name => "content-type-validator";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine($"Content type validator initialized with types: {string.Join(", ", _allowedMimeTypes)}");
}
public void Shutdown()
{
Console.WriteLine("Content type validator shut down");
}
public void Validate(ExtractionResult result, ExtractionConfig config)
{
if (!_allowedMimeTypes.Contains(result.MimeType))
{
throw new KreuzbergException(
$"MIME type {result.MimeType} not allowed. Allowed types: {string.Join(", ", _allowedMimeTypes)}",
1002
);
}
}
public bool ShouldValidate(ExtractionResult result, ExtractionConfig config)
{
return true;
}
public int Priority()
{
return 50;
}
}
```

View File

@@ -0,0 +1,62 @@
```csharp title="C#"
using Kreuzberg;
public class QualityScoreValidator : IValidator
{
private const float MinimumQuality = 0.7f;
public string Name => "quality-score-validator";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine($"Quality score validator initialized (min score: {MinimumQuality})");
}
public void Shutdown()
{
Console.WriteLine("Quality score validator shut down");
}
public void Validate(ExtractionResult result, ExtractionConfig config)
{
var qualityScore = CalculateQualityScore(result);
if (qualityScore < MinimumQuality)
{
throw new KreuzbergException(
$"Quality score {qualityScore:F2} below minimum {MinimumQuality}",
1003
);
}
}
public bool ShouldValidate(ExtractionResult result, ExtractionConfig config)
{
return !string.IsNullOrEmpty(result.Content);
}
public int Priority()
{
return 50;
}
private float CalculateQualityScore(ExtractionResult result)
{
var contentLength = result.Content.Length;
var hasMetadata = result.Metadata != null;
var score = (contentLength > 100 ? 0.8f : 0.5f) + (hasMetadata ? 0.2f : 0.0f);
return Math.Min(score, 1.0f);
}
}
class Program
{
static void Main()
{
var validator = new QualityScoreValidator();
ValidatorRegistry.Register(validator);
}
}
```

View File

@@ -0,0 +1,59 @@
```csharp title="C#"
using Kreuzberg;
using System.Collections.Concurrent;
var processor = new StatefulPostProcessor();
PostProcessorRegistry.Register(processor);
public class StatefulPostProcessor : IPostProcessor
{
private int _callCount = 0;
private readonly ConcurrentDictionary<string, string> _cache = new();
public string Name => "stateful-processor";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine("Stateful processor initialized");
_callCount = 0;
_cache.Clear();
}
public void Shutdown()
{
Console.WriteLine($"Stateful processor called {_callCount} times");
Console.WriteLine($"Cache contains {_cache.Count} entries");
}
public void Process(ExtractionResult result, ExtractionConfig config)
{
_callCount++;
var key = $"last_mime_{_callCount}";
_cache.TryAdd(key, result.MimeType);
Console.WriteLine($"Processing #{_callCount}: {result.MimeType}");
}
public ProcessingStage ProcessingStage()
{
return ProcessingStage.Middle;
}
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
{
return true;
}
public ulong EstimatedDurationMs(ExtractionResult result)
{
return 5;
}
public int Priority()
{
return 50;
}
}
```

View File

@@ -0,0 +1,35 @@
```csharp title="C#"
using Kreuzberg;
var processor = new UnregisterableProcessor();
PostProcessorRegistry.Register(processor);
Console.WriteLine("Processor registered");
var processors = KreuzbergLib.ListPostProcessors();
Console.WriteLine($"Active processors: {string.Join(", ", processors)}");
PostProcessorRegistry.Unregister(processor.Name);
Console.WriteLine("Processor unregistered");
processors = KreuzbergLib.ListPostProcessors();
Console.WriteLine($"Active processors: {string.Join(", ", processors)}");
public class UnregisterableProcessor : IPostProcessor
{
public string Name => "removable-processor";
public string Version => "1.0.0";
public void Initialize() { }
public void Shutdown() { }
public void Process(ExtractionResult result, ExtractionConfig config)
{
Console.WriteLine("Processing...");
}
public ProcessingStage ProcessingStage() => ProcessingStage.Middle;
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config) => true;
public ulong EstimatedDurationMs(ExtractionResult result) => 10;
public int Priority() => 50;
}
```

View File

@@ -0,0 +1,62 @@
```csharp title="C#"
using Kreuzberg;
var processor = new WordCountProcessor();
PostProcessorRegistry.Register(processor);
public class WordCountProcessor : IPostProcessor
{
public string Name => "word-count";
public string Version => "1.0.0";
public void Initialize()
{
Console.WriteLine("Word count processor initialized");
}
public void Shutdown()
{
Console.WriteLine("Word count processor shut down");
}
public void Process(ExtractionResult result, ExtractionConfig config)
{
var wordCount = CountWords(result.Content);
if (result.Metadata == null)
{
result.Metadata = new Metadata();
}
Console.WriteLine($"Document contains {wordCount} words");
}
public ProcessingStage ProcessingStage()
{
return ProcessingStage.Early;
}
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
{
return !string.IsNullOrEmpty(result.Content);
}
public ulong EstimatedDurationMs(ExtractionResult result)
{
return 5;
}
public int Priority()
{
return 50;
}
private int CountWords(string content)
{
if (string.IsNullOrWhiteSpace(content))
return 0;
return content.Split(new[] { ' ', '\t', '\n', '\r' }, System.StringSplitOptions.RemoveEmptyEntries).Length;
}
}
```