83 lines
2.0 KiB
C#
83 lines
2.0 KiB
C#
using Kreuzberg;
|
|
|
|
class MinLengthValidator : IValidator
|
|
{
|
|
private readonly int _minLength;
|
|
|
|
public MinLengthValidator(int minLength)
|
|
{
|
|
_minLength = minLength;
|
|
}
|
|
|
|
public string Name => "min-length";
|
|
public int Priority => 10;
|
|
|
|
public void Validate(ExtractionResult result)
|
|
{
|
|
if (result.Content.Length < _minLength)
|
|
{
|
|
throw new KreuzbergValidationException(
|
|
$"Content too short: {result.Content.Length} < {_minLength}"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
class QualityScoreValidator : IValidator
|
|
{
|
|
private readonly double _minScore;
|
|
|
|
public QualityScoreValidator(double minScore)
|
|
{
|
|
_minScore = minScore;
|
|
}
|
|
|
|
public string Name => "quality-score";
|
|
public int Priority => 5;
|
|
|
|
public void Validate(ExtractionResult result)
|
|
{
|
|
var score = result.QualityScore;
|
|
|
|
if (score < _minScore)
|
|
{
|
|
throw new KreuzbergValidationException(
|
|
$"Quality score too low: {score:F2} < {_minScore:F2}"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
class Program
|
|
{
|
|
static void Main()
|
|
{
|
|
var minLengthValidator = new MinLengthValidator(minLength: 50);
|
|
var qualityValidator = new QualityScoreValidator(minScore: 0.7);
|
|
|
|
KreuzbergLib.RegisterValidator(minLengthValidator);
|
|
KreuzbergLib.RegisterValidator(qualityValidator);
|
|
|
|
try
|
|
{
|
|
var config = new ExtractionConfig
|
|
{
|
|
EnableQualityProcessing = true
|
|
};
|
|
|
|
var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
|
|
|
|
Console.WriteLine("Validation passed");
|
|
Console.WriteLine($"Content length: {result.Content.Length}");
|
|
}
|
|
catch (KreuzbergValidationException ex)
|
|
{
|
|
Console.WriteLine($"Validation failed: {ex.Message}");
|
|
}
|
|
catch (KreuzbergException ex)
|
|
{
|
|
Console.WriteLine($"Error: {ex.Message}");
|
|
}
|
|
}
|
|
}
|