Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "auto",
Language = "en"
}
};
var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
Console.WriteLine(result.Content);

View File

@@ -0,0 +1,78 @@
```csharp title="C#"
using Kreuzberg;
using System.Collections.Generic;
public class CloudOcrBackend : IOcrBackend
{
private string _apiKey;
public string Name => "cloud-ocr";
public string Version => "1.0.0";
public CloudOcrBackend(string apiKey)
{
_apiKey = apiKey;
}
public void Initialize()
{
}
public void Shutdown()
{
}
public ExtractionResult ProcessImage(byte[] imageBytes, OcrConfig config)
{
// Call cloud OCR API with imageBytes and config.Language
// Return ExtractionResult with extracted text
throw new NotImplementedException();
}
public ExtractionResult ProcessImageFile(string path, OcrConfig config)
{
var imageBytes = File.ReadAllBytes(path);
return ProcessImage(imageBytes, config);
}
public bool SupportsLanguage(string language)
{
return SupportedLanguages().Contains(language);
}
public OcrBackendType BackendType()
{
return OcrBackendType.Cloud;
}
public List<string> SupportedLanguages()
{
return new List<string> { "eng", "deu", "fra" };
}
public bool SupportsTableDetection()
{
return false;
}
public bool SupportsDocumentProcessing()
{
return false;
}
public ExtractionResult ProcessDocument(string path, OcrConfig config)
{
throw new NotSupportedException("Document processing not supported by CloudOcrBackend");
}
}
class Program
{
static void Main()
{
// Register the backend
var backend = new CloudOcrBackend(apiKey: "your-api-key");
OcrBackendBridge.Register(backend);
}
}
```

View File

@@ -0,0 +1,14 @@
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "easyocr",
Language = "en",
UseGpu = true
}
};
var result = KreuzbergLib.ExtractFileSync("scanned.pdf", config);
Console.WriteLine(result.Content);

View File

@@ -0,0 +1,14 @@
using Kreuzberg;
var config = new ExtractionConfig
{
ForceOcr = true,
Ocr = new OcrConfig
{
Backend = "tesseract",
Language = "eng"
}
};
var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
Console.WriteLine(result.Content);

View File

@@ -0,0 +1,21 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Images = new ImageExtractionConfig
{
ExtractImages = true,
TargetDpi = 200,
MaxImageDimension = 2048,
InjectPlaceholders = true, // set to false to extract images without markdown references
AutoAdjustDpi = true
}
};
var result = KreuzbergLib.ExtractFileSync("document.pdf", null, config);
string content = result.Content;
string preview = content.Length > 100 ? content[..100] : content;
Console.WriteLine($"Extracted: {preview}");
```

View File

@@ -0,0 +1,27 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
TesseractConfig = new TesseractConfig
{
Preprocessing = new ImagePreprocessingConfig
{
TargetDpi = 300,
Denoise = true,
Deskew = true,
ContrastEnhance = true,
BinarizationMethod = "otsu"
}
}
}
};
var result = KreuzbergLib.ExtractFileSync("scanned.pdf", null, config);
string content = result.Content;
string preview = content.Length > 100 ? content[..100] : content;
Console.WriteLine($"Content: {preview}");
```

View File

@@ -0,0 +1,23 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "easyocr",
Language = "en"
}
};
// EasyOCR-specific options (use_gpu, beam_width, etc.) can be passed through
// OcrConfig's EasyocrConfig field if available, or via backend-specific configuration.
var result = KreuzbergLib.ExtractFileSync("scanned.pdf", null, config);
string content = result.Content;
string preview = content.Length > 100 ? content[..100] : content;
int totalLength = content.Length;
Console.WriteLine($"Extracted content (preview): {preview}");
Console.WriteLine($"Total characters: {totalLength}");
```

View File

@@ -0,0 +1,29 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "paddle-ocr",
Language = "en"
}
};
var result = KreuzbergLib.ExtractFileSync("scanned.pdf", config);
if (result.OcrElements is not null)
{
foreach (var element in result.OcrElements)
{
Console.WriteLine($"Text: {element.Text}");
Console.WriteLine($"Confidence: {element.Confidence.Recognition:F2}");
Console.WriteLine($"Geometry: {element.Geometry}");
if (element.Rotation is not null)
{
Console.WriteLine($"Rotation: {element.Rotation.Angle}°");
}
Console.WriteLine();
}
}
```

View File

@@ -0,0 +1,21 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "tesseract",
Language = "eng"
}
};
var result = KreuzbergLib.ExtractFileSync("scanned.pdf", null, config);
string content = result.Content;
string preview = content.Length > 100 ? content[..100] : content;
int totalLength = content.Length;
Console.WriteLine($"Extracted content (preview): {preview}");
Console.WriteLine($"Total characters: {totalLength}");
```

View File

@@ -0,0 +1,21 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "tesseract"
},
ForceOcr = true
};
var result = KreuzbergLib.ExtractFileSync("document.pdf", null, config);
string content = result.Content;
string preview = content.Length > 100 ? content[..100] : content;
int totalLength = content.Length;
Console.WriteLine($"Extracted content (preview): {preview}");
Console.WriteLine($"Total characters: {totalLength}");
```

View File

@@ -0,0 +1,21 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "tesseract",
Language = "eng+deu+fra"
}
};
var result = KreuzbergLib.ExtractFileSync("multilingual.pdf", null, config);
string content = result.Content;
string preview = content.Length > 100 ? content[..100] : content;
int totalLength = content.Length;
Console.WriteLine($"Extracted content (preview): {preview}");
Console.WriteLine($"Total characters: {totalLength}");
```

View File

@@ -0,0 +1,16 @@
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "paddle-ocr",
Language = "en",
// PaddleOcrConfig = new PaddleOcrConfig { ModelTier = "server" } // for max accuracy
}
};
var result = KreuzbergLib.ExtractFileSync("scanned.pdf", config);
Console.WriteLine(result.Content);
```

View File

@@ -0,0 +1,17 @@
using Kreuzberg;
var config = new ExtractionConfig
{
Ocr = new OcrConfig
{
Backend = "tesseract",
Language = "eng+deu+fra",
TesseractConfig = new TesseractConfig
{
Psm = 3
}
}
};
var result = KreuzbergLib.ExtractFileSync("document.pdf", config);
Console.WriteLine(result.Content);