Files
fil/docs/snippets/go/metadata/metadata.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

2.7 KiB

package main

import (
	"fmt"
	"log"
	"strings"

	"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)

func main() {
	result, err := kreuzberg.ExtractFileSync("document.pdf", nil)
	if err != nil {
		log.Fatalf("extract pdf: %v", err)
	}

	// Access PDF metadata
	if pdf, ok := result.Metadata.PdfMetadata(); ok {
		if pdf.PageCount != nil {
			fmt.Printf("Pages: %d\n", *pdf.PageCount)
		}
		if pdf.Author != nil {
			fmt.Printf("Author: %s\n", *pdf.Author)
		}
		if pdf.Title != nil {
			fmt.Printf("Title: %s\n", *pdf.Title)
		}
	}

	// Access HTML metadata
	htmlResult, err := kreuzberg.ExtractFileSync("page.html", nil)
	if err != nil {
		log.Fatalf("extract html: %v", err)
	}
	if html, ok := htmlResult.Metadata.HTMLMetadata(); ok {
		if html.Title != nil {
			fmt.Printf("Title: %s\n", *html.Title)
		}
		if html.Description != nil {
			fmt.Printf("Description: %s\n", *html.Description)
		}

		// Access keywords as array
		if len(html.Keywords) > 0 {
			fmt.Printf("Keywords: %s\n", strings.Join(html.Keywords, ", "))
		}

		// Access canonical URL (renamed from canonical)
		if html.CanonicalURL != nil {
			fmt.Printf("Canonical URL: %s\n", *html.CanonicalURL)
		}

		// Access Open Graph fields from map
		if len(html.OpenGraph) > 0 {
			if image, ok := html.OpenGraph["image"]; ok {
				fmt.Printf("Open Graph Image: %s\n", image)
			}
			if ogTitle, ok := html.OpenGraph["title"]; ok {
				fmt.Printf("Open Graph Title: %s\n", ogTitle)
			}
			if ogType, ok := html.OpenGraph["type"]; ok {
				fmt.Printf("Open Graph Type: %s\n", ogType)
			}
		}

		// Access Twitter Card fields from map
		if len(html.TwitterCard) > 0 {
			if card, ok := html.TwitterCard["card"]; ok {
				fmt.Printf("Twitter Card Type: %s\n", card)
			}
			if creator, ok := html.TwitterCard["creator"]; ok {
				fmt.Printf("Twitter Creator: %s\n", creator)
			}
		}

		// Access new fields
		if html.Language != nil {
			fmt.Printf("Language: %s\n", *html.Language)
		}

		if html.TextDirection != nil {
			fmt.Printf("Text Direction: %s\n", *html.TextDirection)
		}

		// Access headers
		if len(html.Headers) > 0 {
			headers := make([]string, len(html.Headers))
			for i, h := range html.Headers {
				headers[i] = h.Text
			}
			fmt.Printf("Headers: %s\n", strings.Join(headers, ", "))
		}

		// Access links
		if len(html.Links) > 0 {
			for _, link := range html.Links {
				fmt.Printf("Link: %s (%s)\n", link.Href, link.Text)
			}
		}

		// Access images
		if len(html.Images) > 0 {
			for _, image := range html.Images {
				fmt.Printf("Image: %s\n", image.Src)
			}
		}

		// Access structured data
		if len(html.StructuredData) > 0 {
			fmt.Printf("Structured data items: %d\n", len(html.StructuredData))
		}
	}
}