Files
LifeFaq/app/services/metadata_processor.py
2024-12-11 23:56:15 +01:00

84 lines
3.1 KiB
Python

import os
import markdown
import json
from typing import List, Dict
class MetadataProcessor:
"""
A class to scan Markdown files, extract front matter metadata,
and generate a structured JSON file.
"""
def __init__(self, input_dir: str, output_file: str):
"""
Initialize the MetadataProcessor.
Args:
input_dir (str): Directory containing Markdown files.
output_file (str): Path to save the generated JSON file.
"""
self.input_dir = input_dir
self.output_file = output_file
self.data = {"categories": [], "favorites": []}
def _extract_metadata(self, file_path: str) -> Dict:
"""
Extract front matter metadata using the 'markdown' package.
Args:
file_path (str): Path to the Markdown file.
Returns:
dict: A dictionary containing the extracted metadata.
"""
with open(file_path, "r", encoding="utf-8") as file:
markdown_content = file.read()
# Initialize Markdown with meta extension
md = markdown.Markdown(extensions=["extra", "nl2br", "meta"])
md.convert(markdown_content)
# Metadata is stored in md.Meta as a dictionary of lists
meta = {key: " ".join(value) for key, value in md.Meta.items()} if md.Meta else {}
return meta
def _process_directory(self):
"""
Recursively scan the input directory for Markdown files
and extract metadata to build the JSON structure.
"""
for root, _, files in os.walk(self.input_dir):
for file in files:
if file.endswith(".md"):
file_path = os.path.join(root, file)
metadata = self._extract_metadata(file_path)
if metadata:
# Add to 'categories'
self.data["categories"].append({
"name": metadata.get("name", "Unknown"),
"path": os.path.relpath(root, self.input_dir).replace(os.sep, "/"),
"author": metadata.get("author", "Unknown")
})
# Add to 'favorites' if 'favorite' is true
if metadata.get("favorite") and metadata["favorite"].lower() == "true":
self.data["favorites"].append({
"name": metadata.get("name", "Unknown"),
"image": metadata.get("image", "images/default.jpg"),
"description": metadata.get("summary", "No description provided")
})
def generate_json(self):
"""
Generate the JSON structure and save it to the output file.
"""
self._process_directory()
# Save JSON to file
with open(self.output_file, "w", encoding="utf-8") as json_file:
json.dump(self.data, json_file, indent=4, ensure_ascii=False)
print(f"Generated JSON saved to {self.output_file}")