generated from hjess/PythonTemplateProject
103 lines
3.9 KiB
Python
103 lines
3.9 KiB
Python
import os
|
|
import markdown
|
|
import json
|
|
from typing import List, Dict
|
|
|
|
|
|
class MetadataProcessor:
|
|
"""
|
|
A class to scan Markdown files, extract front matter metadata,
|
|
and generate a structured JSON file.
|
|
"""
|
|
|
|
def __init__(self, input_dir: str, output_file: str):
|
|
"""
|
|
Initialize the MetadataProcessor.
|
|
|
|
Args:
|
|
input_dir (str): Directory containing Markdown files.
|
|
output_file (str): Path to save the generated JSON file.
|
|
"""
|
|
self.input_dir = input_dir
|
|
self.output_file = output_file
|
|
self.data = {"categories": [], "favorites": []}
|
|
|
|
def _extract_metadata(self, file_path: str) -> Dict:
|
|
"""
|
|
Extract front matter metadata using the 'markdown' package.
|
|
|
|
Args:
|
|
file_path (str): Path to the Markdown file.
|
|
|
|
Returns:
|
|
dict: A dictionary containing the extracted metadata.
|
|
"""
|
|
with open(file_path, "r", encoding="utf-8") as file:
|
|
markdown_content = file.read()
|
|
|
|
# Initialize Markdown with meta extension
|
|
md = markdown.Markdown(extensions=["extra", "nl2br", "meta"])
|
|
md.convert(markdown_content)
|
|
|
|
# Metadata is stored in md.Meta as a dictionary of lists
|
|
meta = {key: " ".join(value) for key, value in md.Meta.items()} if md.Meta else {}
|
|
return meta
|
|
|
|
def _process_directory(self):
|
|
"""
|
|
Recursively scan the input directory for Markdown files
|
|
and extract metadata to build the JSON structure.
|
|
"""
|
|
for root, _, files in os.walk(self.input_dir):
|
|
for file in files:
|
|
if file.endswith(".md"):
|
|
file_path = os.path.join(root, file)
|
|
metadata = self._extract_metadata(file_path)
|
|
|
|
if metadata:
|
|
# Add to 'categories'
|
|
self.data["categories"].append({
|
|
"name": metadata.get("name", "Unknown"),
|
|
"path": os.path.relpath(root, self.input_dir).replace(os.sep, "/"),
|
|
"author": metadata.get("author", "Unknown")
|
|
})
|
|
|
|
# Add to 'favorites' if 'favorite' is true
|
|
if metadata.get("favorite") and metadata["favorite"].lower() == "true":
|
|
self.data["favorites"].append({
|
|
"name": metadata.get("name", "Unknown"),
|
|
"image": metadata.get("image", "images/default.jpg"),
|
|
"description": metadata.get("summary", "No description provided")
|
|
})
|
|
|
|
def generate_json(self):
|
|
"""
|
|
Generate the JSON structure, deduplicate and sort categories by 'path',
|
|
then save it to the output file.
|
|
"""
|
|
self._process_directory() # Extract all markdown data into self.data
|
|
|
|
# Ensure 'categories' exists and is a list
|
|
if "categories" not in self.data:
|
|
self.data["categories"] = []
|
|
|
|
# Deduplicate 'categories' using 'path' as the unique key
|
|
unique_categories = { }
|
|
for category in self.data["categories"]:
|
|
if isinstance( category, dict ): # Ensure valid category structure
|
|
path = category.get( "path", "unknown" ) # Use 'path' as the unique key
|
|
if path not in unique_categories:
|
|
unique_categories[path] = category
|
|
|
|
# Replace the 'categories' list with a sorted version by 'path'
|
|
self.data["categories"] = sorted(
|
|
unique_categories.values(),
|
|
key = lambda x: x.get( "path", "unknown" )
|
|
)
|
|
|
|
# Save the updated JSON to file
|
|
with open( self.output_file, "w", encoding = "utf-8" ) as json_file:
|
|
json.dump( self.data, json_file, indent = 4, ensure_ascii = False )
|
|
|
|
print( f"Generated JSON saved to {self.output_file}" )
|