import os import markdown import json from typing import List, Dict from fastapi import FastAPI from app.services.image_service import ImageService, FileHandler class MetadataProcessor: """ A class to scan Markdown files, extract front matter metadata, and generate a structured JSON file. """ def __init__(self, input_dir: str, output_file: str,app:FastAPI=None): """ Initialize the MetadataProcessor. Args: input_dir (str): Directory containing Markdown files. output_file (str): Path to save the generated JSON file. """ self.input_dir = input_dir self.output_file = output_file self.app = app self.data = {"categories": [], "favorites": []} def _extract_metadata(self, file_path: str) -> Dict: """ Extract front matter metadata using the 'markdown' package. Args: file_path (str): Path to the Markdown file. Returns: dict: A dictionary containing the extracted metadata. """ with open(file_path, "r", encoding="utf-8") as file: markdown_content = file.read() # Initialize Markdown with meta extension md = markdown.Markdown(extensions=["extra", "nl2br", "meta"]) md.convert(markdown_content) # Metadata is stored in md.Meta as a dictionary of lists meta = {key: " ".join(value) for key, value in md.Meta.items()} if md.Meta else {} return meta def _process_directory(self): """ Recursively scan the input directory for Markdown files and extract metadata to build the JSON structure. """ for root, _, files in os.walk(self.input_dir): for file in files: if file.endswith(".md"): file_path = os.path.join(root, file) metadata = self._extract_metadata(file_path) if metadata: # Add to 'categories' self.data["categories"].append({ "name": metadata.get("name", "Unknown"), "path": os.path.relpath(root, self.input_dir).replace(os.sep, "/"), "author": metadata.get("author", "Unknown") }) # Add to 'favorites' if 'favorite' is true if metadata.get("favorite") and metadata["favorite"].lower() == "true": image_type = "thumbnails" category = os.path.relpath( root, self.input_dir ).replace( os.sep, "/" ) filehandler = FileHandler(category=category, image_type=image_type, filename=metadata.get("image")) imageservice = ImageService(self.app) default_size = imageservice.get_image_size( image_type ) width = default_size.get( "width" ) height = default_size.get( "height" ) image_tag = imageservice.image_tag(category, image_type, metadata.get("image","Unkown")) print(filehandler.dest_filename_webp) print(image_tag) self.data["favorites"].append({ "name": metadata.get("name", "Unknown"), "image": filehandler.dest_filename_webp, "height": height, "width": width, "description": metadata.get("summary", "No description provided"), "path": category, }) def generate_json(self): """ Generate the JSON structure, deduplicate and sort categories by 'path', then save it to the output file. """ self._process_directory() # Extract all markdown data into self.data # Ensure 'categories' exists and is a list if "categories" not in self.data: self.data["categories"] = [] # Deduplicate 'categories' using 'path' as the unique key unique_categories = { } for category in self.data["categories"]: if isinstance( category, dict ): # Ensure valid category structure path = category.get( "path", "unknown" ) # Use 'path' as the unique key if path not in unique_categories: unique_categories[path] = category # Replace the 'categories' list with a sorted version by 'path' self.data["categories"] = sorted( unique_categories.values(), key = lambda x: x.get( "path", "unknown" ) ) # Save the updated JSON to file with open( self.output_file, "w", encoding = "utf-8" ) as json_file: json.dump( self.data, json_file, indent = 4, ensure_ascii = False ) print( f"Generated JSON saved to {self.output_file}" ) return True