import os from bs4 import BeautifulSoup from app.services.markdown_render import render_markdown_with_jinja # Your custom renderer from jinja2 import Environment, FileSystemLoader class MarkdownProcessor: """ A class to process Markdown files, extract metadata, and generate a single 'index.html' per category directory using a custom rendering engine. """ def __init__(self, input_dir: str, templates_dir: str): """ Initialize the MarkdownProcessor. Args: input_dir (str): Root directory containing category subdirectories. templates_dir (str): Directory containing Jinja2 templates. """ self.input_dir = input_dir self.env = Environment(loader=FileSystemLoader(templates_dir)) def _process_markdown_files_in_directory(self, directory_path: str) -> list: """ Process all Markdown files in a directory using Markdown and Jinja2 custom tags. Args: directory_path (str): Path to the category directory. Returns: list: A list of processed sections containing metadata and rendered content. """ sections = [] for file in sorted(os.listdir(directory_path)): if file.endswith(".md"): file_path = os.path.join(directory_path, file) with open(file_path, "r", encoding="utf-8") as md_file: markdown_content = md_file.read() # Process Markdown and Jinja2 rendered_content, metadata = render_markdown_with_jinja(markdown_content) # Append the section to the list sections.append({ "name": metadata.get("title", "Untitled"), "content": rendered_content, "summary": metadata.get("summary", ""), "author": metadata.get("author", "Unknown"), }) return sections def _generate_index_html(self, directory_path: str, sections: list, output_file: str): """ Generate the index.html file for a category using the combined sections. Args: directory_path (str): Path to the category directory. sections (list): List of processed Markdown content and metadata. output_file (str): Path to save the generated index.html. """ # Render the template with the combined sections template = self.env.get_template("combined_template.html") rendered_html = template.render( title=os.path.basename(directory_path).capitalize(), sections=sections ) # Write the rendered HTML to index.html os.makedirs(directory_path, exist_ok=True) with open(output_file, "w", encoding="utf-8") as output: soup = BeautifulSoup( rendered_html, 'html.parser' ) cleaned_html = soup.prettify(formatter="html5") output.write(cleaned_html) print(f"Generated: {output_file}") def run(self): """ Run the Markdown processing workflow: one 'index.html' per category. """ for root, dirs, _ in os.walk(self.input_dir): for directory in dirs: category_path = os.path.join(root, directory) output_file = os.path.join(category_path, "index.html") # Process all Markdown files in the current category directory sections = self._process_markdown_files_in_directory(category_path) if sections: self._generate_index_html(category_path, sections, output_file)