Files
LifeFaq/app/services/markdown_processor.py

93 lines
3.6 KiB
Python
Raw Normal View History

2024-12-11 23:56:15 +01:00
import os
from bs4 import BeautifulSoup
2024-12-11 23:56:15 +01:00
from app.services.markdown_render import render_markdown_with_jinja # Your custom renderer
from jinja2 import Environment, FileSystemLoader
class MarkdownProcessor:
"""
A class to process Markdown files, extract metadata, and generate a single
'index.html' per category directory using a custom rendering engine.
"""
def __init__(self, input_dir: str, templates_dir: str):
"""
Initialize the MarkdownProcessor.
Args:
input_dir (str): Root directory containing category subdirectories.
templates_dir (str): Directory containing Jinja2 templates.
"""
self.input_dir = input_dir
self.env = Environment(loader=FileSystemLoader(templates_dir))
def _process_markdown_files_in_directory(self, directory_path: str) -> list:
"""
Process all Markdown files in a directory using Markdown and Jinja2 custom tags.
Args:
directory_path (str): Path to the category directory.
Returns:
list: A list of processed sections containing metadata and rendered content.
"""
sections = []
for file in sorted(os.listdir(directory_path)):
if file.endswith(".md"):
file_path = os.path.join(directory_path, file)
with open(file_path, "r", encoding="utf-8") as md_file:
markdown_content = md_file.read()
# Process Markdown and Jinja2
rendered_content, metadata = render_markdown_with_jinja(markdown_content)
# Append the section to the list
sections.append({
"name": metadata.get("title", "Untitled"),
"content": rendered_content,
"summary": metadata.get("summary", ""),
"author": metadata.get("author", "Unknown"),
})
return sections
def _generate_index_html(self, directory_path: str, sections: list, output_file: str):
"""
Generate the index.html file for a category using the combined sections.
Args:
directory_path (str): Path to the category directory.
sections (list): List of processed Markdown content and metadata.
output_file (str): Path to save the generated index.html.
"""
# Render the template with the combined sections
template = self.env.get_template("combined_template.html")
rendered_html = template.render(
title=os.path.basename(directory_path).capitalize(),
sections=sections
)
# Write the rendered HTML to index.html
os.makedirs(directory_path, exist_ok=True)
with open(output_file, "w", encoding="utf-8") as output:
soup = BeautifulSoup( rendered_html, 'html.parser' )
cleaned_html = soup.prettify(formatter="html5")
output.write(cleaned_html)
2024-12-11 23:56:15 +01:00
print(f"Generated: {output_file}")
def run(self):
"""
Run the Markdown processing workflow: one 'index.html' per category.
"""
for root, dirs, _ in os.walk(self.input_dir):
for directory in dirs:
category_path = os.path.join(root, directory)
output_file = os.path.join(category_path, "index.html")
# Process all Markdown files in the current category directory
sections = self._process_markdown_files_in_directory(category_path)
if sections:
self._generate_index_html(category_path, sections, output_file)