import markdown import re import os import glob from typing import List def markdown_to_html_paragraphs(markdown_text: str) -> List[str]: """ Convert markdown text into a list of HTML paragraphs. Supports mathematical equations using LaTeX syntax. Args: markdown_text (str): The markdown text to convert Returns: List[str]: A list of HTML paragraphs, each wrapped in

tags """ # Prepend "md/" to image paths if they don't already start with md/ markdown_text = re.sub(r'!\[(.*?)\]\((?!md/)([^/].*?\.assets/.*?)\)', r'![\1](/blog/md/\2)', markdown_text) # Check if the first line starts with a # for h1 title lines = markdown_text.split('\n') has_h1_title = False bold_title = None if lines and lines[0].strip().startswith('#'): has_h1_title = True title_line = lines[0].strip().lstrip('#').strip() bold_title = f'

{title_line}

' # Remove the title from the markdown to avoid duplicate processing markdown_text = '\n'.join(lines[1:]) else: raise ValueError("No title found in the markdown file") # Configure markdown with math extensions extensions = [ 'markdown.extensions.extra', # For blockquotes and other features 'markdown.extensions.fenced_code', # For code blocks 'markdown.extensions.codehilite', # For syntax highlighting 'markdown.extensions.attr_list', # For attributes 'markdown.extensions.md_in_html', # For markdown inside HTML 'mdx_math', # For math support ] try: # Try to use python-markdown-math which outputs compatible with MathJax 3 import pymdownx.arithmatex extensions.remove('mdx_math') extensions.append('pymdownx.arithmatex') extension_configs = { 'pymdownx.arithmatex': { 'generic': True # Uses \(...\) for inline and \[...\] for display math } } except ImportError: # Fallback to mdx_math extension_configs = { 'mdx_math': { 'enable_dollar_delimiter': True, # Enable $...$ for inline math } } # Convert markdown to HTML with math support html = markdown.markdown( markdown_text, extensions=extensions, extension_configs=extension_configs ) html = re.sub(r'

\s*(]+>)\s*

', r'\1', html, flags=re.IGNORECASE) # Convert image followed by blockquote to figure with caption html = re.sub( r']+)>\s*
\s*

(.*?)

\s*
', r'
\n \n
\2
\n
', html, flags=re.DOTALL ) # Add "link" class and target="_blank" to all tags html = re.sub(r'', r'', html) html = re.sub(r'', r'', html) html = re.sub(r'', r'', html) # Split the HTML into paragraphs paragraphs = html.split('\n\n') # Clean up and ensure each paragraph is properly wrapped cleaned_paragraphs = [] # Add the bold title as the first element if it exists if has_h1_title and bold_title: cleaned_paragraphs.append(bold_title) for p in paragraphs: p = p.strip() if p: # If the paragraph doesn't already have

tags, add them if not (p.startswith('<') and not p.startswith('

')): p = f'

{p}

' cleaned_paragraphs.append(p) return cleaned_paragraphs, title_line def insert_markdown_into_template(template_path: str, markdown_text: str) -> str: """ Insert parsed markdown content into the template HTML file. Args: template_path (str): Path to the template HTML file markdown_text (str): The markdown text to convert and insert Returns: str: Complete HTML with markdown content inserted """ # Parse markdown into HTML paragraphs html_paragraphs, title_line = markdown_to_html_paragraphs(markdown_text) # Read the template with open(template_path, 'r') as f: template = f.read() # Join paragraphs into a single string content_html = '\n'.join(html_paragraphs) # Insert the content into the template complete_html = template.replace('{{ content }}', content_html) # Replace {{ title }} placeholders with the extracted title complete_html = complete_html.replace('{{ title }}', title_line) return complete_html def process_all_markdown_files(): """ Process all markdown files in blog/md/ directory and generate HTML files in blog/html/. """ # Get all markdown files in blog/md/ md_files = glob.glob("dist/blog/md/*.md") template_path = "dist/blog/template.html" for md_file in md_files: # Extract base filename without extension base_name = os.path.basename(md_file)[:-3] # Remove .md extension html_file = f"dist/blog/html/{base_name}.html" print(f"Processing {md_file} -> {html_file}") try: # Read the markdown content with open(md_file, "r") as f: markdown_text = f.read() # Generate HTML content complete_html = insert_markdown_into_template(template_path, markdown_text) # Write HTML output with open(html_file, "w") as f: f.write(complete_html) except Exception as e: print(f"Error processing {md_file}: {str(e)}") if __name__ == "__main__": process_all_markdown_files()