168 lines
No EOL
5.8 KiB
Python
168 lines
No EOL
5.8 KiB
Python
import markdown
|
|
import re
|
|
import os
|
|
import glob
|
|
from typing import List
|
|
|
|
|
|
def markdown_to_html_paragraphs(markdown_text: str) -> List[str]:
|
|
"""
|
|
Convert markdown text into a list of HTML paragraphs.
|
|
Supports mathematical equations using LaTeX syntax.
|
|
|
|
Args:
|
|
markdown_text (str): The markdown text to convert
|
|
|
|
Returns:
|
|
List[str]: A list of HTML paragraphs, each wrapped in <p> tags
|
|
"""
|
|
# Prepend "md/" to image paths if they don't already start with md/
|
|
markdown_text = re.sub(r'!\[(.*?)\]\((?!md/)([^/].*?\.assets/.*?)\)', r'', markdown_text)
|
|
|
|
# Check if the first line starts with a # for h1 title
|
|
lines = markdown_text.split('\n')
|
|
has_h1_title = False
|
|
bold_title = None
|
|
|
|
if lines and lines[0].strip().startswith('#'):
|
|
has_h1_title = True
|
|
title_line = lines[0].strip().lstrip('#').strip()
|
|
bold_title = f'<p class="blog-title">{title_line}</p>'
|
|
# Remove the title from the markdown to avoid duplicate processing
|
|
markdown_text = '\n'.join(lines[1:])
|
|
else:
|
|
raise ValueError("No title found in the markdown file")
|
|
|
|
# Configure markdown with math extensions
|
|
extensions = [
|
|
'markdown.extensions.extra', # For blockquotes and other features
|
|
'markdown.extensions.fenced_code', # For code blocks
|
|
'markdown.extensions.codehilite', # For syntax highlighting
|
|
'markdown.extensions.attr_list', # For attributes
|
|
'markdown.extensions.md_in_html', # For markdown inside HTML
|
|
'mdx_math', # For math support
|
|
]
|
|
|
|
try:
|
|
# Try to use python-markdown-math which outputs compatible with MathJax 3
|
|
import pymdownx.arithmatex
|
|
extensions.remove('mdx_math')
|
|
extensions.append('pymdownx.arithmatex')
|
|
extension_configs = {
|
|
'pymdownx.arithmatex': {
|
|
'generic': True # Uses \(...\) for inline and \[...\] for display math
|
|
}
|
|
}
|
|
except ImportError:
|
|
# Fallback to mdx_math
|
|
extension_configs = {
|
|
'mdx_math': {
|
|
'enable_dollar_delimiter': True, # Enable $...$ for inline math
|
|
}
|
|
}
|
|
|
|
# Convert markdown to HTML with math support
|
|
html = markdown.markdown(
|
|
markdown_text,
|
|
extensions=extensions,
|
|
extension_configs=extension_configs
|
|
)
|
|
|
|
html = re.sub(r'<p>\s*(<img[^>]+>)\s*</p>', r'\1', html, flags=re.IGNORECASE)
|
|
# Convert image followed by blockquote to figure with caption
|
|
html = re.sub(
|
|
r'<img([^>]+)>\s*<blockquote>\s*<p>(.*?)</p>\s*</blockquote>',
|
|
r'<figure class="figure">\n <img\1 class="figure-img img-fluid rounded">\n <figcaption class="figure-caption">\2</figcaption>\n</figure>',
|
|
html,
|
|
flags=re.DOTALL
|
|
)
|
|
|
|
# Add "link" class and target="_blank" to all <a> tags
|
|
html = re.sub(r'<a(.*?)>', r'<a\1 class="link" target="_blank">', html)
|
|
html = re.sub(r'<a(.*?)class="(.*?)"(.*?)class="(.*?)"(.*?)>', r'<a\1class="\2 \4"\3\5>', html)
|
|
html = re.sub(r'<a(.*?)target="(.*?)"(.*?)target="(.*?)"(.*?)>', r'<a\1target="\2"\3\5>', html)
|
|
|
|
# Split the HTML into paragraphs
|
|
paragraphs = html.split('\n\n')
|
|
|
|
# Clean up and ensure each paragraph is properly wrapped
|
|
cleaned_paragraphs = []
|
|
|
|
# Add the bold title as the first element if it exists
|
|
if has_h1_title and bold_title:
|
|
cleaned_paragraphs.append(bold_title)
|
|
|
|
for p in paragraphs:
|
|
p = p.strip()
|
|
if p:
|
|
# If the paragraph doesn't already have <p> tags, add them
|
|
if not (p.startswith('<') and not p.startswith('<p>')):
|
|
p = f'<p>{p}</p>'
|
|
cleaned_paragraphs.append(p)
|
|
|
|
return cleaned_paragraphs, title_line
|
|
|
|
|
|
def insert_markdown_into_template(template_path: str, markdown_text: str) -> str:
|
|
"""
|
|
Insert parsed markdown content into the template HTML file.
|
|
|
|
Args:
|
|
template_path (str): Path to the template HTML file
|
|
markdown_text (str): The markdown text to convert and insert
|
|
|
|
Returns:
|
|
str: Complete HTML with markdown content inserted
|
|
"""
|
|
# Parse markdown into HTML paragraphs
|
|
html_paragraphs, title_line = markdown_to_html_paragraphs(markdown_text)
|
|
|
|
# Read the template
|
|
with open(template_path, 'r') as f:
|
|
template = f.read()
|
|
|
|
# Join paragraphs into a single string
|
|
content_html = '\n'.join(html_paragraphs)
|
|
|
|
# Insert the content into the template
|
|
complete_html = template.replace('{{ content }}', content_html)
|
|
|
|
# Replace {{ title }} placeholders with the extracted title
|
|
complete_html = complete_html.replace('{{ title }}', title_line)
|
|
|
|
return complete_html
|
|
|
|
|
|
def process_all_markdown_files():
|
|
"""
|
|
Process all markdown files in blog/md/ directory and generate HTML files in blog/html/.
|
|
"""
|
|
# Get all markdown files in blog/md/
|
|
md_files = glob.glob("dist/blog/md/*.md")
|
|
template_path = "dist/blog/template.html"
|
|
|
|
for md_file in md_files:
|
|
# Extract base filename without extension
|
|
base_name = os.path.basename(md_file)[:-3] # Remove .md extension
|
|
html_file = f"dist/blog/html/{base_name}.html"
|
|
|
|
print(f"Processing {md_file} -> {html_file}")
|
|
|
|
try:
|
|
# Read the markdown content
|
|
with open(md_file, "r") as f:
|
|
markdown_text = f.read()
|
|
|
|
# Generate HTML content
|
|
complete_html = insert_markdown_into_template(template_path, markdown_text)
|
|
|
|
# Write HTML output
|
|
with open(html_file, "w") as f:
|
|
f.write(complete_html)
|
|
|
|
except Exception as e:
|
|
print(f"Error processing {md_file}: {str(e)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
process_all_markdown_files() |