First commit
This commit is contained in:
commit
ed9566d057
76 changed files with 4005 additions and 0 deletions
168
parser/md.py
Normal file
168
parser/md.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
import markdown
|
||||
import re
|
||||
import os
|
||||
import glob
|
||||
from typing import List
|
||||
|
||||
|
||||
def markdown_to_html_paragraphs(markdown_text: str) -> List[str]:
|
||||
"""
|
||||
Convert markdown text into a list of HTML paragraphs.
|
||||
Supports mathematical equations using LaTeX syntax.
|
||||
|
||||
Args:
|
||||
markdown_text (str): The markdown text to convert
|
||||
|
||||
Returns:
|
||||
List[str]: A list of HTML paragraphs, each wrapped in <p> tags
|
||||
"""
|
||||
# Prepend "md/" to image paths if they don't already start with md/
|
||||
markdown_text = re.sub(r'!\[(.*?)\]\((?!md/)([^/].*?\.assets/.*?)\)', r'', markdown_text)
|
||||
|
||||
# Check if the first line starts with a # for h1 title
|
||||
lines = markdown_text.split('\n')
|
||||
has_h1_title = False
|
||||
bold_title = None
|
||||
|
||||
if lines and lines[0].strip().startswith('#'):
|
||||
has_h1_title = True
|
||||
title_line = lines[0].strip().lstrip('#').strip()
|
||||
bold_title = f'<p class="blog-title">{title_line}</p>'
|
||||
# Remove the title from the markdown to avoid duplicate processing
|
||||
markdown_text = '\n'.join(lines[1:])
|
||||
else:
|
||||
raise ValueError("No title found in the markdown file")
|
||||
|
||||
# Configure markdown with math extensions
|
||||
extensions = [
|
||||
'markdown.extensions.extra', # For blockquotes and other features
|
||||
'markdown.extensions.fenced_code', # For code blocks
|
||||
'markdown.extensions.codehilite', # For syntax highlighting
|
||||
'markdown.extensions.attr_list', # For attributes
|
||||
'markdown.extensions.md_in_html', # For markdown inside HTML
|
||||
'mdx_math', # For math support
|
||||
]
|
||||
|
||||
try:
|
||||
# Try to use python-markdown-math which outputs compatible with MathJax 3
|
||||
import pymdownx.arithmatex
|
||||
extensions.remove('mdx_math')
|
||||
extensions.append('pymdownx.arithmatex')
|
||||
extension_configs = {
|
||||
'pymdownx.arithmatex': {
|
||||
'generic': True # Uses \(...\) for inline and \[...\] for display math
|
||||
}
|
||||
}
|
||||
except ImportError:
|
||||
# Fallback to mdx_math
|
||||
extension_configs = {
|
||||
'mdx_math': {
|
||||
'enable_dollar_delimiter': True, # Enable $...$ for inline math
|
||||
}
|
||||
}
|
||||
|
||||
# Convert markdown to HTML with math support
|
||||
html = markdown.markdown(
|
||||
markdown_text,
|
||||
extensions=extensions,
|
||||
extension_configs=extension_configs
|
||||
)
|
||||
|
||||
html = re.sub(r'<p>\s*(<img[^>]+>)\s*</p>', r'\1', html, flags=re.IGNORECASE)
|
||||
# Convert image followed by blockquote to figure with caption
|
||||
html = re.sub(
|
||||
r'<img([^>]+)>\s*<blockquote>\s*<p>(.*?)</p>\s*</blockquote>',
|
||||
r'<figure class="figure">\n <img\1 class="figure-img img-fluid rounded">\n <figcaption class="figure-caption">\2</figcaption>\n</figure>',
|
||||
html,
|
||||
flags=re.DOTALL
|
||||
)
|
||||
|
||||
# Add "link" class and target="_blank" to all <a> tags
|
||||
html = re.sub(r'<a(.*?)>', r'<a\1 class="link" target="_blank">', html)
|
||||
html = re.sub(r'<a(.*?)class="(.*?)"(.*?)class="(.*?)"(.*?)>', r'<a\1class="\2 \4"\3\5>', html)
|
||||
html = re.sub(r'<a(.*?)target="(.*?)"(.*?)target="(.*?)"(.*?)>', r'<a\1target="\2"\3\5>', html)
|
||||
|
||||
# Split the HTML into paragraphs
|
||||
paragraphs = html.split('\n\n')
|
||||
|
||||
# Clean up and ensure each paragraph is properly wrapped
|
||||
cleaned_paragraphs = []
|
||||
|
||||
# Add the bold title as the first element if it exists
|
||||
if has_h1_title and bold_title:
|
||||
cleaned_paragraphs.append(bold_title)
|
||||
|
||||
for p in paragraphs:
|
||||
p = p.strip()
|
||||
if p:
|
||||
# If the paragraph doesn't already have <p> tags, add them
|
||||
if not (p.startswith('<') and not p.startswith('<p>')):
|
||||
p = f'<p>{p}</p>'
|
||||
cleaned_paragraphs.append(p)
|
||||
|
||||
return cleaned_paragraphs, title_line
|
||||
|
||||
|
||||
def insert_markdown_into_template(template_path: str, markdown_text: str) -> str:
|
||||
"""
|
||||
Insert parsed markdown content into the template HTML file.
|
||||
|
||||
Args:
|
||||
template_path (str): Path to the template HTML file
|
||||
markdown_text (str): The markdown text to convert and insert
|
||||
|
||||
Returns:
|
||||
str: Complete HTML with markdown content inserted
|
||||
"""
|
||||
# Parse markdown into HTML paragraphs
|
||||
html_paragraphs, title_line = markdown_to_html_paragraphs(markdown_text)
|
||||
|
||||
# Read the template
|
||||
with open(template_path, 'r') as f:
|
||||
template = f.read()
|
||||
|
||||
# Join paragraphs into a single string
|
||||
content_html = '\n'.join(html_paragraphs)
|
||||
|
||||
# Insert the content into the template
|
||||
complete_html = template.replace('{{ content }}', content_html)
|
||||
|
||||
# Replace {{ title }} placeholders with the extracted title
|
||||
complete_html = complete_html.replace('{{ title }}', title_line)
|
||||
|
||||
return complete_html
|
||||
|
||||
|
||||
def process_all_markdown_files():
|
||||
"""
|
||||
Process all markdown files in blog/md/ directory and generate HTML files in blog/html/.
|
||||
"""
|
||||
# Get all markdown files in blog/md/
|
||||
md_files = glob.glob("dist/blog/md/*.md")
|
||||
template_path = "dist/blog/template.html"
|
||||
|
||||
for md_file in md_files:
|
||||
# Extract base filename without extension
|
||||
base_name = os.path.basename(md_file)[:-3] # Remove .md extension
|
||||
html_file = f"dist/blog/html/{base_name}.html"
|
||||
|
||||
print(f"Processing {md_file} -> {html_file}")
|
||||
|
||||
try:
|
||||
# Read the markdown content
|
||||
with open(md_file, "r") as f:
|
||||
markdown_text = f.read()
|
||||
|
||||
# Generate HTML content
|
||||
complete_html = insert_markdown_into_template(template_path, markdown_text)
|
||||
|
||||
# Write HTML output
|
||||
with open(html_file, "w") as f:
|
||||
f.write(complete_html)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {md_file}: {str(e)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
process_all_markdown_files()
|
||||
Loading…
Add table
Add a link
Reference in a new issue