First commit

2025-05-13 10:48:42 +02:00 · 2025-05-13 10:48:42 +02:00 · ed9566d057
commit ed9566d057
76 changed files with 4005 additions and 0 deletions
--- a/parser/md.py
+++ b/parser/md.py
@ -0,0 +1,168 @@
+import markdown
+import re
+import os
+import glob
+from typing import List
+
+
+def markdown_to_html_paragraphs(markdown_text: str) -> List[str]:
+    """
+    Convert markdown text into a list of HTML paragraphs.
+    Supports mathematical equations using LaTeX syntax.
+    
+    Args:
+        markdown_text (str): The markdown text to convert
+        
+    Returns:
+        List[str]: A list of HTML paragraphs, each wrapped in <p> tags
+    """
+    # Prepend "md/" to image paths if they don't already start with md/
+    markdown_text = re.sub(r'!\[(.*?)\]\((?!md/)([^/].*?\.assets/.*?)\)', r'![\1](/blog/md/\2)', markdown_text)
+    
+    # Check if the first line starts with a # for h1 title
+    lines = markdown_text.split('\n')
+    has_h1_title = False
+    bold_title = None
+    
+    if lines and lines[0].strip().startswith('#'):
+        has_h1_title = True
+        title_line = lines[0].strip().lstrip('#').strip()
+        bold_title = f'<p class="blog-title">{title_line}</p>'
+        # Remove the title from the markdown to avoid duplicate processing
+        markdown_text = '\n'.join(lines[1:])
+    else:
+        raise ValueError("No title found in the markdown file")
+    
+    # Configure markdown with math extensions
+    extensions = [
+        'markdown.extensions.extra',  # For blockquotes and other features
+        'markdown.extensions.fenced_code',  # For code blocks
+        'markdown.extensions.codehilite',  # For syntax highlighting
+        'markdown.extensions.attr_list',  # For attributes
+        'markdown.extensions.md_in_html',  # For markdown inside HTML
+        'mdx_math',  # For math support
+    ]
+    
+    try:
+        # Try to use python-markdown-math which outputs compatible with MathJax 3
+        import pymdownx.arithmatex
+        extensions.remove('mdx_math')
+        extensions.append('pymdownx.arithmatex')
+        extension_configs = {
+            'pymdownx.arithmatex': {
+                'generic': True  # Uses \(...\) for inline and \[...\] for display math
+            }
+        }
+    except ImportError:
+        # Fallback to mdx_math
+        extension_configs = {
+            'mdx_math': {
+                'enable_dollar_delimiter': True,  # Enable $...$ for inline math
+            }
+        }
+    
+    # Convert markdown to HTML with math support
+    html = markdown.markdown(
+        markdown_text,
+        extensions=extensions,
+        extension_configs=extension_configs
+    )
+
+    html = re.sub(r'<p>\s*(<img[^>]+>)\s*</p>', r'\1', html, flags=re.IGNORECASE)
+    # Convert image followed by blockquote to figure with caption
+    html = re.sub(
+        r'<img([^>]+)>\s*<blockquote>\s*<p>(.*?)</p>\s*</blockquote>',
+        r'<figure class="figure">\n  <img\1 class="figure-img img-fluid rounded">\n  <figcaption class="figure-caption">\2</figcaption>\n</figure>',
+        html,
+        flags=re.DOTALL
+    )
+    
+    # Add "link" class and target="_blank" to all <a> tags
+    html = re.sub(r'<a(.*?)>', r'<a\1 class="link" target="_blank">', html)
+    html = re.sub(r'<a(.*?)class="(.*?)"(.*?)class="(.*?)"(.*?)>', r'<a\1class="\2 \4"\3\5>', html)
+    html = re.sub(r'<a(.*?)target="(.*?)"(.*?)target="(.*?)"(.*?)>', r'<a\1target="\2"\3\5>', html)
+
+    # Split the HTML into paragraphs
+    paragraphs = html.split('\n\n')
+    
+    # Clean up and ensure each paragraph is properly wrapped
+    cleaned_paragraphs = []
+    
+    # Add the bold title as the first element if it exists
+    if has_h1_title and bold_title:
+        cleaned_paragraphs.append(bold_title)
+    
+    for p in paragraphs:
+        p = p.strip()
+        if p:
+            # If the paragraph doesn't already have <p> tags, add them
+            if not (p.startswith('<') and not p.startswith('<p>')):
+                p = f'<p>{p}</p>'
+            cleaned_paragraphs.append(p)
+    
+    return cleaned_paragraphs, title_line
+
+
+def insert_markdown_into_template(template_path: str, markdown_text: str) -> str:
+    """
+    Insert parsed markdown content into the template HTML file.
+    
+    Args:
+        template_path (str): Path to the template HTML file
+        markdown_text (str): The markdown text to convert and insert
+        
+    Returns:
+        str: Complete HTML with markdown content inserted
+    """
+    # Parse markdown into HTML paragraphs
+    html_paragraphs, title_line = markdown_to_html_paragraphs(markdown_text)
+    
+    # Read the template
+    with open(template_path, 'r') as f:
+        template = f.read()
+    
+    # Join paragraphs into a single string
+    content_html = '\n'.join(html_paragraphs)
+    
+    # Insert the content into the template
+    complete_html = template.replace('{{ content }}', content_html)
+    
+    # Replace {{ title }} placeholders with the extracted title
+    complete_html = complete_html.replace('{{ title }}', title_line)
+    
+    return complete_html
+
+
+def process_all_markdown_files():
+    """
+    Process all markdown files in blog/md/ directory and generate HTML files in blog/html/.
+    """
+    # Get all markdown files in blog/md/
+    md_files = glob.glob("dist/blog/md/*.md")
+    template_path = "dist/blog/template.html"
+    
+    for md_file in md_files:
+        # Extract base filename without extension
+        base_name = os.path.basename(md_file)[:-3]  # Remove .md extension
+        html_file = f"dist/blog/html/{base_name}.html"
+        
+        print(f"Processing {md_file} -> {html_file}")
+        
+        try:
+            # Read the markdown content
+            with open(md_file, "r") as f:
+                markdown_text = f.read()
+            
+            # Generate HTML content
+            complete_html = insert_markdown_into_template(template_path, markdown_text)
+            
+            # Write HTML output
+            with open(html_file, "w") as f:
+                f.write(complete_html)
+            
+        except Exception as e:
+            print(f"Error processing {md_file}: {str(e)}")
+
+
+if __name__ == "__main__":
+    process_all_markdown_files()