|  | 
|  | 1 | +import re | 
|  | 2 | +from pathlib import Path | 
|  | 3 | + | 
|  | 4 | +# Mapping section titles to their corresponding filenames | 
|  | 5 | +SECTION_MAPPING = { | 
|  | 6 | +    "": "home_page.md", | 
|  | 7 | +    "About the project": "about.md", | 
|  | 8 | +    "Installation": "installation.md", | 
|  | 9 | +    "Documentation": "docs.md", | 
|  | 10 | +    "License": "license.md", | 
|  | 11 | +    "Credits and acknowledgements": "credits.md", | 
|  | 12 | +    "Contact and feedback": "contact.md", | 
|  | 13 | +} | 
|  | 14 | + | 
|  | 15 | + | 
|  | 16 | +def extract_section(readme, section_title): | 
|  | 17 | +    """Extracts content between current section and next ## heading""" | 
|  | 18 | +    pattern = rf"## {re.escape(section_title)}(.*?)(?=\n## |\Z)" | 
|  | 19 | +    match = re.search(pattern, readme, flags=re.DOTALL) | 
|  | 20 | +    return match.group(1).strip() if match else "" | 
|  | 21 | + | 
|  | 22 | + | 
|  | 23 | +def extract_links_from_readme(readme): | 
|  | 24 | +    """Extract link references from README.md into a dictionary""" | 
|  | 25 | +    link_pattern = r"\[([^\]]+)\]: (\S+)" | 
|  | 26 | +    links = {} | 
|  | 27 | + | 
|  | 28 | +    matches = re.findall(link_pattern, readme) | 
|  | 29 | +    for ref, url in matches: | 
|  | 30 | +        links[ref] = url | 
|  | 31 | + | 
|  | 32 | +    return links | 
|  | 33 | + | 
|  | 34 | + | 
|  | 35 | +def convert_gfm_to_sphinx(content, links): | 
|  | 36 | +    """Convert GitHub Flavored Markdown to Sphinx-style syntax.""" | 
|  | 37 | +    # Convert GFM admonitions (like > [!IMPORTANT] and > [!NOTE]) | 
|  | 38 | +    content = re.sub( | 
|  | 39 | +        r"(^|\n)> \[!(\w+)\]([^\n]*)((?:\n> [^\n]*)*)", | 
|  | 40 | +        lambda m: f"\n:::{{{m.group(2)}}}\n"  # Note the curly braces here | 
|  | 41 | +        + re.sub(r"^> ", "", m.group(4), flags=re.MULTILINE).strip() | 
|  | 42 | +        + "\n:::\n", | 
|  | 43 | +        content, | 
|  | 44 | +    ) | 
|  | 45 | + | 
|  | 46 | +    # Replace link references dynamically using the links dictionary | 
|  | 47 | +    for ref, url in links.items(): | 
|  | 48 | +        content = re.sub(rf"\[{re.escape(ref)}\]", f"({url})", content) | 
|  | 49 | + | 
|  | 50 | +    return content | 
|  | 51 | + | 
|  | 52 | + | 
|  | 53 | +def decrease_header_levels(content): | 
|  | 54 | +    """Decrease each Markdown header by one level.""" | 
|  | 55 | +    lines = content.splitlines() | 
|  | 56 | +    new_lines = [] | 
|  | 57 | +    for line in lines: | 
|  | 58 | +        if re.match(r"^(#{2,6})\s", line): | 
|  | 59 | +            num_hashes = len(line.split()[0]) | 
|  | 60 | +            new_line = "#" * (num_hashes - 1) + line[num_hashes:] | 
|  | 61 | +            new_lines.append(new_line) | 
|  | 62 | +        else: | 
|  | 63 | +            new_lines.append(line) | 
|  | 64 | +    return "\n".join(new_lines) | 
|  | 65 | + | 
|  | 66 | + | 
|  | 67 | +def clean_trailing_links(content): | 
|  | 68 | +    """Remove trailing links and clean up extra empty lines.""" | 
|  | 69 | +    # Remove [label]: link style | 
|  | 70 | +    content = re.sub(r"^\[.+?\]:\s+\S+$", "", content, flags=re.MULTILINE) | 
|  | 71 | +    # Remove (url): url style | 
|  | 72 | +    content = re.sub( | 
|  | 73 | +        r"^\(https?://[^\s)]+\):\s*https?://[^\s)]+$", "", content, flags=re.MULTILINE | 
|  | 74 | +    ) | 
|  | 75 | +    content = re.sub( | 
|  | 76 | +        r"^\(mailto:[^\s)]+\):\s*mailto:[^\s)]+$", "", content, flags=re.MULTILINE | 
|  | 77 | +    ) | 
|  | 78 | +    # Remove empty lines | 
|  | 79 | +    content = re.sub(r"\n{2,}", "\n\n", content).strip() | 
|  | 80 | +    return content | 
|  | 81 | + | 
|  | 82 | + | 
|  | 83 | +def process_readme(readme_path, output_dir): | 
|  | 84 | +    readme = Path(readme_path).read_text(encoding="utf-8") | 
|  | 85 | + | 
|  | 86 | +    # Extract links from README | 
|  | 87 | +    links = extract_links_from_readme(readme) | 
|  | 88 | + | 
|  | 89 | +    # Create output directory | 
|  | 90 | +    output_dir.mkdir(exist_ok=True, parents=True) | 
|  | 91 | + | 
|  | 92 | +    for section_title, filename in SECTION_MAPPING.items(): | 
|  | 93 | +        content = extract_section(readme, section_title) | 
|  | 94 | +        if content: | 
|  | 95 | +            myst_content = ( | 
|  | 96 | +                f"## {section_title}\n\n{convert_gfm_to_sphinx(content, links)}" | 
|  | 97 | +            ) | 
|  | 98 | +            if filename.lower() == "contact.md": | 
|  | 99 | +                myst_content = clean_trailing_links(myst_content) | 
|  | 100 | +            myst_content = decrease_header_levels(myst_content) | 
|  | 101 | +            (output_dir / filename).write_text(myst_content) | 
|  | 102 | +            print(f"Generated {filename}") | 
|  | 103 | +        else: | 
|  | 104 | +            raise ValueError(f"Section '{section_title}' not found in README") | 
|  | 105 | + | 
|  | 106 | +    # Include CONTRIBUTING.md with its own link references | 
|  | 107 | +    contrib_path = readme_path.parent / "CONTRIBUTING.md" | 
|  | 108 | +    try: | 
|  | 109 | +        raw_contrib = contrib_path.read_text() | 
|  | 110 | +        contrib_links = extract_links_from_readme(raw_contrib) | 
|  | 111 | + | 
|  | 112 | +        # Convert content | 
|  | 113 | +        contrib_converted = convert_gfm_to_sphinx(raw_contrib, contrib_links) | 
|  | 114 | + | 
|  | 115 | +        # Remove trailing link definitions | 
|  | 116 | +        contrib_converted = clean_trailing_links(contrib_converted) | 
|  | 117 | + | 
|  | 118 | +        # Write output | 
|  | 119 | +        (output_dir / "contributing.md").write_text(contrib_converted) | 
|  | 120 | +        print("Generated contributing.md") | 
|  | 121 | +    except FileNotFoundError as e: | 
|  | 122 | +        raise FileNotFoundError(f"CONTRIBUTING.md not found at {contrib_path}") from e | 
|  | 123 | + | 
|  | 124 | + | 
|  | 125 | +if __name__ == "__main__": | 
|  | 126 | +    default_readme = Path(__file__).resolve().parent.parent / "README.md" | 
|  | 127 | +    output_sections_readme = Path("./sections_readme") | 
|  | 128 | +    process_readme(default_readme, output_sections_readme) | 
0 commit comments