Skip to content

Commit c707527

Browse files
sayalaruanoenryH
andauthored
✨ Add script to split readme and organize documentation (#29)
* ✨ Feat(docs/split_readme.py): Add script to split readme and organize documentation * 🎨 Add split_readme script on the conf.py file and update index * 🔧 activate admonitions in myst_nb - Rendering of tool tips etc has to be activated * Remove withe space above overview Co-authored-by: Henry Webel <[email protected]> --------- Co-authored-by: Henry Webel <[email protected]>
1 parent e702489 commit c707527

File tree

3 files changed

+158
-3
lines changed

3 files changed

+158
-3
lines changed

docs/conf.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
# https://myst-nb.readthedocs.io/en/latest/computation/execute.html
4646
nb_execution_mode = "auto"
4747

48-
myst_enable_extensions = ["dollarmath", "amsmath"]
48+
myst_enable_extensions = ["dollarmath", "amsmath", "colon_fence"]
4949

5050
# Plolty support through require javascript library
5151
# https://myst-nb.readthedocs.io/en/latest/render/interactive.html#plotly
@@ -143,6 +143,14 @@
143143
PROJECT_ROOT = Path(__file__).parent.parent
144144
PACKAGE_ROOT = PROJECT_ROOT / "src" / "vuecore"
145145

146+
def run_split_readme(_):
147+
print("[conf.py] Splitting README.md into sections...")
148+
from split_readme import process_readme
149+
150+
readme_path = PROJECT_ROOT / "README.md"
151+
output_dir = PROJECT_ROOT / "docs" / "sections_readme"
152+
process_readme(readme_path, output_dir)
153+
146154
def run_apidoc(_):
147155
from sphinx.ext import apidoc
148156

@@ -161,4 +169,5 @@ def run_apidoc(_):
161169
)
162170

163171
def setup(app):
172+
app.connect("builder-inited", run_split_readme)
164173
app.connect("builder-inited", run_apidoc)

docs/index.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
11
<!-- https://myst-parser.readthedocs.io/en/latest/faq/index.html
22
#include-a-file-from-outside-the-docs-folder-like-readme-md -->
33

4-
```{include} ../README.md
5-
:start-line: 0
4+
```{include} ./sections_readme/home_page.md
65
:relative-docs: docs
76
:relative-images:
87
```
98

9+
```{toctree}
10+
:maxdepth: 1
11+
:caption: Overview
12+
13+
sections_readme/about
14+
sections_readme/installation
15+
sections_readme/execution
16+
sections_readme/license
17+
```
18+
1019
```{toctree}
1120
:maxdepth: 1
1221
:caption: API Usage Examples
@@ -25,6 +34,15 @@ api_examples/box_plot
2534
reference/vuecore
2635
```
2736

37+
```{toctree}
38+
:maxdepth: 1
39+
:caption: Project Support
40+
41+
sections_readme/contributing
42+
sections_readme/credits
43+
sections_readme/contact
44+
```
45+
2846
```{toctree}
2947
:maxdepth: 1
3048
:caption: Extra Materials

docs/split_readme.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import re
2+
from pathlib import Path
3+
4+
# Mapping section titles to their corresponding filenames
5+
SECTION_MAPPING = {
6+
"![VueCore Logo](https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuecore/HEAD/docs/images/logo/vuecore_logo.svg)": "home_page.md",
7+
"About the project": "about.md",
8+
"Installation": "installation.md",
9+
"Documentation": "docs.md",
10+
"License": "license.md",
11+
"Credits and acknowledgements": "credits.md",
12+
"Contact and feedback": "contact.md",
13+
}
14+
15+
16+
def extract_section(readme, section_title):
17+
"""Extracts content between current section and next ## heading"""
18+
pattern = rf"## {re.escape(section_title)}(.*?)(?=\n## |\Z)"
19+
match = re.search(pattern, readme, flags=re.DOTALL)
20+
return match.group(1).strip() if match else ""
21+
22+
23+
def extract_links_from_readme(readme):
24+
"""Extract link references from README.md into a dictionary"""
25+
link_pattern = r"\[([^\]]+)\]: (\S+)"
26+
links = {}
27+
28+
matches = re.findall(link_pattern, readme)
29+
for ref, url in matches:
30+
links[ref] = url
31+
32+
return links
33+
34+
35+
def convert_gfm_to_sphinx(content, links):
36+
"""Convert GitHub Flavored Markdown to Sphinx-style syntax."""
37+
# Convert GFM admonitions (like > [!IMPORTANT] and > [!NOTE])
38+
content = re.sub(
39+
r"(^|\n)> \[!(\w+)\]([^\n]*)((?:\n> [^\n]*)*)",
40+
lambda m: f"\n:::{{{m.group(2)}}}\n" # Note the curly braces here
41+
+ re.sub(r"^> ", "", m.group(4), flags=re.MULTILINE).strip()
42+
+ "\n:::\n",
43+
content,
44+
)
45+
46+
# Replace link references dynamically using the links dictionary
47+
for ref, url in links.items():
48+
content = re.sub(rf"\[{re.escape(ref)}\]", f"({url})", content)
49+
50+
return content
51+
52+
53+
def decrease_header_levels(content):
54+
"""Decrease each Markdown header by one level."""
55+
lines = content.splitlines()
56+
new_lines = []
57+
for line in lines:
58+
if re.match(r"^(#{2,6})\s", line):
59+
num_hashes = len(line.split()[0])
60+
new_line = "#" * (num_hashes - 1) + line[num_hashes:]
61+
new_lines.append(new_line)
62+
else:
63+
new_lines.append(line)
64+
return "\n".join(new_lines)
65+
66+
67+
def clean_trailing_links(content):
68+
"""Remove trailing links and clean up extra empty lines."""
69+
# Remove [label]: link style
70+
content = re.sub(r"^\[.+?\]:\s+\S+$", "", content, flags=re.MULTILINE)
71+
# Remove (url): url style
72+
content = re.sub(
73+
r"^\(https?://[^\s)]+\):\s*https?://[^\s)]+$", "", content, flags=re.MULTILINE
74+
)
75+
content = re.sub(
76+
r"^\(mailto:[^\s)]+\):\s*mailto:[^\s)]+$", "", content, flags=re.MULTILINE
77+
)
78+
# Remove empty lines
79+
content = re.sub(r"\n{2,}", "\n\n", content).strip()
80+
return content
81+
82+
83+
def process_readme(readme_path, output_dir):
84+
readme = Path(readme_path).read_text(encoding="utf-8")
85+
86+
# Extract links from README
87+
links = extract_links_from_readme(readme)
88+
89+
# Create output directory
90+
output_dir.mkdir(exist_ok=True, parents=True)
91+
92+
for section_title, filename in SECTION_MAPPING.items():
93+
content = extract_section(readme, section_title)
94+
if content:
95+
myst_content = (
96+
f"## {section_title}\n\n{convert_gfm_to_sphinx(content, links)}"
97+
)
98+
if filename.lower() == "contact.md":
99+
myst_content = clean_trailing_links(myst_content)
100+
myst_content = decrease_header_levels(myst_content)
101+
(output_dir / filename).write_text(myst_content)
102+
print(f"Generated {filename}")
103+
else:
104+
raise ValueError(f"Section '{section_title}' not found in README")
105+
106+
# Include CONTRIBUTING.md with its own link references
107+
contrib_path = readme_path.parent / "CONTRIBUTING.md"
108+
try:
109+
raw_contrib = contrib_path.read_text()
110+
contrib_links = extract_links_from_readme(raw_contrib)
111+
112+
# Convert content
113+
contrib_converted = convert_gfm_to_sphinx(raw_contrib, contrib_links)
114+
115+
# Remove trailing link definitions
116+
contrib_converted = clean_trailing_links(contrib_converted)
117+
118+
# Write output
119+
(output_dir / "contributing.md").write_text(contrib_converted)
120+
print("Generated contributing.md")
121+
except FileNotFoundError as e:
122+
raise FileNotFoundError(f"CONTRIBUTING.md not found at {contrib_path}") from e
123+
124+
125+
if __name__ == "__main__":
126+
default_readme = Path(__file__).resolve().parent.parent / "README.md"
127+
output_sections_readme = Path("./sections_readme")
128+
process_readme(default_readme, output_sections_readme)

0 commit comments

Comments
 (0)