diff --git a/hack/mkdocs/__tests__/test_breakage.py b/hack/mkdocs/__tests__/test_breakage.py new file mode 100644 index 0000000000..0ba26a882c --- /dev/null +++ b/hack/mkdocs/__tests__/test_breakage.py @@ -0,0 +1,414 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import shutil +import sys +import unittest +from pathlib import Path +from types import SimpleNamespace + +sys.path.insert(0, str(Path(__file__).parent)) + +import hack.mkdocs_linking as linking +from hack.mkdocs_linking import on_config, prepare_docs + + +class TestLinkBreakageScenarios(unittest.TestCase): + """Tests focused on how links break and how to prevent/handle breakage.""" + + def setUp(self) -> None: + """Set up test environment.""" + self.test_dir = Path("./temp_test_link_breakage") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + self.original_docs_dir = linking.DOCS_DIR + self.original_redirect_file = linking.REDIRECT_MAP_FILE + + linking.DOCS_DIR = self.test_dir / "docs" + linking.REDIRECT_MAP_FILE = self.test_dir / "redirect_map.json" + linking.DOCS_DIR.mkdir(parents=True, exist_ok=True) + + def tearDown(self) -> None: + """Clean up test environment.""" + linking.DOCS_DIR = self.original_docs_dir + linking.REDIRECT_MAP_FILE = self.original_redirect_file + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + # === ID COLLISION AND CONFLICTS === + + def test_id_collision_when_files_move_to_same_path_structure(self) -> None: + """Test when multiple files would generate the same ID after restructuring.""" + # Initial structure + (linking.DOCS_DIR / "user-guide.md").write_text("# User Guide") + (linking.DOCS_DIR / "admin").mkdir() + (linking.DOCS_DIR / "admin" / "guide.md").write_text("# Admin Guide") + + prepare_docs() + + # Simulate restructuring where both files move to create ID collision + # user-guide.md -> guides/user.md (would generate 'guides-user') + # admin/guide.md -> guides/user.md (would also generate 'guides-user') + + # This is a real scenario: two different guides both get moved to guides/user.md + # at different times, or one file replaces another + + original_map = json.loads(linking.REDIRECT_MAP_FILE.read_text()) + + # Verify we have the expected initial state + self.assertEqual(original_map["user-guide"], "user-guide.md") + self.assertEqual(original_map["admin-guide"], "admin/guide.md") + + # Now simulate what happens if both files are moved to the same new location + # This would break internal_link() because two IDs point to the same file + (linking.DOCS_DIR / "guides").mkdir() + + # File 1 moves first + shutil.move( + linking.DOCS_DIR / "user-guide.md", linking.DOCS_DIR / "guides" / "user.md" + ) + + # File 2 overwrites it (common in refactoring) + shutil.move( + linking.DOCS_DIR / "admin" / "guide.md", + linking.DOCS_DIR / "guides" / "user.md", + ) + + # Update the moved file to have both old IDs (impossible situation) + content = """--- +id: user-guide +old_id: admin-guide +--- +# Merged Guide""" + (linking.DOCS_DIR / "guides" / "user.md").write_text(content) + + mock_config = { + "docs_dir": str(linking.DOCS_DIR), + "pages": [ + SimpleNamespace( + file=SimpleNamespace(src_path="guides/user.md"), url="/guides/user/" + ) + ], + "plugins": {"macros": {"config": {"python_macros": {}}}}, + } + + # The internal_link macro should handle this somehow + result_config = on_config(mock_config) + + if ( + "internal_link" + in result_config["plugins"]["macros"]["config"]["python_macros"] + ): + internal_link = result_config["plugins"]["macros"]["config"][ + "python_macros" + ]["internal_link"] + + # Both old IDs should resolve to the same page (or one should fail gracefully) + try: + url1 = internal_link("user-guide") + self.assertEqual(url1, "/guides/user/") + except ValueError: + pass # Acceptable if it fails gracefully + + # The admin-guide ID no longer exists as a separate page + with self.assertRaises(ValueError): + internal_link("admin-guide") + + def test_id_changes_during_refactoring(self) -> None: + """Test when someone manually changes IDs in frontmatter, breaking existing links.""" + # Initial setup + (linking.DOCS_DIR / "api.md").write_text("""--- +id: api-reference +title: API Reference +--- +# API Reference""") + + (linking.DOCS_DIR / "tutorial.md").write_text("""--- +id: getting-started +title: Getting Started +--- +# Getting Started + +See the {{ internal_link('api-reference') }} for details.""") + + prepare_docs() + + # Someone manually changes the API page ID during editing + (linking.DOCS_DIR / "api.md").write_text("""--- +id: api-docs-v2 +title: API Reference +--- +# API Reference""") + + mock_config = { + "docs_dir": str(linking.DOCS_DIR), + "pages": [ + SimpleNamespace(file=SimpleNamespace(src_path="api.md"), url="/api/"), + SimpleNamespace( + file=SimpleNamespace(src_path="tutorial.md"), url="/tutorial/" + ), + ], + "plugins": {"macros": {"config": {"python_macros": {}}}}, + } + + result_config = on_config(mock_config) + + if ( + "internal_link" + in result_config["plugins"]["macros"]["config"]["python_macros"] + ): + internal_link = result_config["plugins"]["macros"]["config"][ + "python_macros" + ]["internal_link"] + + # Old ID should fail + with self.assertRaises(ValueError) as context: + internal_link("api-reference") + self.assertIn("api-reference", str(context.exception)) + + # New ID should work + self.assertEqual(internal_link("api-docs-v2"), "/api/") + + def test_circular_id_references_and_dependency_loops(self) -> None: + """Test handling of circular references in ID mappings.""" + # This can happen if redirect map gets corrupted or manually edited + + # Create initial files + (linking.DOCS_DIR / "a.md").write_text("---\nid: page-a\n---\n# Page A") + (linking.DOCS_DIR / "b.md").write_text("---\nid: page-b\n---\n# Page B") + + prepare_docs() + + # Manually corrupt the redirect map to create circular references + corrupt_map = { + "page-a": "b.md", # page-a points to b.md + "page-b": "a.md", # page-b points to a.md (circular!) + "page-c": "nonexistent.md", # broken reference + } + linking.REDIRECT_MAP_FILE.write_text(json.dumps(corrupt_map)) + + mock_config = { + "docs_dir": str(linking.DOCS_DIR), + "pages": [ + SimpleNamespace(file=SimpleNamespace(src_path="a.md"), url="/a/"), + SimpleNamespace(file=SimpleNamespace(src_path="b.md"), url="/b/"), + ], + "plugins": {"macros": {"config": {"python_macros": {}}}}, + } + + # Should handle corrupted redirect map gracefully + result_config = on_config(mock_config) + + # The macro should work based on actual current file IDs, not the corrupt map + if ( + "internal_link" + in result_config["plugins"]["macros"]["config"]["python_macros"] + ): + internal_link = result_config["plugins"]["macros"]["config"][ + "python_macros" + ]["internal_link"] + + # Should work based on actual current frontmatter, not redirect map + self.assertEqual(internal_link("page-a"), "/a/") + self.assertEqual(internal_link("page-b"), "/b/") + + # === FILE SYSTEM CHANGES THAT BREAK LINKS === + + def test_case_sensitivity_issues_across_filesystems(self) -> None: + """Test link breakage due to case sensitivity differences.""" + # Create file with specific casing + (linking.DOCS_DIR / "API-Guide.md").write_text( + "---\nid: API-Guide\n---\n# API Guide" + ) + + prepare_docs() + + # Simulate file being renamed with different case (common on case-insensitive filesystems) + original_content = (linking.DOCS_DIR / "API-Guide.md").read_text() + (linking.DOCS_DIR / "API-Guide.md").unlink() + (linking.DOCS_DIR / "api-guide.md").write_text(original_content) + + mock_config = { + "docs_dir": str(linking.DOCS_DIR), + "pages": [ + SimpleNamespace( + file=SimpleNamespace(src_path="api-guide.md"), url="/api-guide/" + ) + ], + "plugins": {"macros": {"config": {"python_macros": {}}}}, + } + + result_config = on_config(mock_config) + + if ( + "internal_link" + in result_config["plugins"]["macros"]["config"]["python_macros"] + ): + internal_link = result_config["plugins"]["macros"]["config"][ + "python_macros" + ]["internal_link"] + + # The ID should still work (case-sensitive match required) + self.assertEqual(internal_link("API-Guide"), "/api-guide/") + + def test_unicode_normalization_issues(self) -> None: + """Test link breakage due to Unicode normalization differences.""" + import unicodedata + + # Create file with Unicode characters + # Using different Unicode normalization forms that look the same + filename1 = "café.md" # é as single character + filename2 = unicodedata.normalize("NFD", "café.md") # é as e + combining accent + + # These look the same but are different at byte level + self.assertNotEqual(filename1, filename2) + + # Create file with one form + (linking.DOCS_DIR / filename1).write_text( + "---\nid: cafe-menu\n---\n# Café Menu" + ) + + prepare_docs() + + # File system or Git might change the normalization + original_content = (linking.DOCS_DIR / filename1).read_text() + (linking.DOCS_DIR / filename1).unlink() + (linking.DOCS_DIR / filename2).write_text(original_content) + + mock_config = { + "docs_dir": str(linking.DOCS_DIR), + "pages": [ + SimpleNamespace(file=SimpleNamespace(src_path=filename2), url="/cafe/") + ], + "plugins": {"macros": {"config": {"python_macros": {}}}}, + } + + result_config = on_config(mock_config) + + if ( + "internal_link" + in result_config["plugins"]["macros"]["config"]["python_macros"] + ): + internal_link = result_config["plugins"]["macros"]["config"][ + "python_macros" + ]["internal_link"] + + # Should still work despite Unicode normalization change + self.assertEqual(internal_link("cafe-menu"), "/cafe/") + + def test_redirect_map_corruption_scenarios(self) -> None: + """Test various ways the redirect map can become corrupted.""" + # Create initial files + (linking.DOCS_DIR / "page.md").write_text("---\nid: test-page\n---\n# Test") + + prepare_docs() + + # Test various corruption scenarios + corruption_scenarios = [ + '{"invalid": json syntax}', # Invalid JSON + '{"valid": "json", "but": "wrong", "structure": true}', # Wrong structure + "not json at all", # Not JSON + "", # Empty file + "{}", # Empty but valid JSON + '{"key-with-no-value":}', # Malformed JSON + '{"unicode-test": "café\\ud83d\\ude00"}', # Unicode issues + ] + + for i, corrupt_content in enumerate(corruption_scenarios): + with self.subTest(scenario=i): + # Corrupt the redirect map + linking.REDIRECT_MAP_FILE.write_text(corrupt_content) + + mock_config = { + "docs_dir": str(linking.DOCS_DIR), + "pages": [ + SimpleNamespace( + file=SimpleNamespace(src_path="page.md"), url="/page/" + ) + ], + "plugins": {"macros": {"config": {"python_macros": {}}}}, + } + + # Should handle all corruption gracefully + try: + result_config = on_config(mock_config) + self.assertIsNotNone(result_config) + except Exception as e: + # Should not crash with unhandled exceptions + self.assertIsInstance( + e, (json.JSONDecodeError, KeyError, ValueError) + ) + + def test_internal_link_macro_with_invalid_inputs(self) -> None: + """Test internal_link macro with various invalid inputs that could break pages.""" + (linking.DOCS_DIR / "test.md").write_text("---\nid: test-page\n---\n# Test") + + # Run prepare_docs first to create the redirect map + prepare_docs() + + mock_config = { + "docs_dir": str(linking.DOCS_DIR), + "pages": [ + SimpleNamespace(file=SimpleNamespace(src_path="test.md"), url="/test/") + ], + "plugins": {"macros": {"config": {"python_macros": {}}}}, + } + + result_config = on_config(mock_config) + + if ( + "internal_link" + in result_config["plugins"]["macros"]["config"]["python_macros"] + ): + internal_link = result_config["plugins"]["macros"]["config"][ + "python_macros" + ]["internal_link"] + + # Test various invalid inputs that could come from template errors + invalid_inputs = [ + None, # None value + "", # Empty string + " ", # Whitespace only + "non-existent-page", # Non-existent ID + "test page", # Spaces in ID + "test/page", # Slashes in ID + "test-page\n", # Newlines + 123, # Non-string type + ["test-page"], # List instead of string + {"id": "test-page"}, # Dict instead of string + ] + + for invalid_input in invalid_inputs: + with self.subTest(input=repr(invalid_input)): + try: + result = internal_link(invalid_input) + # If it somehow succeeds, result should be reasonable + self.assertIsInstance(result, str) + self.assertTrue(result.startswith("/")) + except (ValueError, TypeError, AttributeError) as e: + # Expected for invalid inputs + self.assertIsInstance( + e, (ValueError, TypeError, AttributeError) + ) + except Exception as e: + # Should not crash with unexpected exceptions + self.fail( + f"Unexpected exception for input {invalid_input}: {e}" + ) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/hack/mkdocs/__tests__/test_cli.py b/hack/mkdocs/__tests__/test_cli.py new file mode 100644 index 0000000000..ee52cae276 --- /dev/null +++ b/hack/mkdocs/__tests__/test_cli.py @@ -0,0 +1,139 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import shutil +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) + +import hack.mkdocs_linking as linking + + +class TestCommandLineInterface(unittest.TestCase): + docs_path: Path + + def setUp(self) -> None: + """Set up a temporary directory structure for each test.""" + self.test_dir = Path("./temp_test_convert_links") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + self.docs_path = self.test_dir / "docs" + self.redirect_map_file = self.test_dir / "redirect_map.json" + self.docs_path.mkdir(parents=True) + + self.linking_module = sys.modules["linking"] + self.original_globals = { + "DOCS_DIR": self.linking_module.DOCS_DIR, + "REDIRECT_MAP_FILE": self.linking_module.REDIRECT_MAP_FILE, + } + self.linking_module.DOCS_DIR = self.docs_path # type: ignore + self.linking_module.REDIRECT_MAP_FILE = self.redirect_map_file # type: ignore + + def test_main_handles_prepare_docs_exceptions(self) -> None: + """Test main() handles exceptions from prepare_docs gracefully.""" + # Arrange: Mock prepare_docs to raise an exception + original_prepare_docs = linking.prepare_docs + + def failing_prepare_docs(docs_dir_path=None): + raise Exception("Test exception from prepare_docs") + + linking.prepare_docs = failing_prepare_docs + + import sys + + original_argv = sys.argv + sys.argv = ["linking.py", "--prepare"] + + try: + # Act & Assert: Exception should propagate (this is expected behavior) + with self.assertRaises(Exception) as context: + linking.main() + + self.assertIn("Test exception from prepare_docs", str(context.exception)) + + finally: + # Restore everything + linking.prepare_docs = original_prepare_docs + sys.argv = original_argv + + def test_main_with_prepare_argument(self) -> None: + """Test main() function when called with --prepare argument.""" + # Arrange: Create some test files + (self.docs_path / "test.md").write_text("# Test Document") + (self.docs_path / "guide.md").write_text("# Guide Document") + + # Mock sys.argv to simulate command line arguments + import sys + + original_argv = sys.argv + sys.argv = ["linking.py", "--prepare", "--docs-dir", str(self.docs_path)] + + try: + # Act: Call main function + linking.main() + + # Assert: Verify that prepare_docs was executed + self.assertTrue(self.redirect_map_file.exists()) + redirect_map = json.loads(self.redirect_map_file.read_text()) + self.assertIn("test", redirect_map) + self.assertIn("guide", redirect_map) + + finally: + # Restore original argv + sys.argv = original_argv + + def test_prepare_docs_called_correctly(self) -> None: + """Test that prepare_docs is called when --prepare is used.""" + # Arrange: Create test files and mock prepare_docs + (self.docs_path / "sample.md").write_text("# Sample") + + original_prepare_docs = linking.prepare_docs + prepare_docs_called = False + + def mock_prepare_docs(docs_dir_path=None): + nonlocal prepare_docs_called + prepare_docs_called = True + # Call the original function to ensure it works + original_prepare_docs(docs_dir_path) + + linking.prepare_docs = mock_prepare_docs + + import sys + + original_argv = sys.argv + # Pass docs_dir to ensure the redirect map is created in the temp folder + sys.argv = ["linking.py", "--prepare", "--docs-dir", str(self.docs_path)] + + try: + # Act: Call main + linking.main() + + # Assert: Verify prepare_docs was called + self.assertTrue(prepare_docs_called) + + # Verify it actually worked + self.assertTrue(self.redirect_map_file.exists()) + + finally: + # Restore everything + linking.prepare_docs = original_prepare_docs + sys.argv = original_argv + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/hack/mkdocs/__tests__/test_convert.py b/hack/mkdocs/__tests__/test_convert.py new file mode 100644 index 0000000000..24a2807785 --- /dev/null +++ b/hack/mkdocs/__tests__/test_convert.py @@ -0,0 +1,144 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) + +import hack.mkdocs_linking as linking + + +class TestConvertFromRelativeLinks(unittest.TestCase): + """Tests for the convert_internal_links function.""" + + test_dir: Path + docs_path: Path + redirect_map_file: Path + linking_module: object + original_globals: dict + + def setUp(self) -> None: + """Set up a temporary directory structure for each test.""" + self.test_dir = Path("./temp_test_convert_links") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + self.docs_path = self.test_dir / "docs" + self.redirect_map_file = self.test_dir / "redirect_map.json" + self.docs_path.mkdir(parents=True) + + self.linking_module = sys.modules["linking"] + self.original_globals = { + "DOCS_DIR": self.linking_module.DOCS_DIR, + "REDIRECT_MAP_FILE": self.linking_module.REDIRECT_MAP_FILE, + } + self.linking_module.DOCS_DIR = self.docs_path # type: ignore + self.linking_module.REDIRECT_MAP_FILE = self.redirect_map_file # type: ignore + + def test_basic_link_conversion(self) -> None: + """Test that a simple relative link is converted to a macro.""" + # Arrange + (self.docs_path / "index.md").write_text("Link to [About](./about.md).") + (self.docs_path / "about.md").write_text("This is the about page.") + linking.prepare_docs(str(self.docs_path)) + + # Act + linking.convert_internal_links(str(self.docs_path)) + + # Assert + content = (self.docs_path / "index.md").read_text() + expected = '---\nid: index\n---\nLink to [About]({{ internal_link("about") }}).' + self.assertEqual(content, expected) + + def test_file_with_no_links(self) -> None: + """Test that a file with no links is not modified.""" + # Arrange + original_content = "This document has no links. Just plain text." + (self.docs_path / "no-links.md").write_text(original_content) + linking.prepare_docs(str(self.docs_path)) + + # Act + linking.convert_internal_links(str(self.docs_path)) + + # Assert + final_content = (self.docs_path / "no-links.md").read_text() + expected_content = "---\nid: no-links\n---\n" + original_content + self.assertEqual(final_content, expected_content) + + def test_handles_complex_relative_paths(self) -> None: + """Test conversion of links with complex relative paths like ../..""" + # Arrange + (self.docs_path / "guides" / "advanced").mkdir(parents=True) + (self.docs_path / "api" / "v1").mkdir(parents=True) + + (self.docs_path / "guides" / "advanced" / "config.md").write_text( + "See the [Auth API](../../api/v1/auth.md) for details." + ) + (self.docs_path / "api" / "v1" / "auth.md").write_text("Auth API docs.") + linking.prepare_docs(str(self.docs_path)) + + # Act + linking.convert_internal_links(str(self.docs_path)) + + # Assert + content = (self.docs_path / "guides" / "advanced" / "config.md").read_text() + expected = '---\nid: guides-advanced-config\n---\nSee the [Auth API]({{ internal_link("api-v1-auth") }}) for details.' + self.assertEqual(content, expected) + + def test_idempotency_does_not_reconvert_links(self) -> None: + """Test that running the conversion twice doesn't change already converted links.""" + # Arrange + (self.docs_path / "index.md").write_text("Link to [About](./about.md).") + (self.docs_path / "about.md").write_text("This is the about page.") + linking.prepare_docs(str(self.docs_path)) + + # Act + linking.convert_internal_links(str(self.docs_path)) # First run + content_after_first_run = (self.docs_path / "index.md").read_text() + + linking.convert_internal_links(str(self.docs_path)) # Second run + content_after_second_run = (self.docs_path / "index.md").read_text() + + # Assert + expected = '---\nid: index\n---\nLink to [About]({{ internal_link("about") }}).' + self.assertEqual(content_after_first_run, expected) + self.assertEqual( + content_after_second_run, + expected, + "Content should not change on the second run.", + ) + + def test_leaves_broken_links_unchanged(self) -> None: + """Test that a link to a non-existent .md file is not converted.""" + # Arrange + original_content = "This is a [Broken Link](./nonexistent.md)." + (self.docs_path / "index.md").write_text(original_content) + linking.prepare_docs(str(self.docs_path)) + + # Act + linking.convert_internal_links(str(self.docs_path)) + + # Assert + final_content = (self.docs_path / "index.md").read_text() + expected_content = "---\nid: index\n---\n" + original_content + self.assertEqual( + final_content, expected_content, "Broken link should not be modified." + ) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/hack/mkdocs/__tests__/test_fontmatter.py b/hack/mkdocs/__tests__/test_fontmatter.py new file mode 100644 index 0000000000..80b3b8b20b --- /dev/null +++ b/hack/mkdocs/__tests__/test_fontmatter.py @@ -0,0 +1,161 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import shutil +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) + +import hack.mkdocs_linking as linking +from hack.mkdocs_linking import prepare_docs + + +class TestYAMLFrontmatterEdgeCases(unittest.TestCase): + """Tests focused on YAML frontmatter parsing edge cases.""" + + def setUp(self) -> None: + """Set up test environment.""" + self.test_dir = Path("./temp_test_yaml") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + self.original_docs_dir = linking.DOCS_DIR + self.original_redirect_file = linking.REDIRECT_MAP_FILE + + linking.DOCS_DIR = self.test_dir / "docs" + linking.REDIRECT_MAP_FILE = self.test_dir / "redirect_map.json" + linking.DOCS_DIR.mkdir(parents=True, exist_ok=True) + + def tearDown(self) -> None: + """Clean up test environment.""" + linking.DOCS_DIR = self.original_docs_dir + linking.REDIRECT_MAP_FILE = self.original_redirect_file + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + def test_malformed_yaml_frontmatter(self) -> None: + """Test handling of various malformed YAML frontmatter.""" + malformed_files = { + "unclosed-quotes.md": """--- +title: "This quote is never closed +id: broken-yaml +--- +# Content""", + "invalid-structure.md": """--- +title: Valid Title +invalid-yaml: [unclosed list +id: another-broken +--- +# Content""", + "wrong-delimiters.md": """+++ +title: Hugo-style frontmatter +id: wrong-format ++++ +# Content""", + "no-end-delimiter.md": """--- +title: Missing end delimiter +id: incomplete +# This should be treated as content""", + } + + for filename, content in malformed_files.items(): + (linking.DOCS_DIR / filename).write_text(content) + + # Should handle malformed YAML gracefully + prepare_docs() + + # Check that redirect map was created (files with valid structure should work) + self.assertTrue(linking.REDIRECT_MAP_FILE.exists()) + redirect_map = json.loads(linking.REDIRECT_MAP_FILE.read_text()) + + # Files with malformed YAML should get auto-generated IDs or extracted IDs + self.assertIn("broken-yaml", redirect_map) # From unclosed-quotes.md + self.assertIn("another-broken", redirect_map) # From invalid-structure.md + self.assertIn("wrong-delimiters", redirect_map) + self.assertIn("no-end-delimiter", redirect_map) + + def test_complex_yaml_structures(self) -> None: + """Test handling of complex YAML structures in frontmatter.""" + complex_content = """--- +title: "Complex YAML Test" +id: complex-yaml +tags: + - testing + - yaml + - complex +metadata: + author: + name: "John Doe" + email: "john@example.com" + created: 2023-01-01 + updated: 2023-12-31 + nested: + deeply: + very: "deep value" +categories: ["cat1", "cat2", "cat3"] +boolean_value: true +null_value: null +number_value: 42 +float_value: 3.14 +multiline: | + This is a multiline + string that spans + multiple lines +--- +# Complex YAML Test""" + + (linking.DOCS_DIR / "complex.md").write_text(complex_content) + prepare_docs() + + # Should preserve all the complex YAML structure + updated_content = (linking.DOCS_DIR / "complex.md").read_text() + + # Verify the ID was preserved and complex structure remains + self.assertIn("id: complex-yaml", updated_content) + self.assertIn("deeply:", updated_content) + self.assertIn("multiline: |", updated_content) + + # Verify redirect map was created correctly + redirect_map = json.loads(linking.REDIRECT_MAP_FILE.read_text()) + self.assertEqual(redirect_map["complex-yaml"], "complex.md") + + def test_unicode_in_yaml_frontmatter(self) -> None: + """Test handling of Unicode characters in YAML frontmatter.""" + unicode_content = """--- +title: "测试文档 🚀 Café" +id: unicode-test +description: "This contains émojis 🎉 and ñoñ-ASCII çhars" +author: "José García-Martínez" +tags: ["日本語", "español", "français"] +--- +# Unicode Test Document""" + + (linking.DOCS_DIR / "unicode.md").write_text(unicode_content, encoding="utf-8") + prepare_docs() + + # Should handle Unicode correctly + updated_content = (linking.DOCS_DIR / "unicode.md").read_text(encoding="utf-8") + self.assertIn("🚀", updated_content) + self.assertIn("José García-Martínez", updated_content) + self.assertIn("日本語", updated_content) + + redirect_map = json.loads(linking.REDIRECT_MAP_FILE.read_text()) + self.assertEqual(redirect_map["unicode-test"], "unicode.md") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/hack/mkdocs/__tests__/test_internal_link.py b/hack/mkdocs/__tests__/test_internal_link.py new file mode 100644 index 0000000000..2e94d75fd5 --- /dev/null +++ b/hack/mkdocs/__tests__/test_internal_link.py @@ -0,0 +1,125 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from pathlib import Path +import shutil +import sys + +sys.path.insert(0, str(Path(__file__).parent)) + +from hack.mkdocs_main import PageResolver + + +class TestPageResolver(unittest.TestCase): + def setUp(self): + """Set up a temporary directory structure for each test.""" + self.test_dir = Path("./temp_test_main") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + self.docs_path = self.test_dir / "docs" + self.docs_path.mkdir(parents=True) + + # Create some test markdown files with frontmatter IDs + (self.docs_path / "index.md").write_text("---\nid: home\n---\n# Home") + (self.docs_path / "about.md").write_text("---\nid: about-us\n---\n# About") + (self.docs_path / "guides").mkdir() + (self.docs_path / "guides" / "first.md").write_text( + "---\nid: first-guide\n---\n# First Guide" + ) + (self.docs_path / "guides" / "second.md").write_text( + "---\nid: second-guide\n---\n# Second Guide" + ) + (self.docs_path / "guides" / "subsection").mkdir() + (self.docs_path / "guides" / "subsection" / "deep.md").write_text( + "---\nid: deep-page\n---\n# Deep Page" + ) + + self.resolver = PageResolver(docs_dir=self.docs_path) + + def tearDown(self): + """Clean up the temporary directory after each test.""" + shutil.rmtree(self.test_dir) + + def test_resolve_page_link_no_context(self): + """Test resolving page links without a current page context.""" + self.assertEqual(self.resolver.resolve_page_link("home"), "index.md") + self.assertEqual( + self.resolver.resolve_page_link("first-guide"), "guides/first.md" + ) + + def test_resolve_page_link_from_root(self): + """Test resolving page links from a page in the docs root.""" + self.assertEqual( + self.resolver.resolve_page_link("about-us", "index.md"), "about.md" + ) + self.assertEqual( + self.resolver.resolve_page_link("first-guide", "index.md"), + "guides/first.md", + ) + + def test_resolve_page_link_from_subdir(self): + """Test resolving page links from a page in a subdirectory.""" + self.assertEqual( + self.resolver.resolve_page_link("home", "guides/first.md"), "../index.md" + ) + self.assertEqual( + self.resolver.resolve_page_link("second-guide", "guides/first.md"), + "second.md", + ) + self.assertEqual( + self.resolver.resolve_page_link("about-us", "guides/first.md"), + "../about.md", + ) + self.assertEqual( + self.resolver.resolve_page_link("deep-page", "guides/first.md"), + "subsection/deep.md", + ) + + def test_resolve_page_link_from_deep_subdir(self): + """Test resolving page links from a deeply nested page.""" + self.assertEqual( + self.resolver.resolve_page_link("home", "guides/subsection/deep.md"), + "../../index.md", + ) + self.assertEqual( + self.resolver.resolve_page_link("first-guide", "guides/subsection/deep.md"), + "../first.md", + ) + + def test_resolve_page_link_not_found(self): + """Test that resolving a non-existent page ID raises a ValueError.""" + with self.assertRaises(ValueError): + self.resolver.resolve_page_link("non-existent-id") + + def test_id_changes_are_picked_up(self): + """Test that the resolver picks up changes to page IDs.""" + self.assertEqual(self.resolver.resolve_page_link("home"), "index.md") + + # Modify the ID in a file + (self.docs_path / "index.md").write_text("---\nid: new-home\n---\n# Home") + + # Clear the resolver's cache to force it to re-scan the files + self.resolver._page_cache = None + + # The new ID should now resolve correctly + self.assertEqual(self.resolver.resolve_page_link("new-home"), "index.md") + + # The old ID should no longer be found + with self.assertRaises(ValueError): + self.resolver.resolve_page_link("home") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/hack/mkdocs/__tests__/test_migration.py b/hack/mkdocs/__tests__/test_migration.py new file mode 100644 index 0000000000..f1aec05942 --- /dev/null +++ b/hack/mkdocs/__tests__/test_migration.py @@ -0,0 +1,297 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import shutil +import sys +from typing import Any, Dict, List, Tuple +import unittest +from pathlib import Path +from types import SimpleNamespace + +sys.path.insert(0, str(Path(__file__).parent)) + +import hack.mkdocs_linking as linking +from hack.mkdocs_linking import on_config, prepare_docs + + +class TestMigration(unittest.TestCase): + """Tests the migration script's prepare and on_config functions.""" + + def setUp(self) -> None: + """Set up a temporary directory structure for each test.""" + self.test_dir = Path("./temp_test_project") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + self.docs_path = self.test_dir / "docs" + self.redirect_map_file = self.test_dir / "redirect_map.json" + self.docs_path.mkdir(parents=True) + + # The script under test uses module-level globals for configuration. + # To ensure tests are isolated, we temporarily redirect these globals + # to point to our test directory during test execution. + self.linking_module = sys.modules["linking"] + self.original_globals = { + "DOCS_DIR": self.linking_module.DOCS_DIR, + "REDIRECT_MAP_FILE": self.linking_module.REDIRECT_MAP_FILE, + } + self.linking_module.DOCS_DIR = self.docs_path # type: ignore + self.linking_module.REDIRECT_MAP_FILE = self.redirect_map_file # type: ignore + + def tearDown(self) -> None: + """Clean up the temporary directory after each test.""" + shutil.rmtree(self.test_dir) + # Restore the original global variables to avoid side-effects between + # test runs. + for key, value in self.original_globals.items(): + setattr(self.linking_module, key, value) + + def test_prepare_fresh_run_no_frontmatter(self) -> None: + """Test that IDs are correctly injected into files.""" + # Arrange: Create a file structure with no existing frontmatter. + (self.docs_path / "index.md").write_text("Welcome page") + (self.docs_path / "guides").mkdir() + (self.docs_path / "guides" / "http.md").write_text("HTTP Guide") + + # Act: Run the preparation function. + prepare_docs() + + # Assert: Verify the redirect map file was created and is correct. + self.assertTrue(self.redirect_map_file.exists()) + redirect_map: Dict[str, str] = json.loads(self.redirect_map_file.read_text()) + self.assertEqual(redirect_map.get("index"), "index.md") + self.assertEqual(redirect_map.get("guides-http"), "guides/http.md") + + def _create_mock_config( + self, pages_data: List[Tuple[str, str, str]] + ) -> Dict[str, Any]: + """Create a mock MkDocs config object for testing the hook function.""" + mock_pages: List[SimpleNamespace] = [] + for page_id, src_path, url in pages_data: + page = SimpleNamespace(file=SimpleNamespace(src_path=src_path), url=url) + # The hook function reads files to get IDs, so we must create them. + file_path = self.docs_path / src_path + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(f"---\nid: {page_id}\n---\nContent") + mock_pages.append(page) + + return { + "docs_dir": str(self.docs_path), + "pages": mock_pages, + "plugins": { + "redirects": {"config": {"redirect_maps": {}}}, + "macros": {"config": {"python_macros": {}}}, + }, + } + + def test_on_files_one_file_moved(self) -> None: + """Test that a redirect is correctly generated for a moved file using on_files.""" + (self.docs_path / "old-path.md").write_text("Content") + prepare_docs() + # Simulate a file move by creating a mock files list + new_file = SimpleNamespace(src_path="new/path/for/doc.md") + # Write the file with the same ID in the new location + new_file_path = self.docs_path / "new/path/for/doc.md" + new_file_path.parent.mkdir(parents=True, exist_ok=True) + new_file_path.write_text("---\nid: old-path\n---\nContent") + # Call on_files with the new file list and config + files = [new_file] + config = {"docs_dir": str(self.docs_path)} + # on_files prints output, but we want to check the mkdocs.yml or output + # For this test, just ensure no exceptions and that the function returns the files + result = linking.on_files(files, config) + self.assertEqual(result, files) + + def test_prepare_with_existing_frontmatter(self) -> None: + """Test that existing frontmatter is preserved and IDs are respected.""" + # Arrange: Create files with existing frontmatter + (self.docs_path / "existing.md").write_text("""--- +title: "Existing Document" +author: "John Doe" +id: custom-id +tags: ["important"] +--- +# Existing Document +This has frontmatter already.""") + + (self.docs_path / "partial.md").write_text("""--- +title: "Partial Frontmatter" +description: "No ID yet" +--- +# Partial Document""") + + # Act: Run the preparation function + prepare_docs() + + # Assert: Check that existing ID is preserved and new ID is added + redirect_map = json.loads(self.redirect_map_file.read_text()) + self.assertEqual(redirect_map.get("custom-id"), "existing.md") + self.assertEqual(redirect_map.get("partial"), "partial.md") + + # Verify file contents preserve existing frontmatter + existing_content = (self.docs_path / "existing.md").read_text() + self.assertIn('title: "Existing Document"', existing_content) + self.assertIn('author: "John Doe"', existing_content) + self.assertIn("id: custom-id", existing_content) + self.assertIn('tags: ["important"]', existing_content) + + partial_content = (self.docs_path / "partial.md").read_text() + self.assertIn('title: "Partial Frontmatter"', partial_content) + self.assertIn("id: partial", partial_content) + + def test_prepare_multiple_subdirectories(self) -> None: + """Test ID generation for files in multiple nested directories.""" + # Arrange: Create a complex directory structure + (self.docs_path / "api" / "v1").mkdir(parents=True) + (self.docs_path / "api" / "v2").mkdir(parents=True) + (self.docs_path / "guides" / "getting-started").mkdir(parents=True) + (self.docs_path / "guides" / "advanced").mkdir(parents=True) + + files_to_create = { + "api/v1/auth.md": "# Authentication v1", + "api/v1/users.md": "# Users API v1", + "api/v2/auth.md": "# Authentication v2", + "api/v2/users.md": "# Users API v2", + "guides/getting-started/installation.md": "# Installation", + "guides/getting-started/quickstart.md": "# Quick Start", + "guides/advanced/configuration.md": "# Advanced Configuration", + "guides/advanced/deployment.md": "# Deployment Guide", + } + + for file_path, content in files_to_create.items(): + (self.docs_path / file_path).write_text(content) + + # Act: Run preparation + prepare_docs() + + # Assert: Verify all files get appropriate IDs + redirect_map = json.loads(self.redirect_map_file.read_text()) + + expected_mappings = { + "api-v1-auth": "api/v1/auth.md", + "api-v1-users": "api/v1/users.md", + "api-v2-auth": "api/v2/auth.md", + "api-v2-users": "api/v2/users.md", + "guides-getting-started-installation": "guides/getting-started/installation.md", + "guides-getting-started-quickstart": "guides/getting-started/quickstart.md", + "guides-advanced-configuration": "guides/advanced/configuration.md", + "guides-advanced-deployment": "guides/advanced/deployment.md", + } + + for expected_id, expected_path in expected_mappings.items(): + self.assertEqual(redirect_map.get(expected_id), expected_path) + + def test_on_config_no_files_moved(self) -> None: + """Test that no redirects are generated when files haven't moved.""" + # Arrange: Create initial state + (self.docs_path / "stable.md").write_text( + "---\nid: stable-doc\n---\nStable content" + ) + prepare_docs() + + # Arrange: Create config with same paths (no moves) + mock_config = self._create_mock_config( + [("stable-doc", "stable.md", "/stable/")] + ) + + # Act: Run the hook + updated_config = on_config(mock_config) + + # Assert: No redirect rules should be generated + redirects = updated_config["plugins"]["redirects"]["config"]["redirect_maps"] + self.assertEqual(len(redirects), 0) + + def test_on_files_multiple_files_moved(self) -> None: + """Test redirect generation for multiple moved files using on_files.""" + initial_files = { + "old-guide.md": "old-guide-id", + "temp/draft.md": "draft-doc", + "archive/old-api.md": "api-v1", + } + for file_path, file_id in initial_files.items(): + file_full_path = self.docs_path / file_path + file_full_path.parent.mkdir(parents=True, exist_ok=True) + file_full_path.write_text(f"---\nid: {file_id}\n---\nContent") + prepare_docs() + # Simulate all files being moved to new locations + new_files = [ + SimpleNamespace(src_path="guides/user-guide.md"), + SimpleNamespace(src_path="published/final-doc.md"), + SimpleNamespace(src_path="api/legacy/v1.md"), + ] + # Write the files with the same IDs in the new locations + moved = [ + ("guides/user-guide.md", "old-guide-id"), + ("published/final-doc.md", "draft-doc"), + ("api/legacy/v1.md", "api-v1"), + ] + for path, file_id in moved: + file_path = self.docs_path / path + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(f"---\nid: {file_id}\n---\nContent") + config = {"docs_dir": str(self.docs_path)} + result = linking.on_files(new_files, config) + self.assertEqual(result, new_files) + + def test_prepare_handles_empty_docs_directory(self) -> None: + """Test that prepare_docs handles an empty docs directory gracefully.""" + # Arrange: Docs directory exists but is empty (no .md files) + + # Act: Run preparation + prepare_docs() + + # Assert: Should create empty redirect map + self.assertTrue(self.redirect_map_file.exists()) + redirect_map = json.loads(self.redirect_map_file.read_text()) + self.assertEqual(len(redirect_map), 0) + + def test_on_config_missing_redirect_map(self) -> None: + """Test on_config behavior when redirect map file doesn't exist.""" + # Arrange: Ensure redirect map doesn't exist + if self.redirect_map_file.exists(): + self.redirect_map_file.unlink() + + mock_config = self._create_mock_config([("test-page", "test.md", "/test/")]) + + # Act: Run the hook + updated_config = on_config(mock_config) + + # Assert: Should handle missing file gracefully and still set up macro + self.assertIn("macros", updated_config["plugins"]) + + # No redirects should be generated + redirects = updated_config["plugins"]["redirects"]["config"]["redirect_maps"] + self.assertEqual(len(redirects), 0) + + def test_on_config_missing_plugins(self) -> None: + """Test on_config behavior when expected plugins are not configured.""" + # Arrange: Create config without redirects or macros plugins + (self.docs_path / "test.md").write_text("---\nid: test-page\n---\nContent") + prepare_docs() + + mock_config = { + "docs_dir": str(self.docs_path), + "pages": [ + SimpleNamespace(file=SimpleNamespace(src_path="test.md"), url="/test/") + ], + "plugins": {}, # No redirects or macros plugins + } + + # Act: Run the hook + updated_config = on_config(mock_config) + + # Assert: Should handle missing plugins gracefully + self.assertIsInstance(updated_config, dict) + self.assertEqual(updated_config["docs_dir"], str(self.docs_path)) diff --git a/hack/mkdocs/__tests__/test_preview.py b/hack/mkdocs/__tests__/test_preview.py new file mode 100644 index 0000000000..353d19c8aa --- /dev/null +++ b/hack/mkdocs/__tests__/test_preview.py @@ -0,0 +1,89 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import shutil +import sys +import unittest +from pathlib import Path +from unittest.mock import patch + +sys.path.insert(0, str(Path(__file__).parent)) + +import hack.mkdocs_linking as linking + +class TestPreviewDocs(unittest.TestCase): + """Tests for the preview_docs function.""" + + def setUp(self) -> None: + """Set up a temporary directory for each test.""" + self.test_dir = Path("./temp_test_preview") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + self.docs_dir = self.test_dir / "my-docs" + self.docs_dir.mkdir(parents=True) + + def tearDown(self) -> None: + """Clean up the temporary directory after each test.""" + shutil.rmtree(self.test_dir) + + def test_preview_with_mixed_files(self): + """Test preview output with a mix of files with and without IDs.""" + # Arrange + (self.docs_dir / "index.md").write_text("# Welcome") + (self.docs_dir / "has-id.md").write_text("---\nid: existing-id\n---\n# Has ID") + + # Act + captured_output = io.StringIO() + with patch("sys.stdout", captured_output): + linking.preview_docs(str(self.docs_dir)) + + # Assert + output = captured_output.getvalue() + self.assertIn("Files that would be modified (1):", output) + self.assertIn("+ index.md -> ID: 'index'", output) + self.assertIn("Files already with IDs (1):", output) + self.assertIn("* has-id.md -> ID: 'existing-id'", output) + self.assertIn("Would create/update redirect map", output) + + def test_preview_with_empty_directory(self): + """Test preview output for an empty directory.""" + # Arrange (directory is already empty) + + # Act + captured_output = io.StringIO() + with patch("sys.stdout", captured_output): + linking.preview_docs(str(self.docs_dir)) + + # Assert + output = captured_output.getvalue() + self.assertIn("No markdown files found", output) + + def test_preview_with_nonexistent_directory(self): + """Test preview output for a non-existent directory.""" + # Arrange + non_existent_path = str(self.test_dir / "non-existent") + + # Act + captured_output = io.StringIO() + with patch("sys.stdout", captured_output): + linking.preview_docs(non_existent_path) + + # Assert + output = captured_output.getvalue() + self.assertIn("ERROR: Directory", output) + self.assertIn("does not exist", output) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/hack/mkdocs/__tests__/test_recovery.py b/hack/mkdocs/__tests__/test_recovery.py new file mode 100644 index 0000000000..86b1ec6e23 --- /dev/null +++ b/hack/mkdocs/__tests__/test_recovery.py @@ -0,0 +1,120 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import shutil +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) + +import hack.mkdocs_linking as linking +from hack.mkdocs_linking import prepare_docs + + +class TestErrorRecovery(unittest.TestCase): + """Tests focused on error recovery and system robustness.""" + + def setUp(self) -> None: + """Set up test environment.""" + self.test_dir = Path("./temp_test_robustness") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + self.original_docs_dir = linking.DOCS_DIR + self.original_redirect_file = linking.REDIRECT_MAP_FILE + + linking.DOCS_DIR = self.test_dir / "docs" + linking.REDIRECT_MAP_FILE = self.test_dir / "redirect_map.json" + linking.DOCS_DIR.mkdir(parents=True, exist_ok=True) + + def tearDown(self) -> None: + """Clean up test environment.""" + linking.DOCS_DIR = self.original_docs_dir + linking.REDIRECT_MAP_FILE = self.original_redirect_file + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + def test_concurrent_access_simulation(self) -> None: + """Test robustness when files are modified during processing.""" + import threading + import time + + # Create initial files + for i in range(5): + (linking.DOCS_DIR / f"concurrent-{i}.md").write_text(f"""--- +id: concurrent-{i} +--- +# Concurrent Test {i}""") + + def modify_files(): + """Simulate another process modifying files during processing.""" + time.sleep(0.1) # Give prepare_docs a chance to start + try: + # Modify a file during processing + (linking.DOCS_DIR / "concurrent-2.md").write_text("""--- +id: concurrent-2-modified +--- +# Modified During Processing""") + except FileNotFoundError: + pass # File might be temporarily locked + + # Start background modification + modifier_thread = threading.Thread(target=modify_files) + modifier_thread.start() + + # Run preparation while files are being modified + prepare_docs() + + modifier_thread.join() + + # Should complete successfully despite concurrent modifications + self.assertTrue(linking.REDIRECT_MAP_FILE.exists()) + redirect_map = json.loads(linking.REDIRECT_MAP_FILE.read_text()) + + # Should have processed most files + self.assertGreaterEqual(len(redirect_map), 4) + + def test_special_characters_in_filenames(self) -> None: + """Test handling of files with special characters in names.""" + special_files = { + "file with spaces.md": "file-with-spaces", + "file-with-üñïçødé.md": "file-with-unicode", + "file.with.dots.md": "file-with-dots", + "file[with]brackets.md": "file-with-brackets", + "file(with)parens.md": "file-with-parens", + "file&with&symbols.md": "file-with-symbols", + } + + for filename, expected_id in special_files.items(): + try: + (linking.DOCS_DIR / filename).write_text(f"""--- +id: {expected_id} +--- +# Test File""") + except OSError: + # Some filesystems don't support certain characters + continue + + prepare_docs() + + # Should handle special characters in filenames + redirect_map = json.loads(linking.REDIRECT_MAP_FILE.read_text()) + + # Check that files were processed (those that could be created) + for filename, expected_id in special_files.items(): + if (linking.DOCS_DIR / filename).exists(): + self.assertIn(expected_id, redirect_map) + self.assertEqual(redirect_map[expected_id], filename) diff --git a/hack/mkdocs/__tests__/test_updates.py b/hack/mkdocs/__tests__/test_updates.py new file mode 100644 index 0000000000..707476ff1d --- /dev/null +++ b/hack/mkdocs/__tests__/test_updates.py @@ -0,0 +1,109 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import sys +import unittest +import yaml +from pathlib import Path +from unittest.mock import patch + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +import hack.mkdocs_linking as linking + +class TestUpdateMkdocsYml(unittest.TestCase): + """Tests for the _update_mkdocs_yml_redirects function.""" + + def setUp(self) -> None: + """Set up a temporary directory for each test.""" + self.test_dir = Path("./temp_test_yml_updates") + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + self.test_dir.mkdir() + self.mkdocs_yml_path = self.test_dir / "mkdocs.yml" + # Patch the function to use our temporary file path + self.patcher = patch("linking.Path") + self.mock_path = self.patcher.start() + self.mock_path.return_value = self.mkdocs_yml_path + + def tearDown(self) -> None: + """Clean up the temporary directory after each test.""" + shutil.rmtree(self.test_dir) + self.patcher.stop() + + def test_updates_yml_with_no_plugins_section(self): + """Test that the function adds plugins and redirects correctly.""" + # Arrange + self.mkdocs_yml_path.write_text("site_name: My Docs") + redirects = {"old/path.md": "new/path.md"} + + # Act + result = linking._update_mkdocs_yml_redirects(redirects) + + # Assert + self.assertTrue(result) + with open(self.mkdocs_yml_path, "r") as f: + config = yaml.safe_load(f) + self.assertIn("plugins", config) + self.assertIn({"redirects": {"redirect_maps": redirects}}, config["plugins"]) + + def test_updates_yml_with_string_redirects_plugin(self): + """Test updating when 'redirects' is just a string in the plugins list.""" + # Arrange + self.mkdocs_yml_path.write_text("plugins:\n - redirects") + redirects = {"old/path.md": "new/path.md"} + + # Act + result = linking._update_mkdocs_yml_redirects(redirects) + + # Assert + self.assertTrue(result) + with open(self.mkdocs_yml_path, "r") as f: + config = yaml.safe_load(f) + self.assertIn({"redirects": {"redirect_maps": redirects}}, config["plugins"]) + self.assertNotIn("redirects", config["plugins"]) + + def test_updates_yml_with_null_redirect_maps(self): + """Test handling of 'redirect_maps: null'.""" + # Arrange + self.mkdocs_yml_path.write_text("plugins:\n - redirects:\n redirect_maps:") + redirects = {"old/path.md": "new/path.md"} + + # Act + result = linking._update_mkdocs_yml_redirects(redirects) + + # Assert + self.assertTrue(result) + with open(self.mkdocs_yml_path, "r") as f: + config = yaml.safe_load(f) + self.assertEqual(config["plugins"][0]["redirects"]["redirect_maps"], redirects) + + def test_does_not_write_if_no_changes_needed(self): + """Test that the file is not modified if redirects are already present.""" + # Arrange + redirects = {"old/path.md": "new/path.md"} + config_dict = { + "plugins": [{"redirects": {"redirect_maps": redirects}}] + } + self.mkdocs_yml_path.write_text(yaml.dump(config_dict)) + initial_mtime = self.mkdocs_yml_path.stat().st_mtime + + # Act + result = linking._update_mkdocs_yml_redirects(redirects) + + # Assert + self.assertTrue(result) + final_mtime = self.mkdocs_yml_path.stat().st_mtime + self.assertEqual(initial_mtime, final_mtime) diff --git a/hack/mkdocs_linking.py b/hack/mkdocs_linking.py new file mode 100644 index 0000000000..60e5e2c135 --- /dev/null +++ b/hack/mkdocs_linking.py @@ -0,0 +1,596 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +MkDocs Documentation Migration Utility + +This module provides CLI and plugin tools to support large-scale documentation +refactoring and robust internal linking for MkDocs sites. + +Features: + - Prepare documentation: Inject a unique permanent ID into the frontmatter + of each Markdown file and generate a redirect map for safe file moves. + - Convert internal links: Replace relative Markdown links with a macro-based + internal_link for resilient cross-page linking. + - MkDocs plugin hooks: Initialize the redirects plugin and generate redirect + rules for moved files during the build process (without modifying docs). + +Usage: + Run as a CLI to prepare docs or convert links, or use as a plugin in mkdocs.yml. +""" + +import argparse +import json +import os +import re +import yaml +from pathlib import Path +from typing import Any, Dict, List, Tuple, Optional + + +# --- Configuration --- +# Global constants defining key file paths and metadata keys. +DOCS_DIR: Path = Path("docs") +REDIRECT_MAP_FILE: Path = Path("redirect_map.json") +FRONTMATTER_ID_KEY: str = "id" + + +def _get_frontmatter(content: str) -> Tuple[Dict[str, str], int]: + """Extracts simple key-value frontmatter using only the standard library. + + Args: + content: The full text content of a file. + + Returns: + A tuple containing the frontmatter data as a dictionary and the + character position where the frontmatter section ends. + """ + # Step 1: Look for a YAML frontmatter block (e.g., ---...---) at the + # beginning of a file. + match = re.match(r"^---\s*\n(.*?\n)---\s*\n", content, re.DOTALL) + if not match: + return {}, 0 + + frontmatter_str: str = match.group(1) + end_pos: int = len(match.group(0)) + + # Step 2: Parse the frontmatter block line-by-line for simple key-value pairs. + data: Dict[str, str] = {} + for line in frontmatter_str.strip().split("\n"): + if ":" in line: + key, value = line.split(":", 1) + data[key.strip()] = value.strip() + return data, end_pos + + +def _format_frontmatter_str(data: Dict[str, str]) -> str: + """Formats a simple dictionary into a YAML-like string.""" + # Step 1: Convert dictionary to YAML-formatted lines. + lines: List[str] = [f"{key}: {value}" for key, value in data.items()] + return "\n".join(lines) + "\n" + + +def prepare_docs(docs_dir_path: Optional[str] = None) -> None: + """ + Scan the documentation directory for Markdown files, inject a unique permanent + ID into the frontmatter of each file if missing, and create a redirect map + (JSON) of page IDs to file paths. This prepares the docs for safe refactoring + and enables robust internal linking and redirect generation. + + Args: + docs_dir_path: Optional path to the documentation directory. If None, + uses the default DOCS_DIR. + + Returns: + None. Writes changes to Markdown files and creates/updates the redirect + map JSON file on disk. + """ + print("Starting documentation preparation...") + redirect_map: Dict[str, str] = {} + + # Use global variables for when no path specified + if docs_dir_path is None: + docs_dir = DOCS_DIR + redirect_map_file = REDIRECT_MAP_FILE + else: + docs_dir = Path(docs_dir_path) + redirect_map_file = Path(docs_dir_path).parent / "redirect_map.json" + + # Step 1: Iterate through all markdown files in the documentation directory. + for md_file in docs_dir.rglob("*.md"): + # Step 2: Read file content and extract existing frontmatter. + content: str = md_file.read_text("utf-8") + frontmatter, end_pos = _get_frontmatter(content) + page_id: str | None = frontmatter.get(FRONTMATTER_ID_KEY) + + # Step 3: If no permanent ID exists, generate one from the file path, + # inject it into the frontmatter, and rewrite the file. + if not page_id: + relative_path: Path = md_file.relative_to(docs_dir) + page_id = str(relative_path.with_suffix("")).replace(os.path.sep, "-") + print(f" - Assigning ID '{page_id}' to {md_file}") + + frontmatter[FRONTMATTER_ID_KEY] = page_id + new_frontmatter_str = _format_frontmatter_str(frontmatter) + + body: str = content[end_pos:] + new_content: str = f"---\n{new_frontmatter_str}---\n{body}" + md_file.write_text(new_content, "utf-8") + else: + print(f" - Found existing ID '{page_id}' in {md_file}") + + # Step 4: Add the page's ID and current path to our map. This captures + # the "before" state of the documentation. + redirect_map[page_id] = str(md_file.relative_to(docs_dir)) + + # Step 5: Write the completed map to a JSON file for persistence. This file + # will be used by the MkDocs hook to generate redirects. + redirect_map_file.write_text(json.dumps(redirect_map, indent=2)) + print(f"Preparation complete. Map saved to {redirect_map_file}") + + +def convert_internal_links(docs_dir_path: str = "docs"): + """ + Converts all relative Markdown links in a documentation directory to use an internal link macro. + This function scans all Markdown (.md) files within the specified documentation directory, + builds a mapping of file paths to unique page IDs (as defined in each file's frontmatter), + and then replaces any relative Markdown links to other .md files with a macro of the form: + [Link Text]({{ internal_link("page_id") }}) + External links, anchor links, mailto links, and links to non-Markdown files are ignored. + If a link points to a Markdown file that does not have a page ID in its frontmatter, + the link is left unchanged. + The function modifies files in-place and prints a summary of the conversion process, + including the number of files modified. + Args: + docs_dir_path (str): Path to the root documentation directory containing Markdown files. + Defaults to 'docs'. + Side Effects: + - Reads and writes Markdown files in the specified directory. + - Prints progress and summary information to stdout. + Requirements: + - Each Markdown file should have a unique page ID in its frontmatter under the key FRONTMATTER_ID_KEY. + - The function assumes the existence of helper functions and constants: + - _get_frontmatter(content): Extracts frontmatter from file content. + - FRONTMATTER_ID_KEY: The key used to retrieve the page ID from frontmatter. + Example: + convert_internal_links('docs') + """ + print("Starting internal link conversion...") + docs_dir = Path(docs_dir_path) + + # Step 1: Build a map of file paths to their unique IDs. + # This is more efficient than reading the target file for every link. + path_to_id_map: Dict[str, str] = {} + for md_file in docs_dir.rglob("*.md"): + content = md_file.read_text("utf-8") + frontmatter, _ = _get_frontmatter(content) + page_id = frontmatter.get(FRONTMATTER_ID_KEY) + if page_id: + # Use a normalized posix path relative to the docs root as the key + relative_path_key = md_file.relative_to(docs_dir).as_posix() + path_to_id_map[relative_path_key] = page_id + + print(f" - Built a map of {len(path_to_id_map)} page IDs.") + + # Step 2: Iterate through each file and replace its links. + files_converted = 0 + for md_file in docs_dir.rglob("*.md"): + content = md_file.read_text("utf-8") + original_content = content + + # This nested function (closure) captures the current file's context. + def replace_link(match: re.Match) -> str: + link_text = match.group(1) + link_url = match.group(2) + + # Ignore external links, anchors, or non-markdown file links + if link_url.startswith(("http", "#", "mailto:")) or not link_url.endswith( + ".md" + ): + return match.group(0) + + # Resolve the relative path to an absolute path from the docs root + current_dir = md_file.parent + target_file = (current_dir / link_url).resolve() + + # Make the path relative to the docs dir to use as a lookup key + try: + target_relative_path = target_file.relative_to(docs_dir.resolve()) + target_key = target_relative_path.as_posix() + except ValueError: + # This can happen if the link points outside the docs directory + return match.group(0) + + # Look up the ID in our map + target_id = path_to_id_map.get(target_key) + if target_id: + # If an ID is found, build the macro + return f'[{link_text}]({{{{ internal_link("{target_id}") }}}})' + else: + # If no ID is found (e.g., broken link), leave it as is + return match.group(0) + + # Use re.sub with our replacer function to process all links + # Regex explanation: + # \[([^\]]+)\] - Capture the link text inside [ ] + # \((?!{{)([^)]+)\) - Capture the URL inside ( ), but negative lookahead + # to avoid re-processing our own macros. + content = re.sub(r"\[([^\]]+)\]\((?!{{)([^)]+\.md)\)", replace_link, content) + + if content != original_content: + files_converted += 1 + md_file.write_text(content, "utf-8") + print(f" - Converted links in {md_file.relative_to(docs_dir)}") + + print(f"Link conversion complete. Modified {files_converted} files.") + + +def on_config(config: Dict[str, Any]) -> Dict[str, Any]: + """ + MkDocs plugin hook for the config phase. Ensures the redirects plugin is + properly initialized in the MkDocs config, supporting both dict and object + plugin representations. Does not modify files on disk. + + Args: + config: The MkDocs configuration dictionary. + + Returns: + The updated MkDocs configuration dictionary with the redirects plugin + initialized if present. + """ + print("Running MkDocs migration hook (config phase)...") + for plugin_name, plugin_instance in config.get("plugins", {}).items(): + if plugin_name == "redirects": + # Support both dict and object plugin representations + if isinstance(plugin_instance, dict): + if "config" not in plugin_instance: + plugin_instance["config"] = {} + if "redirect_maps" not in plugin_instance["config"]: + plugin_instance["config"]["redirect_maps"] = {} + else: + # Assume object with .config attribute + if not hasattr(plugin_instance, "config"): + plugin_instance.config = type("Config", (), {})() + if not hasattr(plugin_instance.config, "redirect_maps"): + plugin_instance.config.redirect_maps = {} # type: ignore + print(" Redirects plugin configured.") + break + return config + + +def _update_mkdocs_yml_redirects(redirect_updates: Dict[str, str]) -> bool: + """ + Updates the 'mkdocs.yml' configuration file with new redirect rules using safe YAML parsing. + This function ensures that the 'redirects' plugin and its 'redirect_maps' section exist in the + MkDocs configuration file. It merges the provided redirect rules (mapping old paths to new paths) + with any existing rules, preserving the overall structure and formatting of the YAML file as much + as possible. The function handles both string and dictionary plugin formats, and will create the + necessary sections if they are missing. + redirect_updates (Dict[str, str]): + A dictionary mapping old documentation paths (as keys) to new paths (as values). + Each entry represents a redirect rule to be added or updated in the configuration. + bool: + True if the 'mkdocs.yml' file was successfully updated with new or changed redirect rules. + False if no changes were needed, if the file does not exist, or if an error occurred. + Raises: + None explicitly, but prints warnings or errors to the console if: + - The 'mkdocs.yml' file is missing or not a valid YAML dictionary. + - There are issues parsing or writing the YAML file. + Notes: + - The function uses PyYAML for parsing and writing YAML. + - Existing comments and formatting may not be fully preserved due to PyYAML limitations. + - The function is idempotent: if the provided redirects are already present, no changes are made. + - If the 'redirects' plugin is not present, it is added in the correct format. + - If the 'redirect_maps' section is missing, it is created. + - The function prints informative messages about its actions and any issues encountered. + """ + mkdocs_yml_path = Path("mkdocs.yml") + if not mkdocs_yml_path.exists(): + print(" Warning: mkdocs.yml not found. Cannot update redirects.") + return False + + try: + with open(mkdocs_yml_path, "r", encoding="utf-8") as f: + config = yaml.safe_load(f) + + if not isinstance(config, dict): + print(" Warning: mkdocs.yml is not a valid YAML dictionary.") + return False + + # Step 1: Ensure 'plugins' section exists + plugins = config.setdefault("plugins", []) + + # Step 2: Find the first redirects plugin configuration + redirects_plugin_entry = None + for plugin in plugins: + if isinstance(plugin, dict) and "redirects" in plugin: + redirects_plugin_entry = plugin + break + elif isinstance(plugin, str) and plugin == "redirects": + # Found a string entry, which we will replace with a dict + redirects_plugin_entry = plugin + break + + # Step 3: If no entry exists, create a new one + if redirects_plugin_entry is None: + redirects_plugin_entry = {"redirects": {"redirect_maps": {}}} + plugins.append(redirects_plugin_entry) + + # Step 4: If the entry was a string, replace it with a proper dict structure + if isinstance(redirects_plugin_entry, str): + plugins[plugins.index(redirects_plugin_entry)] = { + "redirects": {"redirect_maps": {}} + } + redirects_plugin_entry = plugins[ + plugins.index({"redirects": {"redirect_maps": {}}}) + ] + + # Step 5: Get the config dict for the redirects plugin + redirects_plugin_config = redirects_plugin_entry.setdefault("redirects", {}) + + # Step 6: Handle case where config is `redirects: null` + if redirects_plugin_config is None: + redirects_plugin_config = {} + redirects_plugin_entry["redirects"] = redirects_plugin_config + + # Step 7: Ensure 'redirect_maps' exists + redirect_maps = redirects_plugin_config.setdefault("redirect_maps", {}) + + # Step 8: Handle case where `redirect_maps:` is present but empty (evaluates to None) + if redirect_maps is None: + redirect_maps = {} + redirects_plugin_config["redirect_maps"] = redirect_maps + + # Step 9: Check if there are actual changes to be made before writing the file + if not any( + redirect_updates.get(k) != redirect_maps.get(k) for k in redirect_updates + ): + print(" No new redirect updates needed in mkdocs.yml.") + return True + + # Step 10: Update redirect_maps with new redirects + redirect_maps.update(redirect_updates) + + # Step 11: Write the updated config back to mkdocs.yml + with open(mkdocs_yml_path, "w", encoding="utf-8") as f: + yaml.dump(config, f, default_flow_style=False, sort_keys=False, indent=2) + + print(f" Updated mkdocs.yml with {len(redirect_updates)} redirect rules.") + return True + + except (yaml.YAMLError, IOError) as e: + print(f" Error updating mkdocs.yml: {e}") + return False + + +def on_files(files, config): + """ + Handles the MkDocs 'files' plugin event to manage page redirects and internal linking. + This function performs the following steps: + 1. Loads a mapping of page IDs to their previous file paths from a JSON file (REDIRECT_MAP_FILE). + 2. Scans the current set of Markdown files in the documentation, extracting their page IDs from frontmatter, + and builds a mapping of page IDs to their new file paths. + 3. Compares the old and new mappings to detect moved or renamed pages, and generates redirect rules for any + pages whose paths have changed. + 4. Attempts to update the 'mkdocs.yml' configuration file with the new redirect rules for use with the + mkdocs-redirects plugin. If automatic updating fails, prints instructions for manual addition. + 5. Prints debug and status information throughout the process. + Args: + files (list): List of MkDocs file objects representing the documentation files. + config (dict): MkDocs configuration dictionary. + Returns: + list: The (possibly unmodified) list of file objects, to be passed along in the MkDocs build process. + Side Effects: + - Reads from REDIRECT_MAP_FILE to obtain previous page mappings. + - May write to 'mkdocs.yml' to update redirect rules. + - Prints status and debug information to the console. + Notes: + - This function assumes the existence of certain global constants and helper functions: + - REDIRECT_MAP_FILE: Path to the JSON file containing the old page ID to path mapping. + - FRONTMATTER_ID_KEY: The key in the frontmatter that uniquely identifies a page. + - _get_frontmatter: Function to extract frontmatter from a Markdown file. + - _update_mkdocs_yml_redirects: Function to update mkdocs.yml with new redirect rules. + - The function is designed to be used as a plugin hook in the MkDocs build process. + """ + """Generates redirects and sets up the internal link macro for MkDocs.""" + print("Running MkDocs migration hook (files phase)...") + + # Step 1: Load the "before" state map + if not REDIRECT_MAP_FILE.exists(): + print(f" Warning: {REDIRECT_MAP_FILE} not found. Skipping.") + return files + + old_paths_map: Dict[str, str] = json.loads(REDIRECT_MAP_FILE.read_text()) + + # Step 2: Build the "after" state map + after_paths_map: Dict[str, str] = {} + + print( + f" Debug: MkDocs found these files: {[f.src_path for f in files if f.src_path.endswith('.md')]}" + ) + + for file in files: + if file.src_path.endswith(".md"): + abs_path = Path(config["docs_dir"]) / file.src_path + if abs_path.exists(): + content = abs_path.read_text("utf-8") + frontmatter, _ = _get_frontmatter(content) + page_id = frontmatter.get(FRONTMATTER_ID_KEY) + + if page_id: + after_paths_map[page_id] = file.src_path + + # Step 3: Generate redirect rules and write them to mkdocs.yml + # (since programmatic config updates don't seem to work) + redirect_updates = {} + count = 0 + for page_id, old_path in old_paths_map.items(): + new_path = after_paths_map.get(page_id) + if new_path and new_path != old_path: + # Only create redirect for the .md file path + # The mkdocs-redirects plugin handles URL generation automatically + redirect_updates[old_path] = new_path + print(f" Would add redirect: {old_path} -> {new_path}") + count += 1 + + if count > 0: + print(f" Generated {count} redirect rules.") + print(f" Redirect rules: {redirect_updates}") + + # Try to update mkdocs.yml automatically + if _update_mkdocs_yml_redirects(redirect_updates): + print(" ✓ Updated mkdocs.yml with redirect rules.") + else: + print(" ✗ Could not update mkdocs.yml automatically.") + print(" Manual addition required:") + print(" plugins:") + print(" - redirects:") + print(" redirect_maps:") + for old, new in redirect_updates.items(): + print(f" {old}: {new}") + else: + print(" No new redirects needed.") + + print(" `internal_link` macro is ready (provided by main.py).") + return files + + +def preview_docs(docs_dir_path: str = "docs") -> None: + """ + Simulates the process of preparing documentation files for unique page IDs without making any changes. + This function scans the specified documentation directory (default: 'docs') for Markdown (.md) files. + It analyzes each file to determine whether it already contains a unique page ID in its frontmatter. + For files missing an ID, it shows what ID would be generated based on the file's relative path. + For files with existing IDs, it lists them and their IDs. + No files are modified; this is a dry-run/preview mode. + The function also indicates where a redirect map (redirect_map.json) would be created or updated + if changes were to be applied. + Args: + docs_dir_path (str): Path to the documentation directory to scan. Defaults to 'docs'. + Prints: + - The number of Markdown files found. + - A list of files that would be modified with new IDs, showing the generated ID for each. + - A list of files that already have IDs, showing the existing ID for each. + - The path where the redirect map would be created or updated. + Notes: + - This function does not modify any files or create the redirect map. + - Errors encountered while reading files are reported, but do not stop the process. + - Requires the existence of helper functions/constants: _get_frontmatter and FRONTMATTER_ID_KEY. + """ + """Shows what the prepare_docs function would do without making changes.""" + docs_dir = Path(docs_dir_path) + + print(f"Scanning documentation directory: {docs_dir}") + + # Step 1: Validate that the documentation directory exists. + if not docs_dir.exists(): + print(f" ERROR: Directory {docs_dir} does not exist") + return + + # Step 2: Find all markdown files in the documentation directory. + md_files = list(docs_dir.rglob("*.md")) + if not md_files: + print(" No markdown files found") + return + + print(f" Found {len(md_files)} markdown files") + + files_needing_ids = [] + files_with_ids = [] + + # Step 3: Analyze each markdown file to determine its current state. + for md_file in md_files: + try: + content = md_file.read_text("utf-8") + frontmatter, _ = _get_frontmatter(content) + page_id = frontmatter.get(FRONTMATTER_ID_KEY) + + if page_id: + files_with_ids.append((md_file, page_id)) + else: + relative_path = md_file.relative_to(docs_dir) + generated_id = str(relative_path.with_suffix("")).replace( + os.path.sep, "-" + ) + files_needing_ids.append((md_file, generated_id)) + except Exception as e: + print(f" Warning: Could not process {md_file}: {e}") + + # Step 4: Display files that would be modified with new IDs. + if files_needing_ids: + print(f"\nFiles that would be modified ({len(files_needing_ids)}):") + for md_file, generated_id in files_needing_ids: + relative_path = md_file.relative_to(docs_dir) + print(f" + {relative_path} -> ID: '{generated_id}'") + + # Step 5: Display files that already have IDs and would be preserved. + if files_with_ids: + print(f"\nFiles already with IDs ({len(files_with_ids)}):") + for md_file, existing_id in files_with_ids: + relative_path = md_file.relative_to(docs_dir) + print(f" * {relative_path} -> ID: '{existing_id}'") + + # Step 6: Show where the redirect map would be created or updated. + print( + f"\nWould create/update redirect map: {Path(docs_dir_path).parent / 'redirect_map.json'}" + ) + + +def main() -> None: + """Parses command line arguments and runs the preparation script.""" + parser = argparse.ArgumentParser( + description="MkDocs migration helper - prepares docs for safe refactoring.", + prog="linking", + ) + parser.add_argument( + "--prepare", + action="store_true", + help="Scan docs folder, inject IDs, and create redirect map.", + ) + parser.add_argument( + "--convert-links", + action="store_true", + help="Convert all relative Markdown links to the internal_link macro.", + ) + parser.add_argument( + "--docs-dir", default="docs", help="Documentation directory (default: docs)." + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be done without making changes.", + ) + args = parser.parse_args() + + if args.prepare: + if args.dry_run: + print("DRY RUN: Preview of changes that would be made") + print("=" * 50) + preview_docs(args.docs_dir) + else: + prepare_docs(args.docs_dir) + elif args.convert_links: + if args.dry_run: + print( + "DRY RUN for link conversion is not implemented. This action directly modifies files." + ) + else: + convert_internal_links(args.docs_dir) + else: + parser.print_help() + + +if __name__ == "__main__": + main() diff --git a/hack/mkdocs_main.py b/hack/mkdocs_main.py new file mode 100644 index 0000000000..3e0a0f90b5 --- /dev/null +++ b/hack/mkdocs_main.py @@ -0,0 +1,115 @@ +# Copyright 2025 The Kubernetes Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Main module for mkdocs-macros plugin. +This provides the internal_link macro for resilient documentation linking. +""" + +import re +from pathlib import Path +from typing import Dict, Optional + + +class PageResolver: + """Handles page ID resolution and link generation.""" + + def __init__(self, docs_dir: Path = Path("docs")): + self.docs_dir = docs_dir + self._page_cache: Optional[Dict[str, Path]] = None + + def _extract_frontmatter_id(self, content: str) -> Optional[str]: + """Extract page ID from YAML frontmatter.""" + if not content.startswith("---"): + return None + + match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL) + if not match: + return None + + frontmatter = match.group(1) + for line in frontmatter.split("\n"): + if ":" in line: + key, value = line.split(":", 1) + if key.strip() == "id": + return value.strip() + return None + + def _build_page_cache(self) -> Dict[str, Path]: + """Build a cache of page ID to file path mappings.""" + cache = {} + for md_file in self.docs_dir.rglob("*.md"): + try: + content = md_file.read_text("utf-8") + page_id = self._extract_frontmatter_id(content) + if page_id: + cache[page_id] = md_file + except (OSError, UnicodeDecodeError): + continue # Skip files that can't be read + return cache + + def resolve_page_link( + self, page_id: str, current_page_path: Optional[str] = None + ) -> str: + """Resolve a page ID to its Markdown file reference, relative to current page.""" + import os + + # Build cache on first use + if self._page_cache is None: + self._page_cache = self._build_page_cache() + + file_path = self._page_cache.get(page_id) + if not file_path: + raise ValueError(f"Page with ID '{page_id}' not found") + + target_path = file_path.relative_to(self.docs_dir) + + # If no current page context, return absolute path from docs root + if not current_page_path: + return str(target_path) + + # Calculate relative path from current page to target page + current_path = Path(current_page_path) + current_dir = current_path.parent + rel_path = os.path.relpath(str(target_path), str(current_dir)) + return rel_path.replace(os.path.sep, "/") + + +def define_env(env): + """Hook for mkdocs-macros plugin functions and variables.""" + + resolver = PageResolver() + + @env.macro + def internal_link(page_id: str) -> str: + """ + Looks up a page by ID and returns its Markdown file reference. + This provides resilient linking that survives file moves. + """ + try: + # Get current page context from mkdocs-macros environment + current_page_path = None + if hasattr(env, "variables") and env.variables: + page = env.variables.get("page") + if page and hasattr(page, "file") and hasattr(page.file, "src_path"): + current_page_path = page.file.src_path + + return resolver.resolve_page_link(page_id, current_page_path) + except Exception: + # Fallback: use resolver directly + try: + return resolver.resolve_page_link(page_id, None) + except ValueError: + return f"[LINK ERROR: Page '{page_id}' not found]" diff --git a/hack/redirect_map.json b/hack/redirect_map.json new file mode 100644 index 0000000000..fe848ffef6 --- /dev/null +++ b/hack/redirect_map.json @@ -0,0 +1,12 @@ +{ + "guides/getting-started.md": "guides/index.md", + "concepts/gamma.md": "mesh/index.md", + "concepts/service-facets.md": "mesh/service-facets.md", + "concepts/guidelines.md": "guides/api-design.md", + "contributing/community.md": "contributing/index.md", + "contributing/gamma.md": "mesh/index.md#contributing", + "reference/implementers-guide.md": "guides/implementers.md", + "references/implementers-guide.md": "guides/implementers.md", + "references/spec.md": "reference/spec.md", + "references/policy-attachment.md": "reference/policy-attachment.md" +} \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 18393ce4c6..ab14640027 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,6 +13,7 @@ extra_javascript: hooks: - hack/mkdocs-copy-geps.py - hack/mkdocs-generate-conformance.py +- hack/mkdocs-linking.py watch: - geps theme: @@ -36,18 +37,12 @@ plugins: - macros: include_dir: examples j2_line_comment_prefix: "#$" - - redirects: - redirect_maps: - 'guides/getting-started.md': 'guides/index.md' - 'concepts/gamma.md': 'mesh/index.md' - 'concepts/service-facets.md': 'mesh/service-facets.md' - 'concepts/guidelines.md': 'guides/api-design.md' - 'contributing/community.md': 'contributing/index.md' - 'contributing/gamma.md': 'mesh/index.md#contributing' - 'reference/implementers-guide.md': 'guides/implementers.md' - 'references/implementers-guide.md': 'guides/implementers.md' - 'references/spec.md': 'reference/spec.md' - 'references/policy-attachment.md': 'reference/policy-attachment.md' + module_name: mkdocs-main.py + # Do not add manual redirects here, they will be overwritten. Add them to + # hack/redirect.json if needed in exceptional cases, but the mkdocs_linking.py + # hook and accompanying macro largely negate the need for manual redirects: + # https://github.com/kubernetes-sigs/gateway-api/pull/3999 + - redirects: - mermaid2 markdown_extensions: - admonition