|
| 1 | +"""Tests for sanitization utilities.""" |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +from lightspeed_evaluation.core.constants import MAX_RUN_NAME_LENGTH |
| 6 | +from lightspeed_evaluation.core.utils import sanitize_run_name |
| 7 | + |
| 8 | + |
| 9 | +class TestSanitizeRunName: |
| 10 | + """Test cases for sanitize_run_name function.""" |
| 11 | + |
| 12 | + def test_basic_alphanumeric(self): |
| 13 | + """Test that basic alphanumeric strings pass through unchanged.""" |
| 14 | + assert sanitize_run_name("test123") == "test123" |
| 15 | + assert sanitize_run_name("rh124_filesystem_basics") == "rh124_filesystem_basics" |
| 16 | + |
| 17 | + def test_empty_string(self): |
| 18 | + """Test that empty string returns empty string.""" |
| 19 | + assert sanitize_run_name("") == "" |
| 20 | + |
| 21 | + def test_whitespace_trimming(self): |
| 22 | + """Test that leading/trailing whitespace is removed.""" |
| 23 | + assert sanitize_run_name(" test ") == "test" |
| 24 | + assert sanitize_run_name("\ttest\n") == "test" |
| 25 | + |
| 26 | + def test_filesystem_unsafe_characters(self): |
| 27 | + """Test that filesystem-unsafe characters are replaced with underscores.""" |
| 28 | + assert sanitize_run_name("test/run") == "test_run" |
| 29 | + assert sanitize_run_name("test\\run") == "test_run" |
| 30 | + assert sanitize_run_name("test:run") == "test_run" |
| 31 | + assert sanitize_run_name("test*run") == "test_run" |
| 32 | + assert sanitize_run_name("test?run") == "test_run" |
| 33 | + assert sanitize_run_name('test"run') == "test_run" |
| 34 | + assert sanitize_run_name("test'run") == "test_run" |
| 35 | + assert sanitize_run_name("test`run") == "test_run" |
| 36 | + assert sanitize_run_name("test<run") == "test_run" |
| 37 | + assert sanitize_run_name("test>run") == "test_run" |
| 38 | + assert sanitize_run_name("test|run") == "test_run" |
| 39 | + |
| 40 | + def test_multiple_special_characters(self): |
| 41 | + """Test strings with multiple special characters.""" |
| 42 | + assert sanitize_run_name("test/run:123") == "test_run_123" |
| 43 | + assert sanitize_run_name("rh124: filesystem basics") == "rh124_filesystem_basics" |
| 44 | + assert sanitize_run_name("test's `command`") == "test_s_command" |
| 45 | + |
| 46 | + def test_space_collapsing(self): |
| 47 | + """Test that multiple spaces are collapsed to single underscore.""" |
| 48 | + assert sanitize_run_name("multiple spaces") == "multiple_spaces" |
| 49 | + assert sanitize_run_name("test run") == "test_run" |
| 50 | + |
| 51 | + def test_underscore_collapsing(self): |
| 52 | + """Test that multiple underscores are collapsed to single underscore.""" |
| 53 | + assert sanitize_run_name("test___run") == "test_run" |
| 54 | + assert sanitize_run_name("test_____run") == "test_run" |
| 55 | + |
| 56 | + def test_mixed_whitespace_underscore_collapsing(self): |
| 57 | + """Test that mixed spaces and underscores collapse properly.""" |
| 58 | + assert sanitize_run_name("test _ _ run") == "test_run" |
| 59 | + assert sanitize_run_name("test _ run") == "test_run" |
| 60 | + |
| 61 | + def test_leading_trailing_underscores_stripped(self): |
| 62 | + """Test that leading/trailing underscores created during sanitization are removed.""" |
| 63 | + assert sanitize_run_name("/test/") == "test" |
| 64 | + assert sanitize_run_name(":test:") == "test" |
| 65 | + assert sanitize_run_name("_test_") == "test" |
| 66 | + |
| 67 | + def test_max_length_enforcement(self): |
| 68 | + """Test that strings exceeding max length are truncated.""" |
| 69 | + long_string = "a" * (MAX_RUN_NAME_LENGTH + 50) |
| 70 | + result = sanitize_run_name(long_string) |
| 71 | + assert len(result) <= MAX_RUN_NAME_LENGTH |
| 72 | + assert result == "a" * MAX_RUN_NAME_LENGTH |
| 73 | + |
| 74 | + def test_max_length_with_trailing_underscores(self): |
| 75 | + """Test that truncation removes trailing underscores.""" |
| 76 | + # Create a string that when truncated would end with underscore |
| 77 | + long_string = "a" * (MAX_RUN_NAME_LENGTH - 1) + "_" + "b" * 50 |
| 78 | + result = sanitize_run_name(long_string) |
| 79 | + assert len(result) <= MAX_RUN_NAME_LENGTH |
| 80 | + assert not result.endswith("_") |
| 81 | + |
| 82 | + def test_control_characters(self): |
| 83 | + """Test that control characters are replaced.""" |
| 84 | + assert sanitize_run_name("test\x00run") == "test_run" |
| 85 | + assert sanitize_run_name("test\x1frun") == "test_run" |
| 86 | + |
| 87 | + def test_unicode_characters_preserved(self): |
| 88 | + """Test that Unicode characters (emojis, kanji, etc.) are preserved.""" |
| 89 | + # Emojis |
| 90 | + assert sanitize_run_name("test🚀run") == "test🚀run" |
| 91 | + assert sanitize_run_name("📊evaluation") == "📊evaluation" |
| 92 | + |
| 93 | + # Japanese kanji |
| 94 | + assert sanitize_run_name("テスト実行") == "テスト実行" |
| 95 | + assert sanitize_run_name("test_日本語_run") == "test_日本語_run" |
| 96 | + |
| 97 | + # Chinese characters |
| 98 | + assert sanitize_run_name("测试运行") == "测试运行" |
| 99 | + |
| 100 | + # Mix of Unicode and ASCII |
| 101 | + assert sanitize_run_name("test_🎯_goal") == "test_🎯_goal" |
| 102 | + |
| 103 | + def test_unicode_with_unsafe_characters(self): |
| 104 | + """Test Unicode strings with filesystem-unsafe characters.""" |
| 105 | + assert sanitize_run_name("テスト/実行") == "テスト_実行" |
| 106 | + assert sanitize_run_name("test🚀:run") == "test🚀_run" |
| 107 | + assert sanitize_run_name("評価 💯 test") == "評価_💯_test" |
| 108 | + |
| 109 | + def test_real_world_yaml_filenames(self): |
| 110 | + """Test realistic YAML filename scenarios.""" |
| 111 | + assert sanitize_run_name("rh124_lesson_01") == "rh124_lesson_01" |
| 112 | + assert sanitize_run_name("filesystem-basics") == "filesystem-basics" |
| 113 | + assert sanitize_run_name("Module 1: Introduction") == "Module_1_Introduction" |
| 114 | + assert sanitize_run_name("test (copy)") == "test_(copy)" # Parentheses are valid |
0 commit comments