aboutcode-org · arshad-muhammad · Oct 4, 2024 · Oct 4, 2024
diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py
@@ -26,6 +26,29 @@
 
 from cluecode import copyrights_hint
 
+from cluecode.normalizer import normalize_copyright_symbols
+
+def detect_copyrights(file_path):
+    # Read the content of the file
+    with open(file_path, 'r', encoding='utf-8') as file:
+        text = file.read()
+
+    # Normalize the text before processing it
+    normalized_text = normalize_copyright_symbols(text)
+
+    # Save the normalized content back to the file (optional)
+    with open(file_path, 'w', encoding='utf-8') as file:
+        file.write(normalized_text)
+
+    return normalized_text
+
+# Specify the path to your document directly here
+file_path = "./copyright.py"
+
+# Call the function and print the result
+normalized_content = detect_copyrights(file_path)
+print(normalized_content)
+
 # Tracing flags
 TRACE = False or os.environ.get('SCANCODE_DEBUG_COPYRIGHT', False)
 

diff --git a/src/cluecode/copyrights_hint.py b/src/cluecode/copyrights_hint.py
@@ -14,6 +14,29 @@
 # A regex to match a string that may contain a copyright year.
 # This is a year between 1960 and today prefixed and suffixed with
 # either a white-space or some punctuation.
+from cluecode.normalizer import normalize_copyright_symbols
+
+def detect_copyrights(file_path):
+    # Read the content of the file
+    with open(file_path, 'r', encoding='utf-8') as file:
+        text = file.read()
+
+    # Normalize the text before processing it
+    normalized_text = normalize_copyright_symbols(text)
+
+    # Save the normalized content back to the file (optional)
+    with open(file_path, 'w', encoding='utf-8') as file:
+        file.write(normalized_text)
+
+    return normalized_text
+
+# Specify the path to your document directly here
+file_path = "./copyright.py"
+
+# Call the function and print the result
+normalized_content = detect_copyrights(file_path)
+print(normalized_content)
+
 
 all_years = tuple(str(year) for year in range(1960, datetime.today().year))
 years = r'[\(\.,\-\)\s]+(' + '|'.join(all_years) + r')([\(\.,\-\)\s]+|$)'

diff --git a/src/cluecode/normalizer.py b/src/cluecode/normalizer.py
@@ -0,0 +1,9 @@
+import re
+
+def normalize_copyright_symbols(text):
+    """
+    Replace [C] or [c] with (C) to ensure proper copyright detection.
+    """
+    # Replace [C] or [c] with (C)
+    text = re.sub(r'\[C\]', '(C)', text, flags=re.IGNORECASE)
+    return text