Skip to content

Commit

Permalink
aboutcode-org#3659 Fix copyright detection normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
arshad-muhammad committed Oct 4, 2024
1 parent 0d2ce3f commit 5f167e1
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
23 changes: 23 additions & 0 deletions src/cluecode/copyrights.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,29 @@

from cluecode import copyrights_hint

from cluecode.normalizer import normalize_copyright_symbols

def detect_copyrights(file_path):
# Read the content of the file
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()

# Normalize the text before processing it
normalized_text = normalize_copyright_symbols(text)

# Save the normalized content back to the file (optional)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(normalized_text)

return normalized_text

# Specify the path to your document directly here
file_path = "./copyright.py"

# Call the function and print the result
normalized_content = detect_copyrights(file_path)
print(normalized_content)

# Tracing flags
TRACE = False or os.environ.get('SCANCODE_DEBUG_COPYRIGHT', False)

Expand Down
23 changes: 23 additions & 0 deletions src/cluecode/copyrights_hint.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,29 @@
# A regex to match a string that may contain a copyright year.
# This is a year between 1960 and today prefixed and suffixed with
# either a white-space or some punctuation.
from cluecode.normalizer import normalize_copyright_symbols

def detect_copyrights(file_path):
# Read the content of the file
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()

# Normalize the text before processing it
normalized_text = normalize_copyright_symbols(text)

# Save the normalized content back to the file (optional)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(normalized_text)

return normalized_text

# Specify the path to your document directly here
file_path = "./copyright.py"

# Call the function and print the result
normalized_content = detect_copyrights(file_path)
print(normalized_content)


all_years = tuple(str(year) for year in range(1960, datetime.today().year))
years = r'[\(\.,\-\)\s]+(' + '|'.join(all_years) + r')([\(\.,\-\)\s]+|$)'
Expand Down

0 comments on commit 5f167e1

Please sign in to comment.