diff --git a/tools/license_header.py b/tools/license_header.py index 0211c2c9feb5..4afd987f24e8 100755 --- a/tools/license_header.py +++ b/tools/license_header.py @@ -58,7 +58,10 @@ under the License.""" # if a file contains any str in the list, then consider it has been licensed -_LICENSE_PATTERNS = ['Licensed to the Apache Software Foundation'] +_APACHE_LICENSE_PATTERNS = ['Licensed to the Apache Software Foundation'] +_OTHER_LICENSE_PATTERNS = ['THE SOFTWARE IS PROVIDED \"AS IS\"', + 'THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS'] +TOP_LEVEL_LICENSE_FILE = 'LICENSE' # the folders or files that will be ignored _WHITE_LIST = [ @@ -107,9 +110,12 @@ # Julia package metadata, generated by Pkg3.jl 'julia/Project.toml', - + # Licensed under Apache 2.0 license - 'example/image-classification/predict-cpp/image-classification-predict.cc' + 'example/image-classification/predict-cpp/image-classification-predict.cc', + + # This file + 'tools/license_header.py' ] # language extensions and the according commment mark @@ -135,8 +141,46 @@ def is_mxnet_root(path: str) -> bool: return curpath -def _lines_have_license(lines): - return any([any([p in l for p in _LICENSE_PATTERNS]) for l in lines]) +def _lines_have_multiple_license(lines): + has_apache_license = False + has_other_license = False + for l in lines: + if any(p in l for p in _APACHE_LICENSE_PATTERNS): + has_apache_license = True + elif any(p in l for p in _OTHER_LICENSE_PATTERNS): + has_other_license = True + return (has_apache_license and has_other_license) + + +def _lines_have_apache_license(lines): + return any([any([p in l for p in _APACHE_LICENSE_PATTERNS]) for l in lines]) + + +def _file_listed_in_top_level_license(fname): + with open(TOP_LEVEL_LICENSE_FILE, 'r', encoding="utf-8") as f: + lines = f.readlines() + return any([fname in l for l in lines]) + + +def file_have_valid_license(fname): + with open(fname, 'r', encoding="utf-8") as f: + lines = f.readlines() + if not lines: + return True + if (_lines_have_apache_license(lines) and (not _lines_have_multiple_license(lines))): + return True + elif _lines_have_multiple_license(lines): + if _file_listed_in_top_level_license(fname): + return True + else: + logging.error("File %s has multiple license", fname) + return False + else: + if _file_listed_in_top_level_license(fname): + return True + else: + logging.error("File %s doesn't have a valid license", fname) + return False def _get_license(comment_mark): @@ -173,13 +217,7 @@ def file_has_license(fname): if not should_have_license(fname): return True try: - with open(fname, 'r', encoding="utf-8") as f: - lines = f.readlines() - if not lines or _lines_have_license(lines): - return True - else: - logging.error("File %s doesn't have a license", fname) - return False + return file_have_valid_license(fname) except UnicodeError: return True return True @@ -188,9 +226,7 @@ def file_has_license(fname): def file_add_license(fname): if not should_have_license(fname): return - with open(fname, 'r', encoding="utf-8") as f: - lines = f.readlines() - if _lines_have_license(lines): + if file_have_valid_license(fname): return _, ext = os.path.splitext(fname) with open(fname, 'w', encoding="utf-8") as f: @@ -260,7 +296,8 @@ def main(): files = file_generator(get_mxnet_root()) if action == 'check': - if not all(map(file_has_license, files)): + logging.info("Start to check %d files", (len(files))) + if False in list(map(file_has_license, files)): return 1 else: logging.info("All known and whitelisted files have license")