25
25
from pygmars import Token
26
26
from pygmars .tree import Tree
27
27
28
-
29
28
from cluecode import copyrights_hint
30
29
from textcode .markup import strip_known_markup_from_text
31
30
@@ -107,8 +106,24 @@ def detect_copyrights(
107
106
Strip markup from text if ``demarkup`` is True.
108
107
Run for up to ``deadline`` seconds and return results found so far.
109
108
"""
109
+ from cluecode .linux_credits import detect_credits_authors
110
+
110
111
from textcode .analysis import numbered_text_lines
111
112
113
+ if include_authors :
114
+ author_detections = list (detect_credits_authors (location ))
115
+
116
+ if TRACE :
117
+ logger_debug ('detect_copyrights: detect_credits_authors' )
118
+ for detecta in author_detections :
119
+ logger_debug (f' { detecta } ' )
120
+
121
+ # bail out if we have a credits file with credits
122
+ if author_detections :
123
+ for a in author_detections :
124
+ yield a
125
+ return
126
+
112
127
numbered_lines = list (numbered_text_lines (location , demarkup = True ))
113
128
114
129
if TRACE or TRACE_TOK :
@@ -661,8 +676,9 @@ def build_detection_from_node(
661
676
# Slovenian: avtorske pravice
662
677
# Ukrainian: авторське право
663
678
664
- # rare typo copyrighy
679
+ # rare typos incopyright
665
680
(r'^Copyrighy$' , 'COPY' ),
681
+ (r'^Copyirght$' , 'COPY' ),
666
682
667
683
# OSGI
668
684
(r'^Bundle-Copyright' , 'COPY' ),
@@ -904,6 +920,7 @@ def build_detection_from_node(
904
920
(r'^[Ss]tring$' , 'JUNK' ),
905
921
(r'^Implementation-Vendor$' , 'JUNK' ),
906
922
(r'^dnl$' , 'JUNK' ),
923
+ (r'^ifndef$' , 'JUNK' ),
907
924
908
925
(r'^as$' , 'NN' ),
909
926
(r'^[Vv]isit$' , 'JUNK' ),
@@ -939,7 +956,6 @@ def build_detection_from_node(
939
956
(r'^Add$' , 'JUNK' ),
940
957
(r'^Average$' , 'JUNK' ),
941
958
(r'^Taken$' , 'JUNK' ),
942
- (r'^LAWS\.?$' , 'JUNK' ),
943
959
(r'^design$' , 'JUNK' ),
944
960
(r'^Driver$' , 'JUNK' ),
945
961
(r'^[Cc]ontribution\.?' , 'JUNK' ),
@@ -949,7 +965,7 @@ def build_detection_from_node(
949
965
(r'^Last-Translator$' , 'JUNK' ),
950
966
(r'^Translated$' , 'JUNK' ),
951
967
(r'^OMAP730$' , 'JUNK' ),
952
- ( r'^Law\.$' , 'JUNK' ),
968
+
953
969
(r'^dylid$' , 'JUNK' ),
954
970
(r'^BeOS$' , 'JUNK' ),
955
971
(r'^Generates?$' , 'JUNK' ),
@@ -991,7 +1007,6 @@ def build_detection_from_node(
991
1007
(r'^Disclaimer$' , 'JUNK' ),
992
1008
(r'^Directive.?$' , 'JUNK' ),
993
1009
(r'^LAWS\,?$' , 'JUNK' ),
994
- (r'^[Ll]aws?,?$' , 'JUNK' ),
995
1010
(r'^me$' , 'JUNK' ),
996
1011
(r'^Derived$' , 'JUNK' ),
997
1012
(r'^Limitations?$' , 'JUNK' ),
@@ -1062,7 +1077,15 @@ def build_detection_from_node(
1062
1077
(r'^Much$' , 'JUNK' ),
1063
1078
(r'^remains?,?$' , 'JUNK' ),
1064
1079
(r'^earlier$' , 'JUNK' ),
1065
- (r'^[lL]aws?$' , 'JUNK' ),
1080
+
1081
+ # there is a Mr. Law
1082
+ (r'^Law[\.,]?$' , 'NN' ),
1083
+ (r'^laws?[\.,]?$' , 'JUNK' ),
1084
+ (r'^Laws[\.,]?$' , 'JUNK' ),
1085
+ (r'^LAWS?[\.,]?$' , 'JUNK' ),
1086
+ (r'^LAWS?$' , 'NN' ),
1087
+
1088
+ (r'^taken$' , 'NN' ),
1066
1089
(r'^Insert$' , 'JUNK' ),
1067
1090
(r'^url$' , 'JUNK' ),
1068
1091
(r'^[Ss]ee$' , 'JUNK' ),
@@ -1083,6 +1106,7 @@ def build_detection_from_node(
1083
1106
(r'^[Ii]nterfaces?,?$' , 'JUNK' ),
1084
1107
(r'^than$' , 'JUNK' ),
1085
1108
(r'^whom$' , 'JUNK' ),
1109
+ (r'^Definitions?$' , 'JUNK' ),
1086
1110
(r'^However,?$' , 'JUNK' ),
1087
1111
(r'^[Cc]ollectively$' , 'JUNK' ),
1088
1112
(r'^following$' , 'FOLLOWING' ),
@@ -1190,7 +1214,8 @@ def build_detection_from_node(
1190
1214
(r'^[a-z]{3,10}[A-Z][a-z]{3,10}$' , 'JUNK' ),
1191
1215
1192
1216
(r'^\$?Guid$' , 'JUNK' ),
1193
- #(r'^Small$', 'NN'),
1217
+ # there is a Mr Small
1218
+ # (r'^Small$', 'NN'),
1194
1219
(r'^implementing$' , 'JUNK' ),
1195
1220
(r'^Unlike$' , 'JUNK' ),
1196
1221
(r'^using$' , 'JUNK' ),
@@ -1271,6 +1296,7 @@ def build_detection_from_node(
1271
1296
(r'^[Ss]tatements?.?$' , 'JUNK' ),
1272
1297
(r'^issues?.?$' , 'JUNK' ),
1273
1298
(r'^retain?.?$' , 'JUNK' ),
1299
+ (r'^Sun3x$' , 'JUNK' ),
1274
1300
1275
1301
############################################################################
1276
1302
# Nouns and proper Nouns
@@ -1281,7 +1307,7 @@ def build_detection_from_node(
1281
1307
(r'^This_file_is_part_of_KDE$' , 'NAME' ),
1282
1308
1283
1309
# K.K. (a company suffix), needs special handling
1284
- (r'^K.K.,?$' , 'NAME ' ),
1310
+ (r'^K.K.,?$' , 'COMP ' ),
1285
1311
1286
1312
# MIT is problematic
1287
1313
# With a comma, always CAPS (MIT alone is too error prone to be always tagged as CAPS
@@ -1362,6 +1388,7 @@ def build_detection_from_node(
1362
1388
(r'^DATED$' , 'NN' ),
1363
1389
(r'^Delay' , 'NN' ),
1364
1390
(r'^Derivative' , 'NN' ),
1391
+ (r'^Direct$' , 'NN' ),
1365
1392
(r'^DISCLAIMED' , 'NN' ),
1366
1393
(r'^Docs?$' , 'NN' ),
1367
1394
(r'^DOCUMENTATION' , 'NN' ),
@@ -1451,10 +1478,13 @@ def build_detection_from_node(
1451
1478
(r'^GPLd?\.?$' , 'NN' ),
1452
1479
(r'^GPL\'d$' , 'NN' ),
1453
1480
(r'^Gnome$' , 'NN' ),
1481
+ (r'^Port$' , 'NN' ),
1454
1482
(r'^GnuPG$' , 'NN' ),
1455
1483
(r'^Government.' , 'NNP' ),
1456
1484
(r'^OProfile$' , 'NNP' ),
1457
1485
(r'^Government$' , 'COMP' ),
1486
+ # there is a Ms. Grant
1487
+ (r'^Grant$' , 'NNP' ),
1458
1488
(r'^Grants?\.?,?$' , 'NN' ),
1459
1489
(r'^Header' , 'NN' ),
1460
1490
(r'^HylaFAX$' , 'NN' ),
@@ -1491,7 +1521,6 @@ def build_detection_from_node(
1491
1521
(r'^List$' , 'NN' ),
1492
1522
(r'^Set$' , 'NN' ),
1493
1523
(r'^Last$' , 'NN' ),
1494
- (r'^LAW' , 'NN' ),
1495
1524
(r'^Legal$' , 'NN' ),
1496
1525
(r'^LegalTrademarks$' , 'NN' ),
1497
1526
(r'^Library$' , 'NN' ),
@@ -1644,6 +1673,11 @@ def build_detection_from_node(
1644
1673
(r'^CodeMirror$' , 'NN' ),
1645
1674
(r'^They$' , 'JUNK' ),
1646
1675
(r'^Branched$' , 'NN' ),
1676
+ (r'^Partial$' , 'NN' ),
1677
+ (r'^Fixed$' , 'NN' ),
1678
+ (r'^Later$' , 'NN' ),
1679
+ (r'^Rear$' , 'NN' ),
1680
+ (r'^Left$' , 'NN' ),
1647
1681
1648
1682
(r'^Improved$' , 'NN' ),
1649
1683
(r'^Designed$' , 'NN' ),
@@ -1712,11 +1746,12 @@ def build_detection_from_node(
1712
1746
(r'^Compression$' , 'NN' ),
1713
1747
(r'^Letter$' , 'NN' ),
1714
1748
(r'^Moved$' , 'NN' ),
1749
+ (r'^More$' , 'NN' ),
1715
1750
(r'^Phone$' , 'NN' ),
1751
+ (r'^[Tt]ests?$' , 'JUNK' ),
1716
1752
1717
1753
(r'^Inputs?$' , 'NN' ),
1718
1754
1719
-
1720
1755
# dual caps that are not NNP
1721
1756
(r'^Make[A-Z]' , 'JUNK' ),
1722
1757
(r'^Create[A-Z]' , 'JUNK' ),
@@ -1904,12 +1939,11 @@ def build_detection_from_node(
1904
1939
(r'^(S\.?A\.?S?|Sas|sas|A\/S|AG,?|AB|Labs?|[Cc][Oo]|Research|Center|INRIA|Societe|KG)[,\.]?$' , 'COMP' ),
1905
1940
# French SARL
1906
1941
(r'^(SARL|S\.A\.R\.L\.)[\.,\)]*$' , 'COMP' ),
1907
- # More company suffix : a.s. in Czechia and otehrs
1942
+ # More company suffix : a.s. in Czechia and others
1908
1943
(r'^(a\.s\.|S\.r\.l\.?)$' , 'COMP' ),
1909
1944
(r'^Vertriebsges\.m\.b\.H\.?,?$' , 'COMP' ),
1910
1945
# Iceland
1911
1946
(r'^(ehf|hf|svf|ohf)\.,?$' , 'COMP' ),
1912
-
1913
1947
# Move company abbreviations
1914
1948
(r'^(SPRL|srl)[\.,]?$' , 'COMP' ),
1915
1949
# Poland
@@ -2176,6 +2210,7 @@ def build_detection_from_node(
2176
2210
(r'^Meridian\'93$' , 'NNP' ),
2177
2211
(r'^Xiph.Org$' , 'NNP' ),
2178
2212
(r'^iClick,?$' , 'NNP' ),
2213
+ (r'^electronics?$' , 'NNP' ),
2179
2214
2180
2215
# proper nouns with digits
2181
2216
(r'^([A-Z][a-z0-9]+){1,2}[\.,]?$' , 'NNP' ),
@@ -2203,6 +2238,9 @@ def build_detection_from_node(
2203
2238
(r'^AT$' , '<at>' ),
2204
2239
(r'^DOT$' , 'DOT' ),
2205
2240
2241
+ # exceptions to CAPS
2242
+ (r'^MMC$' , 'JUNK' ),
2243
+
2206
2244
# all CAPS word, at least 1 char long such as MIT, including an optional trailing comma or dot
2207
2245
(r'^[A-Z0-9]+,?$' , 'CAPS' ),
2208
2246
@@ -2272,10 +2310,9 @@ def build_detection_from_node(
2272
2310
(r'__MyCompanyName__[\.,]?$' , 'NAME' ),
2273
2311
2274
2312
# email in brackets <brett_AT_jdom_DOT_org>
2275
- #(karl AT indy.rr.com)
2276
- #<fdlibm-comments AT sun.com>
2313
+ # (karl AT indy.rr.com)
2314
+ # <fdlibm-comments AT sun.com>
2277
2315
(r'(?i:^[<\(][\w\.\-\+]+at[\w\.\-\+]+(dot)?[\w\.\-\+]+[/)>]$)' , 'EMAIL' ),
2278
-
2279
2316
2280
2317
# Code variable names including snake case
2281
2318
(r'^.*(_.*)+$' , 'JUNK' ),
@@ -2311,7 +2348,6 @@ def build_detection_from_node(
2311
2348
(r'^(?:=>|->|<-|<=)$' , 'JUNK' ),
2312
2349
2313
2350
(r'^semiconductors?[\.,]?$' , 'NNP' ),
2314
-
2315
2351
2316
2352
############################################################################
2317
2353
# catch all other as Nouns
@@ -2589,6 +2625,7 @@ def build_detection_from_node(
2589
2625
NAME-YEAR: {<YR-RANGE> <NAME-EMAIL|COMPANY>+ <CC> <YR-RANGE>} #540
2590
2626
2591
2627
NAME: {<NAME|NAME-EMAIL>+ <OF> <NNP> <OF> <NN>? <COMPANY>} #550
2628
+
2592
2629
NAME: {<NAME|NAME-EMAIL>+ <CC|OF>? <NAME|NAME-EMAIL|COMPANY>} #560
2593
2630
2594
2631
NAME: {<NNP><NNP>} #561
@@ -2622,8 +2659,13 @@ def build_detection_from_node(
2622
2659
#also accept trailing email and URLs
2623
2660
# and "VAN" e.g. Du: Copyright (c) 2008 Alek Du <[email protected] >
2624
2661
NAME-YEAR: {<NAME-YEAR> <VAN>? <EMAIL>?<URL>?} #5701
2662
+
2663
+ # Copyright (C) 2008 Jim Law - Iris LP All rights reserved.
2664
+ NAME-YEAR: {<NAME-YEAR> <NN> <DASH> <NAME>} # 5701.1
2665
+
2625
2666
NAME-YEAR: {<NAME-YEAR>+} #5702
2626
2667
2668
+
2627
2669
NAME: {<NNP> <OF> <NNP>} #580
2628
2670
NAME: {<NAME> <NNP>} #590
2629
2671
NAME: {<NN|NNP|CAPS>+ <CC> <OTH>} #600
@@ -2843,6 +2885,10 @@ def build_detection_from_node(
2843
2885
# Copyright (c) 2013-2015 Streams Standard Reference Implementation Authors
2844
2886
COPYRIGHT: {<COPY>+ <NAME-YEAR> <NN|NNP>+ <AUTHS>} #1566
2845
2887
2888
+ # Nicolas Pitre, (c) 2002 Monta Vista Software Inc
2889
+ # Cliff Brake, (c) 2001
2890
+ #COPYRIGHT: {<NAME> <COPY> <NAME-YEAR> <NAME> <COPY> <YR-RANGE>} #1566.1
2891
+
2846
2892
# copyright: Copyright (c) Joe Joyce and contributors, 2016-2019.
2847
2893
COPYRIGHT: {<COPY>+ <NAME> <CC> <NN> <YR-RANGE>} #1579992
2848
2894
@@ -3027,8 +3073,11 @@ def build_detection_from_node(
3027
3073
# Author: Jeff LaBundy <[email protected] >
3028
3074
COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <AUTH> <NAME-EMAIL>} #2280-3
3029
3075
3076
+
3030
3077
COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
3031
3078
3079
+ COPYRIGHT: {<COPYRIGHT2> <BY> <NAME-YEAR|NAME-EMAIL> <BY>? <NAME-YEAR|NAME-EMAIL>? } #2280-4
3080
+
3032
3081
# using #2280 above: Copyright 2018 Developers of the Rand project
3033
3082
COPYRIGHT: {<COPYRIGHT2> <MAINT> <OF> <COMPANY>} #2280.123
3034
3083
@@ -3151,7 +3200,8 @@ def build_detection_from_node(
3151
3200
COPYRIGHT: {<COPYRIGHT2> <CAPS> <CD|CDS> <COMPANY> <NAME>} #2009.1
3152
3201
3153
3202
# COPYRIGHT (c) 2006 - 2009 DIONYSOS
3154
- COPYRIGHT: {<COPYRIGHT2> <CAPS>} #2009
3203
+ # Copyright 2003 ICT CAS
3204
+ COPYRIGHT: {<COPYRIGHT2> <CAPS>+} #2009
3155
3205
3156
3206
# Copyright (C) 2000 See Beyond Communications Corporation
3157
3207
COPYRIGHT2: {<COPYRIGHT2> <JUNK> <COMPANY>} # 2010
@@ -3349,7 +3399,7 @@ def build_detection_from_node(
3349
3399
#Copyright (C) 2012-2016 by the following authors:
3350
3400
#- Wladimir J. van der Laan <[email protected] >
3351
3401
3352
- NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3402
+ NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3353
3403
NAME-EMAIL: {<DASH> <NAME-EMAIL> <NN>?} #157999.14
3354
3404
COPYRIGHT: {<COPYRIGHT2> <FOLLOWING> <AUTHS> <NAME-EMAIL>+ } #157999.14
3355
3405
@@ -3888,6 +3938,8 @@ def is_junk_copyright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
3888
3938
'a' ,
3889
3939
'</p>' ,
3890
3940
'or' ,
3941
+ 'taken' ,
3942
+ 'from' ,
3891
3943
])
3892
3944
3893
3945
# these final holders are ignored.
@@ -4398,7 +4450,7 @@ def remove_code_comment_markers(s):
4398
4450
Return ``s`` removing code comments such as C and C++ style comment markers and assimilated
4399
4451
4400
4452
>>> remove_code_comment_markers(r"\\ *#%; /\\ /*a*/b/*c\\ d#e%f \\ *#%; /")
4401
- 'a b c\\ \d e f'
4453
+ 'a b c\\ \\ d e f'
4402
4454
"""
4403
4455
return (s
4404
4456
.replace ('/*' , ' ' )
@@ -4474,7 +4526,7 @@ def prepare_text_line(line):
4474
4526
.replace ('\\ XA9' , ' (c) ' )
4475
4527
.replace ('\\ A9' , ' (c) ' )
4476
4528
.replace ('\\ a9' , ' (c) ' )
4477
- .replace ('<A9>' , ' (c) ' )
4529
+ .replace ('<A9>' , ' (c) ' )
4478
4530
.replace ('XA9;' , ' (c) ' )
4479
4531
.replace ('Xa9;' , ' (c) ' )
4480
4532
.replace ('xA9;' , ' (c) ' )
@@ -4525,7 +4577,7 @@ def prepare_text_line(line):
4525
4577
.replace ('year>' , " " )
4526
4578
.replace ('<year>' , " " )
4527
4579
.replace ('<name>' , " " )
4528
-
4580
+
4529
4581
)
4530
4582
4531
4583
if TRACE_TOK :
0 commit comments