Skip to content

Commit 10d65c7

Browse files
committed
Merge remote-tracking branch 'upstream/develop' into misc-updates
2 parents bf670c3 + 7d0d91a commit 10d65c7

File tree

585 files changed

+96975
-175
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

585 files changed

+96975
-175
lines changed

src/cluecode/copyrights.py

+73-21
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from pygmars import Token
2626
from pygmars.tree import Tree
2727

28-
2928
from cluecode import copyrights_hint
3029
from textcode.markup import strip_known_markup_from_text
3130

@@ -107,8 +106,24 @@ def detect_copyrights(
107106
Strip markup from text if ``demarkup`` is True.
108107
Run for up to ``deadline`` seconds and return results found so far.
109108
"""
109+
from cluecode.linux_credits import detect_credits_authors
110+
110111
from textcode.analysis import numbered_text_lines
111112

113+
if include_authors:
114+
author_detections = list(detect_credits_authors(location))
115+
116+
if TRACE:
117+
logger_debug('detect_copyrights: detect_credits_authors')
118+
for detecta in author_detections:
119+
logger_debug(f' {detecta}')
120+
121+
# bail out if we have a credits file with credits
122+
if author_detections:
123+
for a in author_detections:
124+
yield a
125+
return
126+
112127
numbered_lines = list(numbered_text_lines(location, demarkup=True))
113128

114129
if TRACE or TRACE_TOK:
@@ -661,8 +676,9 @@ def build_detection_from_node(
661676
# Slovenian: avtorske pravice
662677
# Ukrainian: авторське право
663678

664-
# rare typo copyrighy
679+
# rare typos incopyright
665680
(r'^Copyrighy$', 'COPY'),
681+
(r'^Copyirght$', 'COPY'),
666682

667683
# OSGI
668684
(r'^Bundle-Copyright', 'COPY'),
@@ -904,6 +920,7 @@ def build_detection_from_node(
904920
(r'^[Ss]tring$', 'JUNK'),
905921
(r'^Implementation-Vendor$', 'JUNK'),
906922
(r'^dnl$', 'JUNK'),
923+
(r'^ifndef$', 'JUNK'),
907924

908925
(r'^as$', 'NN'),
909926
(r'^[Vv]isit$', 'JUNK'),
@@ -939,7 +956,6 @@ def build_detection_from_node(
939956
(r'^Add$', 'JUNK'),
940957
(r'^Average$', 'JUNK'),
941958
(r'^Taken$', 'JUNK'),
942-
(r'^LAWS\.?$', 'JUNK'),
943959
(r'^design$', 'JUNK'),
944960
(r'^Driver$', 'JUNK'),
945961
(r'^[Cc]ontribution\.?', 'JUNK'),
@@ -949,7 +965,7 @@ def build_detection_from_node(
949965
(r'^Last-Translator$', 'JUNK'),
950966
(r'^Translated$', 'JUNK'),
951967
(r'^OMAP730$', 'JUNK'),
952-
(r'^Law\.$', 'JUNK'),
968+
953969
(r'^dylid$', 'JUNK'),
954970
(r'^BeOS$', 'JUNK'),
955971
(r'^Generates?$', 'JUNK'),
@@ -991,7 +1007,6 @@ def build_detection_from_node(
9911007
(r'^Disclaimer$', 'JUNK'),
9921008
(r'^Directive.?$', 'JUNK'),
9931009
(r'^LAWS\,?$', 'JUNK'),
994-
(r'^[Ll]aws?,?$', 'JUNK'),
9951010
(r'^me$', 'JUNK'),
9961011
(r'^Derived$', 'JUNK'),
9971012
(r'^Limitations?$', 'JUNK'),
@@ -1062,7 +1077,15 @@ def build_detection_from_node(
10621077
(r'^Much$', 'JUNK'),
10631078
(r'^remains?,?$', 'JUNK'),
10641079
(r'^earlier$', 'JUNK'),
1065-
(r'^[lL]aws?$', 'JUNK'),
1080+
1081+
# there is a Mr. Law
1082+
(r'^Law[\.,]?$', 'NN'),
1083+
(r'^laws?[\.,]?$', 'JUNK'),
1084+
(r'^Laws[\.,]?$', 'JUNK'),
1085+
(r'^LAWS?[\.,]?$', 'JUNK'),
1086+
(r'^LAWS?$', 'NN'),
1087+
1088+
(r'^taken$', 'NN'),
10661089
(r'^Insert$', 'JUNK'),
10671090
(r'^url$', 'JUNK'),
10681091
(r'^[Ss]ee$', 'JUNK'),
@@ -1083,6 +1106,7 @@ def build_detection_from_node(
10831106
(r'^[Ii]nterfaces?,?$', 'JUNK'),
10841107
(r'^than$', 'JUNK'),
10851108
(r'^whom$', 'JUNK'),
1109+
(r'^Definitions?$', 'JUNK'),
10861110
(r'^However,?$', 'JUNK'),
10871111
(r'^[Cc]ollectively$', 'JUNK'),
10881112
(r'^following$', 'FOLLOWING'),
@@ -1190,7 +1214,8 @@ def build_detection_from_node(
11901214
(r'^[a-z]{3,10}[A-Z][a-z]{3,10}$', 'JUNK'),
11911215

11921216
(r'^\$?Guid$', 'JUNK'),
1193-
#(r'^Small$', 'NN'),
1217+
# there is a Mr Small
1218+
# (r'^Small$', 'NN'),
11941219
(r'^implementing$', 'JUNK'),
11951220
(r'^Unlike$', 'JUNK'),
11961221
(r'^using$', 'JUNK'),
@@ -1271,6 +1296,7 @@ def build_detection_from_node(
12711296
(r'^[Ss]tatements?.?$', 'JUNK'),
12721297
(r'^issues?.?$', 'JUNK'),
12731298
(r'^retain?.?$', 'JUNK'),
1299+
(r'^Sun3x$', 'JUNK'),
12741300

12751301
############################################################################
12761302
# Nouns and proper Nouns
@@ -1281,7 +1307,7 @@ def build_detection_from_node(
12811307
(r'^This_file_is_part_of_KDE$', 'NAME'),
12821308

12831309
# K.K. (a company suffix), needs special handling
1284-
(r'^K.K.,?$', 'NAME'),
1310+
(r'^K.K.,?$', 'COMP'),
12851311

12861312
# MIT is problematic
12871313
# With a comma, always CAPS (MIT alone is too error prone to be always tagged as CAPS
@@ -1362,6 +1388,7 @@ def build_detection_from_node(
13621388
(r'^DATED$', 'NN'),
13631389
(r'^Delay', 'NN'),
13641390
(r'^Derivative', 'NN'),
1391+
(r'^Direct$', 'NN'),
13651392
(r'^DISCLAIMED', 'NN'),
13661393
(r'^Docs?$', 'NN'),
13671394
(r'^DOCUMENTATION', 'NN'),
@@ -1451,10 +1478,13 @@ def build_detection_from_node(
14511478
(r'^GPLd?\.?$', 'NN'),
14521479
(r'^GPL\'d$', 'NN'),
14531480
(r'^Gnome$', 'NN'),
1481+
(r'^Port$', 'NN'),
14541482
(r'^GnuPG$', 'NN'),
14551483
(r'^Government.', 'NNP'),
14561484
(r'^OProfile$', 'NNP'),
14571485
(r'^Government$', 'COMP'),
1486+
# there is a Ms. Grant
1487+
(r'^Grant$', 'NNP'),
14581488
(r'^Grants?\.?,?$', 'NN'),
14591489
(r'^Header', 'NN'),
14601490
(r'^HylaFAX$', 'NN'),
@@ -1491,7 +1521,6 @@ def build_detection_from_node(
14911521
(r'^List$', 'NN'),
14921522
(r'^Set$', 'NN'),
14931523
(r'^Last$', 'NN'),
1494-
(r'^LAW', 'NN'),
14951524
(r'^Legal$', 'NN'),
14961525
(r'^LegalTrademarks$', 'NN'),
14971526
(r'^Library$', 'NN'),
@@ -1644,6 +1673,11 @@ def build_detection_from_node(
16441673
(r'^CodeMirror$', 'NN'),
16451674
(r'^They$', 'JUNK'),
16461675
(r'^Branched$', 'NN'),
1676+
(r'^Partial$', 'NN'),
1677+
(r'^Fixed$', 'NN'),
1678+
(r'^Later$', 'NN'),
1679+
(r'^Rear$', 'NN'),
1680+
(r'^Left$', 'NN'),
16471681

16481682
(r'^Improved$', 'NN'),
16491683
(r'^Designed$', 'NN'),
@@ -1712,11 +1746,12 @@ def build_detection_from_node(
17121746
(r'^Compression$', 'NN'),
17131747
(r'^Letter$', 'NN'),
17141748
(r'^Moved$', 'NN'),
1749+
(r'^More$', 'NN'),
17151750
(r'^Phone$', 'NN'),
1751+
(r'^[Tt]ests?$', 'JUNK'),
17161752

17171753
(r'^Inputs?$', 'NN'),
17181754

1719-
17201755
# dual caps that are not NNP
17211756
(r'^Make[A-Z]', 'JUNK'),
17221757
(r'^Create[A-Z]', 'JUNK'),
@@ -1904,12 +1939,11 @@ def build_detection_from_node(
19041939
(r'^(S\.?A\.?S?|Sas|sas|A\/S|AG,?|AB|Labs?|[Cc][Oo]|Research|Center|INRIA|Societe|KG)[,\.]?$', 'COMP'),
19051940
# French SARL
19061941
(r'^(SARL|S\.A\.R\.L\.)[\.,\)]*$', 'COMP'),
1907-
# More company suffix : a.s. in Czechia and otehrs
1942+
# More company suffix : a.s. in Czechia and others
19081943
(r'^(a\.s\.|S\.r\.l\.?)$', 'COMP'),
19091944
(r'^Vertriebsges\.m\.b\.H\.?,?$', 'COMP'),
19101945
# Iceland
19111946
(r'^(ehf|hf|svf|ohf)\.,?$', 'COMP'),
1912-
19131947
# Move company abbreviations
19141948
(r'^(SPRL|srl)[\.,]?$', 'COMP'),
19151949
# Poland
@@ -2176,6 +2210,7 @@ def build_detection_from_node(
21762210
(r'^Meridian\'93$', 'NNP'),
21772211
(r'^Xiph.Org$', 'NNP'),
21782212
(r'^iClick,?$', 'NNP'),
2213+
(r'^electronics?$', 'NNP'),
21792214

21802215
# proper nouns with digits
21812216
(r'^([A-Z][a-z0-9]+){1,2}[\.,]?$', 'NNP'),
@@ -2203,6 +2238,9 @@ def build_detection_from_node(
22032238
(r'^AT$', '<at>'),
22042239
(r'^DOT$', 'DOT'),
22052240

2241+
# exceptions to CAPS
2242+
(r'^MMC$', 'JUNK'),
2243+
22062244
# all CAPS word, at least 1 char long such as MIT, including an optional trailing comma or dot
22072245
(r'^[A-Z0-9]+,?$', 'CAPS'),
22082246

@@ -2272,10 +2310,9 @@ def build_detection_from_node(
22722310
(r'__MyCompanyName__[\.,]?$', 'NAME'),
22732311

22742312
# email in brackets <brett_AT_jdom_DOT_org>
2275-
#(karl AT indy.rr.com)
2276-
#<fdlibm-comments AT sun.com>
2313+
# (karl AT indy.rr.com)
2314+
# <fdlibm-comments AT sun.com>
22772315
(r'(?i:^[<\(][\w\.\-\+]+at[\w\.\-\+]+(dot)?[\w\.\-\+]+[/)>]$)', 'EMAIL'),
2278-
22792316

22802317
# Code variable names including snake case
22812318
(r'^.*(_.*)+$', 'JUNK'),
@@ -2311,7 +2348,6 @@ def build_detection_from_node(
23112348
(r'^(?:=>|->|<-|<=)$', 'JUNK'),
23122349

23132350
(r'^semiconductors?[\.,]?$', 'NNP'),
2314-
23152351

23162352
############################################################################
23172353
# catch all other as Nouns
@@ -2589,6 +2625,7 @@ def build_detection_from_node(
25892625
NAME-YEAR: {<YR-RANGE> <NAME-EMAIL|COMPANY>+ <CC> <YR-RANGE>} #540
25902626
25912627
NAME: {<NAME|NAME-EMAIL>+ <OF> <NNP> <OF> <NN>? <COMPANY>} #550
2628+
25922629
NAME: {<NAME|NAME-EMAIL>+ <CC|OF>? <NAME|NAME-EMAIL|COMPANY>} #560
25932630
25942631
NAME: {<NNP><NNP>} #561
@@ -2622,8 +2659,13 @@ def build_detection_from_node(
26222659
#also accept trailing email and URLs
26232660
# and "VAN" e.g. Du: Copyright (c) 2008 Alek Du <[email protected]>
26242661
NAME-YEAR: {<NAME-YEAR> <VAN>? <EMAIL>?<URL>?} #5701
2662+
2663+
# Copyright (C) 2008 Jim Law - Iris LP All rights reserved.
2664+
NAME-YEAR: {<NAME-YEAR> <NN> <DASH> <NAME>} # 5701.1
2665+
26252666
NAME-YEAR: {<NAME-YEAR>+} #5702
26262667
2668+
26272669
NAME: {<NNP> <OF> <NNP>} #580
26282670
NAME: {<NAME> <NNP>} #590
26292671
NAME: {<NN|NNP|CAPS>+ <CC> <OTH>} #600
@@ -2843,6 +2885,10 @@ def build_detection_from_node(
28432885
# Copyright (c) 2013-2015 Streams Standard Reference Implementation Authors
28442886
COPYRIGHT: {<COPY>+ <NAME-YEAR> <NN|NNP>+ <AUTHS>} #1566
28452887
2888+
# Nicolas Pitre, (c) 2002 Monta Vista Software Inc
2889+
# Cliff Brake, (c) 2001
2890+
#COPYRIGHT: {<NAME> <COPY> <NAME-YEAR> <NAME> <COPY> <YR-RANGE>} #1566.1
2891+
28462892
# copyright: Copyright (c) Joe Joyce and contributors, 2016-2019.
28472893
COPYRIGHT: {<COPY>+ <NAME> <CC> <NN> <YR-RANGE>} #1579992
28482894
@@ -3027,8 +3073,11 @@ def build_detection_from_node(
30273073
# Author: Jeff LaBundy <[email protected]>
30283074
COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <AUTH> <NAME-EMAIL>} #2280-3
30293075
3076+
30303077
COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
30313078
3079+
COPYRIGHT: {<COPYRIGHT2> <BY> <NAME-YEAR|NAME-EMAIL> <BY>? <NAME-YEAR|NAME-EMAIL>? } #2280-4
3080+
30323081
# using #2280 above: Copyright 2018 Developers of the Rand project
30333082
COPYRIGHT: {<COPYRIGHT2> <MAINT> <OF> <COMPANY>} #2280.123
30343083
@@ -3151,7 +3200,8 @@ def build_detection_from_node(
31513200
COPYRIGHT: {<COPYRIGHT2> <CAPS> <CD|CDS> <COMPANY> <NAME>} #2009.1
31523201
31533202
# COPYRIGHT (c) 2006 - 2009 DIONYSOS
3154-
COPYRIGHT: {<COPYRIGHT2> <CAPS>} #2009
3203+
# Copyright 2003 ICT CAS
3204+
COPYRIGHT: {<COPYRIGHT2> <CAPS>+} #2009
31553205
31563206
# Copyright (C) 2000 See Beyond Communications Corporation
31573207
COPYRIGHT2: {<COPYRIGHT2> <JUNK> <COMPANY>} # 2010
@@ -3349,7 +3399,7 @@ def build_detection_from_node(
33493399
#Copyright (C) 2012-2016 by the following authors:
33503400
#- Wladimir J. van der Laan <[email protected]>
33513401
3352-
NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3402+
NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
33533403
NAME-EMAIL: {<DASH> <NAME-EMAIL> <NN>?} #157999.14
33543404
COPYRIGHT: {<COPYRIGHT2> <FOLLOWING> <AUTHS> <NAME-EMAIL>+ } #157999.14
33553405
@@ -3888,6 +3938,8 @@ def is_junk_copyright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
38883938
'a',
38893939
'</p>',
38903940
'or',
3941+
'taken',
3942+
'from',
38913943
])
38923944

38933945
# these final holders are ignored.
@@ -4398,7 +4450,7 @@ def remove_code_comment_markers(s):
43984450
Return ``s`` removing code comments such as C and C++ style comment markers and assimilated
43994451
44004452
>>> remove_code_comment_markers(r"\\*#%; /\\/*a*/b/*c\\d#e%f \\*#%; /")
4401-
'a b c\\\d e f'
4453+
'a b c\\\\d e f'
44024454
"""
44034455
return (s
44044456
.replace('/*', ' ')
@@ -4474,7 +4526,7 @@ def prepare_text_line(line):
44744526
.replace('\\XA9', ' (c) ')
44754527
.replace('\\A9', ' (c) ')
44764528
.replace('\\a9', ' (c) ')
4477-
.replace('<A9>', ' (c) ')
4529+
.replace('<A9>', ' (c) ')
44784530
.replace('XA9;', ' (c) ')
44794531
.replace('Xa9;', ' (c) ')
44804532
.replace('xA9;', ' (c) ')
@@ -4525,7 +4577,7 @@ def prepare_text_line(line):
45254577
.replace('year>', " ")
45264578
.replace('<year>', " ")
45274579
.replace('<name>', " ")
4528-
4580+
45294581
)
45304582

45314583
if TRACE_TOK:

0 commit comments

Comments
 (0)