Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add detection of Twitter bot #2487

Merged
merged 1 commit into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 57 additions & 37 deletions src/lib/inc_generated/ndpi_crawlers_match.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0x22628880 /* 34.98.136.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x226288C0 /* 34.98.136.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x22628900 /* 34.98.137.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x22628A00 /* 34.98.138.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x22628A40 /* 34.98.138.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x22628A00 /* 34.98.138.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x22628B00 /* 34.98.139.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x22628B80 /* 34.98.139.128/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x22628C00 /* 34.98.140.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
Expand Down Expand Up @@ -87,12 +86,15 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0x22742400 /* 34.116.36.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x22742440 /* 34.116.36.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x22742500 /* 34.116.37.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x22742700 /* 34.116.39.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x22742800 /* 34.116.40.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x22742900 /* 34.116.41.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x22742B00 /* 34.116.43.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x22764200 /* 34.118.66.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x2276FE00 /* 34.118.254.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x227EB260 /* 34.126.178.96/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x227F8000 /* 34.127.128.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x227F8040 /* 34.127.128.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x22929690 /* 34.146.150.144/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x22936E90 /* 34.147.110.144/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x22974A90 /* 34.151.74.144/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -119,15 +121,16 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0x23BB8A00 /* 35.187.138.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8C00 /* 35.187.140.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8C80 /* 35.187.140.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8D00 /* 35.187.141.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8D00 /* 35.187.141.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8D40 /* 35.187.141.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8D80 /* 35.187.141.128/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8E00 /* 35.187.142.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8E40 /* 35.187.142.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8F00 /* 35.187.143.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x23BB8F40 /* 35.187.143.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x23F31000 /* 35.243.16.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x23F31080 /* 35.243.16.128/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x23F31100 /* 35.243.17.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x23F31140 /* 35.243.17.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x23F31100 /* 35.243.17.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x23F31200 /* 35.243.18.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x23F31240 /* 35.243.18.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x23F31300 /* 35.243.19.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -141,6 +144,8 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0x23F7F3F0 /* 35.247.243.240/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x284D8B00 /* 40.77.139.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x284DA700 /* 40.77.167.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x284DB100 /* 40.77.177.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x284DB200 /* 40.77.178.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT },
{ 0x284DBC00 /* 40.77.188.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x284DCA00 /* 40.77.202.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x284F83D0 /* 40.79.131.208/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -149,15 +154,20 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0x33694300 /* 51.105.67.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x34A79000 /* 52.167.144.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x34E79400 /* 52.231.148.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x398D0000 /* 57.141.0.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x398D0800 /* 57.141.8.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x398D0C00 /* 57.141.12.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT },
{ 0x39900000 /* 57.144.0.0/14 */, 14, NDPI_HTTP_CRAWLER_BOT },
{ 0x4137D200 /* 65.55.210.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x42DC9000 /* 66.220.144.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94000 /* 66.249.64.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94000 /* 66.249.64.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94080 /* 66.249.64.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F940E0 /* 66.249.64.224/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94100 /* 66.249.65.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F941A0 /* 66.249.65.160/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F941C0 /* 66.249.65.192/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94200 /* 66.249.66.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94280 /* 66.249.66.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F942A0 /* 66.249.66.160/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F942C0 /* 66.249.66.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94400 /* 66.249.68.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94440 /* 66.249.68.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -171,12 +181,15 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0x42F94E00 /* 66.249.78.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F94F00 /* 66.249.79.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95700 /* 66.249.87.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95900 /* 66.249.89.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95900 /* 66.249.89.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95980 /* 66.249.89.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F959E0 /* 66.249.89.224/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95A00 /* 66.249.90.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95A60 /* 66.249.90.96/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95A80 /* 66.249.90.128/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95B00 /* 66.249.91.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95C00 /* 66.249.92.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95C00 /* 66.249.92.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95C60 /* 66.249.92.96/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95C80 /* 66.249.92.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x42F95CC0 /* 66.249.92.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB000 /* 69.63.176.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -195,18 +208,22 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0x66846000 /* 102.132.96.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x67046000 /* 103.4.96.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C000 /* 107.178.192.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C080 /* 107.178.192.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C0A0 /* 107.178.192.160/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C0C0 /* 107.178.192.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C100 /* 107.178.193.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C100 /* 107.178.193.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C180 /* 107.178.193.128/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C1C0 /* 107.178.193.192/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C200 /* 107.178.194.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C300 /* 107.178.195.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C360 /* 107.178.195.96/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C380 /* 107.178.195.128/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2C400 /* 107.178.196.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2CA00 /* 107.178.202.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2CA80 /* 107.178.202.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2CAC0 /* 107.178.202.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2CB00 /* 107.178.203.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x6BB2E000 /* 107.178.224.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT },
{ 0x6CB10200 /* 108.177.2.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x81860000 /* 129.134.0.0/16 */, 16, NDPI_HTTP_CRAWLER_BOT },
{ 0x8BD93400 /* 139.217.52.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x934BD000 /* 147.75.208.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -219,9 +236,13 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0xB93CD800 /* 185.60.216.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xB959D800 /* 185.89.216.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xBFE9CCE0 /* 191.233.204.224/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0xC0854C00 /* 192.133.76.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xC0B20500 /* 192.178.5.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0xC0B20600 /* 192.178.6.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0xC0B21100 /* 192.178.17.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0xC7109C00 /* 199.16.156.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xC71E1800 /* 199.30.24.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT },
{ 0xC73B9400 /* 199.59.148.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xCC0F1400 /* 204.15.20.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xCF2E0D00 /* 207.46.13.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xD155EE00 /* 209.85.238.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -230,15 +251,14 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
};

static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = {
{ "2001:4860:4801:2::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:c::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:f::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:10::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:18::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:1c::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:1e::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:10::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:20::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:30::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:31::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:32::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:34::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:38::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:3c::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:3e::", 64, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -254,14 +274,13 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = {
{ "2001:4860:4801:80::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:88::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:90::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2008::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2008::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:200c::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2010::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2018::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:201c::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:201e::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2010::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2020::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2030::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2031::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2032::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2034::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:2038::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:203c::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2001:4860:4801:203e::", 64, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -284,7 +303,7 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = {
{ "2404:f340:4010:4004::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2404:f340:4010:4006::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:8::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:c::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:c::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:f::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:10::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:15::", 64, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -298,7 +317,9 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = {
{ "2600:1900:0:34::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:36::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:40::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:50::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:51::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:52::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:54::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:58::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:5c::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:5e::", 64, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -314,18 +335,20 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = {
{ "2600:1900:0:90::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:94::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:a0::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:a4::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:a4::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:b0::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:b8::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:c0::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:c4::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:c6::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:d0::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:e0::", 59, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:e0::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:f0::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:f8::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:fa::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:fc::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:100::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:110::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:114::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:116::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:110::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:118::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:11a::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:11c::", 62, NDPI_HTTP_CRAWLER_BOT },
Expand All @@ -341,18 +364,14 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = {
{ "2600:1900:0:164::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:166::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:170::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:180::", 61, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:188::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:18c::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:18e::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:180::", 60, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:190::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:192::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1a0::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1c0::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1c4::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1d0::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1e0::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1e2::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1e0::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1f0::", 62, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:1f4::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:200::", 62, NDPI_HTTP_CRAWLER_BOT },
Expand Down Expand Up @@ -384,10 +403,11 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = {
{ "2600:1900:0:330::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:332::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:340::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:350::", 63, NDPI_HTTP_CRAWLER_BOT },
{ "2600:1900:0:352::", 64, NDPI_HTTP_CRAWLER_BOT },
{ "2620:0:1c00::", 40, NDPI_HTTP_CRAWLER_BOT },
{ "2a03:2880::", 31, NDPI_HTTP_CRAWLER_BOT },
{ "2a03:2887:ff2c::", 47, NDPI_HTTP_CRAWLER_BOT },
{ "2a03:2887:ff42::", 48, NDPI_HTTP_CRAWLER_BOT },
{ "2a03:83e0::", 32, NDPI_HTTP_CRAWLER_BOT },
{ "2a10:f781:10:cee0::", 64, NDPI_HTTP_CRAWLER_BOT },
/* End */
Expand Down
12 changes: 11 additions & 1 deletion utils/crawlers_ip_addresses_download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ TMP2=/tmp/bot_google_c2.json
TMP3=/tmp/bot_google_c3.json
TMP_BING=/tmp/bot_bing.json
TMP_FB=/tmp/bot_fb.list
TMP_TW=/tmp/bot_tw.list
LIST=/tmp/bot.list
LIST6=/tmp/bot.list6
LIST_MERGED=/tmp/bot.list_m
Expand All @@ -24,6 +25,8 @@ ORIGIN3="https://developers.google.com/static/search/apis/ipranges/user-triggere
#Bing Bot
ORIGIN_BING="https://www.bing.com/toolbox/bingbot.json"
#Facebook Bot: https://developers.facebook.com/docs/sharing/webmasters/crawler/
#TwitterBot
ORIGIN_TW="https://developer.x.com/en/docs/twitter-for-websites/cards/guides/troubleshooting-cards"

echo "(1) Downloading file... ${ORIGIN1}"
http_response=$(curl -s -o $TMP1 -w "%{http_code}" ${ORIGIN1})
Expand All @@ -49,13 +52,20 @@ echo "(1) Downloading FB crawlers routes... "
whois -h whois.radb.net -- '-i origin AS32934' | grep ^route > $TMP_FB
is_file_empty "${TMP_FB}"

echo "(1) Downloading page... ${ORIGIN_TW}"
http_response=$(curl -s -o $TMP_TW -w "%{http_code}" ${ORIGIN_TW})
check_http_response "${http_response}"
is_file_empty "${TMP_TW}"


echo "(2) Processing IP addresses..."
{
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP1
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP2
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP3
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP_BING
grep -v route6 $TMP_FB | tr -d 'route:^ '
grep "IP ranges are" $TMP_TW | grep -E -o "[^^][0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/[0-9]{1,2}" | tr -d ' ' # TODO: ipv4 only
} > $LIST
is_file_empty "${LIST}"
./mergeipaddrlist.py "${LIST}" > "${LIST_MERGED}"
Expand All @@ -72,7 +82,7 @@ is_file_empty "${LIST6}"
is_file_empty "${LIST6_MERGED}"
./ipaddr2list.py $LIST_MERGED NDPI_HTTP_CRAWLER_BOT $LIST6_MERGED > $DEST
is_file_empty "${DEST}"
rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $LIST $LIST6 $LIST_MERGED $LIST6_MERGED
rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $TMP_TW $LIST $LIST6 $LIST_MERGED $LIST6_MERGED

echo "(3) Crawlers IPs are available in $DEST"
exit 0
Loading