Skip to content

Commit

Permalink
Improve detection of crawlers/bots (#1968)
Browse files Browse the repository at this point in the history
Add support for Facebook crawler
  • Loading branch information
IvanNardi authored May 9, 2023
1 parent 79c1dbe commit 684e041
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
- name: Install Ubuntu Prerequisites
run: |
sudo apt-get update
sudo apt-get install python3-netaddr git
sudo apt-get install python3-netaddr git whois
- name: Run Scripts
run: |
echo 'Running ./utils/bitcoinnodes.sh'
Expand Down
232 changes: 232 additions & 0 deletions src/lib/inc_generated/ndpi_crawlers_match.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,238 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = {
{ 0x284D8B00 /* 40.77.139.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT },
{ 0x144AC500 /* 20.74.197.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT },
{ 0x140F85A0 /* 20.15.133.160/27 */, 27, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB000 /* 69.63.176.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x42DC9000 /* 66.220.144.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x42DC9000 /* 66.220.144.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB800 /* 69.63.184.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB000 /* 69.63.176.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x4A774C00 /* 74.119.76.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x45ABFF00 /* 69.171.255.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xADFC4000 /* 173.252.64.0/18 */, 18, NDPI_HTTP_CRAWLER_BOT },
{ 0x45ABE000 /* 69.171.224.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT },
{ 0x45ABE000 /* 69.171.224.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x67046000 /* 103.4.96.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xADFC4000 /* 173.252.64.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4000 /* 31.13.64.0/18 */, 18, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D1800 /* 31.13.24.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x42DC9800 /* 66.220.152.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x45ABEF00 /* 69.171.239.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x45ABF000 /* 69.171.240.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4000 /* 31.13.64.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4000 /* 31.13.64.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4100 /* 31.13.65.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4300 /* 31.13.67.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4400 /* 31.13.68.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4500 /* 31.13.69.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4600 /* 31.13.70.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4700 /* 31.13.71.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4800 /* 31.13.72.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4900 /* 31.13.73.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4A00 /* 31.13.74.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4B00 /* 31.13.75.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4C00 /* 31.13.76.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4D00 /* 31.13.77.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D6000 /* 31.13.96.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4200 /* 31.13.66.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xADFC6000 /* 173.252.96.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4E00 /* 31.13.78.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D4F00 /* 31.13.79.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5000 /* 31.13.80.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5200 /* 31.13.82.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5300 /* 31.13.83.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5400 /* 31.13.84.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5500 /* 31.13.85.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5600 /* 31.13.86.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5700 /* 31.13.87.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5800 /* 31.13.88.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5900 /* 31.13.89.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5B00 /* 31.13.91.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5C00 /* 31.13.92.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5D00 /* 31.13.93.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5E00 /* 31.13.94.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5F00 /* 31.13.95.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5100 /* 31.13.81.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB33CC000 /* 179.60.192.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xB33CC000 /* 179.60.192.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB33CC100 /* 179.60.193.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB33CC200 /* 179.60.194.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB33CC300 /* 179.60.195.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB93CD800 /* 185.60.216.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x2D402800 /* 45.64.40.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0xB93CD800 /* 185.60.216.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB93CD900 /* 185.60.217.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB93CDA00 /* 185.60.218.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB93CDB00 /* 185.60.219.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81860000 /* 129.134.0.0/16 */, 16, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00000 /* 157.240.0.0/16 */, 16, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00800 /* 157.240.8.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00000 /* 157.240.0.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00100 /* 157.240.1.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00200 /* 157.240.2.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00300 /* 157.240.3.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00500 /* 157.240.5.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00600 /* 157.240.6.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00700 /* 157.240.7.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00900 /* 157.240.9.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00A00 /* 157.240.10.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01000 /* 157.240.16.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01300 /* 157.240.19.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00B00 /* 157.240.11.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00C00 /* 157.240.12.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00D00 /* 157.240.13.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00E00 /* 157.240.14.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00F00 /* 157.240.15.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01100 /* 157.240.17.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01200 /* 157.240.18.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01400 /* 157.240.20.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01500 /* 157.240.21.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01600 /* 157.240.22.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01700 /* 157.240.23.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF00000 /* 157.240.0.0/17 */, 17, NDPI_HTTP_CRAWLER_BOT },
{ 0x45ABFA00 /* 69.171.250.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xCC0F1400 /* 204.15.20.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C000 /* 157.240.192.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C600 /* 157.240.198.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x66846000 /* 102.132.96.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x66846000 /* 102.132.96.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x66846100 /* 102.132.97.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01A00 /* 157.240.26.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01B00 /* 157.240.27.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01C00 /* 157.240.28.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01D00 /* 157.240.29.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01E00 /* 157.240.30.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81861C00 /* 129.134.28.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81861D00 /* 129.134.29.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D000 /* 157.240.208.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C100 /* 157.240.193.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C200 /* 157.240.194.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C300 /* 157.240.195.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C500 /* 157.240.197.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C400 /* 157.240.196.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C800 /* 157.240.200.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C900 /* 157.240.201.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0CB00 /* 157.240.203.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0CC00 /* 157.240.204.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0CD00 /* 157.240.205.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0CE00 /* 157.240.206.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0CF00 /* 157.240.207.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D100 /* 157.240.209.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D200 /* 157.240.210.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D300 /* 157.240.211.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D400 /* 157.240.212.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D500 /* 157.240.213.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D600 /* 157.240.214.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D700 /* 157.240.215.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D800 /* 157.240.216.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0DE00 /* 157.240.222.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81861E00 /* 129.134.30.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81861F00 /* 129.134.31.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81861E00 /* 129.134.30.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT },
{ 0x81861900 /* 129.134.25.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81861A00 /* 129.134.26.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81861B00 /* 129.134.27.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x66846300 /* 102.132.99.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x66846500 /* 102.132.101.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864000 /* 129.134.64.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864100 /* 129.134.65.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864200 /* 129.134.66.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864300 /* 129.134.67.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0DB00 /* 157.240.219.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0D900 /* 157.240.217.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0DA00 /* 157.240.218.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C700 /* 157.240.199.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81867F00 /* 129.134.127.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0DF00 /* 157.240.223.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0C000 /* 157.240.192.0/18 */, 18, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0DD00 /* 157.240.221.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0DC00 /* 157.240.220.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xADFC5800 /* 173.252.88.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864400 /* 129.134.68.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864500 /* 129.134.69.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864600 /* 129.134.70.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01800 /* 157.240.24.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01900 /* 157.240.25.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x66846400 /* 102.132.100.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF01F00 /* 157.240.31.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E000 /* 157.240.224.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864700 /* 129.134.71.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E100 /* 157.240.225.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E200 /* 157.240.226.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E300 /* 157.240.227.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81860000 /* 129.134.0.0/17 */, 17, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864800 /* 129.134.72.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864900 /* 129.134.73.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864A00 /* 129.134.74.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB959DA00 /* 185.89.218.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB959DB00 /* 185.89.219.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB959DA00 /* 185.89.218.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E400 /* 157.240.228.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E500 /* 157.240.229.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864C00 /* 129.134.76.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864B00 /* 129.134.75.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0EF00 /* 157.240.239.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F000 /* 157.240.240.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F100 /* 157.240.241.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E700 /* 157.240.231.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E800 /* 157.240.232.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0E900 /* 157.240.233.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0EA00 /* 157.240.234.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0EB00 /* 157.240.235.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0EC00 /* 157.240.236.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864D00 /* 129.134.77.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864E00 /* 129.134.78.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81864F00 /* 129.134.79.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0ED00 /* 157.240.237.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0EE00 /* 157.240.238.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F200 /* 157.240.242.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F300 /* 157.240.243.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81867000 /* 129.134.112.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF06400 /* 157.240.100.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF06200 /* 157.240.98.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF06000 /* 157.240.96.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF06300 /* 157.240.99.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF06500 /* 157.240.101.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81867100 /* 129.134.113.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81867200 /* 129.134.114.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF06100 /* 157.240.97.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x81867300 /* 129.134.115.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F400 /* 157.240.244.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F500 /* 157.240.245.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F600 /* 157.240.246.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F700 /* 157.240.247.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F800 /* 157.240.248.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0F900 /* 157.240.249.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0FA00 /* 157.240.250.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468000 /* 163.70.128.0/17 */, 17, NDPI_HTTP_CRAWLER_BOT },
{ 0xA34D8000 /* 163.77.128.0/17 */, 17, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0FB00 /* 157.240.251.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0FC00 /* 157.240.252.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0FD00 /* 157.240.253.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x934BD000 /* 147.75.208.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0FE00 /* 157.240.254.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB200 /* 69.63.178.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x9DF0CA00 /* 157.240.202.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0x1F0D5A00 /* 31.13.90.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468000 /* 163.70.128.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468100 /* 163.70.129.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468200 /* 163.70.130.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468300 /* 163.70.131.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468400 /* 163.70.132.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468800 /* 163.70.136.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468600 /* 163.70.134.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468700 /* 163.70.135.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xA3468500 /* 163.70.133.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB959DB00 /* 185.89.219.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB959DA00 /* 185.89.218.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT },
{ 0xB959DA00 /* 185.89.218.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT },
{ 0xB959D800 /* 185.89.216.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x934BD000 /* 147.75.208.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0xCC0F1400 /* 204.15.20.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB000 /* 69.63.176.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB000 /* 69.63.176.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB800 /* 69.63.184.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT },
{ 0x42DC9000 /* 66.220.144.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
{ 0x453FB000 /* 69.63.176.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT },
/* End */
{ 0x0, 0, 0 }
};
4 changes: 2 additions & 2 deletions src/lib/protocols/http.c
Original file line number Diff line number Diff line change
Expand Up @@ -607,8 +607,8 @@ static void ndpi_check_user_agent(struct ndpi_detection_module_struct *ndpi_stru
Amazon-Route53-Health-Check-Service (ref 68784dad-be98-49e4-a63c-9fbbe2816d7c; report http://amzn.to/1vsZADi)
Anonymous Crawler/1.0 (Webcrawler developed with StormCrawler; http://example.com/; [email protected])
*/
if((strstr(ua, "+http") != NULL)
|| (strstr(ua, " http") != NULL)
if((strstr(ua, "+http:") != NULL)
|| (strstr(ua, " http:") != NULL)
|| ndpi_strncasestr(ua, "Crawler", ua_len)
|| ndpi_strncasestr(ua, "Bot", ua_len) /* bot/robot */
) {
Expand Down
Binary file not shown.
25 changes: 25 additions & 0 deletions tests/cfgs/default/result/crawler_false_positive.pcapng.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Guessed flow protos: 0

DPI Packets (TCP): 8 (8.00 pkts/flow)
Confidence DPI : 1 (flows)
Num dissector calls: 12 (12.00 diss/flow)
LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
LRU cache stun: 0/0/0 (insert/search/found)
LRU cache tls_cert: 0/0/0 (insert/search/found)
LRU cache mining: 0/0/0 (insert/search/found)
LRU cache msteams: 0/0/0 (insert/search/found)
LRU cache stun_zoom: 0/0/0 (insert/search/found)
Automa host: 0/0 (search/found)
Automa domain: 0/0 (search/found)
Automa tls cert: 0/0 (search/found)
Automa risk mask: 0/0 (search/found)
Automa common alpns: 0/0 (search/found)
Patricia risk mask: 2/0 (search/found)
Patricia risk: 0/0 (search/found)
Patricia protocols: 1/1 (search/found)

OCSP 12 1842 1

1 TCP 192.168.12.156:38291 <-> 93.184.220.29:80 [proto: 7.63/HTTP.OCSP][IP: 288/Edgecast][ClearText][Confidence: DPI][DPI packets: 8][cat: Web/5][7 pkts/705 bytes <-> 5 pkts/1137 bytes][Goodput ratio: 33/70][0.04 sec][Hostname/SNI: ocsp.digicert.com][bytes ratio: -0.235 (Download)][IAT c2s/s2c min/avg/max/stddev: 0/0 5/6 8/10 4/4][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 101/227 284/865 75/319][StatusCode: 200][Req Content-Type: application/ocsp-request][Content-Type: application/ocsp-response][Server: ECS (mil/6CF7)][User-Agent: zbtls http][PLAIN TEXT (ConnectionTP/1.1)][Plen Bins: 33,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
9 changes: 7 additions & 2 deletions utils/crawlers_ip_addresses_download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ TMP1=/tmp/bot_google_c1.json
TMP2=/tmp/bot_google_c2.json
TMP3=/tmp/bot_google_c3.json
TMP_BING=/tmp/bot_bing.json
TMP_FB=/tmp/bot_fb.list
LIST=/tmp/bot.list
#Google Common crawlers
ORIGIN1="https://developers.google.com/static/search/apis/ipranges/googlebot.json"
Expand All @@ -18,7 +19,7 @@ ORIGIN2="https://developers.google.com/static/search/apis/ipranges/special-crawl
ORIGIN3="https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers.json"
#Bing Bot
ORIGIN_BING="https://www.bing.com/toolbox/bingbot.json"

#Facebook Bot: https://developers.facebook.com/docs/sharing/webmasters/crawler/

echo "(1) Downloading file... ${ORIGIN1}"
http_response=$(curl -s -o $TMP1 -w "%{http_code}" ${ORIGIN1})
Expand Down Expand Up @@ -48,15 +49,19 @@ if [ "$http_response" != "200" ]; then
exit 1
fi

echo "(1) Downloading FB crawlers routes... "
whois -h whois.radb.net -- '-i origin AS32934' | grep ^route > $TMP_FB

echo "(2) Processing IP addresses..."
{
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP1 # TODO: ipv6
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP2 # TODO: ipv6
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP3 # TODO: ipv6
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP_BING # TODO: ipv6
grep -v route6 $TMP_FB | tr -d 'route:^ ' # TODO: ipv6
} > $LIST
./ipaddr2list.py $LIST NDPI_HTTP_CRAWLER_BOT > $DEST
rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $LIST
rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $LIST

echo "(3) Crawlers IPs are available in $DEST"
exit 0

0 comments on commit 684e041

Please sign in to comment.