Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved helper scripts to be more restrictive. #1986

Merged
merged 1 commit into from
May 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ LT_LIB_M
PKG_PROG_PKG_CONFIG

AC_PROG_CC
AC_PROG_CPP_WERROR
AC_C_INLINE

SYSTEM=`uname -s`
if test $SYSTEM = "Darwin"; then
Expand Down
7 changes: 5 additions & 2 deletions utils/asn_update.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/bin/bash
#!/usr/bin/env bash

set -e

FAILED_ASN=0
TOTAL_ASN=0
Expand Down Expand Up @@ -28,6 +30,7 @@ function create_list() {
}

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

echo "(1) Downloading Apple routes..."
DEST="../src/lib/inc_generated/ndpi_asn_apple.c.inc"
Expand Down Expand Up @@ -194,7 +197,7 @@ DEST=../src/lib/inc_generated/ndpi_asn_nvidia.c.inc
create_list NDPI_PROTOCOL_NVIDIA $DEST "AS60977" "AS50889" "AS20347" "AS11414"
echo "(3) Nvidia IPs are available in $DEST"

if [ ${TOTAL_ASN} -eq ${FAILED_ASN} ]; then
if [ ${TOTAL_ASN} -eq 0 -o ${TOTAL_ASN} -eq ${FAILED_ASN} ]; then
printf '%s: %s\n' "${0}" "All download(s) failed, ./get_routes_by_asn.sh broken?"
exit 1
else
Expand Down
13 changes: 8 additions & 5 deletions utils/aws_ip_addresses_download.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/bin/sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_amazon_aws_match.c.inc
TMP=/tmp/aws.json
Expand All @@ -10,15 +13,15 @@ ORIGIN=https://ip-ranges.amazonaws.com/ip-ranges.json

echo "(1) Downloading file..."
http_response=$(curl -s -o $TMP -w "%{http_code}" ${ORIGIN})
if [ $http_response != "200" ]; then
echo "Error $http_response: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${TMP}"

echo "(2) Processing IP addresses..."
jq -r '.prefixes | .[].ip_prefix' $TMP > $LIST # TODO: ipv6
is_file_empty "${LIST}"
./ipaddr2list.py $LIST NDPI_PROTOCOL_AMAZON_AWS > $DEST
rm -f $TMP $LIST
is_file_empty "${DEST}"

echo "(3) Amazon AWS IPs are available in $DEST"
exit 0
24 changes: 10 additions & 14 deletions utils/azure_ip_addresses_download.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/bin/sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_azure_match.c.inc
LINK_TMP=/tmp/azure_link.txt
Expand All @@ -14,30 +15,25 @@ LINK_ORIGIN="https://www.microsoft.com/en-us/download/confirmation.aspx?id=56519

echo "(1) Downloading file... ${LINK_ORIGIN}"
http_response=$(curl -s -o ${LINK_TMP} -w "%{http_code}" ${LINK_ORIGIN})
if [ "${http_response}" != "200" ]; then
echo "Error $http_response: you probably need to update the link origin url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${LINK_TMP}"

ORIGIN="$(grep -E 'ServiceTags_Public_[[:digit:]]+.json' ${LINK_TMP} | grep -o -E 'href=\"[^"]+' | sed 's/href="//' | uniq)"
ORIGIN="$(grep -E 'ServiceTags_Public_[[:digit:]]+.json' ${LINK_TMP} | grep -o -E 'href="[^"]+' | sed 's/href="//' | uniq)"
rm -f ${LINK_TMP}
if [ -z "${ORIGIN}" ]; then
echo "Error ${LINK_ORIGIN} does not contain the url format!"
exit 1
fi
is_str_empty "${ORIGIN}" "${LINK_ORIGIN} does not contain the url format!"

echo "(2) Downloading file... ${ORIGIN}"
http_response=$(curl -s -o $TMP -w "%{http_code}" ${ORIGIN})
if [ "${http_response}" != "200" ]; then
echo "Error $http_response: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${TMP}"

echo "(3) Processing IP addresses..."
# Note: the last "grep -v :" is used to skip IPv6 addresses
tr -d '\r' < $TMP | grep / | tr -d '"' | tr -d " " | tr -d "," | grep -v : > $LIST
is_file_empty "${LIST}"
./ipaddr2list.py $LIST NDPI_PROTOCOL_MICROSOFT_AZURE > $DEST
rm -f $TMP $LIST
is_file_empty "${DEST}"

echo "(4) Microsoft Azure IPs are available in $DEST"
exit 0
8 changes: 6 additions & 2 deletions utils/bitcoinnodes.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#!/bin/sh
#!/usr/bin/env bash
#
# List all the current bittorrent nodes
#

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

# NOTE: JQ can be found at https://stedolan.github.io/jq/

curl -s -H "Accept: application/json; indent=4" https://bitnodes.io/api/v1/snapshots/latest/ | jq -r '.nodes|keys[] as $k | "\($k)"' | grep -v onion | grep -v ']' | cut -d ':' -f 1
RESULT="$(curl -s -H "Accept: application/json; indent=4" https://bitnodes.io/api/v1/snapshots/latest/ | jq -r '.nodes|keys[] as $k | "\($k)"' | grep -v onion | grep -v ']' | cut -d ':' -f 1)"
is_str_empty "${RESULT}" "String empty, please review this script."
12 changes: 7 additions & 5 deletions utils/cachefly_ip_addresses_download.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/bin/sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_cachefly_match.c.inc
LIST=/tmp/cachefly.list
Expand All @@ -9,14 +12,13 @@ ORIGIN='https://cachefly.cachefly.net/ips/cdn.txt'

echo "(1) Downloading file..."
http_response=$(curl -s -o "${LIST}" -w "%{http_code}" "${ORIGIN}")
if [ "${http_response}" != "200" ]; then
echo "Error ${http_response}: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${LIST}"

echo "(2) Processing IP addresses..."
./ipaddr2list.py "${LIST}" NDPI_PROTOCOL_CACHEFLY > "${DEST}"
rm -f "${LIST}"
is_file_empty "${DEST}"

echo "(3) Cachefly IPs are available in ${DEST}"
exit 0
12 changes: 7 additions & 5 deletions utils/cloudflare_ip_addresses_download.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/bin/sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_cloudflare_match.c.inc
LIST=/tmp/cloudflare.list
Expand All @@ -10,14 +13,13 @@ ORIGIN="https://www.cloudflare.com/ips-v4"

echo "(1) Downloading file... ${ORIGIN}"
http_response=$(curl -s -o $LIST -w "%{http_code}" ${ORIGIN})
if [ $http_response != "200" ]; then
echo "Error $http_response: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${LIST}"

echo "(2) Processing IP addresses..."
./ipaddr2list.py $LIST NDPI_PROTOCOL_CLOUDFLARE > $DEST
rm -f $LIST
is_file_empty "${DEST}"

echo "(3) Cloudflare IPs are available in $DEST"
exit 0
39 changes: 39 additions & 0 deletions utils/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env sh

printf 'Running script: %s\n' "$(basename ${0})" >&2

function check_http_response()
{
http_response="${1}"

if [ "${http_response}" != "200" ]; then
printf '%s error: %s\n' "${0}" "HTTP Response code ${http_response}; you probably need to update the list url!" >&2
exit 1
fi
}

function is_file_empty()
{
file="${1}"

if [ ! -r "${file}" ]; then
printf '%s error: %s\n' "${0}" "file ${file} not found or not readable!" >&2
exit 1
fi

if [ `cat "${file}" | wc -l` -eq 0 ]; then
printf '%s error: %s\n' "${0}" "file ${file} empty!" >&2
exit 1
fi
}

function is_str_empty()
{
str="${1}"
errmsg="${2}"

if [ -z "${str}" ]; then
printf '%s error: %s\n' "${0}" "${errmsg}" >&2
exit 1
fi
}
30 changes: 13 additions & 17 deletions utils/crawlers_ip_addresses_download.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/bin/sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_crawlers_match.c.inc
TMP1=/tmp/bot_google_c1.json
Expand All @@ -23,34 +24,27 @@ ORIGIN_BING="https://www.bing.com/toolbox/bingbot.json"

echo "(1) Downloading file... ${ORIGIN1}"
http_response=$(curl -s -o $TMP1 -w "%{http_code}" ${ORIGIN1})
if [ "$http_response" != "200" ]; then
echo "Error $http_response: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${TMP1}"

echo "(1) Downloading file... ${ORIGIN2}"
http_response=$(curl -s -o $TMP2 -w "%{http_code}" ${ORIGIN2})
if [ "$http_response" != "200" ]; then
echo "Error $http_response: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${TMP2}"

echo "(1) Downloading file... ${ORIGIN3}"
http_response=$(curl -s -o $TMP3 -w "%{http_code}" ${ORIGIN3})
if [ "$http_response" != "200" ]; then
echo "Error $http_response: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${TMP3}"

echo "(1) Downloading file... ${ORIGIN_BING}"
http_response=$(curl -s -o $TMP_BING -w "%{http_code}" ${ORIGIN_BING})
if [ "$http_response" != "200" ]; then
echo "Error $http_response: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${TMP_BING}"

echo "(1) Downloading FB crawlers routes... "
whois -h whois.radb.net -- '-i origin AS32934' | grep ^route > $TMP_FB
is_file_empty "${TMP_FB}"

echo "(2) Processing IP addresses..."
{
Expand All @@ -60,7 +54,9 @@ echo "(2) Processing IP addresses..."
jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP_BING # TODO: ipv6
grep -v route6 $TMP_FB | tr -d 'route:^ ' # TODO: ipv6
} > $LIST
is_file_empty "${LIST}"
./ipaddr2list.py $LIST NDPI_HTTP_CRAWLER_BOT > $DEST
is_file_empty "${DEST}"
rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $LIST

echo "(3) Crawlers IPs are available in $DEST"
Expand Down
13 changes: 8 additions & 5 deletions utils/ethereum_ip_addresses_download.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/bin/sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_ethereum_match.c.inc
TMP=/tmp/ethereum
Expand All @@ -10,16 +13,16 @@ ORIGIN="https://raw.githubusercontent.com/ethereum/go-ethereum/master/params/boo

echo "(1) Downloading file... ${ORIGIN}"
http_response=$(curl -s -o $TMP -w "%{http_code}" ${ORIGIN})
if [ $http_response != "200" ]; then
echo "Error $http_response: you probably need to update the list url!"
exit 1
fi
check_http_response "${http_response}"
is_file_empty "${TMP}"

echo "(2) Processing IP addresses..."
grep 'enode' $TMP | grep -v '^/' | grep ':' | cut -d '@' -f 2 | cut -d ':' -f 1 > $LIST
is_file_empty "${LIST}"

./ipaddr2list.py $LIST NDPI_PROTOCOL_MINING > $DEST
rm -f $TMP $LIST
is_file_empty "${DEST}"

echo "(3) Ethereum/Mining IPs are available in $DEST"
exit 0
6 changes: 5 additions & 1 deletion utils/gambling_sites_download.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
#!/usr/bin/env sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_gambling_match.c.inc
LIST=/tmp/gambling.list

printf '(1) %s\n' "Scraping Illegal Gambling Sites (Belgium)"
DOMAINS="$(curl -s 'https://www.gamingcommission.be/en/gaming-commission/illegal-games-of-chance/list-of-illegal-gambling-sites' | sed -n 's/^<td[^>]\+>\(.\+\.[a-zA-Z0-9]\+\)\(\|\/.*[^<]*\)<\/td>/\1/gp' || exit 1)"
is_str_empty "${DOMAINS}" "Please check gambling sites URL and sed REGEX."

printf '(2) %s\n' "Processing IP addresses..."
echo "${DOMAINS}" >${LIST}
./hostname2list.py "${LIST}" "Gambling" NDPI_PROTOCOL_GAMBLING NDPI_PROTOCOL_CATEGORY_WEB NDPI_PROTOCOL_UNSAFE >${DEST}
rm -f "${LIST}"
is_file_empty "${DEST}"

exit 0
3 changes: 2 additions & 1 deletion utils/google_cloud_ip_addresses_download.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/bin/sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_google_cloud_match.c.inc
TMP=/tmp/google_c.json
Expand Down
7 changes: 5 additions & 2 deletions utils/google_ip_addresses_download.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/bin/sh
#!/usr/bin/env bash

set -e

cd "$(dirname "${0}")" || exit 1
. ./common.sh || exit 1

DEST=../src/lib/inc_generated/ndpi_google_match.c.inc
LIST=/tmp/google.list
Expand All @@ -13,8 +14,10 @@ echo "(1) Downloading file..."
echo "(2) Processing IP addresses..."
#https://cloud.google.com/vpc/docs/configure-private-google-access#ip-addr-defaults
python3 google.py > $LIST
is_file_empty "${LIST}"
./ipaddr2list.py $LIST NDPI_PROTOCOL_GOOGLE > $DEST
#rm -f $TMP $LIST
rm -f "${TMP}" "${LIST}"
is_file_empty "${DEST}"

echo "(3) Google IPs are available in $DEST"
exit 0
Loading