|
| 1 | +""" |
| 2 | +Updates asn.mmdb and cc.mmdb in /var/lib/ooniapi/ |
| 3 | +
|
| 4 | +""" |
| 5 | + |
| 6 | +import sys |
| 7 | +import gzip |
| 8 | +import timeit |
| 9 | +import shutil |
| 10 | +import logging |
| 11 | + |
| 12 | +import geoip2.database |
| 13 | +from pathlib import Path |
| 14 | +from datetime import datetime, timezone |
| 15 | +from urllib.error import HTTPError |
| 16 | +from urllib.request import urlopen, Request |
| 17 | + |
| 18 | +from prometheus_client import metrics |
| 19 | + |
| 20 | + |
| 21 | +class Metrics: |
| 22 | + GEOIP_ASN_NODE_CNT = metrics.Gauge("geoip_asn_node_cnt", "Count of geoi nodes") |
| 23 | + GEOIP_ASN_EPOCH = metrics.Gauge("geoip_asn_epoch", "Geoip current ASN epoch") |
| 24 | + GEOIP_CC_NODE_CNT = metrics.Gauge("geoip_cc_node_cnt", "Geoip asn node count") |
| 25 | + GEOIP_CC_EPOCH = metrics.Gauge("geoip_cc_epoch", "Geoip current CC epoch") |
| 26 | + GEOIP_CHECKFAIL = metrics.Counter( |
| 27 | + "ooni_geoip_checkfail", "How many times did the check fail in geo ip fail" |
| 28 | + ) |
| 29 | + GEOIP_UPDATED = metrics.Counter( |
| 30 | + "ooni_geoip_updated", "How many times was the geoip database updated" |
| 31 | + ) |
| 32 | + GEOIP_DOWNLOAD_TIME = metrics.Histogram( |
| 33 | + "geoip_download_time", "How long it takes to download the DB" |
| 34 | + ) |
| 35 | + |
| 36 | +log = logging.getLogger("ooni_download_geoip") |
| 37 | + |
| 38 | +log.addHandler(logging.StreamHandler(sys.stdout)) |
| 39 | +log.setLevel(logging.DEBUG) |
| 40 | + |
| 41 | + |
| 42 | +def get_request(url): |
| 43 | + req = Request(url) |
| 44 | + # We need to set the user-agent otherwise db-ip gives us a 403 |
| 45 | + req.add_header("User-Agent", "ooni-downloader") |
| 46 | + return urlopen(req) |
| 47 | + |
| 48 | + |
| 49 | +def is_already_updated(db_dir: Path, ts : str) -> bool: |
| 50 | + try: |
| 51 | + with (db_dir / "geoipdbts").open() as in_file: |
| 52 | + current_ts = in_file.read() |
| 53 | + except FileNotFoundError: |
| 54 | + return False |
| 55 | + |
| 56 | + return current_ts == ts |
| 57 | + |
| 58 | + |
| 59 | +def is_latest_available(url: str) -> bool: |
| 60 | + log.info(f"fetching {url}") |
| 61 | + try: |
| 62 | + resp = get_request(url) |
| 63 | + return resp.status == 200 |
| 64 | + except HTTPError as err: |
| 65 | + if resp.status == 404: # type: ignore |
| 66 | + log.info(f"{url} hasn't been updated yet") |
| 67 | + return False |
| 68 | + log.info(f"unexpected status code '{err.code}' in {url}") |
| 69 | + return False |
| 70 | + |
| 71 | + |
| 72 | +def check_geoip_db(path: Path) -> None: |
| 73 | + assert "cc" in path.name or "asn" in path.name, "invalid path" |
| 74 | + |
| 75 | + with geoip2.database.Reader(str(path)) as reader: |
| 76 | + if "asn" in path.name: |
| 77 | + r1 = reader.asn("8.8.8.8") |
| 78 | + assert r1 is not None, "database file is invalid" |
| 79 | + m = reader.metadata() |
| 80 | + Metrics.GEOIP_ASN_NODE_CNT.set(m.node_count) |
| 81 | + Metrics.GEOIP_ASN_EPOCH.set(m.build_epoch) |
| 82 | + |
| 83 | + elif "cc" in path.name: |
| 84 | + r2 = reader.country("8.8.8.8") |
| 85 | + assert r2 is not None, "database file is invalid" |
| 86 | + m = reader.metadata() |
| 87 | + Metrics.GEOIP_CC_NODE_CNT.set(m.node_count) |
| 88 | + Metrics.GEOIP_CC_EPOCH.set(m.build_epoch) |
| 89 | + |
| 90 | + |
| 91 | +def download_geoip(db_dir: Path, url: str, filename: str) -> None: |
| 92 | + start_time = timeit.default_timer() # Start timer |
| 93 | + log.info(f"Updating geoip database for {url} ({filename})") |
| 94 | + |
| 95 | + tmp_gz_out = db_dir / f"{filename}.gz.tmp" |
| 96 | + tmp_out = db_dir / f"{filename}.tmp" |
| 97 | + |
| 98 | + with get_request(url) as resp: |
| 99 | + with tmp_gz_out.open("wb") as out_file: |
| 100 | + shutil.copyfileobj(resp, out_file) |
| 101 | + with gzip.open(str(tmp_gz_out)) as in_file: |
| 102 | + with tmp_out.open("wb") as out_file: |
| 103 | + shutil.copyfileobj(in_file, out_file) |
| 104 | + tmp_gz_out.unlink() |
| 105 | + |
| 106 | + try: |
| 107 | + check_geoip_db(tmp_out) |
| 108 | + except Exception as exc: |
| 109 | + log.error(f"consistenty check on the geoip DB failed: {exc}") |
| 110 | + Metrics.GEOIP_CHECKFAIL.inc() |
| 111 | + raise |
| 112 | + |
| 113 | + tmp_out.rename(db_dir / filename) |
| 114 | + endtime = timeit.default_timer() # End timer |
| 115 | + Metrics.GEOIP_DOWNLOAD_TIME.observe(endtime - start_time) |
| 116 | + |
| 117 | + |
| 118 | +def update_geoip(db_dir: Path, ts : str, asn_url : str, cc_url : str) -> None: |
| 119 | + db_dir.mkdir(parents=True, exist_ok=True) |
| 120 | + download_geoip(db_dir, asn_url, "asn.mmdb") |
| 121 | + download_geoip(db_dir, cc_url, "cc.mmdb") |
| 122 | + |
| 123 | + with (db_dir / "geoipdbts").open("w") as out_file: |
| 124 | + out_file.write(ts) |
| 125 | + |
| 126 | + log.info("Updated GeoIP databases") |
| 127 | + Metrics.GEOIP_UPDATED.inc() |
| 128 | + |
| 129 | + |
| 130 | +def try_update(db_dir: str): |
| 131 | + db_dir_path = Path(db_dir) |
| 132 | + |
| 133 | + ts = datetime.now(timezone.utc).strftime("%Y-%m") |
| 134 | + asn_url = f"https://download.db-ip.com/free/dbip-asn-lite-{ts}.mmdb.gz" |
| 135 | + cc_url = f"https://download.db-ip.com/free/dbip-country-lite-{ts}.mmdb.gz" |
| 136 | + |
| 137 | + if is_already_updated(db_dir_path, ts): |
| 138 | + log.debug("Database already updated. Exiting.") |
| 139 | + return |
| 140 | + |
| 141 | + if not is_latest_available(asn_url) or not is_latest_available(cc_url): |
| 142 | + log.debug("Update not available yet. Exiting.") |
| 143 | + return |
| 144 | + |
| 145 | + update_geoip(db_dir_path, ts, asn_url, cc_url) |
0 commit comments