Skip to content

Commit

Permalink
add initial tests and testing infrastructure
Browse files Browse the repository at this point in the history
  • Loading branch information
ohnorobo committed Sep 16, 2020
1 parent 623626c commit 2f3e24b
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 7 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,9 @@ existing tables.
`python table/main.py`

Runs queries to recreate any tables derived from the base tables.

## Testing

To run all tests run

`python3 -m unittest`
Empty file added __init__.py
Empty file.
Empty file added pipeline/__init__.py
Empty file.
16 changes: 9 additions & 7 deletions pipeline/metadata/ip_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def get_asn_db(self, date: str) -> pyasn.pyasn:

try:
filepath = match[0].metadata_list[0].path
lines = self.read_gcs_compressed_file_as_list(filepath)
lines = self.read_compressed_file(filepath)
except IndexError:
raise FileNotFoundError(filepath_pattern)

Expand Down Expand Up @@ -145,7 +145,7 @@ def get_org_name_to_country_map(self) -> Dict[str, Tuple[str, str]]:
ex: {"8X8INC-ARIN": ("8x8, Inc.","US")}
"""
filepath = CLOUD_DATA_LOCATION + "as-organizations/20200701.as-org2info.txt.gz"
lines = self.read_gcs_compressed_file_as_list(filepath)
lines = self.read_compressed_file(filepath)

data_start_index = lines.index(ORG_TO_COUNTRY_HEADER) + 1
data_end_index = lines.index(AS_TO_ORG_HEADER)
Expand Down Expand Up @@ -173,7 +173,7 @@ def get_as_to_org_map(
The final 2 fields may be None
"""
filepath = CLOUD_DATA_LOCATION + "as-organizations/20200701.as-org2info.txt.gz"
lines = self.read_gcs_compressed_file_as_list(filepath)
lines = self.read_compressed_file(filepath)

data_start_index = lines.index(AS_TO_ORG_HEADER) + 1
as_to_org_lines = lines[data_start_index:]
Expand All @@ -200,7 +200,7 @@ def get_as_to_type_map(self) -> Dict[int, str]:
ex {398243 : "Enterprise", 13335: "Content", 4: "Transit/Access"}
"""
filepath = CLOUD_DATA_LOCATION + "as-classifications/20200801.as2types.txt.gz"
lines = self.read_gcs_compressed_file_as_list(filepath)
lines = self.read_compressed_file(filepath)

# filter comments
data_lines = [line for line in lines if line[0] != "#"]
Expand All @@ -213,8 +213,9 @@ def get_as_to_type_map(self) -> Dict[int, str]:

return as_to_type_map

def read_gcs_compressed_file_as_list(self, filepath: str) -> List[str]:
"""Read in a compressed GCS file as a list of strings.
@staticmethod
def read_compressed_file(filepath: str) -> List[str]:
"""Read in a compressed file as a list of strings.
We have to read the whole file into memory because some operations
(removing comments, using only the second half of the file)
Expand All @@ -239,7 +240,8 @@ def read_gcs_compressed_file_as_list(self, filepath: str) -> List[str]:

return lines

def previous_day(self, date: str) -> str:
@staticmethod
def previous_day(date: str) -> str:
"""Given a date string return the date string of the day before.
Args:
Expand Down
Binary file added pipeline/metadata/test_file.txt.gz
Binary file not shown.
42 changes: 42 additions & 0 deletions pipeline/metadata/test_ip_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
from pipeline.metadata.ip_metadata import IpMetadata


class IpMetadataTest(unittest.TestCase):

def test_init_and_lookup(self):
# This E2E test requires the user to have get access to the
# gs://censoredplanet_geolocation bucket.
ip_metadata = IpMetadata("2018-07-27")
metadata = ip_metadata.lookup("1.1.1.1")

self.assertEqual(metadata, ("1.1.1.0/24", 13335, "CLOUDFLARENET",
"Cloudflare, Inc.", "Content", "US"))

def test_previous_day(self):
day = "2020-01-02"
previous_day = IpMetadata.previous_day(day)
self.assertEqual(previous_day, "2020-01-01")

def test_read_compressed_file(self):
filepath = "pipeline/metadata/test_file.txt.gz"
lines = IpMetadata.read_compressed_file(filepath)
self.assertListEqual(lines, ["test line 1", "test line 2"])


if __name__ == "__main__":
unittest.main()

0 comments on commit 2f3e24b

Please sign in to comment.