|
1 | 1 | import copy
|
2 |
| -import json |
3 | 2 | import operator
|
4 | 3 | import os
|
5 |
| -import zipfile |
| 4 | +import pickle |
6 | 5 | from collections import defaultdict
|
7 | 6 | from pathlib import Path
|
8 | 7 | from typing import Optional
|
9 | 8 |
|
| 9 | +import lz4.frame |
10 | 10 | import pycountry
|
11 | 11 |
|
12 | 12 |
|
| 13 | +# Function to decompress and unpickle data |
| 14 | +def decompress_and_unpickle(compressed_file): |
| 15 | + with open(compressed_file, 'rb') as file: |
| 16 | + compressed_data = file.read() |
| 17 | + original_data = pickle.loads(lz4.frame.decompress(compressed_data)) |
| 18 | + return original_data |
| 19 | + |
| 20 | + |
13 | 21 | def _query(search_set, key):
|
14 | 22 | key = key.strip().title()
|
15 | 23 | if key in search_set:
|
@@ -54,16 +62,16 @@ class NameDataset:
|
54 | 62 | def __init__(self, load_first_names=True, load_last_names=True):
|
55 | 63 | if not load_first_names and not load_last_names:
|
56 | 64 | raise ValueError('Select either [load_first_names=True] and/or [load_last_names=True].')
|
57 |
| - first_names_filename = Path(os.path.dirname(__file__)) / 'v3/first_names.zip' |
58 |
| - last_names_filename = Path(os.path.dirname(__file__)) / 'v3/last_names.zip' |
| 65 | + first_names_filename = Path(os.path.dirname(__file__)) / 'v3/first_names.lz4' |
| 66 | + last_names_filename = Path(os.path.dirname(__file__)) / 'v3/last_names.lz4' |
59 | 67 | self.first_names = self._read_json_from_zip(first_names_filename) if load_first_names else None
|
60 | 68 | self.last_names = self._read_json_from_zip(last_names_filename) if load_last_names else None
|
61 | 69 |
|
62 | 70 | @staticmethod
|
63 | 71 | def _read_json_from_zip(zip_file):
|
64 |
| - with zipfile.ZipFile(zip_file) as z: |
65 |
| - with z.open(z.filelist[0]) as f: |
66 |
| - return json.load(f) |
| 72 | + print(zip_file) |
| 73 | + # return pickle.load(gzip.open(zip_file, 'rb')) |
| 74 | + return decompress_and_unpickle(zip_file) |
67 | 75 |
|
68 | 76 | def search(self, name: str):
|
69 | 77 | key = name.strip().title()
|
|
0 commit comments