|
4 | 4 |
|
5 | 5 | import msgpack
|
6 | 6 | import plyvel
|
7 |
| -from fastnumbers import check_int |
8 | 7 | from splitgill.utils import parse_to_timestamp, now, partition
|
9 | 8 |
|
10 | 9 | from dataimporter.lib.model import SourceRecord
|
11 | 10 |
|
12 |
| -# the maximum integer we can represent as a sortable string is 78 digits |
13 |
| -MAX_INT = int("9" * 78) |
14 |
| - |
15 |
| - |
16 |
| -def int_to_sortable_str(number: int) -> str: |
17 |
| - """ |
18 |
| - Encodes the given number and returns a string that when compared to other strings is |
19 |
| - alphanumerically orderable. This fixes the standard 1, 2, 20, 21, 3 problem without |
20 |
| - using zero padding which wastes space and requires a much lower maximum input value. |
21 |
| - The algorithm used is based on the one presented here: |
22 |
| - https://www.arangodb.com/2017/09/sorting-number-strings-numerically/ with a couple |
23 |
| - of tweaks. |
24 |
| -
|
25 |
| - Essentially, we encode the length of the number before the number itself using a |
26 |
| - single ASCII character. This allows sorting to be done properly as the ASCII |
27 |
| - character is compared first and then the number next. For example, the number 1 gets |
28 |
| - the character 1 so is encoded as "1_1", whereas 10 gets the character 2 and is |
29 |
| - encoded "2_10". Because we are restricted to not use . in keys and for low number |
30 |
| - convenience, we start at character point 49 which is the character 1 and therefore |
31 |
| - all numbers less than 1,000,000,000 are encoded with the numbers 1 to 9 which is |
32 |
| - convenient for users. |
33 |
| -
|
34 |
| - This encoding structure can support a number with a maximum length of 78 digits |
35 |
| - (ASCII char 1 (49) to ~ (126)). |
36 |
| -
|
37 |
| - This function only works on positive integers. If the input isn't valid, a |
38 |
| - ValueError is raised. |
39 |
| -
|
40 |
| - :param number: the number to encode, must be positive |
41 |
| - :return: the encoded number as a str object |
42 |
| - """ |
43 |
| - if not check_int(number): |
44 |
| - raise ValueError("Number must be a valid integer") |
45 |
| - if number < 0 or number > MAX_INT: |
46 |
| - raise ValueError(f"Number must be positive and no more than {MAX_INT}") |
47 |
| - return f"{chr(48 + len(str(number)))}_{number}" |
48 |
| - |
49 | 11 |
|
50 | 12 | class DB:
|
51 | 13 | """
|
|
0 commit comments