2
2
3
3
"""Wrapper for requests module with cookies persistence and basic cache."""
4
4
5
+ import gzip
5
6
import hashlib
6
7
import logging
7
8
import pathlib
8
9
import pickle
9
10
import time
10
- from typing import Any
11
+ from typing import Any , Optional
11
12
12
13
import requests
13
14
@@ -61,6 +62,7 @@ def _get_cache_file_candidates(self, url: str) -> list[pathlib.Path]:
61
62
filestem = hashname .hexdigest ()
62
63
63
64
candidates = [
65
+ utils .CACHEDIR / f"{ filestem } .gz" ,
64
66
utils .CACHEDIR / filestem ,
65
67
66
68
# For compatibility with old cache files
@@ -69,6 +71,32 @@ def _get_cache_file_candidates(self, url: str) -> list[pathlib.Path]:
69
71
70
72
return candidates
71
73
74
+ def _try_load_cache (self , cachefile : pathlib .Path , max_cache_age : int
75
+ ) -> Optional [str ]:
76
+ if max_cache_age < 0 :
77
+ use_cache = True
78
+ else :
79
+ age = time .time () - cachefile .stat ().st_mtime
80
+ use_cache = age < max_cache_age
81
+
82
+ if use_cache :
83
+ if cachefile .suffix == ".gz" :
84
+ with gzip .open (cachefile , mode = 'r' ) as gzfile :
85
+ return gzfile .read (- 1 ).decode ()
86
+ else :
87
+ with cachefile .open ('r' ) as file :
88
+ return file .read (- 1 )
89
+
90
+ return None
91
+
92
+ def _create_cache_file (self , cachefile : pathlib .Path , data : str ):
93
+ if cachefile .suffix == ".gz" :
94
+ with gzip .open (cachefile , mode = 'w' ) as gzfile :
95
+ gzfile .write (data .encode ())
96
+ else :
97
+ with cachefile .open ('w' ) as file :
98
+ file .write (data )
99
+
72
100
def get (self , url : str , max_cache_age : int = - 1 ) -> str :
73
101
"""Do a GET request."""
74
102
cache_candidates = self ._get_cache_file_candidates (url )
@@ -77,24 +105,18 @@ def get(self, url: str, max_cache_age: int = -1) -> str:
77
105
78
106
cache_olds = [file for file in cache_candidates if file .exists ()]
79
107
for cachefile in cache_olds :
80
- if max_cache_age < 0 :
81
- use_cache = True
82
- else :
83
- age = time .time () - cachefile .stat ().st_mtime
84
- use_cache = age < max_cache_age
108
+ data = self ._try_load_cache (cachefile , max_cache_age )
109
+ if data :
110
+ logger .debug ("Loaded cache file for %s: %s" , url , cachefile )
111
+ return data
85
112
86
- if use_cache :
87
- logger .debug ("Loading cache file for %s: %s" , url , cachefile )
88
- with cachefile .open ('r' ) as file :
89
- return file .read (- 1 )
90
- else :
91
- cachefile .unlink ()
113
+ cachefile .unlink ()
92
114
93
115
logger .debug ("Fetching %s, cache file will be %s" , url , cache_new_file )
94
116
req = self .session .get (url )
95
117
req .raise_for_status ()
96
- with cache_new_file . open ( 'w' ) as file :
97
- file . write ( req .text )
118
+
119
+ self . _create_cache_file ( cache_new_file , req .text )
98
120
99
121
return req .text
100
122
0 commit comments