Skip to content
This repository was archived by the owner on Nov 5, 2023. It is now read-only.

Commit 97112d7

Browse files
Switch extract to return bytes instead of decoding to avoid errors when downloading subtitles marked with the wrong encoding
1 parent 6eb1341 commit 97112d7

File tree

4 files changed

+16
-22
lines changed

4 files changed

+16
-22
lines changed

docs/Utility-Functions.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ from subwinder.utils import extract, special_hash
1313
* [`extract()`](#extractbytes-encoding)
1414
* [`special_hash()`](#special_hashfilepath)
1515

16-
### `extract(bytes, encoding)`
16+
### `extract(bytes)`
1717

18-
Small helper function that base64 decodes and gzip decompresses `bytes` into from an `encoding` encoded string to a python `str`. This likely won't be useful to many people, but this is the format used to transfer subtitles and previews from the opensubtitles API.
18+
Small helper function that base64 decodes and gzip decompresses `bytes`. This likely won't be useful to many people, but this is the format used to transfer subtitles and previews from the opensubtitles API.
1919

2020
```python
21-
assert "Hi!" == extract(b"H4sIAIjurl4C//PIVAQA2sWeeQMAAAA=", "UTF-8")
21+
assert b"Hi!" == extract(b"H4sIAIjurl4C//PIVAQA2sWeeQMAAAA=")
2222
```
2323

2424
### `special_hash(filepath)`

subwinder/core.py

+8-12
Original file line numberDiff line numberDiff line change
@@ -275,30 +275,25 @@ def download_subtitles(
275275
return download_paths
276276

277277
def _download_subtitles(self, sub_containers, filepaths):
278-
encodings = []
279278
sub_file_ids = []
280-
# Unpack stored info
281-
for sub_container in sub_containers:
282-
encodings.append(sub_container.encoding)
283-
sub_file_ids.append(sub_container.file_id)
279+
# Get all the file ids
280+
sub_file_ids = [sub_container.file_id for sub_container in sub_containers]
284281

285282
data = self._request(Endpoints.DOWNLOAD_SUBTITLES, sub_file_ids)["data"]
286283

287-
for encoding, result, fpath in zip(encodings, data, filepaths):
288-
# Currently pray that python supports all the encodings and is called the
289-
# same as what opensubtitles returns
290-
subtitles = utils.extract(result["data"], encoding)
284+
for result, fpath in zip(data, filepaths):
285+
subtitles = utils.extract(result["data"])
291286

292287
# Create the directories if needed, then save the file
293288
dirpath = fpath.parent
294289
dirpath.mkdir(exist_ok=True)
295290

296291
# Write atomically if possible, otherwise fall back to regular writing
297292
if ATOMIC_DOWNLOADS_SUPPORT:
298-
with atomic_write(fpath) as f:
293+
with atomic_write(fpath, mode="wb") as f:
299294
f.write(subtitles)
300295
else:
301-
with fpath.open("w") as f:
296+
with fpath.open("wb") as f:
302297
f.write(subtitles)
303298

304299
def get_comments(self, sub_containers):
@@ -563,6 +558,7 @@ def _preview_subtitles(self, ids):
563558
encoding = preview["encoding"]
564559
contents = preview["contents"]
565560

566-
previews.append(utils.extract(contents, encoding))
561+
# Extract and decode the previews
562+
previews.append(utils.extract(contents).decode(encoding))
567563

568564
return previews

subwinder/utils.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55
from subwinder.exceptions import SubHashError
66

77

8-
def extract(bytes, encoding):
8+
def extract(bytes):
99
"""
1010
Extract `bytes` from being gzip'd and base64 encoded.
1111
"""
1212
compressed = base64.b64decode(bytes)
13-
# TODO: only previewing cares about getting this as a string. downloading can take
14-
# it directly as bytes
15-
return gzip.decompress(compressed).decode(encoding)
13+
return gzip.decompress(compressed)
1614

1715

1816
# As per API spec with some tweaks to make it a bit nicer

tests/test_utils.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ def test_extract():
88
"wa+2QmHRYOxiZfzuNRrVZv8dQcVk3xP08dSMFps5/4WhRKSPvwBzf2OXZqAAAA"
99
)
1010
IDEAL = (
11-
"Hello there, I'm that good ole compressed and encoded subtitle information"
12-
" that you so dearly want to save"
11+
b"Hello there, I'm that good ole compressed and encoded subtitle information"
12+
b" that you so dearly want to save"
1313
)
1414

15-
assert extract(COMPRESSED, "UTF-8") == IDEAL
15+
assert extract(COMPRESSED) == IDEAL
1616

1717

1818
def test_special_hash():

0 commit comments

Comments
 (0)