Skip to content

Commit

Permalink
[Fixes #8458] Direct download timeout with big files (#8505)
Browse files Browse the repository at this point in the history
  • Loading branch information
italogsfernandes authored Dec 20, 2021
1 parent 546a0fa commit 7fc91ea
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 74 deletions.
36 changes: 36 additions & 0 deletions geonode/proxy/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
Replace these with more appropriate tests for your application.
"""
import json
import os
import io
import gisdata
import zipfile

try:
from unittest.mock import MagicMock
Expand All @@ -38,6 +42,7 @@
from geonode import geoserver
from geonode.base.models import Link
from geonode.layers.models import Layer
from geonode.layers.utils import file_upload
from geonode.decorators import on_ogc_backend
from geonode.tests.base import GeoNodeBaseTestSupport
from geonode.base.populate_test_data import create_models
Expand Down Expand Up @@ -194,6 +199,37 @@ def test_download_url(self):
self.assertTrue(
"No files have been found for this resource. Please, contact a system administrator." in data)

@on_ogc_backend(geoserver.BACKEND_PACKAGE)
def test_download_files(self):
admin = get_user_model().objects.get(username="admin")
# upload a shapefile
shp_file = os.path.join(
gisdata.VECTOR_DATA,
'san_andres_y_providencia_poi.shp')
layer = file_upload(
shp_file,
name="san_andres_y_providencia_poi",
user=admin,
overwrite=True,
)
self.client.login(username='admin', password='admin')

response = self.client.get(reverse('download', args=(layer.id,)))
# headers and status assertions
self.assertEqual(response.status_code, 200)
self.assertEqual(response.get('content-type'), "application/zip")
self.assertEqual(response.get('content-disposition'), 'attachment; filename="san_andres_y_providencia_poi.zip"')
# Inspect content
zip_content = io.BytesIO(b"".join(response.streaming_content))
zip = zipfile.ZipFile(zip_content)
zip_files = zip.namelist()
self.assertEqual(len(zip_files), 11)
self.assertIn(".metadata/", "".join(zip_files))
self.assertIn(".shp", "".join(zip_files))
self.assertIn(".dbf", "".join(zip_files))
self.assertIn(".shx", "".join(zip_files))
self.assertIn(".prj", "".join(zip_files))


class OWSApiTestCase(GeoNodeBaseTestSupport):

Expand Down
175 changes: 101 additions & 74 deletions geonode/proxy/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,46 +17,43 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################
import io
import os
import re
import gzip
import io
import json
import shutil
import logging
import tempfile
import re
import traceback
from distutils.version import StrictVersion
from urllib.parse import urljoin, urlparse, urlsplit

from hyperlink import URL
from slugify import slugify
from urllib.parse import urlparse, urlsplit, urljoin

import zipstream
from django.conf import settings
from django.template import loader
from django.http import HttpResponse
from django.views.generic import View
from distutils.version import StrictVersion
from django.http.request import validate_host
from django.core.files.storage import FileSystemStorage
from django.forms.models import model_to_dict
from django.http import HttpResponse, StreamingHttpResponse
from django.http.request import validate_host
from django.template import loader
from django.utils.translation import ugettext as _
from django.core.files.storage import FileSystemStorage
from django.views.decorators.csrf import requires_csrf_token
from django.views.generic import View
from hyperlink import URL
from slugify import slugify

from geonode import geoserver # noqa
from geonode.base import register_event
from geonode.base.enumerations import LINK_TYPES as _LT
from geonode.base.models import Link
from geonode.layers.models import Layer, LayerFile
from geonode.utils import (
resolve_object,
check_ogc_backend,
get_dir_time_suffix,
zip_dir,
get_headers,
http_client,
json_response,
json_serializer_producer)
from geonode.base.enumerations import LINK_TYPES as _LT
json_serializer_producer,
resolve_object,
)

from geonode import geoserver # noqa
from geonode.base import register_event
BUFFER_CHUNK_SIZE = 64 * 1024

TIMEOUT = 30

Expand Down Expand Up @@ -281,14 +278,8 @@ def download(request, resourceid, sender=Layer):
permission_msg=_not_permitted)

if isinstance(instance, Layer):
# Create Target Folder
dirpath = tempfile.mkdtemp(dir=settings.STATIC_ROOT)
dir_time_suffix = get_dir_time_suffix()
target_folder = os.path.join(dirpath, dir_time_suffix)
if not os.path.exists(target_folder):
os.makedirs(target_folder)

layer_files = []
file_list = [] # Store file info to be returned
try:
upload_session = instance.get_upload_session()
if upload_session:
Expand All @@ -299,7 +290,11 @@ def download(request, resourceid, sender=Layer):
for lyr in layer_files:
if storage.exists(str(lyr.file)):
geonode_layer_path = storage.path(str(lyr.file))
shutil.copy2(geonode_layer_path, target_folder)
file_list.append({
"zip_folder": "",
"name": lyr.file.name.split('/')[-1],
"data_src_file": geonode_layer_path,
})
else:
return HttpResponse(
loader.render_to_string(
Expand All @@ -324,9 +319,12 @@ def download(request, resourceid, sender=Layer):
# Let's check for associated SLD files (if any)
try:
for s in instance.styles.all():
sld_file_path = os.path.join(target_folder, "".join([s.name, ".sld"]))
with open(sld_file_path, "w") as sld_file:
sld_file.write(s.sld_body.strip())
sld_file_name = "".join([s.name, ".sld"])
file_list.append({
"zip_folder": "",
"name": sld_file_name,
"data_str": s.sld_body.strip(),
})
try:
# Collecting headers and cookies
headers, access_token = get_headers(request, urlsplit(s.sld_url), s.sld_url)
Expand All @@ -337,9 +335,12 @@ def download(request, resourceid, sender=Layer):
timeout=TIMEOUT,
user=request.user)
sld_remote_content = response.text
sld_file_path = os.path.join(target_folder, "".join([s.name, "_remote.sld"]))
with open(sld_file_path, "w") as sld_file:
sld_file.write(sld_remote_content.strip())
remote_sld_file_name = "".join([s.name, "_remote.sld"])
file_list.append({
"zip_folder": "",
"name": remote_sld_file_name,
"data_str": sld_remote_content,
})
except Exception:
traceback.print_exc()
tb = traceback.format_exc()
Expand All @@ -350,60 +351,89 @@ def download(request, resourceid, sender=Layer):
logger.debug(tb)

# Let's dump metadata
target_md_folder = os.path.join(target_folder, ".metadata")
if not os.path.exists(target_md_folder):
os.makedirs(target_md_folder)

try:
dump_file = os.path.join(target_md_folder, "".join([instance.name, ".dump"]))
with open(dump_file, 'w') as outfile:
serialized_obj = json_serializer_producer(model_to_dict(instance))
json.dump(serialized_obj, outfile)

dump_file_name = "".join([instance.name, ".dump"])
serialized_obj = json_serializer_producer(model_to_dict(instance))
file_list.append({
"zip_folder": ".metadata/",
"name": dump_file_name,
"data_str": json.dumps(serialized_obj),
})
links = Link.objects.filter(resource=instance.resourcebase_ptr)
for link in links:
link_name = slugify(link.name)
link_file = os.path.join(target_md_folder, "".join([link_name, f".{link.extension}"]))
link_file_name = "".join([link_name, f".{link.extension}"])
link_file_obj = None

if link.link_type in ('data'):
# Skipping 'data' download links
continue
elif link.link_type in ('metadata', 'image'):
# Dumping metadata files and images
with open(link_file, "wb"):
try:
# Collecting headers and cookies
headers, access_token = get_headers(request, urlsplit(link.url), link.url)

response, raw = http_client.get(
link.url,
stream=True,
headers=headers,
timeout=TIMEOUT,
user=request.user)
raw.decode_content = True
shutil.copyfileobj(raw, link_file)
except Exception:
traceback.print_exc()
tb = traceback.format_exc()
logger.debug(tb)
try:
# Collecting headers and cookies
headers, access_token = get_headers(request, urlsplit(link.url), link.url)

response, raw = http_client.get(
link.url,
stream=True,
headers=headers,
timeout=TIMEOUT,
user=request.user)
raw.decode_content = True
if raw and raw is not None:
link_file_obj = {
"zip_folder": ".metadata/",
"name": link_file_name,
"data_iter": raw,
}
except Exception:
traceback.print_exc()
tb = traceback.format_exc()
logger.debug(tb)
elif link.link_type.startswith('OGC'):
# Dumping OGC/OWS links
with open(link_file, "w") as link_file:
link_file.write(link.url.strip())
link_file_obj = {
"zip_folder": ".metadata/",
"name": link_file_name,
"data_str": link.url.strip(),
}
# Add file_info to the file list
if link_file_obj is not None:
file_list.append(link_file_obj)
except Exception:
traceback.print_exc()
tb = traceback.format_exc()
logger.debug(tb)

# ZIP everything and return
target_file_name = "".join([instance.name, ".zip"])
target_file = os.path.join(dirpath, target_file_name)
zip_dir(target_folder, target_file)

target_zip = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED, allowZip64=True)

# Iterable: Needed when the file_info has it's data as a stream
def _iterable(source_iter):
while True:
buf = source_iter.read(BUFFER_CHUNK_SIZE)
if not buf:
break
yield buf

# Add files to zip
for file_info in file_list:
zip_file_name = "".join([file_info['zip_folder'], file_info['name']])
# The zip can be built from 3 data sources: str, iterable or a file path
if 'data_str' in file_info and file_info['data_str'] is not None:
target_zip.writestr(arcname=zip_file_name, data=bytes(file_info['data_str'], 'utf-8'))
elif 'data_iter' in file_info and file_info['data_iter'] is not None:
target_zip.write_iter(arcname=zip_file_name, iterable=_iterable(file_info['data_iter']))
elif 'data_src_file' in file_info and file_info['data_src_file'] is not None:
target_zip.write(filename=file_info['data_src_file'], arcname=zip_file_name)

register_event(request, 'download', instance)
response = HttpResponse(
content=open(target_file, mode='rb'),
status=200,
content_type="application/zip")

# Streaming content response
response = StreamingHttpResponse(target_zip, content_type='application/zip')
response['Content-Disposition'] = f'attachment; filename="{target_file_name}"'
return response
except NotImplementedError:
Expand All @@ -418,9 +448,6 @@ def download(request, resourceid, sender=Layer):
'error_message': _no_files_found
},
request=request), status=404)
finally:
if target_folder is not None:
shutil.rmtree(target_folder, ignore_errors=True)
return HttpResponse(
loader.render_to_string(
'401.html',
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ wrapt==1.12.1
jsonfield==3.1.0
jsonschema==3.2.0
pyrsistent==0.17.3
zipstream-new==1.1.8

# Django Apps
django-allauth==0.44.0
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ install_requires =
jsonfield==3.1.0
jsonschema==3.2.0
pyrsistent==0.17.3
zipstream-new==1.1.8

# Django Apps
django-allauth==0.44.0
Expand Down

0 comments on commit 7fc91ea

Please sign in to comment.