Skip to content

Commit a5d9882

Browse files
github-actions[bot]giohappymarthamareal
authored andcommitted
[Fixes GeoNode#10134] New simple renderer to generate thumbnails for PDFs (GeoNode#10135) (GeoNode#10136)
* PDF thumbnail renderer * - add unit tests * command to generate thumbnails for docs * flake fix * renamed management command * add requirement to setup.cfg * make command similar to other sync commands * removed unused import * fix flake8 Co-authored-by: marthamareal <[email protected]> Co-authored-by: Giovanni Allegri <[email protected]> Co-authored-by: marthamareal <[email protected]>
1 parent 7f1783a commit a5d9882

File tree

6 files changed

+108
-0
lines changed

6 files changed

+108
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#########################################################################
2+
#
3+
# Copyright (C) 2022 OSGeo
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
#
18+
#########################################################################
19+
20+
import logging
21+
22+
from django.core.management.base import BaseCommand
23+
from geonode.documents.models import Document
24+
from geonode.documents.tasks import create_document_thumbnail
25+
26+
logger = logging.getLogger(__name__)
27+
28+
29+
class Command(BaseCommand):
30+
help = ("Update documents. For the moment only thumbnails can be updated")
31+
32+
def add_arguments(self, parser):
33+
parser.add_argument(
34+
'--updatethumbnails',
35+
action='store_true',
36+
dest="updatethumbnails",
37+
default=False,
38+
help="Update the document thumbnails.")
39+
40+
def handle(self, *args, **options):
41+
updatethumbnails = options.get('updatethumbnails')
42+
for doc in Document.objects.all():
43+
if updatethumbnails:
44+
if doc.thumbnail_url is None or doc.thumbnail_url == '':
45+
try:
46+
create_document_thumbnail(doc.id)
47+
except Exception:
48+
logger.error(f"[ERROR] Thumbnail for [{doc.name}] couldn't be created")

geonode/documents/tasks.py

+40
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1717
#
1818
#########################################################################
19+
import os
1920
import io
2021

2122
from PIL import Image
23+
import fitz
2224

2325
from celery.utils.log import get_task_logger
2426

@@ -31,6 +33,39 @@
3133
logger = get_task_logger(__name__)
3234

3335

36+
class DocumentRenderer():
37+
FILETYPES = ['pdf']
38+
39+
def __init__(self) -> None:
40+
pass
41+
42+
def supports(self, filename):
43+
return self._get_filetype(filename) in self.FILETYPES
44+
45+
def render(self, filename):
46+
content = None
47+
if self.supports(filename):
48+
filetype = self._get_filetype(filename)
49+
render = getattr(self, f'render_{filetype}')
50+
content = render(filename)
51+
return content
52+
53+
def render_pdf(self, filename):
54+
try:
55+
doc = fitz.open(filename)
56+
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
57+
return pix.pil_tobytes(format="PNG")
58+
except Exception as e:
59+
logger.warning(f'Cound not generate thumbnail for {filename}: {e}')
60+
return None
61+
62+
def _get_filetype(self, filname):
63+
return os.path.splitext(filname)[1][1:]
64+
65+
66+
doc_renderer = DocumentRenderer()
67+
68+
3469
@app.task(
3570
bind=True,
3671
name='geonode.documents.tasks.create_document_thumbnail',
@@ -75,6 +110,11 @@ def create_document_thumbnail(self, object_id):
75110
if image_file is not None:
76111
image_file.close()
77112

113+
elif doc_renderer.supports(document.files[0]):
114+
try:
115+
thumbnail_content = doc_renderer.render(document.files[0])
116+
except Exception as e:
117+
print(e)
78118
if not thumbnail_content:
79119
logger.warning(f"Thumbnail for document #{object_id} empty.")
80120
ResourceBase.objects.filter(id=document.id).update(thumbnail_url=None)

geonode/documents/tests.py

+18
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ def test_non_image_documents_thumbnail(self):
286286
def test_image_documents_thumbnail(self):
287287
self.client.login(username='admin', password='admin')
288288
try:
289+
# test image doc
289290
with open(os.path.join(f"{self.project_root}", "tests/data/img.gif"), "rb") as f:
290291
data = {
291292
'title': "img File Doc",
@@ -303,8 +304,25 @@ def test_image_documents_thumbnail(self):
303304
self.assertEqual(file.size, (400, 200))
304305
# check thumbnail qualty and extention
305306
self.assertEqual(file.format, 'JPEG')
307+
# test pdf doc
308+
with open(os.path.join(f"{self.project_root}", "tests/data/pdf_doc.pdf"), "rb") as f:
309+
data = {
310+
'title': "Pdf File Doc",
311+
'doc_file': f,
312+
'extension': 'pdf',
313+
}
314+
self.client.post(reverse('document_upload'), data=data)
315+
d = Document.objects.get(title='Pdf File Doc')
316+
self.assertIsNotNone(d.thumbnail_url)
317+
thumb_file = os.path.join(
318+
settings.MEDIA_ROOT, f"thumbs/{os.path.basename(urlparse(d.thumbnail_url).path)}"
319+
)
320+
file = Image.open(thumb_file)
321+
# check thumbnail qualty and extention
322+
self.assertEqual(file.format, 'JPEG')
306323
finally:
307324
Document.objects.filter(title='img File Doc').delete()
325+
Document.objects.filter(title='Pdf File Doc').delete()
308326

309327
def test_upload_document_form_size_limit(self):
310328
form_data = {
10.7 KB
Binary file not shown.

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ zipstream-new==1.1.8
2525
schema==0.7.5
2626
rdflib==6.1.1
2727
smart_open==6.2.0
28+
PyMuPDF==1.20.2
2829

2930
# Django Apps
3031
django-allauth==0.51.0

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ install_requires =
5151
schema==0.7.5
5252
rdflib==6.1.1
5353
smart_open==6.2.0
54+
PyMuPDF==1.20.2
5455

5556
# Django Apps
5657
django-allauth==0.51.0

0 commit comments

Comments
 (0)