From 75eb550b64f9f77a342fb30eda9999a80fe77e95 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Tue, 20 Nov 2018 08:48:04 -0500 Subject: [PATCH] Add a new `media_file_basename` column to improve performance when a single `auth_user` has may rows in `logger_attachment`. This commit lacks a management command to populate the `media_file_basename` column, but we'll say that it closes #495. --- .../0010_attachment_media_file_basename.py | 19 ++++++++++++++++ onadata/apps/logger/models/attachment.py | 22 +++++++++++++++++++ onadata/apps/viewer/views.py | 21 +++++++++++------- 3 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 onadata/apps/logger/migrations/0010_attachment_media_file_basename.py diff --git a/onadata/apps/logger/migrations/0010_attachment_media_file_basename.py b/onadata/apps/logger/migrations/0010_attachment_media_file_basename.py new file mode 100644 index 000000000..393bfb732 --- /dev/null +++ b/onadata/apps/logger/migrations/0010_attachment_media_file_basename.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('logger', '0009_add_posted_to_kpi_field_to_logger_instance'), + ] + + operations = [ + migrations.AddField( + model_name='attachment', + name='media_file_basename', + field=models.CharField(db_index=True, max_length=260, null=True, blank=True), + ), + ] diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py index f40b48be5..6b3106e2e 100644 --- a/onadata/apps/logger/models/attachment.py +++ b/onadata/apps/logger/models/attachment.py @@ -1,4 +1,5 @@ import os +import re import mimetypes from hashlib import md5 @@ -28,18 +29,39 @@ def hash_attachment_contents(contents): class Attachment(models.Model): instance = models.ForeignKey(Instance, related_name="attachments") media_file = models.FileField(upload_to=upload_to, max_length=380, db_index=True) + media_file_basename = models.CharField( + max_length=260, null=True, blank=True, db_index=True) mimetype = models.CharField( max_length=100, null=False, blank=True, default='') + MEDIA_FILE_BASENAME_PATTERN = re.compile(r'/([^/]+)$') + class Meta: app_label = 'logger' + def _populate_media_file_basename(self): + # TODO: write a management command to call this (and save) for all + # existing attachments? For the moment, the `media_file_basename` + # column can be populated directly in Postgres using: + # UPDATE logger_attachment + # SET media_file_basename = substring(media_file from '/([^/]+)$'); + if self.media_file: + match = re.search( + self.MEDIA_FILE_BASENAME_PATTERN, self.media_file.name) + if match: + self.media_file_basename = match.groups()[0] + else: + self.media_file_basename = '' + def save(self, *args, **kwargs): if self.media_file and self.mimetype == '': # guess mimetype mimetype, encoding = mimetypes.guess_type(self.media_file.name) if mimetype: self.mimetype = mimetype + + self._populate_media_file_basename() + super(Attachment, self).save(*args, **kwargs) @property diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py index 5e3ccfdf5..36803438c 100644 --- a/onadata/apps/viewer/views.py +++ b/onadata/apps/viewer/views.py @@ -14,6 +14,7 @@ from django.core.files.storage import get_storage_class from django.core.servers.basehttp import FileWrapper from django.core.urlresolvers import reverse +from django.db.models import Q from django.http import ( HttpResponseForbidden, HttpResponseRedirect, HttpResponseNotFound, HttpResponseBadRequest, HttpResponse) @@ -674,26 +675,30 @@ def attachment_url(request, size='medium'): # TODO: how to make sure we have the right media file, # this assumes duplicates are the same file if media_file: - mtch = re.search('^([^\/]+)/attachments(/[^\/]+)$', media_file) + mtch = re.search(r'^([^/]+)/attachments/([^/]+)$', media_file) if mtch: # in cases where the media_file url created by instance.html's # _attachment_url function is in the wrong format, this will # match attachments with the correct owner and the same file name (username, filename) = mtch.groups() - result = Attachment.objects.filter(**{ - 'instance__xform__user__username': username, - }).filter(**{ - 'media_file__endswith': filename, - })[0:1] + result = Attachment.objects.filter( + instance__xform__user__username=username, + ).filter( + Q(media_file_basename=filename) | Q( + media_file_basename=None, + media_file__endswith='/' + filename + ) + )[0:1] else: # search for media_file with exact matching name result = Attachment.objects.filter(media_file=media_file)[0:1] - if len(result) == 0: + try: + attachment = result[0] + except IndexError: media_file_logger.info('attachment not found') return HttpResponseNotFound(_(u'Attachment not found')) - attachment = result[0] if not attachment.mimetype.startswith('image'): return redirect(attachment.media_file.url)