diff --git a/onadata/apps/logger/management/commands/populate_media_file_basename.py b/onadata/apps/logger/management/commands/populate_media_file_basename.py new file mode 100644 index 000000000..7131196b0 --- /dev/null +++ b/onadata/apps/logger/management/commands/populate_media_file_basename.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# vim: ai ts=4 sts=4 et sw=4 coding=utf-8 +from django.conf import settings +from django.db import connection +from django.db.models import Q, Func +from django.db.models.functions import Substr +from django.core.management.base import BaseCommand, CommandError +from django.utils.translation import ugettext as _, ugettext_lazy +from optparse import make_option + +from onadata.apps.logger.models.attachment import Attachment + + +class SubstrFromPattern(Func): + function = "SUBSTRING" + template = "%(function)s(%(expressions)s from '%(pattern)s')" + + +class Command(BaseCommand): + + help = ugettext_lazy("Updates indexed field `media_file_basename` which is empty or null") + option_list = BaseCommand.option_list + ( + make_option( + '--batchsize', + type='int', + default=100, + help=ugettext_lazy("Number of records to process per query")),) + + def handle(self, *args, **kwargs): + batchsize = kwargs.get("batchsize", 100) + stop = False + offset = 0 + while stop is not True: + limit = offset + batchsize + attachments_ids = list(Attachment.objects.values_list("id", flat=True) + .filter(Q(media_file_basename=None) | Q(media_file_basename="")) + .order_by("id")[offset:limit]) + if attachments_ids: + self.stdout.write(_("Updating attachments from #{} to #{}\n").format( + attachments_ids[0], + attachments_ids[-1])) + + Attachment.objects.filter(id__in=attachments_ids)\ + .update(media_file_basename=SubstrFromPattern("media_file", pattern="/([^/]+)$")) + + offset += batchsize + else: + stop = True diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py index 6b3106e2e..ed62a852d 100644 --- a/onadata/apps/logger/models/attachment.py +++ b/onadata/apps/logger/models/attachment.py @@ -34,33 +34,17 @@ class Attachment(models.Model): mimetype = models.CharField( max_length=100, null=False, blank=True, default='') - MEDIA_FILE_BASENAME_PATTERN = re.compile(r'/([^/]+)$') - class Meta: app_label = 'logger' - def _populate_media_file_basename(self): - # TODO: write a management command to call this (and save) for all - # existing attachments? For the moment, the `media_file_basename` - # column can be populated directly in Postgres using: - # UPDATE logger_attachment - # SET media_file_basename = substring(media_file from '/([^/]+)$'); - if self.media_file: - match = re.search( - self.MEDIA_FILE_BASENAME_PATTERN, self.media_file.name) - if match: - self.media_file_basename = match.groups()[0] - else: - self.media_file_basename = '' - def save(self, *args, **kwargs): - if self.media_file and self.mimetype == '': - # guess mimetype - mimetype, encoding = mimetypes.guess_type(self.media_file.name) - if mimetype: - self.mimetype = mimetype - - self._populate_media_file_basename() + if self.media_file: + self.media_file_basename = self.filename + if self.mimetype == '': + # guess mimetype + mimetype, encoding = mimetypes.guess_type(self.media_file.name) + if mimetype: + self.mimetype = mimetype super(Attachment, self).save(*args, **kwargs) diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py index 36803438c..820f00cad 100644 --- a/onadata/apps/viewer/views.py +++ b/onadata/apps/viewer/views.py @@ -699,7 +699,6 @@ def attachment_url(request, size='medium'): media_file_logger.info('attachment not found') return HttpResponseNotFound(_(u'Attachment not found')) - if not attachment.mimetype.startswith('image'): return redirect(attachment.media_file.url)