From 7204d59d665f8010afe9bc019efa4a654d995c51 Mon Sep 17 00:00:00 2001 From: FinnStutzenstein Date: Wed, 4 Dec 2019 14:24:30 +0100 Subject: [PATCH] [WIP] External postgres as mediafile store --- .gitignore | 3 + .../media-upload-content.component.ts | 2 +- .../app/shared/models/mediafiles/mediafile.ts | 11 +- .../site/mediafiles/models/view-mediafile.ts | 41 +++-- .../services/mediafiles-sort-list.service.ts | 2 +- .../mediafile/mediafile-slide-data.ts | 2 +- .../mediafile/mediafile-slide.component.ts | 4 +- .../migrations/0007_external_storage_1.py | 37 +++++ .../migrations/0008_external_storage_2.py | 28 ++++ openslides/mediafiles/models.py | 50 ++++--- openslides/mediafiles/projector.py | 2 +- openslides/mediafiles/serializers.py | 65 +------- openslides/mediafiles/utils.py | 27 ++++ openslides/mediafiles/views.py | 140 +++++++++++++++--- openslides/urls.py | 3 +- tests/integration/mediafiles/test_viewset.py | 48 +++++- 16 files changed, 329 insertions(+), 136 deletions(-) create mode 100644 openslides/mediafiles/migrations/0007_external_storage_1.py create mode 100644 openslides/mediafiles/migrations/0008_external_storage_2.py create mode 100644 openslides/mediafiles/utils.py diff --git a/.gitignore b/.gitignore index a3d55c97d..9f2130d4c 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ node_modules/* bower_components/* +# OS4-Submodules +openslides-* + # Local user data (settings, database, media, search index, static files) personal_data/* openslides/static/* diff --git a/client/src/app/shared/components/media-upload-content/media-upload-content.component.ts b/client/src/app/shared/components/media-upload-content/media-upload-content.component.ts index 9940d702e..71beedb6b 100644 --- a/client/src/app/shared/components/media-upload-content/media-upload-content.component.ts +++ b/client/src/app/shared/components/media-upload-content/media-upload-content.component.ts @@ -138,7 +138,7 @@ export class MediaUploadContentComponent implements OnInit { input.set('title', fileData.title); const access_groups_id = fileData.form.value.access_groups_id || []; if (access_groups_id.length > 0) { - input.set('access_groups_id', '' + access_groups_id); + input.set('access_groups_id', JSON.stringify(access_groups_id)); } if (this.selectedDirectoryId) { input.set('parent_id', '' + this.selectedDirectoryId); diff --git a/client/src/app/shared/models/mediafiles/mediafile.ts b/client/src/app/shared/models/mediafiles/mediafile.ts index 7973f64c9..c639dcbf8 100644 --- a/client/src/app/shared/models/mediafiles/mediafile.ts +++ b/client/src/app/shared/models/mediafiles/mediafile.ts @@ -1,11 +1,7 @@ import { BaseModelWithListOfSpeakers } from '../base/base-model-with-list-of-speakers'; -interface FileMetadata { - name: string; - type: string; - - // Only for PDFs - pages: number; +interface PdfInformation { + pages?: number; encrypted?: boolean; } @@ -13,7 +9,9 @@ export interface MediafileWithoutNestedModels extends BaseModelWithListOfSpeaker id: number; title: string; media_url_prefix: string; + pdf_information: PdfInformation; filesize?: string; + mimetype?: string; access_groups_id: number[]; create_timestamp: string; parent_id: number | null; @@ -31,7 +29,6 @@ export interface MediafileWithoutNestedModels extends BaseModelWithListOfSpeaker export class Mediafile extends BaseModelWithListOfSpeakers { public static COLLECTIONSTRING = 'mediafiles/mediafile'; public id: number; - public mediafile?: FileMetadata; public get has_inherited_access_groups(): boolean { return typeof this.inherited_access_groups_id !== 'boolean'; diff --git a/client/src/app/site/mediafiles/models/view-mediafile.ts b/client/src/app/site/mediafiles/models/view-mediafile.ts index f86dd4a54..c15d718ac 100644 --- a/client/src/app/site/mediafiles/models/view-mediafile.ts +++ b/client/src/app/site/mediafiles/models/view-mediafile.ts @@ -8,6 +8,19 @@ import { ViewGroup } from 'app/site/users/models/view-group'; export const IMAGE_MIMETYPES = ['image/png', 'image/jpeg', 'image/gif']; export const FONT_MIMETYPES = ['font/ttf', 'font/woff', 'application/font-woff', 'application/font-sfnt']; export const PDF_MIMETYPES = ['application/pdf']; +export const VIDEO_MIMETYPES = [ + 'video/quicktime', + 'video/mp4', + 'video/webm', + 'video/ogg', + 'video/x-flv', + 'application/x-mpegURL', + 'video/MP2T', + 'video/3gpp', + 'video/x-msvideo', + 'video/x-ms-wmv', + 'video/x-matroska' +]; export interface MediafileTitleInformation { title: string; @@ -26,12 +39,8 @@ export class ViewMediafile extends BaseViewModelWithListOfSpeakers return this.title; } - public get type(): string | null { - return this.mediafile.mediafile ? this.mediafile.mediafile.type : ''; - } - public get pages(): number | null { - return this.mediafile.mediafile ? this.mediafile.mediafile.pages : null; + return this.mediafile.pdf_information.pages || null; } public get timestamp(): string { @@ -39,7 +48,7 @@ export class ViewMediafile extends BaseViewModelWithListOfSpeakers } public formatForSearch(): SearchRepresentation { - const type = this.is_directory ? 'directory' : this.type; + const type = this.is_directory ? 'directory' : this.mimetype; const properties = [ { key: 'Title', value: this.getTitle() }, { key: 'Path', value: this.path }, @@ -91,7 +100,7 @@ export class ViewMediafile extends BaseViewModelWithListOfSpeakers * @returns true or false */ public isImage(): boolean { - return IMAGE_MIMETYPES.includes(this.type); + return IMAGE_MIMETYPES.includes(this.mimetype); } /** @@ -100,7 +109,7 @@ export class ViewMediafile extends BaseViewModelWithListOfSpeakers * @returns true or false */ public isFont(): boolean { - return FONT_MIMETYPES.includes(this.type); + return FONT_MIMETYPES.includes(this.mimetype); } /** @@ -109,7 +118,7 @@ export class ViewMediafile extends BaseViewModelWithListOfSpeakers * @returns true or false */ public isPdf(): boolean { - return PDF_MIMETYPES.includes(this.type); + return PDF_MIMETYPES.includes(this.mimetype); } /** @@ -118,19 +127,7 @@ export class ViewMediafile extends BaseViewModelWithListOfSpeakers * @returns true or false */ public isVideo(): boolean { - return [ - 'video/quicktime', - 'video/mp4', - 'video/webm', - 'video/ogg', - 'video/x-flv', - 'application/x-mpegURL', - 'video/MP2T', - 'video/3gpp', - 'video/x-msvideo', - 'video/x-ms-wmv', - 'video/x-matroska' - ].includes(this.type); + return VIDEO_MIMETYPES.includes(this.mimetype); } public getIcon(): string { diff --git a/client/src/app/site/mediafiles/services/mediafiles-sort-list.service.ts b/client/src/app/site/mediafiles/services/mediafiles-sort-list.service.ts index 4df9258ee..3b5339b85 100644 --- a/client/src/app/site/mediafiles/services/mediafiles-sort-list.service.ts +++ b/client/src/app/site/mediafiles/services/mediafiles-sort-list.service.ts @@ -23,7 +23,7 @@ export class MediafilesSortListService extends BaseSortListService[] = [ { property: 'title' }, { - property: 'type', + property: 'mimetype', label: this.translate.instant('Type') }, { diff --git a/client/src/app/slides/mediafiles/mediafile/mediafile-slide-data.ts b/client/src/app/slides/mediafiles/mediafile/mediafile-slide-data.ts index 9705ef8b7..6f9ccdb5f 100644 --- a/client/src/app/slides/mediafiles/mediafile/mediafile-slide-data.ts +++ b/client/src/app/slides/mediafiles/mediafile/mediafile-slide-data.ts @@ -1,5 +1,5 @@ export interface MediafileSlideData { path: string; - type: string; + mimetype: string; media_url_prefix: string; } diff --git a/client/src/app/slides/mediafiles/mediafile/mediafile-slide.component.ts b/client/src/app/slides/mediafiles/mediafile/mediafile-slide.component.ts index c40c071b3..4c8bf3f69 100644 --- a/client/src/app/slides/mediafiles/mediafile/mediafile-slide.component.ts +++ b/client/src/app/slides/mediafiles/mediafile/mediafile-slide.component.ts @@ -20,11 +20,11 @@ export class MediafileSlideComponent extends BaseSlideComponent= 1024 * 1024: - mB = size / 1024 / 1024 - size_string = "%d MB" % mB - else: - kB = size / 1024 - size_string = "%d kB" % kB - return size_string + return bytes_to_human(size) @property def is_logo(self): @@ -243,6 +256,7 @@ class Mediafile(RESTModelMixin, ListOfSpeakersMixin, models.Model): @property def is_file(self): + """ Do not check the self.mediafile, becuase this is not a valid indicator. """ return not self.is_directory def get_list_of_speakers_title_information(self): diff --git a/openslides/mediafiles/projector.py b/openslides/mediafiles/projector.py index e2b0adb73..196d5e8c8 100644 --- a/openslides/mediafiles/projector.py +++ b/openslides/mediafiles/projector.py @@ -32,7 +32,7 @@ async def mediafile_slide( return { "path": mediafile["path"], - "type": mediafile["mediafile"]["type"], + "mimetype": mediafile["mimetype"], "media_url_prefix": mediafile["media_url_prefix"], } diff --git a/openslides/mediafiles/serializers.py b/openslides/mediafiles/serializers.py index d1f98295e..0ff48dac5 100644 --- a/openslides/mediafiles/serializers.py +++ b/openslides/mediafiles/serializers.py @@ -1,14 +1,10 @@ -import mimetypes - from django.conf import settings -from django.db import models as dbmodels -from PyPDF2 import PdfFileReader -from PyPDF2.utils import PdfReadError from ..utils.auth import get_group_model from ..utils.rest_api import ( - FileField, + CharField, IdPrimaryKeyRelatedField, + JSONField, ModelSerializer, SerializerMethodField, ValidationError, @@ -16,70 +12,28 @@ from ..utils.rest_api import ( from .models import Mediafile -class AngularCompatibleFileField(FileField): - def to_internal_value(self, data): - if data == "": - return None - return super(AngularCompatibleFileField, self).to_internal_value(data) - - def to_representation(self, value): - if value is None or value.name is None: - return None - filetype = mimetypes.guess_type(value.name)[0] - result = {"name": value.name, "type": filetype} - if filetype == "application/pdf": - try: - if ( - settings.DEFAULT_FILE_STORAGE - == "storages.backends.sftpstorage.SFTPStorage" - ): - remote_path = value.storage._remote_path(value.name) - file_handle = value.storage.sftp.open(remote_path, mode="rb") - else: - file_handle = open(value.path, "rb") - - result["pages"] = PdfFileReader(file_handle).getNumPages() - except FileNotFoundError: - # File was deleted from server. Set 'pages' to 0. - result["pages"] = 0 - except PdfReadError: - # File could be encrypted but not be detected by PyPDF. - result["pages"] = 0 - result["encrypted"] = True - return result - - class MediafileSerializer(ModelSerializer): """ Serializer for mediafile.models.Mediafile objects. """ media_url_prefix = SerializerMethodField() - filesize = SerializerMethodField() + pdf_information = JSONField(required=False) access_groups = IdPrimaryKeyRelatedField( many=True, required=False, queryset=get_group_model().objects.all() ) - - def __init__(self, *args, **kwargs): - """ - This constructor overwrites the FileField field serializer to return the file meta data in a way that the - angualarjs upload module likes - """ - super(MediafileSerializer, self).__init__(*args, **kwargs) - self.serializer_field_mapping[dbmodels.FileField] = AngularCompatibleFileField - - # Make some fields read-oinly for updates (not creation) - if self.instance is not None: - self.fields["mediafile"].read_only = True + original_filename = CharField(write_only=True, required=False, allow_null=True) class Meta: model = Mediafile fields = ( "id", "title", - "mediafile", + "original_filename", "media_url_prefix", "filesize", + "mimetype", + "pdf_information", "access_groups", "create_timestamp", "is_directory", @@ -89,7 +43,7 @@ class MediafileSerializer(ModelSerializer): "inherited_access_groups_id", ) - read_only_fields = ("path",) + read_only_fields = ("path", "filesize", "mimetype", "pdf_information") def validate(self, data): title = data.get("title") @@ -122,8 +76,5 @@ class MediafileSerializer(ModelSerializer): validated_data.pop("parent", None) return super().update(instance, validated_data) - def get_filesize(self, mediafile): - return mediafile.get_filesize() - def get_media_url_prefix(self, mediafile): return settings.MEDIA_URL diff --git a/openslides/mediafiles/utils.py b/openslides/mediafiles/utils.py new file mode 100644 index 000000000..67204a8d6 --- /dev/null +++ b/openslides/mediafiles/utils.py @@ -0,0 +1,27 @@ +from PyPDF2 import PdfFileReader +from PyPDF2.utils import PdfReadError + + +def bytes_to_human(size): + # TODO: Read http://stackoverflow.com/a/1094933 and think about it. + if size < 1024: + size_string = "< 1 kB" + elif size >= 1024 * 1024: + mB = size / 1024 / 1024 + size_string = "%d MB" % mB + else: + kB = size / 1024 + size_string = "%d kB" % kB + return size_string + + +def get_pdf_information(mediafile): + result = {} + try: + pdf = PdfFileReader(mediafile) + result["pages"] = pdf.getNumPages() + except PdfReadError: + # File could be encrypted but not be detected by PyPDF. + result["pages"] = 0 + result["encrypted"] = True + return result diff --git a/openslides/mediafiles/views.py b/openslides/mediafiles/views.py index d0c98b958..683de2030 100644 --- a/openslides/mediafiles/views.py +++ b/openslides/mediafiles/views.py @@ -1,15 +1,39 @@ -from django.http import HttpResponseForbidden, HttpResponseNotFound +import json + +from django.conf import settings +from django.db import connections +from django.http import HttpResponseForbidden, HttpResponseNotFound, JsonResponse from django.http.request import QueryDict +from django.views.decorators.csrf import csrf_exempt from django.views.static import serve from openslides.core.models import Projector +from openslides.utils import logging +from openslides.utils.auth import has_perm, in_some_groups +from openslides.utils.autoupdate import inform_changed_data +from openslides.utils.rest_api import ( + ModelViewSet, + Response, + ValidationError, + list_route, + status, +) -from ..utils.auth import has_perm, in_some_groups -from ..utils.autoupdate import inform_changed_data -from ..utils.rest_api import ModelViewSet, Response, ValidationError, list_route from .access_permissions import MediafileAccessPermissions from .config import watch_and_update_configs from .models import Mediafile +from .utils import bytes_to_human, get_pdf_information + + +logger = logging.getLogger(__name__) + +use_mediafile_database = "mediafiles" in connections +if use_mediafile_database: + logger.info("Using a standalone mediafile database") + +max_upload_size = getattr( + settings, "MEDIAFILE_MAX_SIZE", 100 * 1024 * 1024 +) # default: 100mb # Viewsets for the REST API @@ -47,6 +71,23 @@ class MediafileViewSet(ModelViewSet): result = False return result + def convert_access_groups(self, request): + # convert formdata json representation of "[id, id, ...]" to an real object + if "access_groups_id" in request.data and isinstance(request.data, QueryDict): + access_groups_id = request.data.get("access_groups_id") + if access_groups_id: + try: + access_groups_id = json.loads(access_groups_id) + request.data.setlist("access_groups_id", access_groups_id) + except json.decoder.JSONDecodeError: + raise ValidationError( + { + "detail": "The provided access groups ({access_groups_id}) is not JSON" + } + ) + else: + del request.data["access_groups_id"] + def create(self, request, *args, **kwargs): """ Customized view endpoint to upload a new file. @@ -55,30 +96,77 @@ class MediafileViewSet(ModelViewSet): if isinstance(request.data, QueryDict): request.data._mutable = True - # convert formdata string ", id>" to a list of numbers. - if "access_groups_id" in request.data and isinstance(request.data, QueryDict): - access_groups_id = request.data.get("access_groups_id") - if access_groups_id: - request.data.setlist( - "access_groups_id", [int(x) for x in access_groups_id.split(", ")] - ) - else: - del request.data["access_groups_id"] + self.convert_access_groups(request) is_directory = bool(request.data.get("is_directory", False)) - if is_directory and request.data.get("mediafile"): + mediafile = request.data.get("mediafile") + + # Check, that it is either a file or a directory + if is_directory and mediafile: raise ValidationError( {"detail": "Either create a path or a file, but not both"} ) - if not request.data.get("mediafile") and not is_directory: + if not mediafile and not is_directory: raise ValidationError({"detail": "You forgot to provide a file."}) - return super().create(request, *args, **kwargs) + if mediafile: + if mediafile.size > max_upload_size: + max_size_for_humans = bytes_to_human(max_upload_size) + raise ValidationError( + {"detail": f"The maximum upload file size is {max_size_for_humans}"} + ) + + # set original filename + request.data["original_filename"] = mediafile.name + + # Remove mediafile from request.data, we will put it into the storage ourself. + request.data.pop("mediafile", None) + + # Create mediafile + serializer = self.get_serializer(data=request.data) + serializer.is_valid(raise_exception=True) + db_mediafile = serializer.save() + + # Set filesize, mimetype and check for pdfs. + if mediafile: + db_mediafile.filesize = bytes_to_human(mediafile.size) + db_mediafile.mimetype = mediafile.content_type + if db_mediafile.mimetype == "application/pdf": + db_mediafile.pdf_information = get_pdf_information(mediafile) + else: + db_mediafile.pdf_information = {} + db_mediafile.save() + + # Custom modifications for the database storage: Set original filename here and + # insert the file into the foreing storage + if use_mediafile_database: + with connections["mediafiles"].cursor() as cursor: + cursor.execute( + "INSERT INTO mediafile_data (id, data, mimetype) VALUES (%s, %s, %s)", + [ + db_mediafile.id, + mediafile.open().read(), + mediafile.content_type, + ], + ) + else: + db_mediafile.mediafile = mediafile + db_mediafile.save() + + return Response(data={"id": db_mediafile.id}, status=status.HTTP_201_CREATED) def destroy(self, *args, **kwargs): with watch_and_update_configs(): - response = super().destroy(*args, **kwargs) - return response + mediafile = self.get_object() + deleted_ids = mediafile.delete() + if use_mediafile_database: + with connections["mediafiles"].cursor() as cursor: + cursor.execute( + "DELETE FROM mediafile_data WHERE id IN %s", + [tuple(id for id in deleted_ids)], + ) + + return Response(status=status.HTTP_204_NO_CONTENT) def update(self, *args, **kwargs): with watch_and_update_configs(): @@ -257,3 +345,19 @@ def protected_serve(request, path, document_root=None, show_indexes=False): return serve(request, mediafile.mediafile.name, document_root, show_indexes) else: return HttpResponseForbidden(content="Forbidden.") + + +@csrf_exempt +def check_serve(request, path): + """ + Checks, if the mediafile could be delivered: Responds with a 403 if the access is + forbidden *or* the file was not found. Responds 200 with the mediafile id, if + the access is granted. + """ + try: + mediafile = get_mediafile(request, path) + if not mediafile: + raise Mediafile.DoesNotExist() + except Mediafile.DoesNotExist: + return HttpResponseForbidden() + return JsonResponse({"id": mediafile.id}) diff --git a/openslides/urls.py b/openslides/urls.py index 7043024b4..1d7f70c2b 100644 --- a/openslides/urls.py +++ b/openslides/urls.py @@ -2,7 +2,7 @@ from django.conf import settings from django.conf.urls import include, url from django.views.generic import RedirectView -from openslides.mediafiles.views import protected_serve +from openslides.mediafiles.views import check_serve, protected_serve from openslides.utils.rest_api import router from .core import views as core_views @@ -15,6 +15,7 @@ urlpatterns = [ protected_serve, {"document_root": settings.MEDIA_ROOT}, ), + url(r"^check-media/(?P.*)$", check_serve), # URLs for the rest system, redirect /rest to /rest/ url(r"^rest$", RedirectView.as_view(url="/rest/", permanent=True)), url(r"^rest/", include(router.urls)), diff --git a/tests/integration/mediafiles/test_viewset.py b/tests/integration/mediafiles/test_viewset.py index 1db7c1875..5620986a7 100644 --- a/tests/integration/mediafiles/test_viewset.py +++ b/tests/integration/mediafiles/test_viewset.py @@ -1,3 +1,5 @@ +import json + import pytest from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse @@ -22,6 +24,7 @@ def test_mediafiles_db_queries(): for index in range(10): Mediafile.objects.create( title=f"some_file{index}", + original_filename=f"some_file{index}", mediafile=SimpleUploadedFile(f"some_file{index}", b"some content."), ) @@ -56,6 +59,7 @@ class TestCreation(TestCase): self.assertEqual(mediafile.title, "test_title_ahyo1uifoo9Aiph2av5a") self.assertTrue(mediafile.is_directory) self.assertEqual(mediafile.mediafile.name, "") + self.assertEqual(mediafile.original_filename, "") self.assertEqual(mediafile.path, mediafile.title + "/") def test_file_and_directory(self): @@ -166,9 +170,8 @@ class TestCreation(TestCase): reverse("mediafile-list"), { "title": "test_title_dggjwevBnUngelkdviom", - "is_directory": True, - # This is the format, if it would be provided by JS `FormData`. - "access_groups_id": "2, 4", + "mediafile": self.file, + "access_groups_id": json.dumps([2, 4]), }, ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) @@ -177,6 +180,32 @@ class TestCreation(TestCase): self.assertEqual( sorted([group.id for group in mediafile.access_groups.all()]), [2, 4] ) + self.assertTrue(mediafile.mediafile.name) + self.assertEqual(mediafile.path, mediafile.original_filename) + + def test_with_access_groups_wrong_json(self): + response = self.client.post( + reverse("mediafile-list"), + { + "title": "test_title_dggjwevBnUngelkdviom", + "is_directory": True, + "access_groups_id": json.dumps({"a": 324}), + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertFalse(Mediafile.objects.exists()) + + def test_with_access_groups_wrong_json2(self): + response = self.client.post( + reverse("mediafile-list"), + { + "title": "test_title_dggjwevBnUngelkdviom", + "is_directory": True, + "access_groups_id": "_FWEpwwfkwk", + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertFalse(Mediafile.objects.exists()) # TODO: List and retrieve @@ -194,13 +223,18 @@ class TestUpdate(TestCase): self.client = APIClient() self.client.login(username="admin", password="admin") self.dir = Mediafile.objects.create(title="dir", is_directory=True) - self.fileA = SimpleUploadedFile("some_fileA.ext", b"some content.") + fileA_name = "some_fileA.ext" + self.fileA = SimpleUploadedFile(fileA_name, b"some content.") self.mediafileA = Mediafile.objects.create( - title="mediafileA", mediafile=self.fileA, parent=self.dir + title="mediafileA", + original_filename=fileA_name, + mediafile=self.fileA, + parent=self.dir, ) - self.fileB = SimpleUploadedFile("some_fileB.ext", b"some content.") + fileB_name = "some_fileB.ext" + self.fileB = SimpleUploadedFile(fileB_name, b"some content.") self.mediafileB = Mediafile.objects.create( - title="mediafileB", mediafile=self.fileB + title="mediafileB", original_filename=fileB_name, mediafile=self.fileB ) def test_update(self):