From 75dcbab5d14383266aaba708f5c2f1f5b0b602a2 Mon Sep 17 00:00:00 2001 From: Oskar Hahn Date: Sun, 3 Jan 2016 15:33:51 +0100 Subject: [PATCH] Add search on the server side. --- openslides/assignments/models.py | 11 ++ openslides/core/apps.py | 12 +++ openslides/core/models.py | 8 ++ openslides/core/urls.py | 11 +- openslides/core/views.py | 21 ++++ openslides/mediafiles/models.py | 10 ++ openslides/motions/models.py | 14 +++ openslides/users/models.py | 17 ++- openslides/utils/models.py | 8 ++ openslides/utils/search.py | 173 +++++++++++++++++++++++++++++++ openslides/utils/settings.py.tpl | 3 +- requirements_production.txt | 2 +- tests/old/settings.py | 2 +- tests/settings.py | 2 +- 14 files changed, 284 insertions(+), 10 deletions(-) create mode 100644 openslides/utils/search.py diff --git a/openslides/assignments/models.py b/openslides/assignments/models.py index 30633bec6..96d8f138b 100644 --- a/openslides/assignments/models.py +++ b/openslides/assignments/models.py @@ -16,6 +16,7 @@ from openslides.poll.models import ( ) from openslides.utils.exceptions import OpenSlidesError from openslides.utils.models import RESTModelMixin +from openslides.utils.search import user_name_helper class AssignmentRelatedUser(RESTModelMixin, models.Model): @@ -318,6 +319,16 @@ class Assignment(RESTModelMixin, models.Model): """ return self.agenda_item.pk + def get_search_index_string(self): + """ + Returns a string that can be indexed for the search. + """ + return " ".join(( + self.title, + self.description, + user_name_helper(self.related_users.all()), + " ".join(tag.name for tag in self.tags.all()))) + class AssignmentVote(RESTModelMixin, BaseVote): option = models.ForeignKey('AssignmentOption', related_name='votes') diff --git a/openslides/core/apps.py b/openslides/core/apps.py index 6a9af519b..ac548c7bf 100644 --- a/openslides/core/apps.py +++ b/openslides/core/apps.py @@ -18,6 +18,7 @@ class CoreAppConfig(AppConfig): from openslides.core.signals import config_signal from openslides.utils.autoupdate import inform_changed_data_receiver from openslides.utils.rest_api import router + from openslides.utils.search import index_add_instance, index_del_instance from .signals import setup_general_config from .views import ( ChatMessageViewSet, @@ -45,3 +46,14 @@ class CoreAppConfig(AppConfig): signals.post_delete.connect( inform_changed_data_receiver, dispatch_uid='inform_changed_data_receiver') + + # Update the search when a model is saved or deleted + signals.post_save.connect( + index_add_instance, + dispatch_uid='index_add_instance') + signals.post_delete.connect( + index_del_instance, + dispatch_uid='index_del_instance') + signals.m2m_changed.connect( + index_add_instance, + dispatch_uid='m2m_index_add_instance') diff --git a/openslides/core/models.py b/openslides/core/models.py index 6d8233a5e..a3577cd64 100644 --- a/openslides/core/models.py +++ b/openslides/core/models.py @@ -158,6 +158,14 @@ class CustomSlide(RESTModelMixin, models.Model): def get_agenda_title(self): return self.title + def get_search_index_string(self): + """ + Returns a string that can be indexed for the search. + """ + return " ".join(( + self.title, + self.text)) + class Tag(RESTModelMixin, models.Model): """ diff --git a/openslides/core/urls.py b/openslides/core/urls.py index ce5ba7f5a..a3fc131a6 100644 --- a/openslides/core/urls.py +++ b/openslides/core/urls.py @@ -1,9 +1,8 @@ -from django.conf.urls import patterns, url +from django.conf.urls import url from . import views -urlpatterns = patterns( - '', +urlpatterns = [ url(r'^core/url_patterns/$', views.UrlPatternsView.as_view(), name='core_url_patterns'), @@ -23,7 +22,11 @@ urlpatterns = patterns( # View for the projectors are handelt by angular. url(r'^projector.*$', views.ProjectorView.as_view()), + url(r'^search/$', + views.SearchView.as_view(), + name='core_search'), + # Main entry point for all angular pages. # Has to be the last entry in the urls.py url(r'^.*$', views.IndexView.as_view()), -) +] diff --git a/openslides/core/views.py b/openslides/core/views.py index c79207944..07a72cb74 100644 --- a/openslides/core/views.py +++ b/openslides/core/views.py @@ -2,6 +2,7 @@ import re import uuid from collections import OrderedDict from operator import attrgetter +from urllib.parse import unquote from django.apps import apps from django.conf import settings @@ -27,6 +28,7 @@ from openslides.utils.rest_api import ( ViewSet, detail_route, ) +from openslides.utils.search import search from .config import config from .exceptions import ConfigError, ConfigNotFound @@ -561,3 +563,22 @@ class VersionView(utils_views.APIView): 'description': get_plugin_description(plugin), 'version': get_plugin_version(plugin)}) return result + + +class SearchView(utils_views.APIView): + """ + Accepts a search string and returns a list of objects where each object + is a dictonary with the keywords collection and id. + + This view expects a get argument 'q' with a search string. + + See: https://pythonhosted.org/Whoosh/querylang.html for the format of the + search string. + """ + http_method_names = ['get'] + + def get_context_data(self, **context): + query = self.request.GET.get('q', '') + return super().get_context_data( + elements=search(unquote(query)), + **context) diff --git a/openslides/mediafiles/models.py b/openslides/mediafiles/models.py index 03ba53feb..3e518e0d5 100644 --- a/openslides/mediafiles/models.py +++ b/openslides/mediafiles/models.py @@ -3,6 +3,8 @@ from django.db import models from django.utils.translation import ugettext as _ from django.utils.translation import ugettext_lazy, ugettext_noop +from openslides.utils.search import user_name_helper + from ..utils.models import RESTModelMixin @@ -61,3 +63,11 @@ class Mediafile(RESTModelMixin, models.Model): kB = size / 1024 size_string = '%d kB' % kB return size_string + + def get_search_index_string(self): + """ + Returns a string that can be indexed for the search. + """ + return " ".join(( + self.title, + user_name_helper(self.uploader))) diff --git a/openslides/motions/models.py b/openslides/motions/models.py index 65440db6c..831dfb1ae 100644 --- a/openslides/motions/models.py +++ b/openslides/motions/models.py @@ -18,6 +18,7 @@ from openslides.poll.models import ( CollectDefaultVotesMixin, ) from openslides.utils.models import RESTModelMixin +from openslides.utils.search import user_name_helper from .exceptions import WorkflowError @@ -537,6 +538,19 @@ class Motion(RESTModelMixin, models.Model): """ return config['motions_amendments_enabled'] and self.parent is not None + def get_search_index_string(self): + """ + Returns a string that can be indexed for the search. + """ + return " ".join(( + self.title or '', + self.text or '', + self.reason or '', + str(self.category) if self.category else '', + user_name_helper(self.submitters.all()), + user_name_helper(self.supporters.all()), + " ".join(tag.name for tag in self.tags.all()))) + class MotionVersion(RESTModelMixin, models.Model): """ diff --git a/openslides/users/models.py b/openslides/users/models.py index 22b0873d1..37d04a944 100644 --- a/openslides/users/models.py +++ b/openslides/users/models.py @@ -10,6 +10,8 @@ from django.contrib.auth.models import ( from django.db import models from django.utils.translation import ugettext_lazy, ugettext_noop +from openslides.utils.search import user_name_helper + from ..core.config import config from ..utils.models import RESTModelMixin from .exceptions import UsersError @@ -182,7 +184,7 @@ class User(RESTModelMixin, PermissionsMixin, AbstractBaseUser): structure = '(%s)' % self.structure_level if self.structure_level else '' return ' '.join((self.title, self.get_short_name(), structure)).strip() - def get_short_name(self): + def get_short_name(self, sort_by_first_name=None): """ Returns only the name of the user. @@ -195,7 +197,9 @@ class User(RESTModelMixin, PermissionsMixin, AbstractBaseUser): # The user has a last_name and a first_name if first_name and last_name: - if config['users_sort_users_by_first_name']: + if sort_by_first_name is None: + sort_by_first_name = config['users_sort_users_by_first_name'] + if sort_by_first_name: name = ' '.join((first_name, last_name)) else: name = ', '.join((last_name, first_name)) @@ -214,3 +218,12 @@ class User(RESTModelMixin, PermissionsMixin, AbstractBaseUser): """ from .views import UserViewSet return UserViewSet + + def get_search_index_string(self): + """ + Returns a string that can be indexed for the search. + """ + return " ".join(( + user_name_helper(self), + self.structure_level, + self.about_me)) diff --git a/openslides/utils/models.py b/openslides/utils/models.py index 3626d34fc..43c6643fc 100644 --- a/openslides/utils/models.py +++ b/openslides/utils/models.py @@ -39,3 +39,11 @@ class RESTModelMixin: root_instance = self.get_root_rest_element() rest_url = '%s-detail' % type(root_instance)._meta.object_name.lower() return reverse(rest_url, args=[str(root_instance.pk)]) + + def get_collection_string(self): + """ + Returns the string representing the name of the collection. + """ + # TODO: find a way not to use the url. See #1791 + from .rest_api import get_collection_and_id_from_url + return get_collection_and_id_from_url(self.get_root_rest_url())[0] diff --git a/openslides/utils/search.py b/openslides/utils/search.py new file mode 100644 index 000000000..2226a01ef --- /dev/null +++ b/openslides/utils/search.py @@ -0,0 +1,173 @@ +import os +import shutil + +from django.conf import settings +from django.contrib.auth import get_user_model +from django.core.exceptions import ImproperlyConfigured +from django.db.models import QuerySet +from whoosh import fields +from whoosh.filedb.filestore import RamStorage +from whoosh.index import create_in, exists_in, open_dir +from whoosh.qparser import QueryParser + + +def get_schema(): + """ + This method creates the whoosh schema. It is only needed when the search + index is build. After this, the schema is saved and loaded with the index. + + When the schema is changed, then the index has to be recreated or the index + has to be altert. See: + https://pythonhosted.org/Whoosh/schema.html#modifying-the-schema-after-indexing + """ + return fields.Schema( + id=fields.ID(stored=True), + collection=fields.ID(stored=True), + id_collection=fields.ID(unique=True), + content=fields.TEXT) + + +class Index: + """ + Represents the whoosh index. + """ + + def get_index_path(self): + """ + Returns the index path. + + Raises ImproperlyConfigured if the path is not set in the settings. + """ + try: + return settings.SEARCH_INDEX + except AttributeError: + raise ImproperlyConfigured("Set SEARCH_INDEX into your settings.") + + def create_index(self): + """ + Creats the whoosh index. Delets an existing index if exists. + + Returns the index. + """ + path = self.get_index_path() + if path == 'ram': + self.storage = RamStorage().create_index(get_schema()) + else: + if os.path.exists(path): + shutil.rmtree(path) + os.mkdir(path) + self.storage = create_in(path, get_schema()) + return self.storage + + def get_or_create_index(self): + """ + Returns an index object. + + Creats the index if it does not exist + """ + # Try to return a storage object that was created before. + try: + return self.storage + except AttributeError: + pass + path = self.get_index_path() + if path != 'ram' and exists_in(path): + return open_dir(path) + return self.create_index() + +index = Index() + + +def combine_id_and_collection(instance): + """ + Returns a string where the id and the collection string of an instance + are combined. + """ + return "{}{}".format(instance.id, instance.get_collection_string()) + + +def user_name_helper(users): + """ + Helper to index a user or a list of users. + + Returns a string which contains the names of all users seperated by a space. + + users can be a list, a queryset or an user object. If it is something else + then the str(users) is returned. + """ + if isinstance(users, list) or isinstance(users, QuerySet): + user_string = " ".join( + user.get_short_name(sort_by_first_name=True) for user in users) + elif isinstance(users, get_user_model()): + user_string = users.get_short_name(sort_by_first_name=True) + else: + user_string = str(users) + return user_string + + +def index_add_instance(sender, instance, **kwargs): + """ + Receiver that should be called by the post_save signal and the m2m_changed + signal. + + If the instance has an method get_search_string, then it is written + into the search index. The method has to return an dictonary that can be + used as keyword arguments to writer.add_document. + """ + # TODO: This method blocks the search index. So in a multi thread environment + # this method can raise whoosh.store.LockError. Therefore it has to + # be done in tornado to support the big mode. + # See: https://pythonhosted.org/Whoosh/indexing.html#indexing-documents + try: + get_search_index_string = instance.get_search_index_string + except AttributeError: + # If the instance is not searchable, then exit this signal early. + return + + created = kwargs.get('created', False) + + writer_kwargs = { + 'id_collection': combine_id_and_collection(instance), + 'id': str(instance.pk), + 'collection': instance.get_collection_string(), + 'content': get_search_index_string()} + + with index.get_or_create_index().writer() as writer: + if created: + writer.add_document(**writer_kwargs) + else: + writer.update_document(**writer_kwargs) + + +def index_del_instance(sender, instance, **kwargs): + """ + Like index_add_instance but deletes the instance from the index. + + Should be called by the post_delete signal. + """ + try: + # Try to get the arrribute get_search_attributes. It is not needed + # in this method (and therefore not called) but it tells us if the + # instance is searchable. + instance.get_search_index_string + except AttributeError: + # If the instance is not searchable, then exit this signal early. + return + + with index.get_or_create_index().writer() as writer: + writer.delete_by_term('id_collection', combine_id_and_collection(instance)) + + +def search(query): + """ + Searchs elements. + + query has to be a query string. See: https://pythonhosted.org/Whoosh/querylang.html + + The return value is a list of dictonaries where each dictonary has the keys + id and collection. + """ + parser = QueryParser("content", index.schema) + query = parser.parse(query) + result = index.searcher().search(query, limit=None) + return [dict(element) for element in result] diff --git a/openslides/utils/settings.py.tpl b/openslides/utils/settings.py.tpl index 8018f60a2..2b9aa4efd 100644 --- a/openslides/utils/settings.py.tpl +++ b/openslides/utils/settings.py.tpl @@ -63,4 +63,5 @@ TEMPLATE_DIRS = ( STATICFILES_DIRS = [os.path.join(OPENSLIDES_USER_DATA_PATH, 'static')] + STATICFILES_DIRS -HAYSTACK_CONNECTIONS['default']['PATH'] = os.path.join(OPENSLIDES_USER_DATA_PATH, 'whoosh_index', '') + +SEARCH_INDEX = os.path.join(OPENSLIDES_USER_DATA_PATH, 'search_index') diff --git a/requirements_production.txt b/requirements_production.txt index 20c36876a..24f296921 100644 --- a/requirements_production.txt +++ b/requirements_production.txt @@ -1,7 +1,6 @@ # Requirements for OpenSlides in production in alphabetical order Django>=1.7.1,<1.9 beautifulsoup4>=4.1,<4.5 -django-haystack>=2.1,<2.5 djangorestframework>=3.2.0,<3.3.0 html5lib>=0.9,<1.0 jsonfield>=0.9.19,<1.1 @@ -10,3 +9,4 @@ reportlab>=3.0,<3.3 roman>=2.0,<2.1 setuptools>=2.2,<19.0 sockjs-tornado>=1.0,<1.1 +Whoosh>=2.7.0,<2.8 diff --git a/tests/old/settings.py b/tests/old/settings.py index eb6f9b9f7..b26e2b13d 100644 --- a/tests/old/settings.py +++ b/tests/old/settings.py @@ -46,7 +46,7 @@ TEMPLATE_DIRS = ( STATICFILES_DIRS.insert(0, os.path.join(OPENSLIDES_USER_DATA_PATH, 'static')) -HAYSTACK_CONNECTIONS['default']['STORAGE'] = 'ram' +SEARCH_INDEX = 'ram' # Special test settings diff --git a/tests/settings.py b/tests/settings.py index eb6f9b9f7..b26e2b13d 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -46,7 +46,7 @@ TEMPLATE_DIRS = ( STATICFILES_DIRS.insert(0, os.path.join(OPENSLIDES_USER_DATA_PATH, 'static')) -HAYSTACK_CONNECTIONS['default']['STORAGE'] = 'ram' +SEARCH_INDEX = 'ram' # Special test settings