Add search on the server side.

This commit is contained in:
Oskar Hahn 2016-01-03 15:33:51 +01:00
parent 9a6ebdb2cf
commit 75dcbab5d1
14 changed files with 284 additions and 10 deletions

View File

@ -16,6 +16,7 @@ from openslides.poll.models import (
) )
from openslides.utils.exceptions import OpenSlidesError from openslides.utils.exceptions import OpenSlidesError
from openslides.utils.models import RESTModelMixin from openslides.utils.models import RESTModelMixin
from openslides.utils.search import user_name_helper
class AssignmentRelatedUser(RESTModelMixin, models.Model): class AssignmentRelatedUser(RESTModelMixin, models.Model):
@ -318,6 +319,16 @@ class Assignment(RESTModelMixin, models.Model):
""" """
return self.agenda_item.pk return self.agenda_item.pk
def get_search_index_string(self):
"""
Returns a string that can be indexed for the search.
"""
return " ".join((
self.title,
self.description,
user_name_helper(self.related_users.all()),
" ".join(tag.name for tag in self.tags.all())))
class AssignmentVote(RESTModelMixin, BaseVote): class AssignmentVote(RESTModelMixin, BaseVote):
option = models.ForeignKey('AssignmentOption', related_name='votes') option = models.ForeignKey('AssignmentOption', related_name='votes')

View File

@ -18,6 +18,7 @@ class CoreAppConfig(AppConfig):
from openslides.core.signals import config_signal from openslides.core.signals import config_signal
from openslides.utils.autoupdate import inform_changed_data_receiver from openslides.utils.autoupdate import inform_changed_data_receiver
from openslides.utils.rest_api import router from openslides.utils.rest_api import router
from openslides.utils.search import index_add_instance, index_del_instance
from .signals import setup_general_config from .signals import setup_general_config
from .views import ( from .views import (
ChatMessageViewSet, ChatMessageViewSet,
@ -45,3 +46,14 @@ class CoreAppConfig(AppConfig):
signals.post_delete.connect( signals.post_delete.connect(
inform_changed_data_receiver, inform_changed_data_receiver,
dispatch_uid='inform_changed_data_receiver') dispatch_uid='inform_changed_data_receiver')
# Update the search when a model is saved or deleted
signals.post_save.connect(
index_add_instance,
dispatch_uid='index_add_instance')
signals.post_delete.connect(
index_del_instance,
dispatch_uid='index_del_instance')
signals.m2m_changed.connect(
index_add_instance,
dispatch_uid='m2m_index_add_instance')

View File

@ -158,6 +158,14 @@ class CustomSlide(RESTModelMixin, models.Model):
def get_agenda_title(self): def get_agenda_title(self):
return self.title return self.title
def get_search_index_string(self):
"""
Returns a string that can be indexed for the search.
"""
return " ".join((
self.title,
self.text))
class Tag(RESTModelMixin, models.Model): class Tag(RESTModelMixin, models.Model):
""" """

View File

@ -1,9 +1,8 @@
from django.conf.urls import patterns, url from django.conf.urls import url
from . import views from . import views
urlpatterns = patterns( urlpatterns = [
'',
url(r'^core/url_patterns/$', url(r'^core/url_patterns/$',
views.UrlPatternsView.as_view(), views.UrlPatternsView.as_view(),
name='core_url_patterns'), name='core_url_patterns'),
@ -23,7 +22,11 @@ urlpatterns = patterns(
# View for the projectors are handelt by angular. # View for the projectors are handelt by angular.
url(r'^projector.*$', views.ProjectorView.as_view()), url(r'^projector.*$', views.ProjectorView.as_view()),
url(r'^search/$',
views.SearchView.as_view(),
name='core_search'),
# Main entry point for all angular pages. # Main entry point for all angular pages.
# Has to be the last entry in the urls.py # Has to be the last entry in the urls.py
url(r'^.*$', views.IndexView.as_view()), url(r'^.*$', views.IndexView.as_view()),
) ]

View File

@ -2,6 +2,7 @@ import re
import uuid import uuid
from collections import OrderedDict from collections import OrderedDict
from operator import attrgetter from operator import attrgetter
from urllib.parse import unquote
from django.apps import apps from django.apps import apps
from django.conf import settings from django.conf import settings
@ -27,6 +28,7 @@ from openslides.utils.rest_api import (
ViewSet, ViewSet,
detail_route, detail_route,
) )
from openslides.utils.search import search
from .config import config from .config import config
from .exceptions import ConfigError, ConfigNotFound from .exceptions import ConfigError, ConfigNotFound
@ -561,3 +563,22 @@ class VersionView(utils_views.APIView):
'description': get_plugin_description(plugin), 'description': get_plugin_description(plugin),
'version': get_plugin_version(plugin)}) 'version': get_plugin_version(plugin)})
return result return result
class SearchView(utils_views.APIView):
"""
Accepts a search string and returns a list of objects where each object
is a dictonary with the keywords collection and id.
This view expects a get argument 'q' with a search string.
See: https://pythonhosted.org/Whoosh/querylang.html for the format of the
search string.
"""
http_method_names = ['get']
def get_context_data(self, **context):
query = self.request.GET.get('q', '')
return super().get_context_data(
elements=search(unquote(query)),
**context)

View File

@ -3,6 +3,8 @@ from django.db import models
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _
from django.utils.translation import ugettext_lazy, ugettext_noop from django.utils.translation import ugettext_lazy, ugettext_noop
from openslides.utils.search import user_name_helper
from ..utils.models import RESTModelMixin from ..utils.models import RESTModelMixin
@ -61,3 +63,11 @@ class Mediafile(RESTModelMixin, models.Model):
kB = size / 1024 kB = size / 1024
size_string = '%d kB' % kB size_string = '%d kB' % kB
return size_string return size_string
def get_search_index_string(self):
"""
Returns a string that can be indexed for the search.
"""
return " ".join((
self.title,
user_name_helper(self.uploader)))

View File

@ -18,6 +18,7 @@ from openslides.poll.models import (
CollectDefaultVotesMixin, CollectDefaultVotesMixin,
) )
from openslides.utils.models import RESTModelMixin from openslides.utils.models import RESTModelMixin
from openslides.utils.search import user_name_helper
from .exceptions import WorkflowError from .exceptions import WorkflowError
@ -537,6 +538,19 @@ class Motion(RESTModelMixin, models.Model):
""" """
return config['motions_amendments_enabled'] and self.parent is not None return config['motions_amendments_enabled'] and self.parent is not None
def get_search_index_string(self):
"""
Returns a string that can be indexed for the search.
"""
return " ".join((
self.title or '',
self.text or '',
self.reason or '',
str(self.category) if self.category else '',
user_name_helper(self.submitters.all()),
user_name_helper(self.supporters.all()),
" ".join(tag.name for tag in self.tags.all())))
class MotionVersion(RESTModelMixin, models.Model): class MotionVersion(RESTModelMixin, models.Model):
""" """

View File

@ -10,6 +10,8 @@ from django.contrib.auth.models import (
from django.db import models from django.db import models
from django.utils.translation import ugettext_lazy, ugettext_noop from django.utils.translation import ugettext_lazy, ugettext_noop
from openslides.utils.search import user_name_helper
from ..core.config import config from ..core.config import config
from ..utils.models import RESTModelMixin from ..utils.models import RESTModelMixin
from .exceptions import UsersError from .exceptions import UsersError
@ -182,7 +184,7 @@ class User(RESTModelMixin, PermissionsMixin, AbstractBaseUser):
structure = '(%s)' % self.structure_level if self.structure_level else '' structure = '(%s)' % self.structure_level if self.structure_level else ''
return ' '.join((self.title, self.get_short_name(), structure)).strip() return ' '.join((self.title, self.get_short_name(), structure)).strip()
def get_short_name(self): def get_short_name(self, sort_by_first_name=None):
""" """
Returns only the name of the user. Returns only the name of the user.
@ -195,7 +197,9 @@ class User(RESTModelMixin, PermissionsMixin, AbstractBaseUser):
# The user has a last_name and a first_name # The user has a last_name and a first_name
if first_name and last_name: if first_name and last_name:
if config['users_sort_users_by_first_name']: if sort_by_first_name is None:
sort_by_first_name = config['users_sort_users_by_first_name']
if sort_by_first_name:
name = ' '.join((first_name, last_name)) name = ' '.join((first_name, last_name))
else: else:
name = ', '.join((last_name, first_name)) name = ', '.join((last_name, first_name))
@ -214,3 +218,12 @@ class User(RESTModelMixin, PermissionsMixin, AbstractBaseUser):
""" """
from .views import UserViewSet from .views import UserViewSet
return UserViewSet return UserViewSet
def get_search_index_string(self):
"""
Returns a string that can be indexed for the search.
"""
return " ".join((
user_name_helper(self),
self.structure_level,
self.about_me))

View File

@ -39,3 +39,11 @@ class RESTModelMixin:
root_instance = self.get_root_rest_element() root_instance = self.get_root_rest_element()
rest_url = '%s-detail' % type(root_instance)._meta.object_name.lower() rest_url = '%s-detail' % type(root_instance)._meta.object_name.lower()
return reverse(rest_url, args=[str(root_instance.pk)]) return reverse(rest_url, args=[str(root_instance.pk)])
def get_collection_string(self):
"""
Returns the string representing the name of the collection.
"""
# TODO: find a way not to use the url. See #1791
from .rest_api import get_collection_and_id_from_url
return get_collection_and_id_from_url(self.get_root_rest_url())[0]

173
openslides/utils/search.py Normal file
View File

@ -0,0 +1,173 @@
import os
import shutil
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.exceptions import ImproperlyConfigured
from django.db.models import QuerySet
from whoosh import fields
from whoosh.filedb.filestore import RamStorage
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import QueryParser
def get_schema():
"""
This method creates the whoosh schema. It is only needed when the search
index is build. After this, the schema is saved and loaded with the index.
When the schema is changed, then the index has to be recreated or the index
has to be altert. See:
https://pythonhosted.org/Whoosh/schema.html#modifying-the-schema-after-indexing
"""
return fields.Schema(
id=fields.ID(stored=True),
collection=fields.ID(stored=True),
id_collection=fields.ID(unique=True),
content=fields.TEXT)
class Index:
"""
Represents the whoosh index.
"""
def get_index_path(self):
"""
Returns the index path.
Raises ImproperlyConfigured if the path is not set in the settings.
"""
try:
return settings.SEARCH_INDEX
except AttributeError:
raise ImproperlyConfigured("Set SEARCH_INDEX into your settings.")
def create_index(self):
"""
Creats the whoosh index. Delets an existing index if exists.
Returns the index.
"""
path = self.get_index_path()
if path == 'ram':
self.storage = RamStorage().create_index(get_schema())
else:
if os.path.exists(path):
shutil.rmtree(path)
os.mkdir(path)
self.storage = create_in(path, get_schema())
return self.storage
def get_or_create_index(self):
"""
Returns an index object.
Creats the index if it does not exist
"""
# Try to return a storage object that was created before.
try:
return self.storage
except AttributeError:
pass
path = self.get_index_path()
if path != 'ram' and exists_in(path):
return open_dir(path)
return self.create_index()
index = Index()
def combine_id_and_collection(instance):
"""
Returns a string where the id and the collection string of an instance
are combined.
"""
return "{}{}".format(instance.id, instance.get_collection_string())
def user_name_helper(users):
"""
Helper to index a user or a list of users.
Returns a string which contains the names of all users seperated by a space.
users can be a list, a queryset or an user object. If it is something else
then the str(users) is returned.
"""
if isinstance(users, list) or isinstance(users, QuerySet):
user_string = " ".join(
user.get_short_name(sort_by_first_name=True) for user in users)
elif isinstance(users, get_user_model()):
user_string = users.get_short_name(sort_by_first_name=True)
else:
user_string = str(users)
return user_string
def index_add_instance(sender, instance, **kwargs):
"""
Receiver that should be called by the post_save signal and the m2m_changed
signal.
If the instance has an method get_search_string, then it is written
into the search index. The method has to return an dictonary that can be
used as keyword arguments to writer.add_document.
"""
# TODO: This method blocks the search index. So in a multi thread environment
# this method can raise whoosh.store.LockError. Therefore it has to
# be done in tornado to support the big mode.
# See: https://pythonhosted.org/Whoosh/indexing.html#indexing-documents
try:
get_search_index_string = instance.get_search_index_string
except AttributeError:
# If the instance is not searchable, then exit this signal early.
return
created = kwargs.get('created', False)
writer_kwargs = {
'id_collection': combine_id_and_collection(instance),
'id': str(instance.pk),
'collection': instance.get_collection_string(),
'content': get_search_index_string()}
with index.get_or_create_index().writer() as writer:
if created:
writer.add_document(**writer_kwargs)
else:
writer.update_document(**writer_kwargs)
def index_del_instance(sender, instance, **kwargs):
"""
Like index_add_instance but deletes the instance from the index.
Should be called by the post_delete signal.
"""
try:
# Try to get the arrribute get_search_attributes. It is not needed
# in this method (and therefore not called) but it tells us if the
# instance is searchable.
instance.get_search_index_string
except AttributeError:
# If the instance is not searchable, then exit this signal early.
return
with index.get_or_create_index().writer() as writer:
writer.delete_by_term('id_collection', combine_id_and_collection(instance))
def search(query):
"""
Searchs elements.
query has to be a query string. See: https://pythonhosted.org/Whoosh/querylang.html
The return value is a list of dictonaries where each dictonary has the keys
id and collection.
"""
parser = QueryParser("content", index.schema)
query = parser.parse(query)
result = index.searcher().search(query, limit=None)
return [dict(element) for element in result]

View File

@ -63,4 +63,5 @@ TEMPLATE_DIRS = (
STATICFILES_DIRS = [os.path.join(OPENSLIDES_USER_DATA_PATH, 'static')] + STATICFILES_DIRS STATICFILES_DIRS = [os.path.join(OPENSLIDES_USER_DATA_PATH, 'static')] + STATICFILES_DIRS
HAYSTACK_CONNECTIONS['default']['PATH'] = os.path.join(OPENSLIDES_USER_DATA_PATH, 'whoosh_index', '')
SEARCH_INDEX = os.path.join(OPENSLIDES_USER_DATA_PATH, 'search_index')

View File

@ -1,7 +1,6 @@
# Requirements for OpenSlides in production in alphabetical order # Requirements for OpenSlides in production in alphabetical order
Django>=1.7.1,<1.9 Django>=1.7.1,<1.9
beautifulsoup4>=4.1,<4.5 beautifulsoup4>=4.1,<4.5
django-haystack>=2.1,<2.5
djangorestframework>=3.2.0,<3.3.0 djangorestframework>=3.2.0,<3.3.0
html5lib>=0.9,<1.0 html5lib>=0.9,<1.0
jsonfield>=0.9.19,<1.1 jsonfield>=0.9.19,<1.1
@ -10,3 +9,4 @@ reportlab>=3.0,<3.3
roman>=2.0,<2.1 roman>=2.0,<2.1
setuptools>=2.2,<19.0 setuptools>=2.2,<19.0
sockjs-tornado>=1.0,<1.1 sockjs-tornado>=1.0,<1.1
Whoosh>=2.7.0,<2.8

View File

@ -46,7 +46,7 @@ TEMPLATE_DIRS = (
STATICFILES_DIRS.insert(0, os.path.join(OPENSLIDES_USER_DATA_PATH, 'static')) STATICFILES_DIRS.insert(0, os.path.join(OPENSLIDES_USER_DATA_PATH, 'static'))
HAYSTACK_CONNECTIONS['default']['STORAGE'] = 'ram' SEARCH_INDEX = 'ram'
# Special test settings # Special test settings

View File

@ -46,7 +46,7 @@ TEMPLATE_DIRS = (
STATICFILES_DIRS.insert(0, os.path.join(OPENSLIDES_USER_DATA_PATH, 'static')) STATICFILES_DIRS.insert(0, os.path.join(OPENSLIDES_USER_DATA_PATH, 'static'))
HAYSTACK_CONNECTIONS['default']['STORAGE'] = 'ram' SEARCH_INDEX = 'ram'
# Special test settings # Special test settings