Full text search with haystack and whoosh.

Updated THANKS file.
Portable: Added haystack and whoosh.
This commit is contained in:
Emanuel Schuetze 2013-09-08 14:44:41 +02:00
parent 95fe58eb99
commit e277b8babf
35 changed files with 370 additions and 88 deletions

16
THANKS
View File

@ -9,7 +9,11 @@ OpenSlides uses parts of the following projects:
License: BSD
* Django mptt
<https://github.com/django-mptt/django-mptt>
<https://github.com/django-mptt/django-mptt/>
License: BSD
* Django haystack
<http://haystacksearch.org>
License: BSD
* jQuery
@ -32,7 +36,7 @@ OpenSlides uses parts of the following projects:
<https://github.com/BorisMoore/jquery-tmpl/>
License: MIT/GPLv2
- jQuery bsmSelect
<https://github.com/vicb/bsmSelect>
<https://github.com/vicb/bsmSelect/>
License: MIT/GPLv2
* jQuery UI
@ -68,11 +72,11 @@ OpenSlides uses parts of the following projects:
License: BSD
* Pillow
<https://github.com/python-imaging/Pillow>
<https://github.com/python-imaging/Pillow/>
License: Standard PIL License
* qrcode
<https://github.com/lincolnloop/python-qrcode>
<https://github.com/lincolnloop/python-qrcode/>
License: BSD
* ReportLab
@ -87,6 +91,10 @@ OpenSlides uses parts of the following projects:
<http://font.ubuntu.com>
License: Ubuntu Font Licence 1.0
* Whoosh
<https://bitbucket.org/mchaput/whoosh/wiki/Home/>
License: BSD
* Sphinx
<http://sphinx-doc.org/>
License: BSD

View File

@ -4,7 +4,7 @@ How to create a new portable Windows distribution of OpenSlides:
1.) Follow the OpenSlides installation instructions for windows,
but add the option "-Z" when executing easy_install, e.g.:
easy_install -Z django django-mptt beautifulsoup4 bleach pillow qrcode reportlab tornado
easy_install -Z django django-mptt beautifulsoup4 bleach pillow qrcode reportlab tornado django-haystack whoosh
2.) To update the version resource of the prebuild openslides.exe
pywin32 should be installed (it is not strictly required but at

View File

@ -0,0 +1,31 @@
Copyright (c) 2009-2013, Daniel Lindsley.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of Haystack nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---
Prior to April 17, 2009, this software was released under the MIT license.

View File

@ -1,6 +1,3 @@
Django MPTT
-----------
Copyright (c) 2007, Jonathan Buchanan
Permission is hereby granted, free of charge, to any person obtaining a copy of

View File

@ -0,0 +1,26 @@
Copyright 2011 Matt Chaput. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are
those of the authors and should not be interpreted as representing official
policies, either expressed or implied, of Matt Chaput.

View File

@ -50,7 +50,6 @@ SITE_PACKAGES = {
"django": {
"copy": ["django"],
"exclude": [
r"^django/contrib/admin/",
r"^django/contrib/admindocs/",
r"^django/contrib/comments/",
r"^django/contrib/databrowse/",
@ -103,6 +102,12 @@ SITE_PACKAGES = {
"html5lib": {
"copy": ["html5lib"],
},
"django-haystack": {
"copy": ["haystack"],
},
"whoosh": {
"copy": ["whoosh"],
},
"wx": {
# NOTE: wxpython is a special case, see copy_wx
"copy": [],

View File

@ -1,12 +1,11 @@
import datetime
from haystack.indexes import *
from haystack import site
from openslides.agenda.models import Item
from haystack import indexes
from .models import Item
class AgendaIndex(RealTimeSearchIndex):
title = CharField(model_attr='title', null=True)
text = CharField(document=True, model_attr='text', null=True)
class Index(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True, use_template=True)
modelfilter_name = "Agenda" # verbose_name of model
modelfilter_value = "agenda.item" # 'app_name.model_name'
site.register(Item, AgendaIndex)
def get_model(self):
return Item

View File

@ -0,0 +1,10 @@
{% load i18n %}
{% load highlight %}
{% if perms.agenda.can_see_agenda %}
<li>
<a href="{{ result.object.get_absolute_url }}">{{ result.object }}</a><br>
<span class="app">{% trans "Agenda" %}</a></span><br>
{% highlight result.text with request.GET.q %}
</li>
{% endif %}

View File

@ -0,0 +1,2 @@
{{ object.title }}
{{ object.text }}

View File

@ -0,0 +1,11 @@
from haystack import indexes
from .models import Assignment
class Index(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True, use_template=True)
modelfilter_name = "Elections" # verbose_name of model
modelfilter_value = "assignment.assignment" # 'app_name.model_name'
def get_model(self):
return Assignment

View File

@ -0,0 +1,10 @@
{% load i18n %}
{% load highlight %}
{% if perms.assignment.can_see_assignment %}
<li>
<a href="{{ result.object.get_absolute_url }}">{{ result.object }}</a><br>
<span class="app">{% trans "Election" %}</a></span><br>
{% highlight result.text with request.GET.q %}
</li>
{% endif %}

View File

@ -0,0 +1,3 @@
{{ object.name }}
{{ object.description }}
{{ object.candidates }}

View File

@ -0,0 +1,56 @@
{% extends 'base.html' %}
{% load i18n %}
{% block title %}{{ block.super }} {% trans "Search" %}{% endblock %}
{% block content %}
<h2>{% trans 'Search results' %}</h2>
<form class="form-search well" action="" method="get">
{% for field in form %}
<div class="control-group">
{% if field.name == "q" %}
<label for="id_q">{% trans 'Search' %}:</label>
<div class="input-append">
<input type="text" id="id_q" name="q" class="" value="{{ field.value }}">
<button type="submit" class="btn">{% trans "Search" %}</button>
</div>
{% elif field.name == "models" %}
{% trans 'Filter' %}:
<div class="control-group">
{% for model in models %}
<label class="checkbox inline">
<input type="checkbox" value="{{ model.1 }}" name="models"
{% if model.1 in get_values %}checked{% endif %}> {% trans model.0 %}
</label>
{% endfor %}
</div>
{% endif %}
</div>
{% endfor %}
</form>
{% if query %}
{% for result in page.object_list %}
{% if forloop.first %}
<ol class="searchresults">
{% endif %}
{% with result_template=result.app_label|add:"-results.html" %}
{% include "search/"|add:result_template %}
{% endwith %}
{% if forloop.last %}
</ol>
{% endif %}
{% empty %}
<p><i>{% trans "No results found." %}</i></p>
{% endfor %}
{% if page.has_previous or page.has_next %}
<div>
{% if page.has_previous %}<a href="?q={{ query }}&amp;page={{ page.previous_page_number }}">{% endif %}&laquo; Previous{% if page.has_previous %}</a>{% endif %}
|
{% if page.has_next %}<a href="?q={{ query }}&amp;page={{ page.next_page_number }}">{% endif %}Next &raquo;{% if page.has_next %}</a>{% endif %}
</div>
{% endif %}
{% endif %}
{% endblock %}

View File

@ -26,4 +26,8 @@ urlpatterns = patterns(
url(r'^version/$',
views.VersionView.as_view(),
name='core_version',),
url(r'^search/$',
views.SearchView(),
name='search',),
)

View File

@ -11,9 +11,12 @@
"""
from django.conf import settings
from django.core.exceptions import PermissionDenied
from django.utils.importlib import import_module
from haystack.views import SearchView as _SearchView
from openslides import get_git_commit_id, get_version, RELEASE
from openslides.utils.signals import template_manipulation
from openslides.utils.views import TemplateView
@ -64,3 +67,47 @@ class VersionView(TemplateView):
context['versions'].append((plugin_name, plugin_version))
return context
class SearchView(_SearchView):
"""
Shows search result page.
"""
template = 'core/search.html'
def __call__(self, request):
if not request.user.is_authenticated():
raise PermissionDenied
return super(SearchView, self).__call__(request)
def extra_context(self):
"""
Adds extra context variables to set navigation and search filter.
Returns a context dictionary.
"""
context = {}
template_manipulation.send(
sender=self.__class__, request=self.request, context=context)
context['models'] = self.get_indexed_searchmodels()
context['get_values'] = self.request.GET.getlist('models')
return context
def get_indexed_searchmodels(self):
"""
Iterate over all INSTALLED_APPS and return a list of models which are
indexed by haystack/whoosh for using in customized model search filter
in search template search.html. Each list entry contains a verbose name
of the model and a special form field value for haystack (app_name.model_name),
e.g. ['Agenda', 'agenda.item'].
"""
models = []
# TODO: cache this query!
for app in settings.INSTALLED_APPS:
try:
module = import_module(app + '.search_indexes')
except ImportError:
pass
else:
models.append([module.Index.modelfilter_name, module.Index.modelfilter_value])
return models

View File

@ -108,6 +108,7 @@ INSTALLED_APPS = (
'django.contrib.staticfiles',
'django.contrib.humanize',
'mptt',
'haystack', # full-text-search
'openslides.poll',
'openslides.core',
'openslides.account',
@ -118,9 +119,6 @@ INSTALLED_APPS = (
'openslides.participant',
'openslides.mediafile',
'openslides.config',
# full-text-search
'haystack',
)
TEMPLATE_CONTEXT_PROCESSORS = (
@ -146,8 +144,12 @@ TEST_DISCOVER_TOP_LEVEL = os.path.dirname(os.path.dirname(__file__))
# See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts
ALLOWED_HOSTS = ['*']
# Use Haystack with Whoosh for full text search
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine'
},
}
# Full-text search
HAYSTACK_SITECONF = 'openslides.search_sites'
HAYSTACK_SEARCH_ENGINE = 'whoosh'
HAYSTACK_WHOOSH_PATH = os.path.join(os.path.dirname(__file__), 'whoosh_index')
# Haystack updates search index after each save/delete action by apps
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'

View File

@ -70,6 +70,8 @@ INSTALLED_APPS += INSTALLED_PLUGINS
# Example: "/home/media/media.lawrence.com/"
MEDIA_ROOT = %(media_root_path)s
# Path to Whoosh search index
HAYSTACK_CONNECTIONS['default']['PATH'] = %(whoosh_index_path)s
"""
KEY_LENGTH = 30
@ -225,16 +227,19 @@ def create_settings(settings_path, database_path=None):
database_path = get_portable_db_path()
dbpath_value = 'openslides.main.get_portable_db_path()'
media_root_path_value = 'openslides.main.get_portable_media_root_path()'
whoosh_index_path_value = 'openslides.main.get_portable_whoosh_index_path()'
else:
if database_path is None:
database_path = get_user_data_path('openslides', 'database.sqlite')
dbpath_value = repr(fs2unicode(database_path))
media_root_path_value = repr(fs2unicode(get_user_data_path('openslides', 'media', '')))
whoosh_index_path_value = repr(fs2unicode(get_user_data_path('openslides', 'whoosh_index', '')))
settings_content = CONFIG_TEMPLATE % dict(
default_key=base64.b64encode(os.urandom(KEY_LENGTH)),
dbpath=dbpath_value,
media_root_path=media_root_path_value)
media_root_path=media_root_path_value,
whoosh_index_path=whoosh_index_path_value)
if not os.path.exists(settings_module):
os.makedirs(settings_module)
@ -388,6 +393,10 @@ def get_portable_media_root_path():
return get_portable_path('openslides', 'media', '')
def get_portable_whoosh_index_path():
return get_portable_path('openslides', 'whoosh_index', '')
def win32_get_app_data_path(*args):
shell32 = ctypes.WinDLL("shell32.dll")
SHGetFolderPath = shell32.SHGetFolderPathW

View File

@ -0,0 +1,11 @@
from haystack import indexes
from .models import Mediafile
class Index(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True, use_template=True)
modelfilter_name = "Files" # verbose_name of model
modelfilter_value = "mediafile.mediafile" # 'app_name.model_name'
def get_model(self):
return Mediafile

View File

@ -0,0 +1 @@
{{ object.title }}

View File

@ -0,0 +1,10 @@
{% load i18n %}
{% load highlight %}
{% if perms.mediafile.can_see %}
<li>
<a href="{{ result.object.get_absolute_url }}">{{ result.object }}</a><br>
<span class="app">{% trans "File" %}</a></span><br>
{% highlight result.text with request.GET.q %}
</li>
{% endif %}

View File

@ -0,0 +1,11 @@
from haystack import indexes
from .models import Motion
class Index(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True, use_template=True)
modelfilter_name = "Motions" # verbose_name of model
modelfilter_value = "motion.motion" # 'app_name.model_name'
def get_model(self):
return Motion

View File

@ -0,0 +1,7 @@
{{ object.identifier }}
{{ object.title }}
{{ object.text }}
{{ object.reason }}
{{ object.submitters }}
{{ object.supporters }}
{{ object.category }}

View File

@ -0,0 +1,10 @@
{% load i18n %}
{% load highlight %}
{% if perms.motion.can_see_motion %}
<li>
<a href="{{ result.object.get_absolute_url }}">{{ result.object }}</a><br>
<span class="app">{% trans "Motion" %}</a></span><br>
{% highlight result.text with request.GET.q %}
</li>
{% endif %}

View File

@ -0,0 +1,12 @@
from haystack import indexes
from .models import User
class Index(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True, use_template=True)
text = indexes.EdgeNgramField(document=True, use_template=True)
modelfilter_name = "Participants" # verbose_name of model
modelfilter_value = "participant.user" # 'app_name.model_name'
def get_model(self):
return User

View File

@ -0,0 +1,4 @@
{{ object.django_user }}
{{ object.structure_level }}
{{ object.committee }}
{{ object.about_me }}

View File

@ -0,0 +1,10 @@
{% load i18n %}
{% load highlight %}
{% if perms.participant.can_see_participant %}
<li>
<a href="{{ result.object.get_absolute_url }}">{{ result.object }}</a><br>
<span class="app">{% trans "Participant" %}</a></span><br>
{% highlight result.text with request.GET.q %}
</li>
{% endif %}

View File

@ -1,4 +0,0 @@
import haystack
haystack.autodiscover()

View File

@ -34,6 +34,10 @@ body {
position: absolute;
margin: 8px 0 0 50px;
}
#header .navbar-search {
margin-top: 0px;
}
footer {
margin-bottom: 20px;
}
@ -180,6 +184,15 @@ legend + .control-group {
#dataTable_wrapper .row-fluid:after {
clear: none;
}
.searchresults li {
margin-bottom: 15px;
}
.searchresults li .app {
color: #999999;
}
.highlighted {
font-weight: bold;
}
/** Left sitebar navigation **/

View File

@ -27,22 +27,32 @@
<a href="/" class="logo" title="{% trans 'Home' %}"><img src="{% static 'img/logo.png' %}" alt="{% trans 'Logo' %}" /></a>
<span class="title optional">{% get_config 'event_name' %} {% get_config 'event_description' %}</span>
{% block loginbutton %}
<div class="btn-group pull-right">
{% if user.is_authenticated %}
<a href="#" data-toggle="dropdown" class="btn btn-small dropdown-toggle">
<i class="icon-user"></i> {{ user.username }}
<span class="caret"></span>
</a>
<ul class="dropdown-menu">
{% url 'user_settings' as url_usersettings %}
<li><a href="{% url 'user_settings' %}"><i class="icon-cog"></i> {% trans "Edit profile" %}</a></li>
<li><a href="{% url 'password_change' %}"><i class="icon-lock"></i> {% trans "Change password" %}</a></li>
<li class="divider"></li>
<li><a href="{% url 'user_logout' %}"><i class="icon-off"></i> {% trans "Logout" %}</a></li>
</ul>
{% else %}
<a href="{% url 'user_login' %}" class="btn"><i class="icon-login"></i> {% trans "Login" %}</a>
{% endif %}
<div class="pull-right">
<!-- Search field -->
<form class="navbar-search form-search" action="{% url 'search' %}" method="get">
<div class="input-append">
<input type="text" id="id_q" name="q" class="search-query input-medium" placeholder="{% trans 'Search' %}">
<button type="submit" class="btn"><i class="icon-search"></i></button>
</div>
</form> &nbsp;
<!-- login/logout button -->
<div class="btn-group pull-right">
{% if user.is_authenticated %}
<a href="#" data-toggle="dropdown" class="btn dropdown-toggle">
<i class="icon-user"></i> {{ user.username }}
<span class="caret"></span>
</a>
<ul class="dropdown-menu">
<li><a href="{% url 'user_settings' %}"><i class="icon-cog"></i> {% trans "Edit profile" %}</a></li>
<li><a href="{% url 'password_change' %}"><i class="icon-lock"></i> {% trans "Change password" %}</a></li>
<li class="divider"></li>
<li><a href="{% url 'user_logout' %}"><i class="icon-off"></i> {% trans "Logout" %}</a></li>
</ul>
{% else %}
<a href="{% url 'user_login' %}" class="btn"><i class="icon-login"></i> {% trans "Login" %}</a>
{% endif %}
</div>
</div>
{% endblock %}
</div> <!--/#header-->

View File

@ -1,39 +0,0 @@
{% extends 'base.html' %}
{% block content %}
<h2>Search</h2>
<form method="get" action=".">
<table>
{{ form.as_table }}
<tr>
<td>&nbsp;</td>
<td>
<input type="submit" value="Search">
</td>
</tr>
</table>
{% if query %}
<h3>Results</h3>
{% for result in page.object_list %}
<p>
<a href="{{ result.object.get_absolute_url }}">{{ result.object.title }}</a>
</p>
{% empty %}
<p>No results found.</p>
{% endfor %}
{% if page.has_previous or page.has_next %}
<div>
{% if page.has_previous %}<a href="?q={{ query }}&amp;page={{ page.previous_page_number }}">{% endif %}&laquo; Previous{% if page.has_previous %}</a>{% endif %}
|
{% if page.has_next %}<a href="?q={{ query }}&amp;page={{ page.next_page_number }}">{% endif %}Next &raquo;{% if page.has_next %}</a>{% endif %}
</div>
{% endif %}
{% else %}
{# Show some example queries to run, maybe query syntax, something else? #}
{% endif %}
</form>
{% endblock %}

View File

@ -26,7 +26,6 @@ urlpatterns = patterns(
(r'^config/', include('openslides.config.urls')),
(r'^projector/', include('openslides.projector.urls')),
(r'^i18n/', include('django.conf.urls.i18n')),
(r'^search/', include('haystack.urls')),
)
js_info_dict = {'packages': []}

View File

@ -10,7 +10,7 @@
:license: GNU GPL, see LICENSE for more details.
"""
from django.core.management import call_command
from django.test import TestCase as _TestCase
from openslides.config.api import config
@ -35,4 +35,6 @@ class TestCase(_TestCase):
except AttributeError:
# The cache has only to be deleted if it exists.
pass
# Clear the whoosh search index
call_command('clear_index', interactive=False, verbosity=0)
return return_value

View File

@ -6,3 +6,5 @@ qrcode==2.7
tornado==3.0.1
bleach==1.2.2
beautifulsoup4==4.2.0
django-haystack==2.1.0
whoosh==2.5.4

View File

@ -37,3 +37,6 @@ INSTALLED_APPS += INSTALLED_PLUGINS
# Absolute path to the directory that holds media.
# Example: "/home/media/media.lawrence.com/"
MEDIA_ROOT = ''
# Use RAM storage for whoosh index
HAYSTACK_CONNECTIONS['default']['STORAGE'] = 'ram'