176 lines
5.4 KiB
Python
176 lines
5.4 KiB
Python
import os
|
|
import shutil
|
|
|
|
from django.conf import settings
|
|
from django.contrib.auth import get_user_model
|
|
from django.core.exceptions import ImproperlyConfigured
|
|
from django.db.models import QuerySet
|
|
from whoosh import fields
|
|
from whoosh.filedb.filestore import RamStorage
|
|
from whoosh.index import create_in, exists_in, open_dir
|
|
from whoosh.qparser import QueryParser
|
|
from whoosh.writing import AsyncWriter
|
|
|
|
|
|
def get_schema():
|
|
"""
|
|
This method creates the whoosh schema. It is only needed when the search
|
|
index is build. After this, the schema is saved and loaded with the index.
|
|
|
|
When the schema is changed, then the index has to be recreated or the index
|
|
has to be altert. See:
|
|
https://pythonhosted.org/Whoosh/schema.html#modifying-the-schema-after-indexing
|
|
"""
|
|
return fields.Schema(
|
|
id=fields.ID(stored=True),
|
|
collection=fields.ID(stored=True),
|
|
id_collection=fields.ID(unique=True),
|
|
content=fields.TEXT)
|
|
|
|
|
|
class Index:
|
|
"""
|
|
Represents the whoosh index.
|
|
"""
|
|
|
|
def get_index_path(self):
|
|
"""
|
|
Returns the index path.
|
|
|
|
Raises ImproperlyConfigured if the path is not set in the settings.
|
|
"""
|
|
try:
|
|
return settings.SEARCH_INDEX
|
|
except AttributeError:
|
|
raise ImproperlyConfigured("Set SEARCH_INDEX into your settings.")
|
|
|
|
def create_index(self):
|
|
"""
|
|
Creats the whoosh index. Delets an existing index if exists.
|
|
|
|
Returns the index.
|
|
"""
|
|
path = self.get_index_path()
|
|
if path == 'ram':
|
|
self.storage = RamStorage().create_index(get_schema())
|
|
else:
|
|
if os.path.exists(path):
|
|
shutil.rmtree(path)
|
|
os.mkdir(path)
|
|
self.storage = create_in(path, get_schema())
|
|
return self.storage
|
|
|
|
def get_or_create_index(self):
|
|
"""
|
|
Returns an index object.
|
|
|
|
Creats the index if it does not exist
|
|
"""
|
|
# Try to return a storage object that was created before.
|
|
try:
|
|
return self.storage
|
|
except AttributeError:
|
|
pass
|
|
path = self.get_index_path()
|
|
if path != 'ram' and exists_in(path):
|
|
return open_dir(path)
|
|
return self.create_index()
|
|
|
|
index = Index()
|
|
|
|
|
|
def combine_id_and_collection(instance):
|
|
"""
|
|
Returns a string where the id and the collection string of an instance
|
|
are combined.
|
|
"""
|
|
return "{}{}".format(instance.id, instance.get_collection_string())
|
|
|
|
|
|
def user_name_helper(users):
|
|
"""
|
|
Helper to index a user or a list of users.
|
|
|
|
Returns a string which contains the names of all users seperated by a space.
|
|
|
|
users can be a list, a queryset or an user object. If it is something else
|
|
then the str(users) is returned.
|
|
"""
|
|
if isinstance(users, list) or isinstance(users, QuerySet):
|
|
user_string = " ".join(
|
|
user.get_short_name(sort_by_first_name=True) for user in users)
|
|
elif isinstance(users, get_user_model()):
|
|
user_string = users.get_short_name(sort_by_first_name=True)
|
|
else:
|
|
user_string = str(users)
|
|
return user_string
|
|
|
|
|
|
def index_add_instance(sender, instance, **kwargs):
|
|
"""
|
|
Receiver that should be called by the post_save signal and the m2m_changed
|
|
signal.
|
|
|
|
If the instance has an method get_search_string, then it is written
|
|
into the search index. The method has to return an dictonary that can be
|
|
used as keyword arguments to writer.add_document.
|
|
|
|
This function uses whoosh.writing.AsyncWriter.
|
|
"""
|
|
try:
|
|
get_search_index_string = instance.get_search_index_string
|
|
except AttributeError:
|
|
# If the instance is not searchable, then exit this signal early.
|
|
return
|
|
|
|
created = kwargs.get('created', False)
|
|
|
|
writer_kwargs = {
|
|
'id_collection': combine_id_and_collection(instance),
|
|
'id': str(instance.pk),
|
|
'collection': instance.get_collection_string(),
|
|
'content': get_search_index_string()}
|
|
|
|
with AsyncWriter(index.get_or_create_index()) as writer:
|
|
if created:
|
|
writer.add_document(**writer_kwargs)
|
|
else:
|
|
writer.update_document(**writer_kwargs)
|
|
|
|
|
|
def index_del_instance(sender, instance, **kwargs):
|
|
"""
|
|
Like index_add_instance but deletes the instance from the index.
|
|
|
|
Should be called by the post_delete signal.
|
|
|
|
This function uses whoosh.writing.AsyncWriter.
|
|
"""
|
|
try:
|
|
# Try to get the arrribute get_search_attributes. It is not needed
|
|
# in this method (and therefore not called) but it tells us if the
|
|
# instance is searchable.
|
|
instance.get_search_index_string
|
|
except AttributeError:
|
|
# If the instance is not searchable, then exit this signal early.
|
|
return
|
|
|
|
with AsyncWriter(index.get_or_create_index()) as writer:
|
|
writer.delete_by_term('id_collection', combine_id_and_collection(instance))
|
|
|
|
|
|
def search(query):
|
|
"""
|
|
Searchs elements.
|
|
|
|
query has to be a query string. See: https://pythonhosted.org/Whoosh/querylang.html
|
|
|
|
The return value is a list of dictonaries where each dictonary has the keys
|
|
id and collection.
|
|
"""
|
|
search_index = index.get_or_create_index()
|
|
parser = QueryParser("content", search_index.schema)
|
|
query = parser.parse(query)
|
|
result = search_index.searcher().search(query, limit=None)
|
|
return [dict(element) for element in result]
|