2017-08-18 07:56:16 +02:00
|
|
|
import json
|
2016-12-17 09:30:20 +01:00
|
|
|
from collections import defaultdict
|
2018-07-09 23:22:26 +02:00
|
|
|
from datetime import datetime
|
2018-09-01 08:00:00 +02:00
|
|
|
from time import sleep
|
2018-11-03 23:40:20 +01:00
|
|
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Type
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2018-11-01 17:30:18 +01:00
|
|
|
from asgiref.sync import async_to_sync
|
2019-07-29 15:19:59 +02:00
|
|
|
from django.apps import apps
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2019-08-29 14:25:02 +02:00
|
|
|
from . import logging
|
2018-07-09 23:22:26 +02:00
|
|
|
from .cache_providers import (
|
|
|
|
Cachable,
|
2018-10-28 10:04:52 +01:00
|
|
|
ElementCacheProvider,
|
2019-08-14 12:19:20 +02:00
|
|
|
MemoryCacheProvider,
|
2018-07-09 23:22:26 +02:00
|
|
|
RedisCacheProvider,
|
|
|
|
)
|
2018-11-01 17:30:18 +01:00
|
|
|
from .redis import use_redis
|
2019-07-29 15:19:59 +02:00
|
|
|
from .schema_version import SchemaVersion, schema_version_handler
|
2018-11-03 23:40:20 +01:00
|
|
|
from .utils import get_element_id, split_element_id
|
2016-12-17 09:30:20 +01:00
|
|
|
|
|
|
|
|
2019-05-10 13:50:57 +02:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2019-09-02 08:50:22 +02:00
|
|
|
class ChangeIdTooLowError(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
def get_all_cachables() -> List[Cachable]:
|
|
|
|
"""
|
|
|
|
Returns all element of OpenSlides.
|
|
|
|
"""
|
|
|
|
out: List[Cachable] = []
|
|
|
|
for app in apps.get_app_configs():
|
|
|
|
try:
|
|
|
|
# Get the method get_startup_elements() from an app.
|
|
|
|
# This method has to return an iterable of Cachable objects.
|
|
|
|
get_startup_elements = app.get_startup_elements
|
|
|
|
except AttributeError:
|
|
|
|
# Skip apps that do not implement get_startup_elements.
|
|
|
|
continue
|
|
|
|
out.extend(get_startup_elements())
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
class ElementCache:
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2018-11-03 23:40:20 +01:00
|
|
|
Cache for the elements.
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
Saves the full_data
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
There is one redis Hash (simular to python dict) for the full_data
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
The key of the Hashes is COLLECTIONSTRING:ID where COLLECTIONSTRING is the
|
|
|
|
collection_string of a collection and id the id of an element.
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
There is an sorted set in redis with the change id as score. The values are
|
|
|
|
COLLETIONSTRING:ID for the elements that have been changed with that change
|
2019-07-29 15:19:59 +02:00
|
|
|
id. With this key it is possible, to get all elements as full_data
|
|
|
|
that are newer then a specific change id.
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
All method of this class are async. You either have to call them with
|
|
|
|
await in an async environment or use asgiref.sync.async_to_sync().
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
def __init__(
|
2019-01-06 16:22:33 +01:00
|
|
|
self,
|
|
|
|
cache_provider_class: Type[ElementCacheProvider] = RedisCacheProvider,
|
|
|
|
cachable_provider: Callable[[], List[Cachable]] = get_all_cachables,
|
2019-07-29 15:19:59 +02:00
|
|
|
default_change_id: Optional[int] = None,
|
2019-01-06 16:22:33 +01:00
|
|
|
) -> None:
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2018-07-09 23:22:26 +02:00
|
|
|
Initializes the cache.
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
self.cache_provider = cache_provider_class(self.async_ensure_cache)
|
2018-07-09 23:22:26 +02:00
|
|
|
self.cachable_provider = cachable_provider
|
2018-08-22 22:00:08 +02:00
|
|
|
self._cachables: Optional[Dict[str, Cachable]] = None
|
2019-08-14 12:19:20 +02:00
|
|
|
self.default_change_id: Optional[int] = default_change_id
|
2018-09-01 08:00:00 +02:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
@property
|
|
|
|
def cachables(self) -> Dict[str, Cachable]:
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
Returns all cachables as a dict where the key is the collection_string of the cachable.
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2018-07-09 23:22:26 +02:00
|
|
|
# This method is neccessary to lazy load the cachables
|
|
|
|
if self._cachables is None:
|
2019-01-06 16:22:33 +01:00
|
|
|
self._cachables = {
|
|
|
|
cachable.get_collection_string(): cachable
|
|
|
|
for cachable in self.cachable_provider()
|
|
|
|
}
|
2018-07-09 23:22:26 +02:00
|
|
|
return self._cachables
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
def ensure_cache(
|
|
|
|
self, reset: bool = False, default_change_id: Optional[int] = None
|
|
|
|
) -> None:
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
Ensures the existance of the cache; see async_ensure_cache for more info.
|
|
|
|
"""
|
|
|
|
async_to_sync(self.async_ensure_cache)(reset, default_change_id)
|
2018-09-01 08:00:00 +02:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
async def async_ensure_cache(
|
|
|
|
self, reset: bool = False, default_change_id: Optional[int] = None
|
|
|
|
) -> None:
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
Makes sure that the cache exist. Builds the cache if not or reset is given as True.
|
|
|
|
"""
|
|
|
|
cache_exists = await self.cache_provider.data_exists()
|
2018-09-01 08:00:00 +02:00
|
|
|
|
|
|
|
if reset or not cache_exists:
|
2019-07-29 15:19:59 +02:00
|
|
|
await self.build_cache(default_change_id)
|
|
|
|
|
|
|
|
def ensure_schema_version(self) -> None:
|
|
|
|
async_to_sync(self.async_ensure_schema_version)()
|
|
|
|
|
|
|
|
async def async_ensure_schema_version(self) -> None:
|
|
|
|
cache_schema_version = await self.cache_provider.get_schema_version()
|
|
|
|
schema_changed = not schema_version_handler.compare(cache_schema_version)
|
|
|
|
schema_version_handler.log_current()
|
2018-09-01 08:00:00 +02:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
cache_exists = await self.cache_provider.data_exists()
|
|
|
|
if schema_changed or not cache_exists:
|
|
|
|
await self.build_cache(schema_version=schema_version_handler.get())
|
|
|
|
|
|
|
|
async def build_cache(
|
|
|
|
self,
|
|
|
|
default_change_id: Optional[int] = None,
|
|
|
|
schema_version: Optional[SchemaVersion] = None,
|
|
|
|
) -> None:
|
|
|
|
lock_name = "build_cache"
|
|
|
|
# Set a lock so only one process builds the cache
|
|
|
|
if await self.cache_provider.set_lock(lock_name):
|
|
|
|
logger.info("Building up the cache data...")
|
|
|
|
try:
|
|
|
|
mapping = {}
|
|
|
|
for collection_string, cachable in self.cachables.items():
|
|
|
|
for element in cachable.get_elements():
|
|
|
|
mapping.update(
|
|
|
|
{
|
|
|
|
get_element_id(
|
|
|
|
collection_string, element["id"]
|
|
|
|
): json.dumps(element)
|
|
|
|
}
|
|
|
|
)
|
|
|
|
logger.info("Done building the cache data.")
|
|
|
|
logger.info("Saving cache data into the cache...")
|
2019-08-14 12:19:20 +02:00
|
|
|
if default_change_id is None:
|
|
|
|
if self.default_change_id is not None:
|
|
|
|
default_change_id = self.default_change_id
|
|
|
|
else:
|
|
|
|
# Use the miliseconds (rounded) since the 2016-02-29.
|
|
|
|
default_change_id = int(
|
|
|
|
(datetime.utcnow() - datetime(2016, 2, 29)).total_seconds()
|
|
|
|
)
|
|
|
|
default_change_id *= 1000
|
|
|
|
await self.cache_provider.reset_full_cache(mapping, default_change_id)
|
2019-07-29 15:19:59 +02:00
|
|
|
if schema_version:
|
|
|
|
await self.cache_provider.set_schema_version(schema_version)
|
|
|
|
logger.info("Done saving the cache data.")
|
|
|
|
finally:
|
|
|
|
await self.cache_provider.del_lock(lock_name)
|
|
|
|
else:
|
|
|
|
logger.info("Wait for another process to build up the cache...")
|
|
|
|
while await self.cache_provider.get_lock(lock_name):
|
|
|
|
sleep(0.01)
|
|
|
|
logger.info("Cache is ready (built by another process).")
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
async def change_elements(
|
2019-01-06 16:22:33 +01:00
|
|
|
self, elements: Dict[str, Optional[Dict[str, Any]]]
|
|
|
|
) -> int:
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2018-07-09 23:22:26 +02:00
|
|
|
Changes elements in the cache.
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
elements is a dict with element_id <-> changed element. When the value is None,
|
|
|
|
it is interpreded as deleted.
|
2018-07-09 23:22:26 +02:00
|
|
|
|
|
|
|
Returns the new generated change_id.
|
2016-12-17 09:30:20 +01:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
# Split elements into changed and deleted.
|
2018-07-09 23:22:26 +02:00
|
|
|
deleted_elements = []
|
|
|
|
changed_elements = []
|
|
|
|
for element_id, data in elements.items():
|
|
|
|
if data:
|
|
|
|
# The arguments for redis.hset is pairs of key value
|
|
|
|
changed_elements.append(element_id)
|
|
|
|
changed_elements.append(json.dumps(data))
|
|
|
|
else:
|
|
|
|
deleted_elements.append(element_id)
|
2016-12-17 09:30:20 +01:00
|
|
|
|
2019-01-06 16:22:33 +01:00
|
|
|
return await self.cache_provider.add_changed_elements(
|
2019-08-14 12:19:20 +02:00
|
|
|
changed_elements, deleted_elements
|
2019-01-06 16:22:33 +01:00
|
|
|
)
|
2017-09-04 00:25:45 +02:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
async def get_all_data_list(
|
|
|
|
self, user_id: Optional[int] = None
|
|
|
|
) -> Dict[str, List[Dict[str, Any]]]:
|
2017-09-04 00:25:45 +02:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
Returns all data with a list per collection:
|
|
|
|
{
|
|
|
|
<collection>: [<element>, <element>, ...]
|
|
|
|
}
|
|
|
|
If the user id is given the data will be restricted for this user.
|
2017-09-04 00:25:45 +02:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
all_data: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
|
|
for element_id, data in (await self.cache_provider.get_all_data()).items():
|
|
|
|
collection_string, _ = split_element_id(element_id)
|
|
|
|
all_data[collection_string].append(json.loads(data.decode()))
|
|
|
|
|
|
|
|
if user_id is not None:
|
|
|
|
for collection_string in all_data.keys():
|
|
|
|
restricter = self.cachables[collection_string].restrict_elements
|
|
|
|
all_data[collection_string] = await restricter(
|
|
|
|
user_id, all_data[collection_string]
|
|
|
|
)
|
|
|
|
return dict(all_data)
|
|
|
|
|
|
|
|
async def get_all_data_dict(self) -> Dict[str, Dict[int, Dict[str, Any]]]:
|
2018-12-23 11:05:38 +01:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
Returns all data with a dict (id <-> element) per collection:
|
|
|
|
{
|
|
|
|
<collection>: {
|
|
|
|
<id>: <element>
|
|
|
|
}
|
|
|
|
}
|
2018-12-23 11:05:38 +01:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
all_data: Dict[str, Dict[int, Dict[str, Any]]] = defaultdict(dict)
|
|
|
|
for element_id, data in (await self.cache_provider.get_all_data()).items():
|
2018-12-23 11:05:38 +01:00
|
|
|
collection_string, id = split_element_id(element_id)
|
2019-07-29 15:19:59 +02:00
|
|
|
all_data[collection_string][id] = json.loads(data.decode())
|
|
|
|
return dict(all_data)
|
|
|
|
|
|
|
|
async def get_collection_data(
|
|
|
|
self, collection_string: str
|
|
|
|
) -> Dict[int, Dict[str, Any]]:
|
|
|
|
"""
|
|
|
|
Returns the data for one collection as dict: {id: <element>}
|
|
|
|
"""
|
|
|
|
encoded_collection_data = await self.cache_provider.get_collection_data(
|
|
|
|
collection_string
|
|
|
|
)
|
|
|
|
collection_data = {}
|
|
|
|
for id in encoded_collection_data.keys():
|
|
|
|
collection_data[id] = json.loads(encoded_collection_data[id].decode())
|
|
|
|
return collection_data
|
|
|
|
|
|
|
|
async def get_element_data(
|
|
|
|
self, collection_string: str, id: int, user_id: Optional[int] = None
|
|
|
|
) -> Optional[Dict[str, Any]]:
|
|
|
|
"""
|
|
|
|
Returns one element or None, if the element does not exist.
|
|
|
|
If the user id is given the data will be restricted for this user.
|
|
|
|
"""
|
|
|
|
encoded_element = await self.cache_provider.get_element_data(
|
|
|
|
get_element_id(collection_string, id)
|
|
|
|
)
|
|
|
|
|
|
|
|
if encoded_element is None:
|
|
|
|
return None
|
|
|
|
element = json.loads(encoded_element.decode()) # type: ignore
|
|
|
|
|
|
|
|
if user_id is not None:
|
|
|
|
restricter = self.cachables[collection_string].restrict_elements
|
|
|
|
restricted_elements = await restricter(user_id, [element])
|
|
|
|
element = restricted_elements[0] if restricted_elements else None
|
|
|
|
return element
|
2017-09-04 00:25:45 +02:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
async def get_data_since(
|
|
|
|
self, user_id: Optional[int] = None, change_id: int = 0, max_change_id: int = -1
|
2019-01-06 16:22:33 +01:00
|
|
|
) -> Tuple[Dict[str, List[Dict[str, Any]]], List[str]]:
|
2018-07-09 23:22:26 +02:00
|
|
|
"""
|
2019-07-29 15:19:59 +02:00
|
|
|
Returns all data since change_id until max_change_id (included).
|
|
|
|
max_change_id -1 means the highest change_id. If the user id is given the
|
|
|
|
data will be restricted for this user.
|
2017-09-04 00:25:45 +02:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
Returns two values inside a tuple. The first value is a dict where the
|
|
|
|
key is the collection_string and the value is a list of data. The second
|
|
|
|
is a list of element_ids with deleted elements.
|
2017-09-04 00:25:45 +02:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
Only returns elements with the change_id or newer. When change_id is 0,
|
|
|
|
all elements are returned.
|
2017-09-04 00:25:45 +02:00
|
|
|
|
2019-09-02 08:50:22 +02:00
|
|
|
Raises a ChangeIdTooLowError when the lowest change_id in redis is higher then
|
2018-07-09 23:22:26 +02:00
|
|
|
the requested change_id. In this case the method has to be rerun with
|
|
|
|
change_id=0. This is importend because there could be deleted elements
|
|
|
|
that the cache does not know about.
|
2017-09-04 00:25:45 +02:00
|
|
|
"""
|
2018-07-09 23:22:26 +02:00
|
|
|
if change_id == 0:
|
2019-07-29 15:19:59 +02:00
|
|
|
return (await self.get_all_data_list(user_id), [])
|
2017-09-04 00:25:45 +02:00
|
|
|
|
2018-10-27 14:41:27 +02:00
|
|
|
# This raises a Runtime Exception, if there is no change_id
|
2018-07-09 23:22:26 +02:00
|
|
|
lowest_change_id = await self.get_lowest_change_id()
|
2018-10-27 14:41:27 +02:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
if change_id < lowest_change_id:
|
|
|
|
# When change_id is lower then the lowest change_id in redis, we can
|
|
|
|
# not inform the user about deleted elements.
|
2019-09-02 08:50:22 +02:00
|
|
|
raise ChangeIdTooLowError(
|
|
|
|
f"change_id {change_id} is lower then the lowest change_id in redis {lowest_change_id}."
|
2019-01-06 16:22:33 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
raw_changed_elements, deleted_elements = await self.cache_provider.get_data_since(
|
|
|
|
change_id, max_change_id=max_change_id
|
|
|
|
)
|
2019-07-29 15:19:59 +02:00
|
|
|
changed_elements = {
|
|
|
|
collection_string: [json.loads(value.decode()) for value in value_list]
|
|
|
|
for collection_string, value_list in raw_changed_elements.items()
|
|
|
|
}
|
2017-09-04 00:25:45 +02:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
if user_id is not None:
|
2019-09-02 08:50:22 +02:00
|
|
|
# the list(...) is important, because `changed_elements` will be
|
|
|
|
# altered during iteration and restricting data
|
|
|
|
for collection_string, elements in list(changed_elements.items()):
|
2018-07-09 23:22:26 +02:00
|
|
|
restricter = self.cachables[collection_string].restrict_elements
|
2019-07-29 15:19:59 +02:00
|
|
|
restricted_elements = await restricter(user_id, elements)
|
2019-06-25 16:56:06 +02:00
|
|
|
|
|
|
|
# Add removed objects (through restricter) to deleted elements.
|
2019-07-29 15:19:59 +02:00
|
|
|
element_ids = set([element["id"] for element in elements])
|
|
|
|
restricted_element_ids = set(
|
|
|
|
[element["id"] for element in restricted_elements]
|
|
|
|
)
|
|
|
|
for id in element_ids - restricted_element_ids:
|
2019-06-25 16:56:06 +02:00
|
|
|
deleted_elements.append(get_element_id(collection_string, id))
|
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
if not restricted_elements:
|
|
|
|
del changed_elements[collection_string]
|
|
|
|
else:
|
|
|
|
changed_elements[collection_string] = restricted_elements
|
2018-11-01 17:30:18 +01:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
return (changed_elements, deleted_elements)
|
2018-11-01 17:30:18 +01:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
async def get_current_change_id(self) -> int:
|
2018-02-24 17:54:59 +01:00
|
|
|
"""
|
2018-07-09 23:22:26 +02:00
|
|
|
Returns the current change id.
|
2018-02-24 17:54:59 +01:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
Returns default_change_id if there is no change id yet.
|
2018-02-24 17:54:59 +01:00
|
|
|
"""
|
2019-08-14 12:19:20 +02:00
|
|
|
return await self.cache_provider.get_current_change_id()
|
2018-02-24 17:54:59 +01:00
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
async def get_lowest_change_id(self) -> int:
|
2017-04-28 00:50:37 +02:00
|
|
|
"""
|
2018-07-09 23:22:26 +02:00
|
|
|
Returns the lowest change id.
|
2017-04-28 00:50:37 +02:00
|
|
|
"""
|
2019-08-14 12:19:20 +02:00
|
|
|
return await self.cache_provider.get_lowest_change_id()
|
2017-08-18 07:56:16 +02:00
|
|
|
|
2017-04-28 00:50:37 +02:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
def load_element_cache() -> ElementCache:
|
2017-08-18 07:56:16 +02:00
|
|
|
"""
|
2018-07-09 23:22:26 +02:00
|
|
|
Generates an element cache instance.
|
2017-08-18 07:56:16 +02:00
|
|
|
"""
|
2018-11-01 17:30:18 +01:00
|
|
|
if use_redis:
|
|
|
|
cache_provider_class: Type[ElementCacheProvider] = RedisCacheProvider
|
|
|
|
else:
|
2019-08-14 12:19:20 +02:00
|
|
|
cache_provider_class = MemoryCacheProvider
|
2017-08-18 07:56:16 +02:00
|
|
|
|
2019-07-29 15:19:59 +02:00
|
|
|
return ElementCache(cache_provider_class=cache_provider_class)
|
2017-08-18 07:56:16 +02:00
|
|
|
|
|
|
|
|
2018-09-01 08:00:00 +02:00
|
|
|
# Set the element_cache
|
2019-07-29 15:19:59 +02:00
|
|
|
element_cache = load_element_cache()
|