b3f17db2a3
- collection whitelist instead of blacklist - added field mediafile/blob for import
661 lines
23 KiB
Python
661 lines
23 KiB
Python
import json
|
|
import re
|
|
import sys
|
|
from collections import defaultdict
|
|
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
|
|
|
|
import fastjsonschema
|
|
import yaml
|
|
|
|
MODELS_YML_PATH = "../../docs/models.yml"
|
|
|
|
DEFAULT_FILES = [
|
|
"../../docker/initial-data.json",
|
|
"../../docs/example-data.json",
|
|
]
|
|
|
|
SCHEMA = fastjsonschema.compile(
|
|
{
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
"title": "Schema for initial and example data.",
|
|
"type": "object",
|
|
"patternProperties": {
|
|
"^[a-z_]+$": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {"id": {"type": "number"}},
|
|
"required": ["id"],
|
|
},
|
|
}
|
|
},
|
|
"additionalProperties": False,
|
|
}
|
|
)
|
|
|
|
|
|
class CheckException(Exception):
|
|
pass
|
|
|
|
|
|
def check_string(value: Any) -> bool:
|
|
return value is None or isinstance(value, str)
|
|
|
|
|
|
color_regex = re.compile("^#[0-9a-f]{6}$")
|
|
|
|
|
|
def check_color(value: Any) -> bool:
|
|
return value is None or bool(isinstance(value, str) and color_regex.match(value))
|
|
|
|
|
|
def check_number(value: Any) -> bool:
|
|
return value is None or isinstance(value, int)
|
|
|
|
|
|
def check_float(value: Any) -> bool:
|
|
return value is None or isinstance(value, int) or isinstance(value, float)
|
|
|
|
|
|
def check_boolean(value: Any) -> bool:
|
|
return value is None or value is False or value is True
|
|
|
|
|
|
def check_string_list(value: Any) -> bool:
|
|
return check_x_list(value, check_string)
|
|
|
|
|
|
def check_number_list(value: Any) -> bool:
|
|
return check_x_list(value, check_number)
|
|
|
|
|
|
def check_x_list(value: Any, fn: Callable) -> bool:
|
|
if value is None:
|
|
return True
|
|
if not isinstance(value, list):
|
|
return False
|
|
return all([fn(sv) for sv in value])
|
|
|
|
|
|
def check_decimal(value: Any) -> bool:
|
|
if value is None:
|
|
return True
|
|
if isinstance(value, str):
|
|
pattern = r"^-?(\d|[1-9]\d+)\.\d{6}$"
|
|
if re.match(pattern, value):
|
|
return True
|
|
return False
|
|
|
|
|
|
def check_json(value: Any, root: bool = True) -> bool:
|
|
if value is None:
|
|
return True
|
|
if not root and (isinstance(value, int) or isinstance(value, str)):
|
|
return True
|
|
if isinstance(value, list):
|
|
return all(check_json(x, root=False) for x in value)
|
|
if isinstance(value, dict):
|
|
return all(check_json(x, root=False) for x in value.values())
|
|
return False
|
|
|
|
|
|
checker_mapping = {
|
|
"string": check_string,
|
|
"HTMLStrict": check_string,
|
|
"HTMLPermissive": check_string,
|
|
"generic-relation": check_string,
|
|
"number": check_number,
|
|
"timestamp": check_number,
|
|
"relation": check_number,
|
|
"float": check_float,
|
|
"boolean": check_boolean,
|
|
"string[]": check_string_list,
|
|
"generic-relation-list": check_string_list,
|
|
"number[]": check_number_list,
|
|
"relation-list": check_number_list,
|
|
"decimal(6)": check_decimal,
|
|
"color": check_color,
|
|
"JSON": check_json,
|
|
}
|
|
|
|
|
|
class Checker:
|
|
def __init__(self, data: Dict[str, List[Any]], is_import: bool = False) -> None:
|
|
self.data = data
|
|
|
|
with open(MODELS_YML_PATH, "rb") as x:
|
|
models_yml = x.read()
|
|
models_yml = models_yml.replace(" yes:".encode(), ' "yes":'.encode())
|
|
models_yml = models_yml.replace(" no:".encode(), ' "no":'.encode())
|
|
self.models = yaml.safe_load(models_yml)
|
|
if is_import:
|
|
self.modify_models_for_import()
|
|
|
|
self.errors: List[str] = []
|
|
|
|
self.template_prefixes: Dict[
|
|
str, Dict[str, Tuple[str, int, int]]
|
|
] = defaultdict(dict)
|
|
self.generate_template_prefixes()
|
|
|
|
def modify_models_for_import(self) -> None:
|
|
collection_allowlist = (
|
|
"user",
|
|
"meeting",
|
|
"group",
|
|
"personal_note",
|
|
"tag",
|
|
"agenda_item",
|
|
"list_of_speakers",
|
|
"speaker",
|
|
"topic",
|
|
"motion",
|
|
"motion_submitter",
|
|
"motion_comment",
|
|
"motion_comment_section",
|
|
"motion_category",
|
|
"motion_block",
|
|
"motion_change_recommendation",
|
|
"motion_state",
|
|
"motion_workflow",
|
|
"motion_statute_paragraph",
|
|
"poll",
|
|
"option",
|
|
"vote",
|
|
"assignment",
|
|
"assignment_candidate",
|
|
"mediafile",
|
|
"projector",
|
|
"projection",
|
|
"projector_message",
|
|
"projector_countdown",
|
|
"chat_group",
|
|
)
|
|
for collection in list(self.models.keys()):
|
|
if collection not in collection_allowlist:
|
|
del self.models[collection]
|
|
self.models["mediafile"]["blob"] = "string"
|
|
|
|
def generate_template_prefixes(self) -> None:
|
|
for collection in self.models.keys():
|
|
for field in self.models[collection]:
|
|
if not self.is_template_field(field):
|
|
continue
|
|
parts = field.split("$")
|
|
prefix = parts[0]
|
|
suffix = parts[1]
|
|
if prefix in self.template_prefixes[collection]:
|
|
raise ValueError(
|
|
f"the template prefix {prefix} is not unique within {collection}"
|
|
)
|
|
self.template_prefixes[collection][prefix] = (
|
|
field,
|
|
len(prefix),
|
|
len(suffix),
|
|
)
|
|
|
|
def is_template_field(self, field: str) -> bool:
|
|
return "$_" in field or field.endswith("$")
|
|
|
|
def is_structured_field(self, field: str) -> bool:
|
|
return "$" in field and not self.is_template_field(field)
|
|
|
|
def is_normal_field(self, field: str) -> bool:
|
|
return "$" not in field
|
|
|
|
def make_structured(self, field: str, replacement: Any) -> str:
|
|
if type(replacement) not in (str, int):
|
|
raise CheckException(
|
|
f"Invalid type {type(replacement)} for the replacement of field {field}"
|
|
)
|
|
parts = field.split("$")
|
|
return parts[0] + "$" + str(replacement) + parts[1]
|
|
|
|
def to_template_field(
|
|
self, collection: str, structured_field: str
|
|
) -> Tuple[str, str]:
|
|
"""Returns template_field, replacement"""
|
|
parts = structured_field.split("$")
|
|
descriptor = self.template_prefixes[collection].get(parts[0])
|
|
if not descriptor:
|
|
raise CheckException(
|
|
f"Unknown template field for prefix {parts[0]} in collection {collection}"
|
|
)
|
|
return (
|
|
descriptor[0],
|
|
structured_field[descriptor[1] + 1 : len(structured_field) - descriptor[2]],
|
|
)
|
|
|
|
def run_check(self) -> None:
|
|
self.check_json()
|
|
self.check_collections()
|
|
for collection, models in self.data.items():
|
|
for model in models:
|
|
self.check_model(collection, model)
|
|
if self.errors:
|
|
errors = [f"\t{error}" for error in self.errors]
|
|
raise CheckException("\n".join(errors))
|
|
|
|
def check_json(self) -> None:
|
|
try:
|
|
SCHEMA(self.data)
|
|
except fastjsonschema.exceptions.JsonSchemaException as e:
|
|
raise CheckException(f"JSON does not match schema: {str(e)}")
|
|
|
|
def check_collections(self) -> None:
|
|
c1 = set(self.data.keys())
|
|
c2 = set(self.models.keys())
|
|
if c1 != c2:
|
|
err = "Collections in JSON file do not match with models.yml."
|
|
if c2 - c1:
|
|
err += f" Missing collections: {', '.join(c2-c1)}."
|
|
if c1 - c2:
|
|
err += f" Invalid collections: {', '.join(c1-c2)}."
|
|
raise CheckException(err)
|
|
|
|
def check_model(self, collection: str, model: Dict[str, Any]) -> None:
|
|
errors = self.check_normal_fields(model, collection)
|
|
|
|
if not errors:
|
|
errors = self.check_template_fields(model, collection)
|
|
|
|
if not errors:
|
|
self.check_types(model, collection)
|
|
self.check_relations(model, collection)
|
|
|
|
def check_normal_fields(self, model: Dict[str, Any], collection: str) -> bool:
|
|
model_fields = set(
|
|
x
|
|
for x in model.keys()
|
|
if self.is_normal_field(x) or self.is_template_field(x)
|
|
)
|
|
collection_fields = set(
|
|
x
|
|
for x in self.models[collection].keys()
|
|
if self.is_normal_field(x) or self.is_template_field(x)
|
|
)
|
|
|
|
errors = False
|
|
if collection_fields - model_fields:
|
|
error = f"{collection}/{model['id']}: Missing fields {', '.join(collection_fields - model_fields)}"
|
|
self.errors.append(error)
|
|
errors = True
|
|
if model_fields - collection_fields:
|
|
error = f"{collection}/{model['id']}: Invalid fields {', '.join(model_fields - collection_fields)}"
|
|
self.errors.append(error)
|
|
errors = True
|
|
return errors
|
|
|
|
def check_template_fields(self, model: Dict[str, Any], collection: str) -> bool:
|
|
"""
|
|
Only checks that for each replacement a structured field exists and
|
|
not too many structured fields. Does not check the content.
|
|
Returns True on errors.
|
|
"""
|
|
errors = False
|
|
for template_field in self.models[collection].keys():
|
|
if not self.is_template_field(template_field):
|
|
continue
|
|
field_error = False
|
|
replacements = model[template_field]
|
|
if not isinstance(replacements, list):
|
|
self.errors.append(
|
|
f"{collection}/{model['id']}/{template_field}: Replacements for the template field must be a list"
|
|
)
|
|
field_error = True
|
|
continue
|
|
for replacement in replacements:
|
|
if not isinstance(replacement, str):
|
|
self.errors.append(
|
|
f"{collection}/{model['id']}/{template_field}: Each replacement for the template field must be a string"
|
|
)
|
|
field_error = True
|
|
if field_error:
|
|
error = True
|
|
continue
|
|
|
|
replacement_collection = None
|
|
field_description = self.models[collection][template_field]
|
|
if isinstance(field_description, dict):
|
|
replacement_collection = field_description.get("replacement_collection")
|
|
|
|
for replacement in replacements:
|
|
structured_field = self.make_structured(template_field, replacement)
|
|
if structured_field not in model:
|
|
self.errors.append(
|
|
f"{collection}/{model['id']}/{template_field}: Missing {structured_field} since it is given as a replacement"
|
|
)
|
|
errors = True
|
|
|
|
if replacement_collection:
|
|
try:
|
|
as_id = int(replacement)
|
|
except (TypeError, ValueError):
|
|
self.errors.append(
|
|
f"{collection}/{model['id']}/{template_field}: Replacement {replacement} is not an integer"
|
|
)
|
|
if not self.find_model(replacement_collection, as_id):
|
|
self.errors.append(
|
|
f"{collection}/{model['id']}/{template_field}: Replacement {replacement} does not exist as a model of collection {replacement_collection}"
|
|
)
|
|
|
|
for field in model.keys():
|
|
if self.is_structured_field(field):
|
|
try:
|
|
_template_field, _replacement = self.to_template_field(
|
|
collection, field
|
|
)
|
|
if (
|
|
template_field == _template_field
|
|
and _replacement not in model[template_field]
|
|
):
|
|
self.errors.append(
|
|
f"{collection}/{model['id']}/{field}: Invalid structured field. Missing replacement {_replacement} in {template_field}"
|
|
)
|
|
errors = True
|
|
except CheckException as e:
|
|
self.errors.append(
|
|
f"{collection}/{model['id']}/{field} error: " + str(e)
|
|
)
|
|
errors = True
|
|
|
|
return errors
|
|
|
|
def check_types(self, model: Dict[str, Any], collection: str) -> None:
|
|
for field in model.keys():
|
|
if self.is_template_field(field):
|
|
continue
|
|
|
|
field_type = self.get_type_from_collection(field, collection)
|
|
enum = self.get_enum_from_collection_field(field, collection)
|
|
checker = checker_mapping.get(field_type)
|
|
if checker is None:
|
|
raise NotImplementedError(
|
|
f"TODO implement check for field type {field_type}"
|
|
)
|
|
|
|
if not checker(model[field]):
|
|
error = f"{collection}/{model['id']}/{field}: Type error: Type is not {field_type}"
|
|
self.errors.append(error)
|
|
|
|
if enum and model[field] not in enum:
|
|
error = f"{collection}/{model['id']}/{field}: Value error: Value {model[field]} is not a valid enum value"
|
|
self.errors.append(error)
|
|
|
|
def get_type_from_collection(self, field: str, collection: str) -> str:
|
|
if self.is_structured_field(field):
|
|
field, _ = self.to_template_field(collection, field)
|
|
|
|
field_description = self.models[collection][field]
|
|
if isinstance(field_description, dict):
|
|
if field_description["type"] == "template":
|
|
if isinstance(field_description["fields"], dict):
|
|
return field_description["fields"]["type"]
|
|
return field_description["fields"]
|
|
return field_description["type"]
|
|
return field_description
|
|
|
|
field_description = self.get_field_description(field, collection)
|
|
if field_description:
|
|
return field_description["type"]
|
|
return self.models[collection][field]
|
|
|
|
def get_enum_from_collection_field(
|
|
self, field: str, collection: str
|
|
) -> Optional[Set[str]]:
|
|
if self.is_structured_field(field):
|
|
field, _ = self.to_template_field(collection, field)
|
|
|
|
field_description = self.models[collection][field]
|
|
if isinstance(field_description, dict):
|
|
if field_description["type"] == "template":
|
|
if isinstance(field_description["fields"], dict):
|
|
field_description = field_description["fields"]
|
|
else:
|
|
return None
|
|
if "enum" in field_description:
|
|
enum = set(field_description["enum"])
|
|
if not field_description.get("required", False):
|
|
enum.add(None)
|
|
return enum
|
|
return None
|
|
|
|
def check_relations(self, model: Dict[str, Any], collection: str) -> None:
|
|
for field in model.keys():
|
|
try:
|
|
self.check_relation(model, collection, field)
|
|
except CheckException as e:
|
|
self.errors.append(
|
|
f"{collection}/{model['id']}/{field} error: " + str(e)
|
|
)
|
|
|
|
def check_relation(
|
|
self, model: Dict[str, Any], collection: str, field: str
|
|
) -> None:
|
|
if self.is_template_field(field):
|
|
return
|
|
|
|
field_type = self.get_type_from_collection(field, collection)
|
|
basemsg = f"{collection}/{model['id']}/{field}: Relation Error: "
|
|
|
|
replacement = None
|
|
if self.is_structured_field(field):
|
|
_, replacement = self.to_template_field(collection, field)
|
|
|
|
if field_type == "relation":
|
|
foreign_id = model[field]
|
|
if foreign_id is None:
|
|
return
|
|
|
|
foreign_collection, foreign_field = self.get_to(field, collection)
|
|
|
|
self.check_reverse_relation(
|
|
collection,
|
|
model["id"],
|
|
model,
|
|
foreign_collection,
|
|
foreign_id,
|
|
foreign_field,
|
|
basemsg,
|
|
replacement,
|
|
)
|
|
|
|
elif field_type == "relation-list":
|
|
foreign_ids = model[field]
|
|
if foreign_ids is None:
|
|
return
|
|
|
|
foreign_collection, foreign_field = self.get_to(field, collection)
|
|
|
|
for foreign_id in foreign_ids:
|
|
self.check_reverse_relation(
|
|
collection,
|
|
model["id"],
|
|
model,
|
|
foreign_collection,
|
|
foreign_id,
|
|
foreign_field,
|
|
basemsg,
|
|
replacement,
|
|
)
|
|
|
|
elif field_type == "generic-relation" and model[field] is not None:
|
|
foreign_collection, foreign_id = self.split_fqid(model[field])
|
|
foreign_field = self.get_to_generic_case(
|
|
collection, field, foreign_collection
|
|
)
|
|
|
|
self.check_reverse_relation(
|
|
collection,
|
|
model["id"],
|
|
model,
|
|
foreign_collection,
|
|
foreign_id,
|
|
foreign_field,
|
|
basemsg,
|
|
replacement,
|
|
)
|
|
|
|
elif field_type == "generic-relation-list" and model[field] is not None:
|
|
for fqid in model[field]:
|
|
foreign_collection, foreign_id = self.split_fqid(fqid)
|
|
foreign_field = self.get_to_generic_case(
|
|
collection, field, foreign_collection
|
|
)
|
|
|
|
self.check_reverse_relation(
|
|
collection,
|
|
model["id"],
|
|
model,
|
|
foreign_collection,
|
|
foreign_id,
|
|
foreign_field,
|
|
basemsg,
|
|
replacement,
|
|
)
|
|
|
|
def get_to(self, field: str, collection: str) -> Tuple[str, str]:
|
|
if self.is_structured_field(field):
|
|
field, _ = self.to_template_field(collection, field)
|
|
|
|
field_description = self.models[collection][field]
|
|
if field_description["type"] == "template":
|
|
to = field_description["fields"]["to"]
|
|
else:
|
|
to = field_description["to"]
|
|
return to.split("/")
|
|
|
|
def find_model(self, collection: str, id: int) -> Optional[Dict[str, Any]]:
|
|
c = self.data.get(collection, [])
|
|
for model in c:
|
|
if model["id"] == id:
|
|
return model
|
|
return None
|
|
|
|
def check_reverse_relation(
|
|
self,
|
|
collection: str,
|
|
id: int,
|
|
model: Dict[str, Any],
|
|
foreign_collection: str,
|
|
foreign_id: int,
|
|
foreign_field: str,
|
|
basemsg: str,
|
|
replacement: Optional[str],
|
|
) -> None:
|
|
actual_foreign_field = foreign_field
|
|
if self.is_template_field(foreign_field):
|
|
if replacement:
|
|
actual_foreign_field = self.make_structured(foreign_field, replacement)
|
|
else:
|
|
replacement_collection = self.models[foreign_collection][foreign_field][
|
|
"replacement_collection"
|
|
]
|
|
replacement = model.get(f"{replacement_collection}_id")
|
|
if not replacement:
|
|
self.errors.append(
|
|
f"{basemsg} points to {foreign_collection}/{foreign_id}/{foreign_field},"
|
|
f" but there is no replacement for {replacement_collection}"
|
|
)
|
|
actual_foreign_field = self.make_structured(foreign_field, replacement)
|
|
|
|
foreign_model = self.find_model(foreign_collection, foreign_id)
|
|
foreign_value = (
|
|
foreign_model.get(actual_foreign_field)
|
|
if foreign_model is not None
|
|
else None
|
|
)
|
|
foreign_field_type = self.get_type_from_collection(
|
|
foreign_field, foreign_collection
|
|
)
|
|
fqid = f"{collection}/{id}"
|
|
error = False
|
|
if foreign_field_type == "relation":
|
|
error = foreign_value != id
|
|
elif foreign_field_type == "relation-list":
|
|
error = not foreign_value or id not in foreign_value
|
|
elif foreign_field_type == "generic-relation":
|
|
error = foreign_value != fqid
|
|
elif foreign_field_type == "generic-relation-list":
|
|
error = not foreign_value or fqid not in foreign_value
|
|
else:
|
|
raise NotImplementedError()
|
|
|
|
if error:
|
|
self.errors.append(
|
|
f"{basemsg} points to {foreign_collection}/{foreign_id}/{actual_foreign_field},"
|
|
" but the reverse relation for is corrupt"
|
|
)
|
|
|
|
def split_fqid(self, fqid: str) -> Tuple[str, int]:
|
|
try:
|
|
collection, _id = fqid.split("/")
|
|
id = int(_id)
|
|
if collection not in self.models.keys():
|
|
raise CheckException(f"Fqid {fqid} has an invalid collection")
|
|
return collection, id
|
|
except (ValueError, AttributeError):
|
|
raise CheckException(f"Fqid {fqid} is malformed")
|
|
|
|
def split_collectionfield(self, collectionfield: str) -> Tuple[str, str]:
|
|
collection, field = collectionfield.split("/")
|
|
if collection not in self.models.keys():
|
|
raise CheckException(
|
|
f"Collectionfield {collectionfield} has an invalid collection"
|
|
)
|
|
if (
|
|
field not in self.models[collection]
|
|
): # Note: this has to be adopted when supporting template fields
|
|
raise CheckException(
|
|
f"Collectionfield {collectionfield} has an invalid field"
|
|
)
|
|
return collection, field
|
|
|
|
def get_to_generic_case(
|
|
self, collection: str, field: str, foreign_collection: str
|
|
) -> str:
|
|
"""Returns all reverse relations as collectionfields"""
|
|
to = self.models[collection][field]["to"]
|
|
if isinstance(to, dict):
|
|
if foreign_collection not in to["collections"]:
|
|
raise CheckException(
|
|
f"The collection {foreign_collection} is not supported "
|
|
"as a reverse relation in {collection}/{field}"
|
|
)
|
|
return to["field"]
|
|
|
|
for cf in to:
|
|
c, f = self.split_collectionfield(cf)
|
|
if c == foreign_collection:
|
|
return f
|
|
|
|
raise CheckException(
|
|
f"The collection {foreign_collection} is not supported as a reverse relation in {collection}/{field}"
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
files = sys.argv[1:]
|
|
if not files:
|
|
files = DEFAULT_FILES
|
|
|
|
is_import = "--import" in files
|
|
if is_import:
|
|
files = [x for x in files if x != "--import"]
|
|
|
|
failed = False
|
|
for f in files:
|
|
with open(f) as data:
|
|
try:
|
|
Checker(json.load(data), is_import=is_import).run_check()
|
|
except CheckException as e:
|
|
print(f"Check for {f} failed:\n", e)
|
|
failed = True
|
|
else:
|
|
print(f"Check for {f} successful.")
|
|
return 1 if failed else 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|