Merge pull request #6124 from FinnStutzenstein/datavalidator

Enhance datavalidator for template fields
This commit is contained in:
Finn Stutzenstein 2021-06-21 15:06:54 +02:00 committed by GitHub
commit 603a558b42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 307 additions and 85 deletions

View File

@ -60,7 +60,6 @@
"vote_delegated_vote_$_ids": [], "vote_delegated_vote_$_ids": [],
"assignment_candidate_$_ids": [], "assignment_candidate_$_ids": [],
"projection_$_ids": [], "projection_$_ids": [],
"current_projector_$_ids": [],
"vote_delegated_$_to_id": [], "vote_delegated_$_to_id": [],
"vote_delegations_$_from_ids": [], "vote_delegations_$_from_ids": [],

View File

@ -1,14 +1,15 @@
import json import json
import re import re
import sys import sys
from typing import Any, Callable, Dict, List, Optional, Tuple from collections import defaultdict
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
import fastjsonschema import fastjsonschema
import yaml import yaml
MODELS_YML_PATH = "../../docs/models.yml" MODELS_YML_PATH = "../../docs/models.yml"
CHECKED_FILES = [ DEFAULT_FILES = [
"../../docker/initial-data.json", "../../docker/initial-data.json",
"../../docs/example-data.json", "../../docs/example-data.json",
] ]
@ -43,8 +44,9 @@ def check_string(value: Any) -> bool:
color_regex = re.compile("^#[0-9a-f]{6}$") color_regex = re.compile("^#[0-9a-f]{6}$")
def check_color(value: Any) -> bool: def check_color(value: Any) -> bool:
return value is None or (isinstance(value, str) and color_regex.match(value)) return value is None or bool(isinstance(value, str) and color_regex.match(value))
def check_number(value: Any) -> bool: def check_number(value: Any) -> bool:
@ -85,8 +87,40 @@ def check_decimal(value: Any) -> bool:
return False return False
def check_json(value: Any, root: bool = True) -> bool:
if value is None:
return True
if not root and (isinstance(value, int) or isinstance(value, str)):
return True
if isinstance(value, list):
return all(check_json(x, root=False) for x in value)
if isinstance(value, dict):
return all(check_json(x, root=False) for x in value.values())
return False
checker_mapping = {
"string": check_string,
"HTMLStrict": check_string,
"HTMLPermissive": check_string,
"generic-relation": check_string,
"number": check_number,
"timestamp": check_number,
"relation": check_number,
"float": check_float,
"boolean": check_boolean,
"string[]": check_string_list,
"generic-relation-list": check_string_list,
"number[]": check_number_list,
"relation-list": check_number_list,
"decimal(6)": check_decimal,
"color": check_color,
"JSON": check_json,
}
class Checker: class Checker:
def __init__(self, data: Dict[str, List[Any]]) -> None: def __init__(self, data: Dict[str, List[Any]], is_import: bool = False) -> None:
self.data = data self.data = data
with open(MODELS_YML_PATH, "rb") as x: with open(MODELS_YML_PATH, "rb") as x:
@ -94,9 +128,70 @@ class Checker:
models_yml = models_yml.replace(" yes:".encode(), ' "yes":'.encode()) models_yml = models_yml.replace(" yes:".encode(), ' "yes":'.encode())
models_yml = models_yml.replace(" no:".encode(), ' "no":'.encode()) models_yml = models_yml.replace(" no:".encode(), ' "no":'.encode())
self.models = yaml.safe_load(models_yml) self.models = yaml.safe_load(models_yml)
if is_import:
self.modify_models_for_import()
self.errors: List[str] = [] self.errors: List[str] = []
self.template_prefixes: Dict[
str, Dict[str, Tuple[str, int, int]]
] = defaultdict(dict)
self.generate_template_prefixes()
def modify_models_for_import(self) -> None:
for collection in ("organization", "organization_tag", "resource", "committee"):
del self.models[collection]
def generate_template_prefixes(self) -> None:
for collection in self.models.keys():
for field in self.models[collection]:
if not self.is_template_field(field):
continue
parts = field.split("$")
prefix = parts[0]
suffix = parts[1]
if prefix in self.template_prefixes[collection]:
raise ValueError(
f"the template prefix {prefix} is not unique within {collection}"
)
self.template_prefixes[collection][prefix] = (
field,
len(prefix),
len(suffix),
)
def is_template_field(self, field: str) -> bool:
return "$_" in field or field.endswith("$")
def is_structured_field(self, field: str) -> bool:
return "$" in field and not self.is_template_field(field)
def is_normal_field(self, field: str) -> bool:
return "$" not in field
def make_structured(self, field: str, replacement: Any) -> str:
if type(replacement) not in (str, int):
raise CheckException(
f"Invalid type {type(replacement)} for the replacement of field {field}"
)
parts = field.split("$")
return parts[0] + "$" + str(replacement) + parts[1]
def to_template_field(
self, collection: str, structured_field: str
) -> Tuple[str, str]:
"""Returns template_field, replacement"""
parts = structured_field.split("$")
descriptor = self.template_prefixes[collection].get(parts[0])
if not descriptor:
raise CheckException(
f"Unknown template field for prefix {parts[0]} in collection {collection}"
)
return (
descriptor[0],
structured_field[descriptor[1] + 1 : len(structured_field) - descriptor[2]],
)
def run_check(self) -> None: def run_check(self) -> None:
self.check_json() self.check_json()
self.check_collections() self.check_collections()
@ -125,9 +220,25 @@ class Checker:
raise CheckException(err) raise CheckException(err)
def check_model(self, collection: str, model: Dict[str, Any]) -> None: def check_model(self, collection: str, model: Dict[str, Any]) -> None:
model_fields = set(x for x in model.keys() if "$" not in x) errors = self.check_normal_fields(model, collection)
if not errors:
errors = self.check_template_fields(model, collection)
if not errors:
self.check_types(model, collection)
self.check_relations(model, collection)
def check_normal_fields(self, model: Dict[str, Any], collection: str) -> bool:
model_fields = set(
x
for x in model.keys()
if self.is_normal_field(x) or self.is_template_field(x)
)
collection_fields = set( collection_fields = set(
x for x in self.models[collection].keys() if "$" not in x x
for x in self.models[collection].keys()
if self.is_normal_field(x) or self.is_template_field(x)
) )
errors = False errors = False
@ -139,55 +250,141 @@ class Checker:
error = f"{collection}/{model['id']}: Invalid fields {', '.join(model_fields - collection_fields)}" error = f"{collection}/{model['id']}: Invalid fields {', '.join(model_fields - collection_fields)}"
self.errors.append(error) self.errors.append(error)
errors = True errors = True
return errors
if not errors: def check_template_fields(self, model: Dict[str, Any], collection: str) -> bool:
self.check_types(model, collection) """
self.check_relations(model, collection) Only checks that for each replacement a structured field exists and
not too many structured fields. Does not check the content.
Returns True on errors.
"""
errors = False
for template_field in self.models[collection].keys():
if not self.is_template_field(template_field):
continue
field_error = False
replacements = model[template_field]
if not isinstance(replacements, list):
self.errors.append(
f"{collection}/{model['id']}/{template_field}: Replacements for the template field must be a list"
)
field_error = True
continue
for replacement in replacements:
if not isinstance(replacement, str):
self.errors.append(
f"{collection}/{model['id']}/{template_field}: Each replacement for the template field must be a string"
)
field_error = True
if field_error:
error = True
continue
replacement_collection = None
field_description = self.models[collection][template_field]
if isinstance(field_description, dict):
replacement_collection = field_description.get("replacement_collection")
for replacement in replacements:
structured_field = self.make_structured(template_field, replacement)
if structured_field not in model:
self.errors.append(
f"{collection}/{model['id']}/{template_field}: Missing {structured_field} since it is given as a replacement"
)
errors = True
if replacement_collection:
try:
as_id = int(replacement)
except (TypeError, ValueError):
self.errors.append(
f"{collection}/{model['id']}/{template_field}: Replacement {replacement} is not an integer"
)
if not self.find_model(replacement_collection, as_id):
self.errors.append(
f"{collection}/{model['id']}/{template_field}: Replacement {replacement} does not exist as a model of collection {replacement_collection}"
)
for field in model.keys():
if self.is_structured_field(field):
try:
_template_field, _replacement = self.to_template_field(
collection, field
)
if (
template_field == _template_field
and _replacement not in model[template_field]
):
self.errors.append(
f"{collection}/{model['id']}/{field}: Invalid structured field. Missing replacement {_replacement} in {template_field}"
)
errors = True
except CheckException as e:
self.errors.append(
f"{collection}/{model['id']}/{field} error: " + str(e)
)
errors = True
return errors
def check_types(self, model: Dict[str, Any], collection: str) -> None: def check_types(self, model: Dict[str, Any], collection: str) -> None:
for field in model.keys(): for field in model.keys():
if "$" in field: if self.is_template_field(field):
continue continue
checker = None
field_type = self.get_type_from_collection(field, collection) field_type = self.get_type_from_collection(field, collection)
if field_type in ( enum = self.get_enum_from_collection_field(field, collection)
"string", checker = checker_mapping.get(field_type)
"HTMLStrict", if checker is None:
"HTMLPermissive", raise NotImplementedError(
"generic-relation", f"TODO implement check for field type {field_type}"
): )
checker = check_string
elif field_type in ("number", "timestamp", "relation"): if not checker(model[field]):
checker = check_number
elif field_type == "float":
checker = check_float
elif field_type == "boolean":
checker = check_boolean
elif field_type in ("string[]", "generic-relation-list"):
checker = check_string_list
elif field_type in ("number[]", "relation-list"):
checker = check_number_list
elif field_type == "decimal(6)":
checker = check_decimal
elif field_type == "color":
checker = check_color
elif field_type in (
"JSON",
"template",
):
pass
else:
raise NotImplementedError(f"TODO field type {field_type}")
if checker is not None and not checker(model[field]):
error = f"{collection}/{model['id']}/{field}: Type error: Type is not {field_type}" error = f"{collection}/{model['id']}/{field}: Type error: Type is not {field_type}"
self.errors.append(error) self.errors.append(error)
if enum and model[field] not in enum:
error = f"{collection}/{model['id']}/{field}: Value error: Value {model[field]} is not a valid enum value"
self.errors.append(error)
def get_type_from_collection(self, field: str, collection: str) -> str: def get_type_from_collection(self, field: str, collection: str) -> str:
field_value = self.models[collection][field] if self.is_structured_field(field):
if isinstance(field_value, dict): field, _ = self.to_template_field(collection, field)
return field_value["type"]
return field_value field_description = self.models[collection][field]
if isinstance(field_description, dict):
if field_description["type"] == "template":
if isinstance(field_description["fields"], dict):
return field_description["fields"]["type"]
return field_description["fields"]
return field_description["type"]
return field_description
field_description = self.get_field_description(field, collection)
if field_description:
return field_description["type"]
return self.models[collection][field]
def get_enum_from_collection_field(
self, field: str, collection: str
) -> Optional[Set[str]]:
if self.is_structured_field(field):
field, _ = self.to_template_field(collection, field)
field_description = self.models[collection][field]
if isinstance(field_description, dict):
if field_description["type"] == "template":
if isinstance(field_description["fields"], dict):
field_description = field_description["fields"]
else:
return None
if "enum" in field_description:
enum = set(field_description["enum"])
if not field_description.get("required", False):
enum.add(None)
return enum
return None
def check_relations(self, model: Dict[str, Any], collection: str) -> None: def check_relations(self, model: Dict[str, Any], collection: str) -> None:
for field in model.keys(): for field in model.keys():
@ -201,11 +398,15 @@ class Checker:
def check_relation( def check_relation(
self, model: Dict[str, Any], collection: str, field: str self, model: Dict[str, Any], collection: str, field: str
) -> None: ) -> None:
if "$" in field: if self.is_template_field(field):
return return
field_type = self.get_type_from_collection(field, collection) field_type = self.get_type_from_collection(field, collection)
basemsg = self.get_basemsg(collection, model["id"], field) basemsg = f"{collection}/{model['id']}/{field}: Relation Error: "
replacement = None
if self.is_structured_field(field):
_, replacement = self.to_template_field(collection, field)
if field_type == "relation": if field_type == "relation":
foreign_id = model[field] foreign_id = model[field]
@ -213,16 +414,16 @@ class Checker:
return return
foreign_collection, foreign_field = self.get_to(field, collection) foreign_collection, foreign_field = self.get_to(field, collection)
if "$" in foreign_field:
return
self.check_reverse_relation( self.check_reverse_relation(
collection, collection,
model["id"], model["id"],
model,
foreign_collection, foreign_collection,
foreign_id, foreign_id,
foreign_field, foreign_field,
basemsg, basemsg,
replacement,
) )
elif field_type == "relation-list": elif field_type == "relation-list":
@ -231,17 +432,17 @@ class Checker:
return return
foreign_collection, foreign_field = self.get_to(field, collection) foreign_collection, foreign_field = self.get_to(field, collection)
if "$" in foreign_field:
return
for foreign_id in foreign_ids: for foreign_id in foreign_ids:
self.check_reverse_relation( self.check_reverse_relation(
collection, collection,
model["id"], model["id"],
model,
foreign_collection, foreign_collection,
foreign_id, foreign_id,
foreign_field, foreign_field,
basemsg, basemsg,
replacement,
) )
elif field_type == "generic-relation" and model[field] is not None: elif field_type == "generic-relation" and model[field] is not None:
@ -250,16 +451,15 @@ class Checker:
collection, field, foreign_collection collection, field, foreign_collection
) )
if "$" in foreign_field:
return
self.check_reverse_relation( self.check_reverse_relation(
collection, collection,
model["id"], model["id"],
model,
foreign_collection, foreign_collection,
foreign_id, foreign_id,
foreign_field, foreign_field,
basemsg, basemsg,
replacement,
) )
elif field_type == "generic-relation-list" and model[field] is not None: elif field_type == "generic-relation-list" and model[field] is not None:
@ -269,27 +469,27 @@ class Checker:
collection, field, foreign_collection collection, field, foreign_collection
) )
if "$" in foreign_field:
continue
self.check_reverse_relation( self.check_reverse_relation(
collection, collection,
model["id"], model["id"],
model,
foreign_collection, foreign_collection,
foreign_id, foreign_id,
foreign_field, foreign_field,
basemsg, basemsg,
replacement,
) )
def get_to(self, field: str, collection: str) -> Tuple[str, str]: def get_to(self, field: str, collection: str) -> Tuple[str, str]:
to = self.models[collection][field]["to"] if self.is_structured_field(field):
return to.split("/") field, _ = self.to_template_field(collection, field)
def get_value(self, collection: str, id: int, field: str) -> Any: field_description = self.models[collection][field]
model = self.find_model(collection, id) if field_description["type"] == "template":
if model is None: to = field_description["fields"]["to"]
return None else:
return model.get(field) to = field_description["to"]
return to.split("/")
def find_model(self, collection: str, id: int) -> Optional[Dict[str, Any]]: def find_model(self, collection: str, id: int) -> Optional[Dict[str, Any]]:
c = self.data.get(collection, []) c = self.data.get(collection, [])
@ -298,19 +498,39 @@ class Checker:
return model return model
return None return None
def get_basemsg(self, collection: str, id: int, field: str) -> str:
return f"{collection}/{id}/{field}: RelationError: "
def check_reverse_relation( def check_reverse_relation(
self, self,
collection: str, collection: str,
id: int, id: int,
model: Dict[str, Any],
foreign_collection: str, foreign_collection: str,
foreign_id: int, foreign_id: int,
foreign_field: str, foreign_field: str,
basemsg: str, basemsg: str,
replacement: Optional[str],
) -> None: ) -> None:
foreign_value = self.get_value(foreign_collection, foreign_id, foreign_field) actual_foreign_field = foreign_field
if self.is_template_field(foreign_field):
if replacement:
actual_foreign_field = self.make_structured(foreign_field, replacement)
else:
replacement_collection = self.models[foreign_collection][foreign_field][
"replacement_collection"
]
replacement = model.get(f"{replacement_collection}_id")
if not replacement:
self.errors.append(
f"{basemsg} points to {foreign_collection}/{foreign_id}/{foreign_field},"
f" but there is no replacement for {replacement_collection}"
)
actual_foreign_field = self.make_structured(foreign_field, replacement)
foreign_model = self.find_model(foreign_collection, foreign_id)
foreign_value = (
foreign_model.get(actual_foreign_field)
if foreign_model is not None
else None
)
foreign_field_type = self.get_type_from_collection( foreign_field_type = self.get_type_from_collection(
foreign_field, foreign_collection foreign_field, foreign_collection
) )
@ -329,7 +549,7 @@ class Checker:
if error: if error:
self.errors.append( self.errors.append(
f"{basemsg} points to {foreign_collection}/{foreign_id}/{foreign_field}," f"{basemsg} points to {foreign_collection}/{foreign_id}/{actual_foreign_field},"
" but the reverse relation for is corrupt" " but the reverse relation for is corrupt"
) )
@ -340,7 +560,7 @@ class Checker:
if collection not in self.models.keys(): if collection not in self.models.keys():
raise CheckException(f"Fqid {fqid} has an invalid collection") raise CheckException(f"Fqid {fqid} has an invalid collection")
return collection, id return collection, id
except ValueError: except (ValueError, AttributeError):
raise CheckException(f"Fqid {fqid} is malformed") raise CheckException(f"Fqid {fqid} is malformed")
def split_collectionfield(self, collectionfield: str) -> Tuple[str, str]: def split_collectionfield(self, collectionfield: str) -> Tuple[str, str]:
@ -360,7 +580,7 @@ class Checker:
def get_to_generic_case( def get_to_generic_case(
self, collection: str, field: str, foreign_collection: str self, collection: str, field: str, foreign_collection: str
) -> str: ) -> str:
""" Returns all reverse relations as collectionfields """ """Returns all reverse relations as collectionfields"""
to = self.models[collection][field]["to"] to = self.models[collection][field]["to"]
if isinstance(to, dict): if isinstance(to, dict):
if foreign_collection not in to["collections"]: if foreign_collection not in to["collections"]:
@ -381,11 +601,19 @@ class Checker:
def main() -> int: def main() -> int:
files = sys.argv[1:]
if not files:
files = DEFAULT_FILES
is_import = "--import" in files
if is_import:
files = [x for x in files if x != "--import"]
failed = False failed = False
for f in CHECKED_FILES: for f in files:
with open(f) as data: with open(f) as data:
try: try:
Checker(json.load(data)).run_check() Checker(json.load(data), is_import=is_import).run_check()
except CheckException as e: except CheckException as e:
print(f"Check for {f} failed:\n", e) print(f"Check for {f} failed:\n", e)
failed = True failed = True

View File

@ -67,13 +67,12 @@
"poll_voted_$_ids": ["1"], "poll_voted_$_ids": ["1"],
"poll_voted_$1_ids": [5], "poll_voted_$1_ids": [5],
"option_$_ids": ["1"], "option_$_ids": ["1"],
"option_$1_ids": [3, 4], "option_$1_ids": [5, 7],
"vote_$_ids": ["1"], "vote_$_ids": ["1"],
"vote_$1_ids": [7], "vote_$1_ids": [9],
"projection_$_ids": [], "projection_$_ids": [],
"current_projector_$_ids": [],
"vote_delegated_vote_$_ids": ["1"], "vote_delegated_vote_$_ids": ["1"],
"vote_delegated_vote_$1_ids": [7], "vote_delegated_vote_$1_ids": [9],
"vote_delegated_$_to_id": [], "vote_delegated_$_to_id": [],
"vote_delegations_$_from_ids": [], "vote_delegations_$_from_ids": [],
@ -98,7 +97,7 @@
"last_email_send": null, "last_email_send": null,
"is_demo_user": false, "is_demo_user": false,
"organization_management_level": "", "organization_management_level": null,
"is_present_in_meeting_ids": [], "is_present_in_meeting_ids": [],
"committee_ids": [1], "committee_ids": [1],
@ -126,10 +125,9 @@
"assignment_candidate_$_ids": ["1"], "assignment_candidate_$_ids": ["1"],
"assignment_candidate_$1_ids": [3, 5], "assignment_candidate_$1_ids": [3, 5],
"option_$_ids": ["1"], "option_$_ids": ["1"],
"option_$1_ids": [6, 8], "option_$1_ids": [9, 12],
"vote_$_ids": [], "vote_$_ids": [],
"projection_$_ids": [], "projection_$_ids": [],
"current_projector_$_ids": [],
"vote_delegated_vote_$_ids": [], "vote_delegated_vote_$_ids": [],
"vote_delegated_$_to_id": [], "vote_delegated_$_to_id": [],
"vote_delegations_$_from_ids": [], "vote_delegations_$_from_ids": [],
@ -155,7 +153,7 @@
"last_email_send": null, "last_email_send": null,
"is_demo_user": false, "is_demo_user": false,
"organization_management_level": "", "organization_management_level": null,
"is_present_in_meeting_ids": [], "is_present_in_meeting_ids": [],
"committee_ids": [], "committee_ids": [],
@ -184,10 +182,9 @@
"assignment_candidate_$_ids": ["1"], "assignment_candidate_$_ids": ["1"],
"assignment_candidate_$1_ids": [2, 4], "assignment_candidate_$1_ids": [2, 4],
"option_$_ids": ["1"], "option_$_ids": ["1"],
"option_$1_ids": [5, 7], "option_$1_ids": [8, 11],
"vote_$_ids": [], "vote_$_ids": [],
"projection_$_ids": [], "projection_$_ids": [],
"current_projector_$_ids": [],
"vote_delegated_vote_$_ids": [], "vote_delegated_vote_$_ids": [],
"vote_delegated_$_to_id": [], "vote_delegated_$_to_id": [],
"vote_delegations_$_from_ids": [], "vote_delegations_$_from_ids": [],
@ -2421,7 +2418,6 @@
"candidate_ids": [1, 2, 3], "candidate_ids": [1, 2, 3],
"poll_ids": [3, 4], "poll_ids": [3, 4],
"option_$_ids": [],
"agenda_item_id": 11, "agenda_item_id": 11,
"list_of_speakers_id": 11, "list_of_speakers_id": 11,
"tag_ids": [], "tag_ids": [],
@ -2440,7 +2436,6 @@
"candidate_ids": [4, 5], "candidate_ids": [4, 5],
"poll_ids": [5], "poll_ids": [5],
"option_$_ids": [],
"agenda_item_id": 14, "agenda_item_id": 14,
"list_of_speakers_id": 14, "list_of_speakers_id": 14,
"tag_ids": [2], "tag_ids": [2],