OpenSlides/docs/modelsvalidator/validate.py

376 lines
13 KiB
Python
Raw Normal View History

import json
import re
import sys
from collections import defaultdict
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
import yaml
DEFAULT_FILES = [
"../../docs/models.yml",
]
KEYSEPARATOR = "/"
_collection_regex = r"[a-z]([a-z_]+[a-z]+)?"
_field_regex = r"[a-z][a-z0-9_]*\$?[a-z0-9_]*"
collectionfield_regex = re.compile(
f"^({_collection_regex}){KEYSEPARATOR}({_field_regex})$"
)
collection_regex = re.compile(f"^{_collection_regex}$")
field_regex = re.compile(f"^{_field_regex}$")
decimal_regex = re.compile("^\d+\.\d{6}$")
color_regex = re.compile("^#[0-9a-f]{6}$")
RELATION_TYPES = (
"relation",
"relation-list",
"generic-relation",
"generic-relation-list",
)
DATA_TYPES = (
"string",
"number",
"string[]",
"number[]",
"boolean",
"JSON",
"HTMLStrict",
"HTMLPermissive",
"float",
"decimal(6)",
"timestamp",
"color",
)
VALID_TYPES = DATA_TYPES + RELATION_TYPES + ("template",)
OPTIONAL_ATTRIBUTES = (
"description",
"calculated",
"required",
"read_only",
)
class CheckException(Exception):
pass
class Checker:
def __init__(self, filepath: str) -> None:
with open(filepath, "rb") as x:
self.models = yaml.safe_load(x.read())
self.errors: List[str] = []
def run_check(self) -> None:
self._run_checks()
if self.errors:
errors = [f"\t{error}" for error in self.errors]
raise CheckException("\n".join(errors))
def _run_checks(self) -> None:
for collection in self.models.keys():
if not collection_regex.match(collection):
self.errors.append(f"Collection '{collection}' is not valid.")
if self.errors:
return
for collection, fields in self.models.items():
if not isinstance(fields, dict):
self.errors.append(
f"The fields of collection {collection} must be a dict."
)
continue
for field_name, field in fields.items():
if not field_regex.match(field_name):
self.errors.append(
f"Field name '{field_name}' of collection {collection} is not a valid field name."
)
continue
if not isinstance(field, dict):
self.errors.append(
f"Field '{field_name}' of collection {collection} must be a dict."
)
self.check_field(collection, field_name, field)
if self.errors:
return
for collection, fields in self.models.items():
for field_name, field in fields.items():
is_relation_field = field["type"] in RELATION_TYPES
is_template_relation_field = (
field["type"] == "template"
and isinstance(field["fields"], dict)
and field["fields"]["type"] in RELATION_TYPES
)
if not is_relation_field and not is_template_relation_field:
continue
error = self.check_relation(collection, field_name, field)
if error:
self.errors.append(error)
def check_field(
self,
collection: str,
field_name: str,
field: Union[str, Dict[str, Any]],
nested: bool = False,
) -> None:
collectionfield = f"{collection}{KEYSEPARATOR}{field_name}"
if nested:
if isinstance(field, str):
field = {"type": field}
field[
"restriction_mode"
] = "A" # add restriction_mode to satisfy the checker below.
if field["type"] == "template": # no nested templates
self.errors.append(f"Nested template field in {collectionfield}")
return
type = field.get("type")
if type not in VALID_TYPES:
self.errors.append(
f"Type '{type}' for collectionfield {collectionfield} is invalid."
)
return
required_attributes = [
"type",
"restriction_mode",
]
if type in RELATION_TYPES:
required_attributes.append("to")
if type == "template":
required_attributes.append("fields")
for attr in required_attributes:
if attr not in field:
self.errors.append(
f"Required attribute '{attr}' for collectionfield {collectionfield} is missing."
)
return
if field.get("calculated"):
return
valid_attributes = list(OPTIONAL_ATTRIBUTES) + required_attributes
if type == "string[]":
valid_attributes.append("items")
if "items" in field and "enum" not in field["items"]:
self.errors.append(
f"'items' is missing an inner 'enum' for {collectionfield}"
)
return
for value in field.get("items", {"enum": []})["enum"]:
self.validate_value_for_type("string", value, collectionfield)
if type == "JSON" and "default" in field:
try:
json.loads(json.dumps(field["default"]))
except:
self.errors.append(
f"Default value for {collectionfield}' is not valid json."
)
if type == "number":
valid_attributes.append("minimum")
if not isinstance(field.get("minimum", 0), int):
self.errors.append(f"'minimum' for {collectionfield} is not a number.")
if type == "string":
valid_attributes.append("maxLength")
if not isinstance(field.get("maxLength", 0), int):
self.errors.append(
f"'maxLength' for {collectionfield} is not a number."
)
if type in DATA_TYPES:
valid_attributes.append("default")
if "default" in field:
self.validate_value_for_type(type, field["default"], collectionfield)
valid_attributes.append("enum")
if "enum" in field:
if not isinstance(field["enum"], list):
self.errors.append(f"'enum' for {collectionfield}' is not a list.")
for value in field["enum"]:
self.validate_value_for_type(type, value, collectionfield)
if type in RELATION_TYPES:
valid_attributes.append("on_delete")
if "on_delete" in field and field["on_delete"] not in (
"CASCADE",
"PROTECT",
):
self.errors.append(
f"invalid value for 'on_delete' for {collectionfield}"
)
valid_attributes.append("equal_fields")
if type == "template":
if "$" not in field_name:
self.errors.append(
f"The template field {collectionfield} is missing a $"
)
valid_attributes.append("replacement_collection")
elif "$" in field_name and not nested:
print(field_name, field)
self.errors.append(f"The non-template field {collectionfield} contains a $")
for attr in field.keys():
if attr not in valid_attributes:
self.errors.append(
f"Attribute '{attr}' for collectionfield {collectionfield} is invalid."
)
if not isinstance(field.get("description", ""), str):
self.errors.append(f"Description of {collectionfield} must be a string.")
if type == "template":
self.check_field(collection, field_name, field["fields"], nested=True)
def validate_value_for_type(
self, type_str: str, value: Any, collectionfield: str
) -> None:
basic_types = {
"string": str,
"number": int,
"boolean": bool,
"HTMLStrict": str,
"HTMLPermissive": str,
"timestamp": int,
}
if type_str in basic_types:
if type(value) != basic_types[type_str]:
self.errors.append(
f"Value '{value}' for {collectionfield}' is not a {type_str}."
)
elif type_str in ("string[]", "number[]"):
if not isinstance(value, list):
self.errors.append(
f"Value '{value}' for {collectionfield}' is not a {type_str}."
)
for x in value:
if type(x) != basic_types[type_str[:-2]]:
self.errors.append(
f"Listentry '{x}' for {collectionfield}' is not a {type_str[:-2]}."
)
elif type_str == "JSON":
pass
elif type_str == "float":
if type(value) not in (int, float):
self.errors.append(
f"Value '{value}' for {collectionfield}' is not a float."
)
elif type_str == "decimal(6)":
if not decimal_regex.match(value):
self.errors.append(
f"Value '{value}' for {collectionfield}' is not a decimal(6)."
)
elif type_str == "color":
if not color_regex.match(value):
self.errors.append(
f"Value '{value}' for {collectionfield}' is not a color."
)
else:
raise NotImplementedError(type_str)
def check_relation(
self, collection: str, field_name: str, field: Dict[str, Any]
) -> Optional[str]:
collectionfield = f"{collection}{KEYSEPARATOR}{field_name}"
if field["type"] == "template":
field = field["fields"]
to = field["to"]
if isinstance(to, str):
if not collectionfield_regex.match(to):
return f"'to' of {collectionfield} is not a collectionfield."
return self.check_reverse(collectionfield, to)
elif isinstance(to, list):
for cf in to:
if not collectionfield_regex.match(cf):
return f"The collectionfield in 'to' of {collectionfield} is not valid."
error = self.check_reverse(collectionfield, cf)
if error:
return error
else:
to_field = to["field"]
if not field_regex.match(to_field):
return (
f"The field '{to_field}' in 'to' of {collectionfield} is not valid."
)
for c in to["collections"]:
if not collection_regex.match(c):
self.errors.append(
f"The collection '{c}' in 'to' of {collectionfield} is not a valid collection."
)
error = self.check_reverse(
collectionfield, f"{c}{KEYSEPARATOR}{to['field']}"
)
if error:
return error
return None
def check_reverse(
self, from_collectionfield: str, to_collectionfield: str
) -> Optional[str]:
to_unified = [] # a list of target collectionfields (unififed with all
# the different possibilities for the 'to' field) from the (expected)
# relation in to_collectionfield. The from_collectionfield must be in this
# list.
to_collection, to_field_name = to_collectionfield.split(KEYSEPARATOR)
if to_collection not in self.models:
return f"The collection '{to_collection}' in 'to' of {from_collectionfield} is not a valid collection."
if to_field_name not in self.models[to_collection]:
return f"The collectionfield '{to_collectionfield}' in 'to' of {from_collectionfield} does not exist."
to_field = self.models[to_collection][to_field_name]
if to_field["type"] == "template":
to_field = to_field["fields"]
if not isinstance(to_field, dict):
return f"The 'fields' of the template field '{to_collectionfield}' must be a dict to hold a relation."
if to_field["type"] not in RELATION_TYPES:
return f"{from_collectionfield} points to {to_collectionfield}, but {to_collectionfield} to is not a relation."
to = to_field["to"]
if isinstance(to, str):
to_unified.append(to)
elif isinstance(to, list):
to_unified = to
else:
for c in to["collections"]:
to_unified.append(f"{c}{KEYSEPARATOR}{to['field']}")
if from_collectionfield not in to_unified:
return f"{from_collectionfield} points to {to_collectionfield}, but {to_collectionfield} does not point back."
return None
def split_collectionfield(self, collectionfield: str) -> Tuple[str, str]:
parts = collectionfield.split(KEYSEPARATOR)
return parts[0], parts[1]
def main() -> int:
files = sys.argv[1:]
if not files:
files = DEFAULT_FILES
failed = False
for f in files:
with open(f) as data:
try:
Checker(f).run_check()
except CheckException as e:
print(f"Check for {f} failed:\n", e)
failed = True
else:
print(f"Check for {f} successful.")
return 1 if failed else 0
if __name__ == "__main__":
sys.exit(main())