OpenSlides/server/openslides/utils/validate.py

149 lines
2.9 KiB
Python
Raw Normal View History

2020-04-15 11:59:16 +02:00
from typing import Any, List
2017-01-20 11:34:05 +01:00
import bleach
from .rest_api import ValidationError
2020-04-15 11:59:16 +02:00
allowed_tags_strict = [
2019-01-06 16:22:33 +01:00
"a",
"img", # links and images
"br",
"p",
"span",
"blockquote", # text layout
"strike",
2019-09-24 12:53:15 +02:00
"del",
"ins",
2019-01-06 16:22:33 +01:00
"strong",
"u",
"em",
"sup",
"sub",
"pre", # text formatting
"h1",
"h2",
"h3",
"h4",
"h5",
"h6", # headings
"ol",
"ul",
"li", # lists
"table",
"caption",
"thead",
"tbody",
"th",
"tr",
"td", # tables
2020-04-15 11:59:16 +02:00
"div",
2017-01-20 11:34:05 +01:00
]
2020-04-15 11:59:16 +02:00
allowed_tags_permissive = allowed_tags_strict + [
"video",
]
2020-11-20 13:38:37 +01:00
allowed_attributes = [
"align",
"alt",
"autoplay",
"background",
"bgcolor",
"border",
"class",
"colspan",
"controls",
"dir",
"height",
"hidden",
"href",
"hreflang",
"id",
"lang",
"loop",
"muted",
"poster",
"preload",
"rel",
"rowspan",
"scope",
"sizes",
"src",
"srcset",
"start",
"style",
"target",
"title",
"width",
]
2020-04-15 11:59:16 +02:00
2017-01-20 11:34:05 +01:00
allowed_styles = [
2019-01-06 16:22:33 +01:00
"color",
"background-color",
"height",
"width",
"text-align",
"vertical-align",
2019-01-06 16:22:33 +01:00
"float",
"text-decoration",
2020-04-15 11:59:16 +02:00
"margin",
"padding",
"line-height",
"max-width",
"min-width",
"max-height",
"min-height",
"overflow",
"word-break",
"word-wrap",
2017-01-20 11:34:05 +01:00
]
2020-04-15 11:59:16 +02:00
def validate_html_strict(html: str) -> str:
2017-01-20 11:34:05 +01:00
"""
This method takes a string and escapes all non-whitelisted html entries.
Every field of a model that is loaded trusted in the DOM should be validated.
During copy and paste from Word maybe some tabs are spread over the html. Remove them.
2017-01-20 11:34:05 +01:00
"""
2020-04-15 11:59:16 +02:00
return base_validate_html(html, allowed_tags_strict)
def validate_html_permissive(html: str) -> str:
"""
See validate_html_strict, but allows some more tags, like iframes and videos.
Do not use on validation for normal users, only for admins!
"""
return base_validate_html(html, allowed_tags_permissive)
def base_validate_html(html: str, allowed_tags: List[str]) -> str:
"""
For internal use only.
"""
2019-01-06 16:22:33 +01:00
html = html.replace("\t", "")
2017-08-24 12:26:55 +02:00
return bleach.clean(
2019-01-06 16:22:33 +01:00
html, tags=allowed_tags, attributes=allowed_attributes, styles=allowed_styles
)
def validate_json(json: Any, max_depth: int) -> Any:
"""
Traverses through the JSON structure (dicts and lists) and runs
2020-04-15 11:59:16 +02:00
validate_html_strict on every found string.
Give max-depth to protect against stack-overflows. This should be the
maximum nested depth of the object expected.
"""
if max_depth == 0:
raise ValidationError({"detail": "The JSON is too nested."})
if isinstance(json, dict):
return {key: validate_json(value, max_depth - 1) for key, value in json.items()}
if isinstance(json, list):
return [validate_json(item, max_depth - 1) for item in json]
if isinstance(json, str):
2020-04-15 11:59:16 +02:00
return validate_html_strict(json)
return json