2020-04-15 11:59:16 +02:00
|
|
|
from typing import Any, List
|
2019-11-06 15:55:03 +01:00
|
|
|
|
2017-01-20 11:34:05 +01:00
|
|
|
import bleach
|
|
|
|
|
2019-11-06 15:55:03 +01:00
|
|
|
from .rest_api import ValidationError
|
|
|
|
|
2018-07-09 23:22:26 +02:00
|
|
|
|
2020-04-15 11:59:16 +02:00
|
|
|
allowed_tags_strict = [
|
2019-01-06 16:22:33 +01:00
|
|
|
"a",
|
|
|
|
"img", # links and images
|
|
|
|
"br",
|
|
|
|
"p",
|
|
|
|
"span",
|
|
|
|
"blockquote", # text layout
|
|
|
|
"strike",
|
2019-09-24 12:53:15 +02:00
|
|
|
"del",
|
|
|
|
"ins",
|
2019-01-06 16:22:33 +01:00
|
|
|
"strong",
|
|
|
|
"u",
|
|
|
|
"em",
|
|
|
|
"sup",
|
|
|
|
"sub",
|
|
|
|
"pre", # text formatting
|
|
|
|
"h1",
|
|
|
|
"h2",
|
|
|
|
"h3",
|
|
|
|
"h4",
|
|
|
|
"h5",
|
|
|
|
"h6", # headings
|
|
|
|
"ol",
|
|
|
|
"ul",
|
|
|
|
"li", # lists
|
|
|
|
"table",
|
|
|
|
"caption",
|
|
|
|
"thead",
|
|
|
|
"tbody",
|
|
|
|
"th",
|
|
|
|
"tr",
|
|
|
|
"td", # tables
|
2020-04-15 11:59:16 +02:00
|
|
|
"div",
|
2017-01-20 11:34:05 +01:00
|
|
|
]
|
2020-04-15 11:59:16 +02:00
|
|
|
allowed_tags_permissive = allowed_tags_strict + [
|
|
|
|
"video",
|
|
|
|
]
|
|
|
|
|
2020-11-20 13:38:37 +01:00
|
|
|
allowed_attributes = [
|
|
|
|
"align",
|
|
|
|
"alt",
|
|
|
|
"autoplay",
|
|
|
|
"background",
|
|
|
|
"bgcolor",
|
|
|
|
"border",
|
|
|
|
"class",
|
|
|
|
"colspan",
|
|
|
|
"controls",
|
|
|
|
"dir",
|
|
|
|
"height",
|
|
|
|
"hidden",
|
|
|
|
"href",
|
|
|
|
"hreflang",
|
|
|
|
"id",
|
|
|
|
"lang",
|
|
|
|
"loop",
|
|
|
|
"muted",
|
|
|
|
"poster",
|
|
|
|
"preload",
|
|
|
|
"rel",
|
|
|
|
"rowspan",
|
|
|
|
"scope",
|
|
|
|
"sizes",
|
|
|
|
"src",
|
|
|
|
"srcset",
|
|
|
|
"start",
|
|
|
|
"style",
|
|
|
|
"target",
|
|
|
|
"title",
|
2020-11-23 15:33:16 +01:00
|
|
|
"type",
|
2020-11-20 13:38:37 +01:00
|
|
|
"width",
|
|
|
|
]
|
2020-04-15 11:59:16 +02:00
|
|
|
|
2017-01-20 11:34:05 +01:00
|
|
|
allowed_styles = [
|
2019-01-06 16:22:33 +01:00
|
|
|
"color",
|
|
|
|
"background-color",
|
|
|
|
"height",
|
|
|
|
"width",
|
|
|
|
"text-align",
|
2020-06-09 22:59:41 +02:00
|
|
|
"vertical-align",
|
2019-01-06 16:22:33 +01:00
|
|
|
"float",
|
2019-03-08 10:00:26 +01:00
|
|
|
"text-decoration",
|
2020-04-15 11:59:16 +02:00
|
|
|
"margin",
|
|
|
|
"padding",
|
|
|
|
"line-height",
|
|
|
|
"max-width",
|
|
|
|
"min-width",
|
|
|
|
"max-height",
|
|
|
|
"min-height",
|
|
|
|
"overflow",
|
|
|
|
"word-break",
|
|
|
|
"word-wrap",
|
2017-01-20 11:34:05 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
|
2020-04-15 11:59:16 +02:00
|
|
|
def validate_html_strict(html: str) -> str:
|
2017-01-20 11:34:05 +01:00
|
|
|
"""
|
|
|
|
This method takes a string and escapes all non-whitelisted html entries.
|
|
|
|
Every field of a model that is loaded trusted in the DOM should be validated.
|
2018-03-24 08:08:21 +01:00
|
|
|
During copy and paste from Word maybe some tabs are spread over the html. Remove them.
|
2017-01-20 11:34:05 +01:00
|
|
|
"""
|
2020-04-15 11:59:16 +02:00
|
|
|
return base_validate_html(html, allowed_tags_strict)
|
|
|
|
|
|
|
|
|
|
|
|
def validate_html_permissive(html: str) -> str:
|
|
|
|
"""
|
|
|
|
See validate_html_strict, but allows some more tags, like iframes and videos.
|
|
|
|
Do not use on validation for normal users, only for admins!
|
|
|
|
"""
|
|
|
|
return base_validate_html(html, allowed_tags_permissive)
|
|
|
|
|
|
|
|
|
|
|
|
def base_validate_html(html: str, allowed_tags: List[str]) -> str:
|
|
|
|
"""
|
|
|
|
For internal use only.
|
|
|
|
"""
|
2019-01-06 16:22:33 +01:00
|
|
|
html = html.replace("\t", "")
|
2017-08-24 12:26:55 +02:00
|
|
|
return bleach.clean(
|
2019-01-06 16:22:33 +01:00
|
|
|
html, tags=allowed_tags, attributes=allowed_attributes, styles=allowed_styles
|
|
|
|
)
|
2019-11-06 15:55:03 +01:00
|
|
|
|
|
|
|
|
|
|
|
def validate_json(json: Any, max_depth: int) -> Any:
|
|
|
|
"""
|
|
|
|
Traverses through the JSON structure (dicts and lists) and runs
|
2020-04-15 11:59:16 +02:00
|
|
|
validate_html_strict on every found string.
|
2019-11-06 15:55:03 +01:00
|
|
|
|
|
|
|
Give max-depth to protect against stack-overflows. This should be the
|
|
|
|
maximum nested depth of the object expected.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if max_depth == 0:
|
|
|
|
raise ValidationError({"detail": "The JSON is too nested."})
|
|
|
|
|
|
|
|
if isinstance(json, dict):
|
|
|
|
return {key: validate_json(value, max_depth - 1) for key, value in json.items()}
|
|
|
|
if isinstance(json, list):
|
|
|
|
return [validate_json(item, max_depth - 1) for item in json]
|
|
|
|
if isinstance(json, str):
|
2020-04-15 11:59:16 +02:00
|
|
|
return validate_html_strict(json)
|
2019-11-06 15:55:03 +01:00
|
|
|
|
|
|
|
return json
|