Merge pull request #5314 from jsangmeister/permissiveHtmlValidation

Adds more permissive html validation for topics and welcome page
This commit is contained in:
Emanuel Schütze 2020-04-16 10:57:43 +02:00 committed by GitHub
commit c26ef8c0bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 62 additions and 31 deletions

View File

@ -17,7 +17,7 @@ from openslides.utils.rest_api import (
from ..utils.auth import has_perm
from ..utils.autoupdate import inform_changed_data
from ..utils.validate import validate_html
from ..utils.validate import validate_html_strict
from .models import (
Assignment,
AssignmentOption,
@ -177,7 +177,7 @@ class AssignmentSerializer(ModelSerializer):
def validate(self, data):
if "description" in data:
data["description"] = validate_html(data["description"])
data["description"] = validate_html_strict(data["description"])
return data
def create(self, validated_data):

View File

@ -7,7 +7,7 @@ from django.core.exceptions import ValidationError as DjangoValidationError
from mypy_extensions import TypedDict
from ..utils.cache import element_cache
from ..utils.validate import validate_html
from ..utils.validate import validate_html_permissive, validate_html_strict
from .exceptions import ConfigError, ConfigNotFound
from .models import ConfigStore
@ -183,7 +183,10 @@ class ConfigHandler:
raise ConfigError(f"{required_entry} has to be a string.")
if config_variable.input_type == "markupText":
value = validate_html(value)
if config_variable.name == "general_event_welcome_text":
value = validate_html_permissive(value)
else:
value = validate_html_strict(value)
# Save the new value to the database.
db_value = ConfigStore.objects.get(key=key)

View File

@ -10,7 +10,7 @@ from ..utils.rest_api import (
SerializerMethodField,
ValidationError,
)
from ..utils.validate import validate_html
from ..utils.validate import validate_html_strict
from .models import (
ConfigStore,
Countdown,
@ -161,7 +161,7 @@ class ProjectorMessageSerializer(ModelSerializer):
def validate(self, data):
if "message" in data:
data["message"] = validate_html(data["message"])
data["message"] = validate_html_strict(data["message"])
return data

View File

@ -24,7 +24,7 @@ from ..utils.rest_api import (
SerializerMethodField,
ValidationError,
)
from ..utils.validate import validate_html
from ..utils.validate import validate_html_strict
from .models import (
Category,
Motion,
@ -292,7 +292,7 @@ class MotionChangeRecommendationSerializer(ModelSerializer):
def validate(self, data):
# Change recommendations for titles are stored as plain-text, thus they don't need to be html-escaped
if "text" in data and not self.is_title_cr(data):
data["text"] = validate_html(data["text"])
data["text"] = validate_html_strict(data["text"])
return data
@ -419,21 +419,21 @@ class MotionSerializer(ModelSerializer):
def validate(self, data):
if "text" in data:
data["text"] = validate_html(data["text"])
data["text"] = validate_html_strict(data["text"])
if "modified_final_version" in data:
data["modified_final_version"] = validate_html(
data["modified_final_version"] = validate_html_strict(
data["modified_final_version"]
)
if "reason" in data:
data["reason"] = validate_html(data["reason"])
data["reason"] = validate_html_strict(data["reason"])
# The motion text is only needed, if it is not a paragraph based amendment.
if data.get("amendment_paragraphs") is not None:
data["amendment_paragraphs"] = list(
map(
lambda entry: validate_html(entry)
lambda entry: validate_html_strict(entry)
if isinstance(entry, str)
else None,
data["amendment_paragraphs"],

View File

@ -1,6 +1,6 @@
from openslides.utils.autoupdate import inform_changed_data
from openslides.utils.rest_api import CharField, IntegerField, ModelSerializer
from openslides.utils.validate import validate_html
from openslides.utils.validate import validate_html_permissive
from .models import Topic
@ -36,7 +36,7 @@ class TopicSerializer(ModelSerializer):
def validate(self, data):
if "text" in data:
data["text"] = validate_html(data["text"])
data["text"] = validate_html_permissive(data["text"])
return data
def create(self, validated_data):

View File

@ -9,7 +9,7 @@ from ..utils.rest_api import (
RelatedField,
ValidationError,
)
from ..utils.validate import validate_html
from ..utils.validate import validate_html_strict
from .models import Group, PersonalNote, User
@ -95,7 +95,7 @@ class UserSerializer(ModelSerializer):
# check the about_me html
if "about_me" in data:
data["about_me"] = validate_html(data["about_me"])
data["about_me"] = validate_html_strict(data["about_me"])
return data

View File

@ -1,11 +1,11 @@
from typing import Any
from typing import Any, List
import bleach
from .rest_api import ValidationError
allowed_tags = [
allowed_tags_strict = [
"a",
"img", # links and images
"br",
@ -37,14 +37,18 @@ allowed_tags = [
"th",
"tr",
"td", # tables
"div",
]
allowed_attributes = {
"*": ["class", "style"],
"img": ["alt", "src", "title"],
"a": ["href", "title"],
"th": ["scope"],
"ol": ["start"],
}
allowed_tags_permissive = allowed_tags_strict + [
"video",
]
def allow_all(tag: str, name: str, value: str) -> bool:
return True
allowed_attributes = allow_all
allowed_styles = [
"color",
"background-color",
@ -52,17 +56,41 @@ allowed_styles = [
"width",
"text-align",
"float",
"padding",
"text-decoration",
"margin",
"padding",
"line-height",
"max-width",
"min-width",
"max-height",
"min-height",
"overflow",
"word-break",
"word-wrap",
]
def validate_html(html: str) -> str:
def validate_html_strict(html: str) -> str:
"""
This method takes a string and escapes all non-whitelisted html entries.
Every field of a model that is loaded trusted in the DOM should be validated.
During copy and paste from Word maybe some tabs are spread over the html. Remove them.
"""
return base_validate_html(html, allowed_tags_strict)
def validate_html_permissive(html: str) -> str:
"""
See validate_html_strict, but allows some more tags, like iframes and videos.
Do not use on validation for normal users, only for admins!
"""
return base_validate_html(html, allowed_tags_permissive)
def base_validate_html(html: str, allowed_tags: List[str]) -> str:
"""
For internal use only.
"""
html = html.replace("\t", "")
return bleach.clean(
html, tags=allowed_tags, attributes=allowed_attributes, styles=allowed_styles
@ -72,7 +100,7 @@ def validate_html(html: str) -> str:
def validate_json(json: Any, max_depth: int) -> Any:
"""
Traverses through the JSON structure (dicts and lists) and runs
validate_html on every found string.
validate_html_strict on every found string.
Give max-depth to protect against stack-overflows. This should be the
maximum nested depth of the object expected.
@ -86,6 +114,6 @@ def validate_json(json: Any, max_depth: int) -> Any:
if isinstance(json, list):
return [validate_json(item, max_depth - 1) for item in json]
if isinstance(json, str):
return validate_html(json)
return validate_html_strict(json)
return json

View File

@ -1,12 +1,12 @@
from unittest import TestCase
from openslides.utils.validate import validate_html
from openslides.utils.validate import validate_html_strict
class ValidatorTest(TestCase):
def test_XSS_protection(self):
data = "tuveegi2Ho<a><p>tuveegi2Ho<script>kekj9(djwk</script></p>Boovai7esu</a>ee4Yaiw0ei"
self.assertEqual(
validate_html(data),
validate_html_strict(data),
"tuveegi2Ho<a><p>tuveegi2Ho&lt;script&gt;kekj9(djwk&lt;/script&gt;</p>Boovai7esu</a>ee4Yaiw0ei",
)