Merge pull request #5314 from jsangmeister/permissiveHtmlValidation

Adds more permissive html validation for topics and welcome page
This commit is contained in:
Emanuel Schütze 2020-04-16 10:57:43 +02:00 committed by GitHub
commit c26ef8c0bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 62 additions and 31 deletions

View File

@ -17,7 +17,7 @@ from openslides.utils.rest_api import (
from ..utils.auth import has_perm from ..utils.auth import has_perm
from ..utils.autoupdate import inform_changed_data from ..utils.autoupdate import inform_changed_data
from ..utils.validate import validate_html from ..utils.validate import validate_html_strict
from .models import ( from .models import (
Assignment, Assignment,
AssignmentOption, AssignmentOption,
@ -177,7 +177,7 @@ class AssignmentSerializer(ModelSerializer):
def validate(self, data): def validate(self, data):
if "description" in data: if "description" in data:
data["description"] = validate_html(data["description"]) data["description"] = validate_html_strict(data["description"])
return data return data
def create(self, validated_data): def create(self, validated_data):

View File

@ -7,7 +7,7 @@ from django.core.exceptions import ValidationError as DjangoValidationError
from mypy_extensions import TypedDict from mypy_extensions import TypedDict
from ..utils.cache import element_cache from ..utils.cache import element_cache
from ..utils.validate import validate_html from ..utils.validate import validate_html_permissive, validate_html_strict
from .exceptions import ConfigError, ConfigNotFound from .exceptions import ConfigError, ConfigNotFound
from .models import ConfigStore from .models import ConfigStore
@ -183,7 +183,10 @@ class ConfigHandler:
raise ConfigError(f"{required_entry} has to be a string.") raise ConfigError(f"{required_entry} has to be a string.")
if config_variable.input_type == "markupText": if config_variable.input_type == "markupText":
value = validate_html(value) if config_variable.name == "general_event_welcome_text":
value = validate_html_permissive(value)
else:
value = validate_html_strict(value)
# Save the new value to the database. # Save the new value to the database.
db_value = ConfigStore.objects.get(key=key) db_value = ConfigStore.objects.get(key=key)

View File

@ -10,7 +10,7 @@ from ..utils.rest_api import (
SerializerMethodField, SerializerMethodField,
ValidationError, ValidationError,
) )
from ..utils.validate import validate_html from ..utils.validate import validate_html_strict
from .models import ( from .models import (
ConfigStore, ConfigStore,
Countdown, Countdown,
@ -161,7 +161,7 @@ class ProjectorMessageSerializer(ModelSerializer):
def validate(self, data): def validate(self, data):
if "message" in data: if "message" in data:
data["message"] = validate_html(data["message"]) data["message"] = validate_html_strict(data["message"])
return data return data

View File

@ -24,7 +24,7 @@ from ..utils.rest_api import (
SerializerMethodField, SerializerMethodField,
ValidationError, ValidationError,
) )
from ..utils.validate import validate_html from ..utils.validate import validate_html_strict
from .models import ( from .models import (
Category, Category,
Motion, Motion,
@ -292,7 +292,7 @@ class MotionChangeRecommendationSerializer(ModelSerializer):
def validate(self, data): def validate(self, data):
# Change recommendations for titles are stored as plain-text, thus they don't need to be html-escaped # Change recommendations for titles are stored as plain-text, thus they don't need to be html-escaped
if "text" in data and not self.is_title_cr(data): if "text" in data and not self.is_title_cr(data):
data["text"] = validate_html(data["text"]) data["text"] = validate_html_strict(data["text"])
return data return data
@ -419,21 +419,21 @@ class MotionSerializer(ModelSerializer):
def validate(self, data): def validate(self, data):
if "text" in data: if "text" in data:
data["text"] = validate_html(data["text"]) data["text"] = validate_html_strict(data["text"])
if "modified_final_version" in data: if "modified_final_version" in data:
data["modified_final_version"] = validate_html( data["modified_final_version"] = validate_html_strict(
data["modified_final_version"] data["modified_final_version"]
) )
if "reason" in data: if "reason" in data:
data["reason"] = validate_html(data["reason"]) data["reason"] = validate_html_strict(data["reason"])
# The motion text is only needed, if it is not a paragraph based amendment. # The motion text is only needed, if it is not a paragraph based amendment.
if data.get("amendment_paragraphs") is not None: if data.get("amendment_paragraphs") is not None:
data["amendment_paragraphs"] = list( data["amendment_paragraphs"] = list(
map( map(
lambda entry: validate_html(entry) lambda entry: validate_html_strict(entry)
if isinstance(entry, str) if isinstance(entry, str)
else None, else None,
data["amendment_paragraphs"], data["amendment_paragraphs"],

View File

@ -1,6 +1,6 @@
from openslides.utils.autoupdate import inform_changed_data from openslides.utils.autoupdate import inform_changed_data
from openslides.utils.rest_api import CharField, IntegerField, ModelSerializer from openslides.utils.rest_api import CharField, IntegerField, ModelSerializer
from openslides.utils.validate import validate_html from openslides.utils.validate import validate_html_permissive
from .models import Topic from .models import Topic
@ -36,7 +36,7 @@ class TopicSerializer(ModelSerializer):
def validate(self, data): def validate(self, data):
if "text" in data: if "text" in data:
data["text"] = validate_html(data["text"]) data["text"] = validate_html_permissive(data["text"])
return data return data
def create(self, validated_data): def create(self, validated_data):

View File

@ -9,7 +9,7 @@ from ..utils.rest_api import (
RelatedField, RelatedField,
ValidationError, ValidationError,
) )
from ..utils.validate import validate_html from ..utils.validate import validate_html_strict
from .models import Group, PersonalNote, User from .models import Group, PersonalNote, User
@ -95,7 +95,7 @@ class UserSerializer(ModelSerializer):
# check the about_me html # check the about_me html
if "about_me" in data: if "about_me" in data:
data["about_me"] = validate_html(data["about_me"]) data["about_me"] = validate_html_strict(data["about_me"])
return data return data

View File

@ -1,11 +1,11 @@
from typing import Any from typing import Any, List
import bleach import bleach
from .rest_api import ValidationError from .rest_api import ValidationError
allowed_tags = [ allowed_tags_strict = [
"a", "a",
"img", # links and images "img", # links and images
"br", "br",
@ -37,14 +37,18 @@ allowed_tags = [
"th", "th",
"tr", "tr",
"td", # tables "td", # tables
"div",
] ]
allowed_attributes = { allowed_tags_permissive = allowed_tags_strict + [
"*": ["class", "style"], "video",
"img": ["alt", "src", "title"], ]
"a": ["href", "title"],
"th": ["scope"],
"ol": ["start"], def allow_all(tag: str, name: str, value: str) -> bool:
} return True
allowed_attributes = allow_all
allowed_styles = [ allowed_styles = [
"color", "color",
"background-color", "background-color",
@ -52,17 +56,41 @@ allowed_styles = [
"width", "width",
"text-align", "text-align",
"float", "float",
"padding",
"text-decoration", "text-decoration",
"margin",
"padding",
"line-height",
"max-width",
"min-width",
"max-height",
"min-height",
"overflow",
"word-break",
"word-wrap",
] ]
def validate_html(html: str) -> str: def validate_html_strict(html: str) -> str:
""" """
This method takes a string and escapes all non-whitelisted html entries. This method takes a string and escapes all non-whitelisted html entries.
Every field of a model that is loaded trusted in the DOM should be validated. Every field of a model that is loaded trusted in the DOM should be validated.
During copy and paste from Word maybe some tabs are spread over the html. Remove them. During copy and paste from Word maybe some tabs are spread over the html. Remove them.
""" """
return base_validate_html(html, allowed_tags_strict)
def validate_html_permissive(html: str) -> str:
"""
See validate_html_strict, but allows some more tags, like iframes and videos.
Do not use on validation for normal users, only for admins!
"""
return base_validate_html(html, allowed_tags_permissive)
def base_validate_html(html: str, allowed_tags: List[str]) -> str:
"""
For internal use only.
"""
html = html.replace("\t", "") html = html.replace("\t", "")
return bleach.clean( return bleach.clean(
html, tags=allowed_tags, attributes=allowed_attributes, styles=allowed_styles html, tags=allowed_tags, attributes=allowed_attributes, styles=allowed_styles
@ -72,7 +100,7 @@ def validate_html(html: str) -> str:
def validate_json(json: Any, max_depth: int) -> Any: def validate_json(json: Any, max_depth: int) -> Any:
""" """
Traverses through the JSON structure (dicts and lists) and runs Traverses through the JSON structure (dicts and lists) and runs
validate_html on every found string. validate_html_strict on every found string.
Give max-depth to protect against stack-overflows. This should be the Give max-depth to protect against stack-overflows. This should be the
maximum nested depth of the object expected. maximum nested depth of the object expected.
@ -86,6 +114,6 @@ def validate_json(json: Any, max_depth: int) -> Any:
if isinstance(json, list): if isinstance(json, list):
return [validate_json(item, max_depth - 1) for item in json] return [validate_json(item, max_depth - 1) for item in json]
if isinstance(json, str): if isinstance(json, str):
return validate_html(json) return validate_html_strict(json)
return json return json

View File

@ -1,12 +1,12 @@
from unittest import TestCase from unittest import TestCase
from openslides.utils.validate import validate_html from openslides.utils.validate import validate_html_strict
class ValidatorTest(TestCase): class ValidatorTest(TestCase):
def test_XSS_protection(self): def test_XSS_protection(self):
data = "tuveegi2Ho<a><p>tuveegi2Ho<script>kekj9(djwk</script></p>Boovai7esu</a>ee4Yaiw0ei" data = "tuveegi2Ho<a><p>tuveegi2Ho<script>kekj9(djwk</script></p>Boovai7esu</a>ee4Yaiw0ei"
self.assertEqual( self.assertEqual(
validate_html(data), validate_html_strict(data),
"tuveegi2Ho<a><p>tuveegi2Ho&lt;script&gt;kekj9(djwk&lt;/script&gt;</p>Boovai7esu</a>ee4Yaiw0ei", "tuveegi2Ho<a><p>tuveegi2Ho&lt;script&gt;kekj9(djwk&lt;/script&gt;</p>Boovai7esu</a>ee4Yaiw0ei",
) )