Add first version of server-side filtering of HTML with bleach

Fix formatting (pep8 and otherwise)

Make the CleanHtml a mixin so it can be used easier, add a test case

Mark HTML fields as 'safe' in the template

Update list of allowed HTML tags, take special care for reportlab

Add heading tags to white list

Rename get_clean_html to get_clean_html_fields
This commit is contained in:
Roland Geider 2013-03-11 22:38:39 +01:00 committed by Oskar Hahn
parent ce333d4c40
commit 01885c9304
6 changed files with 146 additions and 9 deletions

View File

@ -14,11 +14,12 @@ from django import forms
from django.utils.translation import ugettext as _
from openslides.utils.forms import CssClassMixin
from openslides.utils.forms import CleanHtmlFormMixin
from openslides.utils.person import PersonFormField, MultiplePersonFormField
from .models import Motion, Category
class BaseMotionForm(forms.ModelForm, CssClassMixin):
class BaseMotionForm(CleanHtmlFormMixin, forms.ModelForm, CssClassMixin):
"""Base FormClass for a Motion.
For it's own, it append the version data to the fields.
@ -51,6 +52,12 @@ class BaseMotionForm(forms.ModelForm, CssClassMixin):
self.initial['reason'] = self.motion.reason
super(BaseMotionForm, self).__init__(*args, **kwargs)
def get_clean_html_fields(self):
The fields 'text' and 'reason' contain HTML, clean them
return ('text', 'reason',)
class MotionSubmitterMixin(forms.ModelForm):
"""Mixin to append the submitter field to a MotionForm."""

View File

@ -75,13 +75,13 @@
<!-- Text -->
<h4>{% trans "Motion text" %}:</h4>
{{ motion.version.text|linebreaks }}
{{ motion.version.text|safe }}
<!-- Reason -->
<h4>{% trans "Reason" %}:</h4>
{% if motion.version.reason %}
{{ motion.version.reason|linebreaks }}
{{ motion.version.reason|safe }}
{% else %}
{% endif %}

View File

@ -23,7 +23,7 @@
bodyClass: "ckeditor_html",
'h1 h2 h3 pre blockquote strong em u strike;' +
'h1 h2 h3 pre blockquote b i u strike;' +
// A workaround for the problem described in
// Hopefully, the problem will be solved in the final version of CKEditor 4.1
@ -44,18 +44,21 @@
toolbar_Full: [
{ name: 'document', items : [ 'Source','-','Save','DocProps','Preview','Print','-','Templates' ] },
{ name: 'clipboard', items : [ 'Cut','Copy','Paste','PasteText','PasteFromWord','-','Undo','Redo' ] },
{ name: 'editing', items : [ 'Find','Replace','-','SelectAll','-','SpellChecker', 'Scayt' ] },
{ name: 'editing', items : [ 'Find','Replace','-','SpellChecker', 'Scayt' ] },
{ name: 'forms', items : [ 'Form', 'Checkbox', 'Radio', 'TextField', 'Textarea', 'Select', 'Button', 'ImageButton', 'HiddenField' ] },
{ name: 'basicstyles', items : [ 'Bold','Italic','Underline','Strike','Subscript','Superscript','-','RemoveFormat' ] },
{ name: 'paragraph', items : [ 'NumberedList','BulletedList','-','Outdent','Indent','-','Blockquote','Pre','InsertPre','CreateDiv','-','JustifyLeft','JustifyCenter','JustifyRight','JustifyBlock','-','BidiLtr','BidiRtl' ] },
{ name: 'paragraph', items : [ 'NumberedList','BulletedList','-','Pre','InsertPre','CreateDiv','-','JustifyLeft','JustifyCenter','JustifyRight','JustifyBlock','-','BidiLtr','BidiRtl' ] },
{ name: 'links', items : [ 'Link','Unlink','Anchor' ] },
{ name: 'insert', items : [ 'Image','Flash','Table','HorizontalRule','Smiley','SpecialChar','PageBreak' ] },
{ name: 'styles', items : [ 'Format','Font','FontSize' ] },
{ name: 'colors', items : [ 'TextColor','BGColor' ] },
{ name: 'styles', items : [ 'Format','FontSize' ] },
{ name: 'tools', items : [ 'Maximize', 'ShowBlocks','-','About' ] }
toolbar: 'Full'
// Override the tags 'strong' and 'em' so that reportlab can read it
CKEDITOR.config.coreStyles_bold = { element : 'b', overrides : 'strong' };
CKEDITOR.config.coreStyles_italic = { element : 'i', overrides : 'em' };
CKEDITOR.replace('id_text', ck_options);
CKEDITOR.replace('id_reason', ck_options);

View File

@ -10,10 +10,39 @@
:license: GNU GPL, see LICENSE for more details.
import bleach
from django import forms
from django.views.generic.edit import FormMixin
from django.utils.translation import ugettext_lazy as _
# Allowed tags, attributes and styles allowed in textareas edited with a JS
# editor. Everything not in these whitelists is stripped.
'*': ['style'],
'a': ['href'],
HTML_STYLES_WHITELIST = ('text-decoration',)
class CssClassMixin(object):
error_css_class = 'error'
required_css_class = 'required'
@ -35,3 +64,32 @@ class LocalizedModelMultipleChoiceField(forms.ModelMultipleChoiceField):
return c
choices = property(_localized_get_choices, forms.ChoiceField._set_choices)
class CleanHtmlFormMixin(FormMixin):
A form mixin that pre-processes the form, cleaning up the HTML code found
in the fields in clean_html. All HTML tags, attributes and styles not in the
whitelists are stripped from the output, leaving only the text content:
<table><tr><td>foo</td></tr></table> simply becomes 'foo'
def get_clean_html_fields(self):
the list of elements to strip of potential malicious HTML
def clean(self):
cleaned_data = super(CleanHtmlFormMixin, self).clean()
for field in self.get_clean_html_fields():
cleaned_data[field] = bleach.clean(cleaned_data[field],
# Needed for reportlab
cleaned_data[field] = cleaned_data[field].replace('<br>', '</br>')
return cleaned_data

View File

@ -10,3 +10,4 @@ Fabric==1.6.0

View File

@ -0,0 +1,68 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
Unit test for OpenSlides
:copyright: 2011, 2012, 2013 by the OpenSlides team, see AUTHORS.
:license: GNU GPL, see LICENSE for more details.
from django.test import TestCase
from django import forms
from django.db import models
from openslides.utils.forms import CleanHtmlFormMixin
from openslides.motion.models import Motion
class HtmlTestForm(CleanHtmlFormMixin, forms.Form):
text = forms.CharField()
text2 = forms.CharField()
def get_clean_html_fields(self):
The field 'text' contains HTML, clean it
return ('text', )
class CleanHtmlTest(TestCase):
def clean_html(self, dirty='', clean=False):
form = HtmlTestForm({'text': dirty, 'text2': dirty})
# No forbidden HTML-tags, nothing should change
if not clean:
self.assertEqual(form.cleaned_data['text'], dirty)
# Something was removed
self.assertEqual(form.cleaned_data['text'], cleaned)
# Field text2 has the same content, but is never passed through the
# HTML-cleanup and should never change
self.assertEqual(form.cleaned_data['text2'], dirty)
def test_clean_html(self):
Test that the correct HTML tags and attributes are removed
# Forbidden tags and attributes
self.clean_html('<script>do_evil();</script>', 'do_evil();')
self.clean_html('<html>evil</html>', 'evil')
self.clean_html('<a href="">good?</a>', 'good?')
self.clean_html('<p href="">good?</p>', '<p>good?</p>')
self.clean_html('<p onclick="javascript:evil();">Not evil</p>', '<p>Not evil</p>')
self.clean_html('<div style="margin-top: 100000em;">evil</div>', 'evil')
self.clean_html('<p style="font-weight:bold;">bad</p>', '<p style="">bad</p>')
# Allowed tags and attributes
self.clean_html('<p style="text-decoration: underline;">OK</p>')