Add first version of server-side filtering of HTML with bleach

Fix formatting (pep8 and otherwise)

Make the CleanHtml a mixin so it can be used easier, add a test case

Mark HTML fields as 'safe' in the template

Update list of allowed HTML tags, take special care for reportlab

Add heading tags to white list

Rename get_clean_html to get_clean_html_fields
This commit is contained in:
Roland Geider 2013-03-11 22:38:39 +01:00 committed by Oskar Hahn
parent ce333d4c40
commit 01885c9304
6 changed files with 146 additions and 9 deletions

View File

@ -14,11 +14,12 @@ from django import forms
from django.utils.translation import ugettext as _
from openslides.utils.forms import CssClassMixin
from openslides.utils.forms import CleanHtmlFormMixin
from openslides.utils.person import PersonFormField, MultiplePersonFormField
from .models import Motion, Category
class BaseMotionForm(forms.ModelForm, CssClassMixin):
class BaseMotionForm(CleanHtmlFormMixin, forms.ModelForm, CssClassMixin):
"""Base FormClass for a Motion.
For it's own, it append the version data to the fields.
@ -51,6 +52,12 @@ class BaseMotionForm(forms.ModelForm, CssClassMixin):
self.initial['reason'] = self.motion.reason
super(BaseMotionForm, self).__init__(*args, **kwargs)
def get_clean_html_fields(self):
'''
The fields 'text' and 'reason' contain HTML, clean them
'''
return ('text', 'reason',)
class MotionSubmitterMixin(forms.ModelForm):
"""Mixin to append the submitter field to a MotionForm."""

View File

@ -75,13 +75,13 @@
<!-- Text -->
<h4>{% trans "Motion text" %}:</h4>
{{ motion.version.text|linebreaks }}
{{ motion.version.text|safe }}
<br>
<!-- Reason -->
<h4>{% trans "Reason" %}:</h4>
{% if motion.version.reason %}
{{ motion.version.reason|linebreaks }}
{{ motion.version.reason|safe }}
{% else %}
{% endif %}

View File

@ -23,7 +23,7 @@
bodyClass: "ckeditor_html",
allowedContent:
'h1 h2 h3 pre blockquote strong em u strike;' +
'h1 h2 h3 pre blockquote b i u strike;' +
// A workaround for the problem described in http://dev.ckeditor.com/ticket/10192
// Hopefully, the problem will be solved in the final version of CKEditor 4.1
@ -44,18 +44,21 @@
toolbar_Full: [
{ name: 'document', items : [ 'Source','-','Save','DocProps','Preview','Print','-','Templates' ] },
{ name: 'clipboard', items : [ 'Cut','Copy','Paste','PasteText','PasteFromWord','-','Undo','Redo' ] },
{ name: 'editing', items : [ 'Find','Replace','-','SelectAll','-','SpellChecker', 'Scayt' ] },
{ name: 'editing', items : [ 'Find','Replace','-','SpellChecker', 'Scayt' ] },
{ name: 'forms', items : [ 'Form', 'Checkbox', 'Radio', 'TextField', 'Textarea', 'Select', 'Button', 'ImageButton', 'HiddenField' ] },
{ name: 'basicstyles', items : [ 'Bold','Italic','Underline','Strike','Subscript','Superscript','-','RemoveFormat' ] },
{ name: 'paragraph', items : [ 'NumberedList','BulletedList','-','Outdent','Indent','-','Blockquote','Pre','InsertPre','CreateDiv','-','JustifyLeft','JustifyCenter','JustifyRight','JustifyBlock','-','BidiLtr','BidiRtl' ] },
{ name: 'paragraph', items : [ 'NumberedList','BulletedList','-','Pre','InsertPre','CreateDiv','-','JustifyLeft','JustifyCenter','JustifyRight','JustifyBlock','-','BidiLtr','BidiRtl' ] },
{ name: 'links', items : [ 'Link','Unlink','Anchor' ] },
{ name: 'insert', items : [ 'Image','Flash','Table','HorizontalRule','Smiley','SpecialChar','PageBreak' ] },
{ name: 'styles', items : [ 'Format','Font','FontSize' ] },
{ name: 'colors', items : [ 'TextColor','BGColor' ] },
{ name: 'styles', items : [ 'Format','FontSize' ] },
{ name: 'tools', items : [ 'Maximize', 'ShowBlocks','-','About' ] }
],
toolbar: 'Full'
};
// Override the tags 'strong' and 'em' so that reportlab can read it
CKEDITOR.config.coreStyles_bold = { element : 'b', overrides : 'strong' };
CKEDITOR.config.coreStyles_italic = { element : 'i', overrides : 'em' };
CKEDITOR.replace('id_text', ck_options);
CKEDITOR.replace('id_reason', ck_options);
});

View File

@ -10,10 +10,39 @@
:license: GNU GPL, see LICENSE for more details.
"""
import bleach
from django import forms
from django.views.generic.edit import FormMixin
from django.utils.translation import ugettext_lazy as _
# Allowed tags, attributes and styles allowed in textareas edited with a JS
# editor. Everything not in these whitelists is stripped.
HTML_TAG_WHITELIST = ('a',
'i',
'b',
'ul',
'ol',
'li',
'p',
'br',
'span',
'strike',
'u',
'pre',
'h1',
'h2',
'h3',)
HTML_ATTRIBUTES_WHITELIST = {
'*': ['style'],
'a': ['href'],
}
HTML_STYLES_WHITELIST = ('text-decoration',)
class CssClassMixin(object):
error_css_class = 'error'
required_css_class = 'required'
@ -35,3 +64,32 @@ class LocalizedModelMultipleChoiceField(forms.ModelMultipleChoiceField):
return c
choices = property(_localized_get_choices, forms.ChoiceField._set_choices)
class CleanHtmlFormMixin(FormMixin):
'''
A form mixin that pre-processes the form, cleaning up the HTML code found
in the fields in clean_html. All HTML tags, attributes and styles not in the
whitelists are stripped from the output, leaving only the text content:
<table><tr><td>foo</td></tr></table> simply becomes 'foo'
'''
def get_clean_html_fields(self):
'''
the list of elements to strip of potential malicious HTML
'''
return()
def clean(self):
cleaned_data = super(CleanHtmlFormMixin, self).clean()
for field in self.get_clean_html_fields():
cleaned_data[field] = bleach.clean(cleaned_data[field],
tags=HTML_TAG_WHITELIST,
attributes=HTML_ATTRIBUTES_WHITELIST,
styles=HTML_STYLES_WHITELIST,
strip=True)
# Needed for reportlab
cleaned_data[field] = cleaned_data[field].replace('<br>', '</br>')
return cleaned_data

View File

@ -10,3 +10,4 @@ Fabric==1.6.0
coverage==3.6
django-discover-runner==0.3
pep8==1.4.5
bleach

View File

@ -0,0 +1,68 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Unit test for OpenSlides __init__.py
:copyright: 2011, 2012, 2013 by the OpenSlides team, see AUTHORS.
:license: GNU GPL, see LICENSE for more details.
"""
from django.test import TestCase
from django import forms
from django.db import models
from openslides.utils.forms import CleanHtmlFormMixin
from openslides.motion.models import Motion
class HtmlTestForm(CleanHtmlFormMixin, forms.Form):
text = forms.CharField()
text2 = forms.CharField()
def get_clean_html_fields(self):
'''
The field 'text' contains HTML, clean it
'''
return ('text', )
class CleanHtmlTest(TestCase):
def clean_html(self, dirty='', clean=False):
form = HtmlTestForm({'text': dirty, 'text2': dirty})
form.is_valid()
# No forbidden HTML-tags, nothing should change
if not clean:
self.assertEqual(form.cleaned_data['text'], dirty)
# Something was removed
else:
self.assertEqual(form.cleaned_data['text'], cleaned)
# Field text2 has the same content, but is never passed through the
# HTML-cleanup and should never change
self.assertEqual(form.cleaned_data['text2'], dirty)
def test_clean_html(self):
'''
Test that the correct HTML tags and attributes are removed
'''
# Forbidden tags and attributes
self.clean_html('<script>do_evil();</script>', 'do_evil();')
self.clean_html('<html>evil</html>', 'evil')
self.clean_html('<a href="evil.com">good?</a>', 'good?')
self.clean_html('<p href="evil.com">good?</p>', '<p>good?</p>')
self.clean_html('<p onclick="javascript:evil();">Not evil</p>', '<p>Not evil</p>')
self.clean_html('<div style="margin-top: 100000em;">evil</div>', 'evil')
self.clean_html('<p style="font-weight:bold;">bad</p>', '<p style="">bad</p>')
# Allowed tags and attributes
self.clean_html('<p>OK</p>')
self.clean_html('<table><tbody><tr><td>OK</td></tr></tbody></table>')
self.clean_html('<p><strong>OK</strong></p>')
self.clean_html('<pre>OK</pre>')
self.clean_html('<blockquote>OK</blockquote>')
self.clean_html('<ul><li>OK</li></ul>')
self.clean_html('<p style="text-decoration: underline;">OK</p>')