From 01885c93047dcb9435c2935bb6f63714e009ac4a Mon Sep 17 00:00:00 2001 From: Roland Geider Date: Mon, 11 Mar 2013 22:38:39 +0100 Subject: [PATCH] Add first version of server-side filtering of HTML with bleach Fix formatting (pep8 and otherwise) Make the CleanHtml a mixin so it can be used easier, add a test case Mark HTML fields as 'safe' in the template Update list of allowed HTML tags, take special care for reportlab Add heading tags to white list Rename get_clean_html to get_clean_html_fields --- openslides/motion/forms.py | 9 ++- .../templates/motion/motion_detail.html | 4 +- .../motion/templates/motion/motion_form.html | 15 ++-- openslides/utils/forms.py | 58 ++++++++++++++++ requirements.txt | 1 + tests/forms/test_clean_html.py | 68 +++++++++++++++++++ 6 files changed, 146 insertions(+), 9 deletions(-) create mode 100644 tests/forms/test_clean_html.py diff --git a/openslides/motion/forms.py b/openslides/motion/forms.py index 2ecb30258..3e3d39294 100644 --- a/openslides/motion/forms.py +++ b/openslides/motion/forms.py @@ -14,11 +14,12 @@ from django import forms from django.utils.translation import ugettext as _ from openslides.utils.forms import CssClassMixin +from openslides.utils.forms import CleanHtmlFormMixin from openslides.utils.person import PersonFormField, MultiplePersonFormField from .models import Motion, Category -class BaseMotionForm(forms.ModelForm, CssClassMixin): +class BaseMotionForm(CleanHtmlFormMixin, forms.ModelForm, CssClassMixin): """Base FormClass for a Motion. For it's own, it append the version data to the fields. @@ -51,6 +52,12 @@ class BaseMotionForm(forms.ModelForm, CssClassMixin): self.initial['reason'] = self.motion.reason super(BaseMotionForm, self).__init__(*args, **kwargs) + def get_clean_html_fields(self): + ''' + The fields 'text' and 'reason' contain HTML, clean them + ''' + return ('text', 'reason',) + class MotionSubmitterMixin(forms.ModelForm): """Mixin to append the submitter field to a MotionForm.""" diff --git a/openslides/motion/templates/motion/motion_detail.html b/openslides/motion/templates/motion/motion_detail.html index cda0ad03b..a03d51300 100644 --- a/openslides/motion/templates/motion/motion_detail.html +++ b/openslides/motion/templates/motion/motion_detail.html @@ -75,13 +75,13 @@

{% trans "Motion text" %}:

- {{ motion.version.text|linebreaks }} + {{ motion.version.text|safe }}

{% trans "Reason" %}:

{% if motion.version.reason %} - {{ motion.version.reason|linebreaks }} + {{ motion.version.reason|safe }} {% else %} – {% endif %} diff --git a/openslides/motion/templates/motion/motion_form.html b/openslides/motion/templates/motion/motion_form.html index 1bb699ed7..6fc7b71ce 100644 --- a/openslides/motion/templates/motion/motion_form.html +++ b/openslides/motion/templates/motion/motion_form.html @@ -23,7 +23,7 @@ bodyClass: "ckeditor_html", allowedContent: - 'h1 h2 h3 pre blockquote strong em u strike;' + + 'h1 h2 h3 pre blockquote b i u strike;' + // A workaround for the problem described in http://dev.ckeditor.com/ticket/10192 // Hopefully, the problem will be solved in the final version of CKEditor 4.1 @@ -44,18 +44,21 @@ toolbar_Full: [ { name: 'document', items : [ 'Source','-','Save','DocProps','Preview','Print','-','Templates' ] }, { name: 'clipboard', items : [ 'Cut','Copy','Paste','PasteText','PasteFromWord','-','Undo','Redo' ] }, - { name: 'editing', items : [ 'Find','Replace','-','SelectAll','-','SpellChecker', 'Scayt' ] }, + { name: 'editing', items : [ 'Find','Replace','-','SpellChecker', 'Scayt' ] }, { name: 'forms', items : [ 'Form', 'Checkbox', 'Radio', 'TextField', 'Textarea', 'Select', 'Button', 'ImageButton', 'HiddenField' ] }, { name: 'basicstyles', items : [ 'Bold','Italic','Underline','Strike','Subscript','Superscript','-','RemoveFormat' ] }, - { name: 'paragraph', items : [ 'NumberedList','BulletedList','-','Outdent','Indent','-','Blockquote','Pre','InsertPre','CreateDiv','-','JustifyLeft','JustifyCenter','JustifyRight','JustifyBlock','-','BidiLtr','BidiRtl' ] }, + { name: 'paragraph', items : [ 'NumberedList','BulletedList','-','Pre','InsertPre','CreateDiv','-','JustifyLeft','JustifyCenter','JustifyRight','JustifyBlock','-','BidiLtr','BidiRtl' ] }, { name: 'links', items : [ 'Link','Unlink','Anchor' ] }, - { name: 'insert', items : [ 'Image','Flash','Table','HorizontalRule','Smiley','SpecialChar','PageBreak' ] }, - { name: 'styles', items : [ 'Format','Font','FontSize' ] }, - { name: 'colors', items : [ 'TextColor','BGColor' ] }, + { name: 'styles', items : [ 'Format','FontSize' ] }, { name: 'tools', items : [ 'Maximize', 'ShowBlocks','-','About' ] } ], toolbar: 'Full' }; + + // Override the tags 'strong' and 'em' so that reportlab can read it + CKEDITOR.config.coreStyles_bold = { element : 'b', overrides : 'strong' }; + CKEDITOR.config.coreStyles_italic = { element : 'i', overrides : 'em' }; + CKEDITOR.replace('id_text', ck_options); CKEDITOR.replace('id_reason', ck_options); }); diff --git a/openslides/utils/forms.py b/openslides/utils/forms.py index 356ca4824..9cad9ae11 100644 --- a/openslides/utils/forms.py +++ b/openslides/utils/forms.py @@ -10,10 +10,39 @@ :license: GNU GPL, see LICENSE for more details. """ +import bleach + from django import forms +from django.views.generic.edit import FormMixin from django.utils.translation import ugettext_lazy as _ +# Allowed tags, attributes and styles allowed in textareas edited with a JS +# editor. Everything not in these whitelists is stripped. +HTML_TAG_WHITELIST = ('a', + 'i', + 'b', + 'ul', + 'ol', + 'li', + 'p', + 'br', + 'span', + 'strike', + 'u', + 'pre', + 'h1', + 'h2', + 'h3',) + +HTML_ATTRIBUTES_WHITELIST = { + '*': ['style'], + 'a': ['href'], +} + +HTML_STYLES_WHITELIST = ('text-decoration',) + + class CssClassMixin(object): error_css_class = 'error' required_css_class = 'required' @@ -35,3 +64,32 @@ class LocalizedModelMultipleChoiceField(forms.ModelMultipleChoiceField): return c choices = property(_localized_get_choices, forms.ChoiceField._set_choices) + + +class CleanHtmlFormMixin(FormMixin): + ''' + A form mixin that pre-processes the form, cleaning up the HTML code found + in the fields in clean_html. All HTML tags, attributes and styles not in the + whitelists are stripped from the output, leaving only the text content: + +
foo
simply becomes 'foo' + ''' + + def get_clean_html_fields(self): + ''' + the list of elements to strip of potential malicious HTML + ''' + return() + + def clean(self): + cleaned_data = super(CleanHtmlFormMixin, self).clean() + for field in self.get_clean_html_fields(): + cleaned_data[field] = bleach.clean(cleaned_data[field], + tags=HTML_TAG_WHITELIST, + attributes=HTML_ATTRIBUTES_WHITELIST, + styles=HTML_STYLES_WHITELIST, + strip=True) + + # Needed for reportlab + cleaned_data[field] = cleaned_data[field].replace('
', '
') + return cleaned_data diff --git a/requirements.txt b/requirements.txt index 6b8b536c5..794e1e3e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ Fabric==1.6.0 coverage==3.6 django-discover-runner==0.3 pep8==1.4.5 +bleach diff --git a/tests/forms/test_clean_html.py b/tests/forms/test_clean_html.py new file mode 100644 index 000000000..d63c4ef91 --- /dev/null +++ b/tests/forms/test_clean_html.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + Unit test for OpenSlides __init__.py + + :copyright: 2011, 2012, 2013 by the OpenSlides team, see AUTHORS. + :license: GNU GPL, see LICENSE for more details. +""" + +from django.test import TestCase +from django import forms +from django.db import models + +from openslides.utils.forms import CleanHtmlFormMixin +from openslides.motion.models import Motion + + +class HtmlTestForm(CleanHtmlFormMixin, forms.Form): + text = forms.CharField() + text2 = forms.CharField() + + def get_clean_html_fields(self): + ''' + The field 'text' contains HTML, clean it + ''' + return ('text', ) + + +class CleanHtmlTest(TestCase): + + def clean_html(self, dirty='', clean=False): + form = HtmlTestForm({'text': dirty, 'text2': dirty}) + form.is_valid() + + # No forbidden HTML-tags, nothing should change + if not clean: + self.assertEqual(form.cleaned_data['text'], dirty) + + # Something was removed + else: + self.assertEqual(form.cleaned_data['text'], cleaned) + + # Field text2 has the same content, but is never passed through the + # HTML-cleanup and should never change + self.assertEqual(form.cleaned_data['text2'], dirty) + + def test_clean_html(self): + ''' + Test that the correct HTML tags and attributes are removed + ''' + + # Forbidden tags and attributes + self.clean_html('', 'do_evil();') + self.clean_html('evil', 'evil') + self.clean_html('good?', 'good?') + self.clean_html('

good?

', '

good?

') + self.clean_html('

Not evil

', '

Not evil

') + self.clean_html('
evil
', 'evil') + self.clean_html('

bad

', '

bad

') + + # Allowed tags and attributes + self.clean_html('

OK

') + self.clean_html('
OK
') + self.clean_html('

OK

') + self.clean_html('
OK
') + self.clean_html('
OK
') + self.clean_html('') + self.clean_html('

OK

') \ No newline at end of file