Added html support for motion text.

Use html parser 'beautifulsoup' to replace html tags which not supported by reportlab pdf.
'pip install bs4' required.

Fixed nested ol lists and unicode warning.

New requirement: beautifulsoup4

HTML support for reportlab.

Move parse html code into new function. Added missing html support in pdf for motion reason.

Added version number for bleach and beautifulsoup4. Moved to core requirements block.

Added bs4, bleach and html5lib requirements to portable build script. Updated portable readme for 1.4.
This commit is contained in:
Emanuel Schuetze 2013-04-07 19:46:28 +02:00 committed by Oskar Hahn
parent 01885c9304
commit 1d1f5fa218
5 changed files with 142 additions and 7 deletions

View File

@ -4,7 +4,7 @@ How to create a new portable Windows distribution of OpenSlides:
1.) Follow the OpenSlides installation instructions for windows, 1.) Follow the OpenSlides installation instructions for windows,
but add the option "-Z" when executing easy_install, e.g.: but add the option "-Z" when executing easy_install, e.g.:
easy_install -Z django django-mptt reportlab pil easy_install -Z django django-mptt beautifulsoup4 bleach pillow qrcode reportlab tornado
2.) Run in the main directory of the OpenSlides checkout: 2.) Run in the main directory of the OpenSlides checkout:
@ -14,5 +14,5 @@ How to create a new portable Windows distribution of OpenSlides:
in the 'dist' directory in the 'dist' directory
NOTE: Creating the portable Windows distribution of OpenSlides is not possible, NOTE: Creating the portable Windows distribution of OpenSlides is not possible
if Python is installed in 64-bit version. if Python is installed in 64-bit(!) version.

View File

@ -90,6 +90,15 @@ SITE_PACKAGES = {
}, },
"qrcode": { "qrcode": {
"copy": ["qrcode"], "copy": ["qrcode"],
},
"beautifulsoup4": {
"copy": ["bs4"],
},
"bleach": {
"copy": ["bleach"],
},
"html5lib": {
"copy": ["html5lib"],
} }
} }

View File

@ -10,6 +10,9 @@
:license: GNU GPL, see LICENSE for more details. :license: GNU GPL, see LICENSE for more details.
""" """
import random
from bs4 import BeautifulSoup
from reportlab.lib import colors from reportlab.lib import colors
from reportlab.lib.units import cm from reportlab.lib.units import cm
from reportlab.platypus import ( from reportlab.platypus import (
@ -147,14 +150,89 @@ def motion_to_pdf(pdf, motion):
pdf.append(table) pdf.append(table)
pdf.append(Spacer(0, 1 * cm)) pdf.append(Spacer(0, 1 * cm))
# motion title
pdf.append(Paragraph(motion.title, stylesheet['Heading3'])) pdf.append(Paragraph(motion.title, stylesheet['Heading3']))
pdf.append(Paragraph(motion.text.replace('\r\n', '<br/>'), stylesheet['Paragraph']))
# motion text
convert_html_to_reportlab(pdf, motion.text)
pdf.append(Spacer(0, 1 * cm))
# motion reason
if motion.reason: if motion.reason:
pdf.append(Paragraph(_("Reason:"), stylesheet['Heading3'])) pdf.append(Paragraph(_("Reason:"), stylesheet['Heading3']))
pdf.append(Paragraph(motion.reason.replace('\r\n', '<br/>'), stylesheet['Paragraph'])) convert_html_to_reportlab(pdf, motion.reason)
return pdf return pdf
def convert_html_to_reportlab(pdf, text):
# parsing and replacing not supported html tags for reportlab...
soup = BeautifulSoup(text)
# read all list elements...
for element in soup.find_all('li'):
try:
# ... and replace ul list elements with <para><bullet>&bull;</bullet>...<para>
if element.parent.name == "ul":
if element.ul:
# for nested ul lists use simple spaces (pragmatic solution)
element.li.insert(0,'&nbsp;&nbsp;&nbsp;&nbsp;')
element.insert_before(element.find_all('li'))
element.clear()
else:
element.name = "para"
bullet_tag = soup.new_tag("bullet")
bullet_tag.string = "&bull;"
element.insert(0, bullet_tag)
# ... and replace ol list elements with <para><bullet><seq id="%id"></seq>.</bullet>...</para>
if element.parent.name == "ol":
# set list id if element is the first of numbered list
if not element.find_previous_sibling():
id = random.randrange(0, 101)
if element.ol:
# nested ol list
element.li.insert(0,'&nbsp;&nbsp;&nbsp;&nbsp;')
element.insert_before(element.find_all('li'))
element.clear()
else:
element.name = "para"
element.insert(0, soup.new_tag("bullet"))
element.bullet.insert(0, soup.new_tag("seq"))
element.bullet.seq['id'] = id
element.bullet.insert(1, ".")
except AttributeError:
pass
# remove tags which are not supported by reportlab (replace tags with their children tags)
for tag in soup.find_all('ul'):
tag.unwrap()
for tag in soup.find_all('ol'):
tag.unwrap()
for tag in soup.find_all('li'):
tag.unwrap()
# print paragraphs with numbers
text = soup.body.contents
paragraph_number = 1
for paragraph in text:
paragraph = str(paragraph)
# ignore empty paragraphs (created by newlines/tabs of ckeditor)
if paragraph == '\n' or paragraph == '\n\n' or paragraph == '\n\t':
continue
if "<pre>" in paragraph:
pdf.append(Paragraph(paragraph.replace('\n', '<br/>'), stylesheet['InnerMonotypeParagraph'], str(paragraph_number)))
paragraph_number += 1
elif "<para>" in paragraph:
pdf.append(Paragraph(paragraph, stylesheet['InnerListParagraph']))
elif "<seqreset" in paragraph:
pass
elif "<h1>" in paragraph:
pdf.append(Paragraph(paragraph, stylesheet['InnerH1Paragraph']))
elif "<h2>" in paragraph:
pdf.append(Paragraph(paragraph, stylesheet['InnerH2Paragraph']))
elif "<h3>" in paragraph:
pdf.append(Paragraph(paragraph, stylesheet['InnerH3Paragraph']))
else:
pdf.append(Paragraph(str(paragraph), stylesheet['InnerParagraph'], str(paragraph_number)))
paragraph_number += 1
def all_motion_cover(pdf, motions): def all_motion_cover(pdf, motions):
"""Create a coverpage for all motions.""" """Create a coverpage for all motions."""
pdf.append(Paragraph(config["motion_pdf_title"], stylesheet['Heading1'])) pdf.append(Paragraph(config["motion_pdf_title"], stylesheet['Heading1']))

View File

@ -13,6 +13,7 @@
from datetime import datetime from datetime import datetime
from os.path import join as path_join from os.path import join as path_join
from reportlab.lib import colors
from reportlab.lib.styles import StyleSheet1, ParagraphStyle from reportlab.lib.styles import StyleSheet1, ParagraphStyle
from reportlab.lib.units import cm from reportlab.lib.units import cm
from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase import pdfmetrics
@ -44,7 +45,7 @@ PAGE_WIDTH = defaultPageSize[0]
stylesheet = StyleSheet1() stylesheet = StyleSheet1()
stylesheet.add(ParagraphStyle( stylesheet.add(ParagraphStyle(
name='Normal', name='Normal',
fontName='Ubuntu', #fontName='Ubuntu',
fontSize=10, fontSize=10,
leading=12, leading=12,
)) ))
@ -54,6 +55,51 @@ stylesheet.add(ParagraphStyle(
leading=14, leading=14,
spaceAfter=15 spaceAfter=15
)) ))
stylesheet.add(ParagraphStyle(
name='InnerParagraph',
parent=stylesheet['Normal'],
leading=14,
spaceBefore=5,
spaceAfter=5,
bulletIndent=-15,
bulletFontSize=8,
bulletColor=colors.grey
))
stylesheet.add(ParagraphStyle(
name='InnerListParagraph',
parent=stylesheet['InnerParagraph'],
bulletIndent=10,
bulletFontSize=10,
bulletColor=colors.black,
leftIndent = 30
))
stylesheet.add(ParagraphStyle(
name='InnerMonotypeParagraph',
parent=stylesheet['InnerParagraph'],
fontName='Courier',
))
stylesheet.add(ParagraphStyle(
name='InnerH1Paragraph',
parent=stylesheet['InnerParagraph'],
fontName='Ubuntu-Bold',
fontSize=16,
spaceBefore=20,
spaceAfter=10,
))
stylesheet.add(ParagraphStyle(
name='InnerH2Paragraph',
parent=stylesheet['InnerH1Paragraph'],
fontSize=12,
spaceBefore=20,
spaceAfter=10,
))
stylesheet.add(ParagraphStyle(
name='InnerH3Paragraph',
parent=stylesheet['InnerH2Paragraph'],
fontSize=10,
spaceBefore=15,
spaceAfter=5,
))
stylesheet.add(ParagraphStyle( stylesheet.add(ParagraphStyle(
name='Small', name='Small',
parent=stylesheet['Normal'], parent=stylesheet['Normal'],

View File

@ -1,5 +1,8 @@
# Requirements for OpenSlides Core
Django==1.5.1 Django==1.5.1
django-mptt==0.5.5 django-mptt==0.5.5
beautifulsoup4==4.1.3
bleach==1.2.1
pillow==2.0.0 pillow==2.0.0
qrcode==2.7 qrcode==2.7
reportlab==2.7 reportlab==2.7
@ -10,4 +13,3 @@ Fabric==1.6.0
coverage==3.6 coverage==3.6
django-discover-runner==0.3 django-discover-runner==0.3
pep8==1.4.5 pep8==1.4.5
bleach