import bleach allowed_tags = [ 'a', 'img', # links and images 'br', 'p', 'span', 'blockquote', # text layout 'strike', 'strong', 'u', 'em', 'sup', 'sub', 'pre', # text formatting 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', # headings 'ol', 'ul', 'li', # lists 'table', 'caption', 'thead', 'tbody', 'th', 'tr', 'td', # tables ] allowed_attributes = { '*': ['class', 'style'], 'img': ['alt', 'src', 'title'], 'a': ['href', 'title'], 'th': ['scope'], 'ol': ['start'], } allowed_styles = [ 'color', 'background-color', 'height', 'width', 'text-align', 'float', 'padding' ] def validate_html(html: str) -> str: """ This method takes a string and escapes all non-whitelisted html entries. Every field of a model that is loaded trusted in the DOM should be validated. During copy and paste from Word maybe some tabs are spread over the html. Remove them. """ html = html.replace('\t', '') return bleach.clean( html, tags=allowed_tags, attributes=allowed_attributes, styles=allowed_styles)