import { Injectable } from '@angular/core'; import { LinenumberingService } from './linenumbering.service'; import { ViewMotion } from '../models/view-motion'; import { ViewUnifiedChange } from '../models/view-unified-change'; const ELEMENT_NODE = 1; const TEXT_NODE = 3; const DOCUMENT_FRAGMENT_NODE = 11; /** * Indicates the type of a modification when comparing ("diff"ing) two versions of a text. * - TYPE_INSERTION indicates an insertion. An insertion is when the new version of a text contains a certain string * that did not exist in the original version of the. * - TYPE_DELETION indicates a replacement. A deletion is when the new version of a text does not contain a certain * string contained in the original version of the text anymore. * - TYPE_REPLACEMENT indicates both of the above: the new version of the text contains text not present in the original * version, but also removes some parts of that text. * * This enumeration is used when _automatically_ detecting the change type of an amendment / change recommendation. */ export enum ModificationType { TYPE_REPLACEMENT, TYPE_INSERTION, TYPE_DELETION } /** * This data structure is used when determining the most specific common ancestor of two HTML nodes (`node1` and `node2`) * within the same Document Fragment. */ interface CommonAncestorData { /** * The most specific common ancestor node. */ commonAncestor: Node; /** * The nodes inbetween `commonAncestor` and the `node1` in the DOM hierarchy. Empty, if node1 is a direct descendant. */ trace1: Node[]; /** * The nodes inbetween `commonAncestor` and the `node2` in the DOM hierarchy. Empty, if node2 is a direct descendant. */ trace2: Node[]; /** * Starting the root node, this indicates the depth level of the `commonAncestor`. */ index: number; } /** * An object produced by `extractRangeByLineNumbers``. It contains both the extracted lines as well as * information about the context in which these lines occur. * This additional information is meant to render the snippet correctly without producing broken HTML */ interface ExtractedContent { /** * The HTML between the two line numbers. Line numbers and automatically set line breaks are stripped. * All HTML tags are converted to uppercase * (e.g. Line 2

Line3

Line 4
) */ html: string; /** * The most specific DOM element that contains the HTML snippet (e.g. a UL, if several LIs are selected) */ ancestor: Node; /** * An HTML string that opens all necessary tags to get the browser into the rendering mode * of the ancestor element (e.g.

in the case of the multiple LIs) */ outerContextStart: string; /** * An HTML string that closes all necessary tags from the ancestor element (e.g.

*/ outerContextEnd: string; /** * A string that opens all necessary tags between the ancestor and the beginning of the selection (e.g.

) */ innerContextStart: string; /** * A string that closes all tags after the end of the selection to the ancestor (e.g.

) */ innerContextEnd: string; /** * The HTML before the selected area begins (including line numbers) */ previousHtml: string; /** * A HTML snippet that closes all open tags from previousHtml */ previousHtmlEndSnippet: string; /** * The HTML after the selected area */ followingHtml: string; /** * A HTML snippet that opens all HTML tags necessary to render "followingHtml" */ followingHtmlStartSnippet: string; } /** * An object specifying a range of line numbers. */ export interface LineRange { /** * The first line number to be included. */ from: number; /** * The end line number. * HINT: As this object is usually referring to actual line numbers, not lines, * the line starting by `to` is not included in the extracted content anymore, only the text between `from` and `to`. */ to: number; } /** * An object representing a paragraph with some changed lines */ export interface DiffLinesInParagraph { /** * The paragraph number */ paragraphNo: number; /** * The first line of the paragraph */ paragraphLineFrom: number; /** * The end line number (after the paragraph) */ paragraphLineTo: number; /** * The first line number with changes */ diffLineFrom: number; /** * The line number after the last change */ diffLineTo: number; /** * The HTML of the not-changed lines before the changed ones */ textPre: string; /** * The HTML of the changed lines */ text: string; /** * The HTML of the not-changed lines after the changed ones */ textPost: string; } /** * Functionality regarding diffing, merging and extracting line ranges. * * ## Examples * * Cleaning up a string generated by CKEditor: * * ```ts * this.diffService.removeDuplicateClassesInsertedByCkeditor(motion.text) * ``` * * Extracting a range specified by line numbers from a motion text: * * ```ts * const lineLength = 80; * const lineNumberedText = this.lineNumbering.insertLineNumbers('

A line

Another line

A list item
Yet another item

', lineLength); * const extractFrom = 2; * const extractUntil = 3; * const extractedData = this.diffService.extractRangeByLineNumbers(lineNumberedText, extractFrom, extractUntil) * ``` * * Creating a valid HTML from such a extracted text, including line numbers: * * ```ts * const extractedHtml = this.diffService.formatDiffWithLineNumbers(extractedData, lineLength, extractFrom); * ``` * * Creating the diff between two html strings: * * ```ts * const before = '

Lorem ipsum dolor sit amet, sed diam voluptua. At

'; * const beforeLineNumbered = this.lineNumbering.insertLineNumbers(before, 80) * const after = '

Lorem ipsum dolor sit amet, sed diam voluptua. At2

'; * const diff = this.diffService.diff(before, after); * ```ts * * Given a (line numbered) diff string, detect the line number range with changes: * * ```ts * this.diffService.detectAffectedLineRange(diff); * ``` * * Given a diff'ed string, apply all changes to receive the new version of the text: * * ```ts * const diffedHtml = '

Test Test 2 Another test ~~Test 3~~

Test 4

'; * const newVersion = this.diffService.diffHtmlToFinalText(diffedHtml); * ``` * * Replace a line number range in a text by new text: * * ```ts * const lineLength = 80; * const lineNumberedText = this.lineNumbering.insertLineNumbers('

A line

Another line

A list item
Yet another item

', lineLength); * const merged = this.diffService.replaceLines(lineNumberedText, '

Replaced paragraph

', 1, 2); * ``` */ @Injectable({ providedIn: 'root' }) export class DiffService { // @TODO Decide on a more sophisticated implementation private diffCache = { _cache: {}, get: (key: string): any => { return this.diffCache._cache[key] === undefined ? null : this.diffCache._cache[key]; }, put: (key: string, val: any): void => { this.diffCache._cache[key] = val; } }; /** * Creates the DiffService. * * @param {LinenumberingService} lineNumberingService */ public constructor(private readonly lineNumberingService: LinenumberingService) {} /** * Searches for the line breaking node within the given Document specified by the given lineNumber. * This is performed by using a querySelector. * * @param {DocumentFragment} fragment * @param {number} lineNumber * @returns {Element} */ public getLineNumberNode(fragment: DocumentFragment, lineNumber: number): Element { return fragment.querySelector('os-linebreak.os-line-number.line-number-' + lineNumber); } /** * This returns the first line breaking node within the given node. * If none is found, `null` is returned. * * @param {Node} node * @returns {Element} */ private getFirstLineNumberNode(node: Node): Element { if (node.nodeType === TEXT_NODE) { return null; } const element = node; if (element.nodeName === 'OS-LINEBREAK') { return element; } const found = element.querySelectorAll('OS-LINEBREAK'); if (found.length > 0) { return found.item(0); } else { return null; } } /** * This returns the last line breaking node within the given node. * If none is found, `null` is returned. * * @param {Node} node * @returns {Element} */ private getLastLineNumberNode(node: Node): Element { if (node.nodeType === TEXT_NODE) { return null; } const element = node; if (element.nodeName === 'OS-LINEBREAK') { return element; } const found = element.querySelectorAll('OS-LINEBREAK'); if (found.length > 0) { return found.item(found.length - 1); } else { return null; } } /** * Given a node, this method returns an array containing all parent elements of this node, recursively. * * @param {Node} node * @returns {Node[]} */ private getNodeContextTrace(node: Node): Node[] { const context = []; let currNode = node; while (currNode) { context.unshift(currNode); currNode = currNode.parentNode; } return context; } /** * This method checks if the given `child`-Node is the first non-empty child element of the given parent Node * called `node`. Hence the name of this method. * * @param node * @param child */ private isFirstNonemptyChild(node: Node, child: Node): boolean { for (let i = 0; i < node.childNodes.length; i++) { if (node.childNodes[i] === child) { return true; } if (node.childNodes[i].nodeType !== TEXT_NODE || node.childNodes[i].nodeValue.match(/\S/)) { return false; } } return false; } /** * Adds elements like * to a given fragment * * @param {DocumentFragment} fragment */ public insertInternalLineMarkers(fragment: DocumentFragment): void { if (fragment.querySelectorAll('OS-LINEBREAK').length > 0) { // Prevent duplicate calls return; } const lineNumbers = fragment.querySelectorAll('span.os-line-number'); let lineMarker, maxLineNumber = 0; lineNumbers.forEach((insertBefore: Node) => { const lineNumberElement = insertBefore; while ( insertBefore.parentNode.nodeType !== DOCUMENT_FRAGMENT_NODE && this.isFirstNonemptyChild(insertBefore.parentNode, insertBefore) ) { insertBefore = insertBefore.parentNode; } lineMarker = document.createElement('OS-LINEBREAK'); lineMarker.setAttribute('data-line-number', lineNumberElement.getAttribute('data-line-number')); lineMarker.setAttribute('class', lineNumberElement.getAttribute('class')); insertBefore.parentNode.insertBefore(lineMarker, insertBefore); maxLineNumber = parseInt(lineNumberElement.getAttribute('data-line-number'), 10); }); // Add one more "fake" line number at the end and beginning, so we can select the last line as well lineMarker = document.createElement('OS-LINEBREAK'); lineMarker.setAttribute('data-line-number', (maxLineNumber + 1).toString(10)); lineMarker.setAttribute('class', 'os-line-number line-number-' + (maxLineNumber + 1).toString(10)); fragment.appendChild(lineMarker); lineMarker = document.createElement('OS-LINEBREAK'); lineMarker.setAttribute('data-line-number', '0'); lineMarker.setAttribute('class', 'os-line-number line-number-0'); fragment.insertBefore(lineMarker, fragment.firstChild); } /** * An OL element has a number of child LI nodes. Given a `descendantNode` that might be anywhere within * the hierarchy of this OL element, this method returns the index (starting with 1) of the LI element * that contains this node. * * @param olNode * @param descendantNode */ private isWithinNthLIOfOL(olNode: Element, descendantNode: Node): number { let nthLIOfOL = null; while (descendantNode.parentNode) { if (descendantNode.parentNode === olNode) { let lisBeforeOl = 0, foundMe = false; for (let i = 0; i < olNode.childNodes.length && !foundMe; i++) { if (olNode.childNodes[i] === descendantNode) { foundMe = true; } else if (olNode.childNodes[i].nodeName === 'LI') { lisBeforeOl++; } } nthLIOfOL = lisBeforeOl + 1; } descendantNode = descendantNode.parentNode; } return nthLIOfOL; } /** * Returns information about the common ancestors of two given nodes. * * @param {Node} node1 * @param {Node} node2 * @returns {CommonAncestorData} */ public getCommonAncestor(node1: Node, node2: Node): CommonAncestorData { const trace1 = this.getNodeContextTrace(node1), trace2 = this.getNodeContextTrace(node2), childTrace1 = [], childTrace2 = []; let commonAncestor = null, commonIndex = null; for (let i = 0; i < trace1.length && i < trace2.length; i++) { if (trace1[i] === trace2[i]) { commonAncestor = trace1[i]; commonIndex = i; } } for (let i = commonIndex + 1; i < trace1.length; i++) { childTrace1.push(trace1[i]); } for (let i = commonIndex + 1; i < trace2.length; i++) { childTrace2.push(trace2[i]); } return { commonAncestor: commonAncestor, trace1: childTrace1, trace2: childTrace2, index: commonIndex }; } /** * This converts a HTML Node element into a rendered HTML string. * * @param {Node} node * @returns {string} */ private serializeTag(node: Node): string { if (node.nodeType !== ELEMENT_NODE) { // Fragments are only placeholders and do not have an HTML representation return ''; } const element = node; let html = '<' + element.nodeName; for (let i = 0; i < element.attributes.length; i++) { const attr = element.attributes[i]; if (attr.name !== 'os-li-number') { html += ' ' + attr.name + '="' + attr.value + '"'; } } html += '>'; return html; } /** * This converts the given HTML string into a DOM tree contained by a DocumentFragment, which is reqturned. * * @param {string} html * @return {DocumentFragment} */ public htmlToFragment(html: string): DocumentFragment { const fragment = document.createDocumentFragment(), div = document.createElement('DIV'); div.innerHTML = html; while (div.childElementCount) { const child = div.childNodes[0]; div.removeChild(child); fragment.appendChild(child); } return fragment; } /** * This performs HTML normalization to prevent the Diff-Algorithm from detecting changes when there are actually * none. Common problems covered by this method are differently ordered Attributes of HTML elements or HTML-encoded * special characters. * Unfortunately, the conversion of HTML-encoded characters to the actual characters is done by a lookup-table for * now, as we haven't figured out a way to decode them automatically. * * @param {string} html * @returns {string} * @private */ public normalizeHtmlForDiff(html: string): string { // Convert all HTML tags to uppercase, but leave the values of attributes unchanged // All attributes and CSS class names are sorted alphabetically // If an attribute is empty, it is removed html = html.replace( /<(\/?[a-z]*)( [^>]*)?>/gi, (_fullHtml: string, tag: string, attributes: string): string => { const tagNormalized = tag.toUpperCase(); if (attributes === undefined) { attributes = ''; } const attributesList = [], attributesMatcher = /( [^"'=]*)(= *((["'])(.*?)\4))?/gi; let match; do { match = attributesMatcher.exec(attributes); if (match) { let attrNormalized = match[1].toUpperCase(), attrValue = match[5]; if (match[2] !== undefined) { if (attrNormalized === ' CLASS') { attrValue = attrValue .split(' ') .sort() .join(' ') .replace(/^\s+/, '') .replace(/\s+$/, ''); } attrNormalized += '=' + match[4] + attrValue + match[4]; } if (attrValue !== '') { attributesList.push(attrNormalized); } } } while (match); attributes = attributesList.sort().join(''); return '<' + tagNormalized + attributes + '>'; } ); const entities = { ' ': ' ', '–': '-', 'ä': 'ä', 'ö': 'ö', 'ü': 'ü', 'Ä': 'Ä', 'Ö': 'Ö', 'Ü': 'Ü', 'ß': 'ß', '„': '„', '“': '“', '•': '•', '§': '§', 'é': 'é', '€': '€' }; html = html .replace(/\s+<\/P>/gi, '

') .replace(/\s+<\/DIV>/gi, '') .replace(/\s+<\/LI>/gi, ''); html = html.replace(/\s+

/gi, '

').replace(/<\/LI>\s+/gi, '

'); html = html.replace(/\u00A0/g, ' '); html = html.replace(/\u2013/g, '-'); Object.keys(entities).forEach(ent => { html = html.replace(new RegExp(ent, 'g'), entities[ent]); }); // Newline characters: after closing block-level-elements, but not after BR (which is inline) html = html.replace(/(
)\n/gi, '$1'); html = html.replace(/[ \n\t]+/gi, ' '); html = html.replace(/(<\/(div|p|ul|li|blockquote>)>) /gi, '$1\n'); return html; } /** * Get all the siblings of the given node _after_ this node, in the order as they appear in the DOM tree. * * @param {Node} node * @returns {Node[]} */ private getAllNextSiblings(node: Node): Node[] { const nodes: Node[] = []; while (node.nextSibling) { nodes.push(node.nextSibling); node = node.nextSibling; } return nodes; } /** * Get all the siblings of the given node _before_ this node, * with the one closest to the given node first (=> reversed order in regard to the DOM tree order) * * @param {Node} node * @returns {Node[]} */ private getAllPrevSiblingsReversed(node: Node): Node[] { const nodes = []; while (node.previousSibling) { nodes.push(node.previousSibling); node = node.previousSibling; } return nodes; } /** * Given two strings, this method tries to guess if `htmlNew` can be produced from `htmlOld` by inserting * or deleting text, or if both is necessary (replac) * * @param {string} htmlOld * @param {string} htmlNew * @returns {number} */ public detectReplacementType(htmlOld: string, htmlNew: string): ModificationType { htmlOld = this.normalizeHtmlForDiff(htmlOld); htmlNew = this.normalizeHtmlForDiff(htmlNew); if (htmlOld === htmlNew) { return ModificationType.TYPE_REPLACEMENT; } let i, foundDiff; for (i = 0, foundDiff = false; i < htmlOld.length && i < htmlNew.length && foundDiff === false; i++) { if (htmlOld[i] !== htmlNew[i]) { foundDiff = true; } } const remainderOld = htmlOld.substr(i - 1), remainderNew = htmlNew.substr(i - 1); let type = ModificationType.TYPE_REPLACEMENT; if (remainderOld.length > remainderNew.length) { if (remainderOld.substr(remainderOld.length - remainderNew.length) === remainderNew) { type = ModificationType.TYPE_DELETION; } } else if (remainderOld.length < remainderNew.length) { if (remainderNew.substr(remainderNew.length - remainderOld.length) === remainderOld) { type = ModificationType.TYPE_INSERTION; } } return type; } /** * This method adds a CSS class name to a given node. * * @param {Node} node * @param {string} className */ public addCSSClass(node: Node, className: string): void { if (node.nodeType !== ELEMENT_NODE) { return; } const element = node; const classesStr = element.getAttribute('class'); const classes = classesStr ? classesStr.split(' ') : []; if (classes.indexOf(className) === -1) { classes.push(className); } element.setAttribute('class', classes.join(' ')); } /** * This method removes a CSS class name from a given node. * * @param {Node} node * @param {string} className */ public removeCSSClass(node: Node, className: string): void { if (node.nodeType !== ELEMENT_NODE) { return; } const element = node; const classesStr = element.getAttribute('class'); const newClasses = []; const classes = classesStr ? classesStr.split(' ') : []; for (let i = 0; i < classes.length; i++) { if (classes[i] !== className) { newClasses.push(classes[i]); } } if (newClasses.length === 0) { element.removeAttribute('class'); } else { element.setAttribute('class', newClasses.join(' ')); } } /** * Adapted from http://ejohn.org/projects/javascript-diff-algorithm/ * by John Resig, MIT License * @param {array} oldArr * @param {array} newArr * @returns {object} */ private diffArrays(oldArr: any, newArr: any): any { const ns = {}, os = {}; let i; for (i = 0; i < newArr.length; i++) { if (ns[newArr[i]] === undefined) { ns[newArr[i]] = { rows: [], o: null }; } ns[newArr[i]].rows.push(i); } for (i = 0; i < oldArr.length; i++) { if (os[oldArr[i]] === undefined) { os[oldArr[i]] = { rows: [], n: null }; } os[oldArr[i]].rows.push(i); } for (i in ns) { if (ns[i].rows.length === 1 && typeof os[i] !== 'undefined' && os[i].rows.length === 1) { newArr[ns[i].rows[0]] = { text: newArr[ns[i].rows[0]], row: os[i].rows[0] }; oldArr[os[i].rows[0]] = { text: oldArr[os[i].rows[0]], row: ns[i].rows[0] }; } } for (i = 0; i < newArr.length - 1; i++) { if ( newArr[i].text !== null && newArr[i + 1].text === undefined && newArr[i].row + 1 < oldArr.length && oldArr[newArr[i].row + 1].text === undefined && newArr[i + 1] === oldArr[newArr[i].row + 1] ) { newArr[i + 1] = { text: newArr[i + 1], row: newArr[i].row + 1 }; oldArr[newArr[i].row + 1] = { text: oldArr[newArr[i].row + 1], row: i + 1 }; } } for (i = newArr.length - 1; i > 0; i--) { if ( newArr[i].text !== null && newArr[i - 1].text === undefined && newArr[i].row > 0 && oldArr[newArr[i].row - 1].text === undefined && newArr[i - 1] === oldArr[newArr[i].row - 1] ) { newArr[i - 1] = { text: newArr[i - 1], row: newArr[i].row - 1 }; oldArr[newArr[i].row - 1] = { text: oldArr[newArr[i].row - 1], row: i - 1 }; } } return { o: oldArr, n: newArr }; } /** * This method splits a string into an array of strings, such as that it can be used by the diff method. * Mainly it tries to split it into single words, but prevents HTML tags from being split into different elements. * * @param {string} str * @returns {string[]} */ private tokenizeHtml(str: string): string[] { const splitArrayEntriesEmbedSeparator = (arrIn: string[], by: string, prepend: boolean): string[] => { const newArr = []; for (let i = 0; i < arrIn.length; i++) { if (arrIn[i][0] === '<' && (by === ' ' || by === '\n')) { // Don't split HTML tags newArr.push(arrIn[i]); continue; } const parts = arrIn[i].split(by); if (parts.length === 1) { newArr.push(arrIn[i]); } else { let j; if (prepend) { if (parts[0] !== '') { newArr.push(parts[0]); } for (j = 1; j < parts.length; j++) { newArr.push(by + parts[j]); } } else { for (j = 0; j < parts.length - 1; j++) { newArr.push(parts[j] + by); } if (parts[parts.length - 1] !== '') { newArr.push(parts[parts.length - 1]); } } } } return newArr; }; const splitArrayEntriesSplitSeparator = (arrIn: string[], by: string): string[] => { const newArr = []; for (let i = 0; i < arrIn.length; i++) { if (arrIn[i][0] === '<') { newArr.push(arrIn[i]); continue; } const parts = arrIn[i].split(by); for (let j = 0; j < parts.length; j++) { if (j > 0) { newArr.push(by); } newArr.push(parts[j]); } } return newArr; }; let arr = splitArrayEntriesEmbedSeparator([str], '<', true); arr = splitArrayEntriesEmbedSeparator(arr, '>', false); arr = splitArrayEntriesSplitSeparator(arr, ' '); arr = splitArrayEntriesSplitSeparator(arr, '.'); arr = splitArrayEntriesSplitSeparator(arr, ','); arr = splitArrayEntriesSplitSeparator(arr, '!'); arr = splitArrayEntriesSplitSeparator(arr, '-'); arr = splitArrayEntriesEmbedSeparator(arr, '\n', false); const arrWithoutEmpties = []; for (let i = 0; i < arr.length; i++) { if (arr[i] !== '') { arrWithoutEmpties.push(arr[i]); } } return arrWithoutEmpties; } /** * Given two strings, this method generates a consolidated new string that indicates the operations necessary * to get from `oldStr` to `newStr` by ... and ~~...~~-Tags * * @param {string} oldStr * @param {string} newStr * @returns {string} */ private diffString(oldStr: string, newStr: string): string { oldStr = this.normalizeHtmlForDiff(oldStr.replace(/\s+$/, '').replace(/^\s+/, '')); newStr = this.normalizeHtmlForDiff(newStr.replace(/\s+$/, '').replace(/^\s+/, '')); const out = this.diffArrays(this.tokenizeHtml(oldStr), this.tokenizeHtml(newStr)); // This fixes the problem tested by "does not lose words when changes are moved X-wise" let lastRow = 0; for (let z = 0; z < out.n.length; z++) { if (out.n[z].row && out.n[z].row > lastRow) { lastRow = out.n[z].row; } if (out.n[z].row && out.n[z].row < lastRow) { out.o[out.n[z].row] = out.o[out.n[z].row].text; out.n[z] = out.n[z].text; } } let str = ''; let i; if (out.n.length === 0) { for (i = 0; i < out.o.length; i++) { str += '~~' + out.o[i] + '~~'; } } else { if (out.n[0].text === undefined) { for (let k = 0; k < out.o.length && out.o[k].text === undefined; k++) { str += '~~' + out.o[k] + '~~'; } } let currOldRow = 0; for (i = 0; i < out.n.length; i++) { if (out.n[i].text === undefined) { if (out.n[i] !== '') { str += '' + out.n[i] + ''; } } else if (out.n[i].row < currOldRow) { str += '' + out.n[i].text + ''; } else { let pre = ''; if (i + 1 < out.n.length && out.n[i + 1].row !== undefined && out.n[i + 1].row > out.n[i].row + 1) { for (let n = out.n[i].row + 1; n < out.n[i + 1].row; n++) { if (out.o[n].text === undefined) { pre += '~~' + out.o[n] + '~~'; } else { pre += '~~' + out.o[n].text + '~~'; } } } else { for (let j = out.n[i].row + 1; j < out.o.length && out.o[j].text === undefined; j++) { pre += '~~' + out.o[j] + '~~'; } } str += out.n[i].text + pre; currOldRow = out.n[i].row; } } } return str .replace(/^\s+/g, '') .replace(/\s+$/g, '') .replace(/ {2,}/g, ' '); } /** * This checks if this string is valid inline HTML. * It does so by leveraging the browser's auto-correction mechanism and coun the number of "<"s (opening and closing * HTML tags) of the original and the cleaned-up string. * This is mainly helpful to decide if a given string can be put into ~~...~~ or ...-Tags without * producing broken HTML. * * @param {string} html * @return {boolean} * @private */ private isValidInlineHtml(html: string): boolean { // If there are no HTML tags, we assume it's valid and skip further checks if (!html.match(/<[^>]*>/)) { return true; } // We check if this is a valid HTML that closes all its tags again using the innerHTML-Hack to correct // the string and check if the number of HTML tags changes by this const doc = document.createElement('div'); doc.innerHTML = html; const tagsBefore = (html.match(/ it was not valid return false; } // If there is any block element inside, we consider it as broken, as this string will be displayed // inside of / tags if (html.match(/<(div|p|ul|li|blockquote)\W/i)) { return false; } return true; } /** * This detects if a given string contains broken HTML. This can happen when the Diff accidentally produces * wrongly nested HTML tags. * * @param {string} html * @returns {boolean} * @private */ private diffDetectBrokenDiffHtml(html: string): boolean { // If other HTML tags are contained within INS/DEL (e.g. "Test
"), let's better be cautious // The "!!(found=...)"-construction is only used to make jshint happy :) const findDel = /(.*?)<\/del>/gi, findIns = /(.*?)<\/ins>/gi; let found, inner; while (!!(found = findDel.exec(html))) { inner = found[1].replace(/]*>/gi, ''); if (inner.match(/<[^>]*>/)) { return true; } } while (!!(found = findIns.exec(html))) { inner = found[1].replace(/]*>/gi, ''); if (!this.isValidInlineHtml(inner)) { return true; } } // If non of the conditions up to now is met, we consider the diff as being sane return false; } /** * Adds a CSS class to the first opening HTML tag within the given string. * * @param {string} html * @param {string} className * @returns {string} */ public addCSSClassToFirstTag(html: string, className: string): string { return html.replace( /<[a-z][^>]*>/i, (match: string): string => { if (match.match(/class=["'][a-z0-9 _-]*["']/i)) { return match.replace( /class=["']([a-z0-9 _-]*)["']/i, (match2: string, previousClasses: string): string => { return 'class="' + previousClasses + ' ' + className + '"'; } ); } else { return match.substring(0, match.length - 1) + ' class="' + className + '">'; } } ); } /** * Adds a CSS class to the last opening HTML tag within the given string. * * @param {string} html * @param {string} className * @returns {string} */ public addClassToLastNode(html: string, className: string): string { const node = document.createElement('div'); node.innerHTML = html; let foundLast = false; for (let i = node.childNodes.length - 1; i >= 0 && !foundLast; i--) { if (node.childNodes[i].nodeType === ELEMENT_NODE) { const childElement = node.childNodes[i]; let classes = []; if (childElement.getAttribute('class')) { classes = childElement.getAttribute('class').split(' '); } classes.push(className); childElement.setAttribute( 'class', classes .sort() .join(' ') .replace(/^\s+/, '') .replace(/\s+$/, '') ); foundLast = true; } } return node.innerHTML; } /** * This function removes color-Attributes from the styles of this node or a descendant, * as they interfer with the green/red color in HTML and PDF * * For the moment, it is sufficient to do this only in paragraph diff mode, as we fall back to this mode anyway * once we encounter SPANs or other tags inside of INS/DEL-tags * * @param {Element} node * @private */ private removeColorStyles(node: Element): void { const styles = node.getAttribute('style'); if (styles && styles.indexOf('color') > -1) { const stylesNew = []; styles.split(';').forEach( (style: string): void => { if (!style.match(/^\s*color\s*:/i)) { stylesNew.push(style); } } ); if (stylesNew.join(';') === '') { node.removeAttribute('style'); } else { node.setAttribute('style', stylesNew.join(';')); } } for (let i = 0; i < node.childNodes.length; i++) { if (node.childNodes[i].nodeType === ELEMENT_NODE) { this.removeColorStyles(node.childNodes[i]); } } } /** * Add the CSS-class to the existing "class"-attribute, or add one. * Works on strings, not nodes * * @param {string} tagStr * @param {string} className * @returns {string} */ private addClassToHtmlTag(tagStr: string, className: string): string { return tagStr.replace( /<(\w+)( [^>]*)?>/gi, (whole: string, tag: string, tagArguments: string): string => { tagArguments = tagArguments ? tagArguments : ''; if (tagArguments.match(/class="/gi)) { // class="someclass" => class="someclass insert" tagArguments = tagArguments.replace( /(class\s*=\s*)(["'])([^\2]*)\2/gi, (classWhole: string, attr: string, para: string, content: string): string => { return attr + para + content + ' ' + className + para; } ); } else { tagArguments += ' class="' + className + '"'; } return '<' + tag + tagArguments + '>'; } ); } /** * This fixes a very specific, really weird bug that is tested in the test case "does not a change in a very specific case". * * @param {string}diffStr * @return {string} */ private fixWrongChangeDetection(diffStr: string): string { if (diffStr.indexOf('') === -1 || diffStr.indexOf('') === -1) { return diffStr; } const findDelGroupFinder = /(?:.*?<\/del>)+/gi; let found, returnStr = diffStr; while (!!(found = findDelGroupFinder.exec(diffStr))) { const del = found[0], split = returnStr.split(del); const findInsGroupFinder = /^(?:.*?<\/ins>)+/gi, foundIns = findInsGroupFinder.exec(split[1]); if (foundIns) { const ins = foundIns[0]; let delShortened = del .replace( /((
<\/del>)?(]+os-line-number[^>]+?>)(\s|<\/?del>)*<\/span>)<\/del>/gi, '' ) .replace(/<\/del>/g, ''); const insConv = ins .replace(//g, '~~') .replace(/<\/ins>/g, '~~') .replace(/<\/del>/g, ''); if (delShortened.indexOf(insConv) !== -1) { delShortened = delShortened.replace(insConv, ''); if (delShortened === '') { returnStr = returnStr.replace(del + ins, del.replace(//g, '').replace(/<\/del>/g, '')); } } } } return returnStr; } /** * Converts a given HTML node into HTML string and optionally strips line number nodes from it. * * @param {Node} node * @param {boolean} stripLineNumbers * @returns {string} */ private serializeDom(node: Node, stripLineNumbers: boolean): string { if (node.nodeType === TEXT_NODE) { return node.nodeValue.replace(//g, '>'); } if ( stripLineNumbers && (this.lineNumberingService.isOsLineNumberNode(node) || this.lineNumberingService.isOsLineBreakNode(node)) ) { return ''; } if (node.nodeName === 'OS-LINEBREAK') { return ''; } if (node.nodeName === 'BR') { const element = node; let br = ''; } let html = this.serializeTag(node); for (let i = 0; i < node.childNodes.length; i++) { if (node.childNodes[i].nodeType === TEXT_NODE) { html += node.childNodes[i].nodeValue .replace(/&/g, '&') .replace(//g, '>'); } else if ( !stripLineNumbers || (!this.lineNumberingService.isOsLineNumberNode(node.childNodes[i]) && !this.lineNumberingService.isOsLineBreakNode(node.childNodes[i])) ) { html += this.serializeDom(node.childNodes[i], stripLineNumbers); } } if (node.nodeType !== DOCUMENT_FRAGMENT_NODE) { html += ''; } return html; } /** * When a
with a os-split-before-class (set by extractRangeByLineNumbers) is edited when creating a * change recommendation and is split again in CKEditor, the second list items also gets that class. * This is not correct however, as the second one actually is a new list item. So we need to remove it again. * * @param {string} html * @returns {string} */ public removeDuplicateClassesInsertedByCkeditor(html: string): string { const fragment = this.htmlToFragment(html); const items = fragment.querySelectorAll('li.os-split-before'); for (let i = 0; i < items.length; i++) { if (!this.isFirstNonemptyChild(items[i].parentNode, items[i])) { this.removeCSSClass(items[i], 'os-split-before'); } } return this.serializeDom(fragment, false); } /** * Given a DOM tree and a specific node within that tree, this method returns the HTML string from the beginning * of this tree up to this node. * The returned string in itself is not renderable, as it stops in the middle of the complete HTML, with opened tags. * * Implementation hint: the first element of "toChildTrace" array needs to be a child element of "node" * @param {Node} node * @param {Node[]} toChildTrace * @param {boolean} stripLineNumbers * @returns {string} */ public serializePartialDomToChild(node: Node, toChildTrace: Node[], stripLineNumbers: boolean): string { if (this.lineNumberingService.isOsLineNumberNode(node) || this.lineNumberingService.isOsLineBreakNode(node)) { return ''; } if (node.nodeName === 'OS-LINEBREAK') { return ''; } let html = this.serializeTag(node), found = false; for (let i = 0; i < node.childNodes.length && !found; i++) { if (node.childNodes[i] === toChildTrace[0]) { found = true; const childElement = node.childNodes[i]; const remainingTrace = toChildTrace; remainingTrace.shift(); if (!this.lineNumberingService.isOsLineNumberNode(childElement)) { html += this.serializePartialDomToChild(childElement, remainingTrace, stripLineNumbers); } } else if (node.childNodes[i].nodeType === TEXT_NODE) { html += node.childNodes[i].nodeValue; } else { const childElement = node.childNodes[i]; if ( !stripLineNumbers || (!this.lineNumberingService.isOsLineNumberNode(childElement) && !this.lineNumberingService.isOsLineBreakNode(childElement)) ) { html += this.serializeDom(childElement, stripLineNumbers); } } } if (!found) { throw new Error('Inconsistency or invalid call of this function detected (to)'); } return html; } /** * Given a DOM tree and a specific node within that tree, this method returns the HTML string beginning after this * node to the end of the tree. * The returned string in itself is not renderable, as it starts in the middle of the complete HTML, with opened tags. * * Implementation hint: the first element of "fromChildTrace" array needs to be a child element of "node" * @param {Node} node * @param {Node[]} fromChildTrace * @param {boolean} stripLineNumbers * @returns {string} */ public serializePartialDomFromChild(node: Node, fromChildTrace: Node[], stripLineNumbers: boolean): string { if (this.lineNumberingService.isOsLineNumberNode(node) || this.lineNumberingService.isOsLineBreakNode(node)) { return ''; } if (node.nodeName === 'OS-LINEBREAK') { return ''; } let html = '', found = false; for (let i = 0; i < node.childNodes.length; i++) { if (node.childNodes[i] === fromChildTrace[0]) { found = true; const childElement = node.childNodes[i]; const remainingTrace = fromChildTrace; remainingTrace.shift(); if (!this.lineNumberingService.isOsLineNumberNode(childElement)) { html += this.serializePartialDomFromChild(childElement, remainingTrace, stripLineNumbers); } } else if (found) { if (node.childNodes[i].nodeType === TEXT_NODE) { html += node.childNodes[i].nodeValue; } else { const childElement = node.childNodes[i]; if ( !stripLineNumbers || (!this.lineNumberingService.isOsLineNumberNode(childElement) && !this.lineNumberingService.isOsLineBreakNode(childElement)) ) { html += this.serializeDom(childElement, stripLineNumbers); } } } } if (!found) { throw new Error('Inconsistency or invalid call of this function detected (from)'); } if (node.nodeType !== DOCUMENT_FRAGMENT_NODE) { html += ''; } return html; } /** * Returns the HTML snippet between two given line numbers. * extractRangeByLineNumbers * Hint: * - The last line (toLine) is not included anymore, as the number refers to the line breaking element at the end of the line * - if toLine === null, then everything from fromLine to the end of the fragment is returned * * In addition to the HTML snippet, additional information is provided regarding the most specific DOM element * that contains the whole section specified by the line numbers (like a P-element if only one paragraph is selected * or the most outer DIV, if multiple sections selected). * * This additional information is meant to render the snippet correctly without producing broken HTML * * In some cases, the returned HTML tags receive additional CSS classes, providing information both for * rendering it and for merging it again correctly. * - os-split-*: These classes are set for all HTML Tags that have been split into two by this process, * e.g. if the fromLine- or toLine-line-break was somewhere in the middle of this tag. * If a tag is split, the first one receives "os-split-after", and the second one "os-split-before". * For example, for the following string
Line 1
Line 2
Line 3
: * - extracting line 1 to 2 results in
Line 1
* - extracting line 2 to 3 results in
Line 2
* - extracting line 3 to null/4 results in
Line 3
* * @param {string} htmlIn * @param {number} fromLine * @param {number} toLine * @returns {ExtractedContent} */ public extractRangeByLineNumbers(htmlIn: string, fromLine: number, toLine: number): ExtractedContent { if (typeof htmlIn !== 'string') { throw new Error('Invalid call - extractRangeByLineNumbers expects a string as first argument'); } const cacheKey = fromLine + '-' + toLine + '-' + this.lineNumberingService.djb2hash(htmlIn), cached = this.diffCache.get(cacheKey); if (cached) { return cached; } const fragment = this.htmlToFragment(htmlIn); this.insertInternalLineMarkers(fragment); if (toLine === null) { const internalLineMarkers = fragment.querySelectorAll('OS-LINEBREAK'), lastMarker = internalLineMarkers[internalLineMarkers.length - 1]; toLine = parseInt(lastMarker.getAttribute('data-line-number'), 10); } const fromLineNode = this.getLineNumberNode(fragment, fromLine), toLineNode = toLine ? this.getLineNumberNode(fragment, toLine) : null, ancestorData = this.getCommonAncestor(fromLineNode, toLineNode); const fromChildTraceRel = ancestorData.trace1, fromChildTraceAbs = this.getNodeContextTrace(fromLineNode), toChildTraceRel = ancestorData.trace2, toChildTraceAbs = this.getNodeContextTrace(toLineNode), ancestor = ancestorData.commonAncestor; let htmlOut = '', outerContextStart = '', outerContextEnd = '', innerContextStart = '', innerContextEnd = '', previousHtmlEndSnippet = '', followingHtmlStartSnippet = '', fakeOl, offset; fromChildTraceAbs.shift(); const previousHtml = this.serializePartialDomToChild(fragment, fromChildTraceAbs, false); toChildTraceAbs.shift(); const followingHtml = this.serializePartialDomFromChild(fragment, toChildTraceAbs, false); let currNode: Node = fromLineNode, isSplit = false; while (currNode.parentNode) { if (!this.isFirstNonemptyChild(currNode.parentNode, currNode)) { isSplit = true; } if (isSplit) { this.addCSSClass(currNode.parentNode, 'os-split-before'); } if (currNode.nodeName !== 'OS-LINEBREAK') { previousHtmlEndSnippet += ''; } currNode = currNode.parentNode; } currNode = toLineNode; isSplit = false; while (currNode.parentNode) { if (!this.isFirstNonemptyChild(currNode.parentNode, currNode)) { isSplit = true; } if (isSplit) { this.addCSSClass(currNode.parentNode, 'os-split-after'); } if (currNode.parentNode.nodeName === 'OL') { const parentElement = currNode.parentNode; fakeOl = parentElement.cloneNode(false); offset = parentElement.getAttribute('start') ? parseInt(parentElement.getAttribute('start'), 10) - 1 : 0; fakeOl.setAttribute('start', (this.isWithinNthLIOfOL(parentElement, toLineNode) + offset).toString()); followingHtmlStartSnippet = this.serializeTag(fakeOl) + followingHtmlStartSnippet; } else { followingHtmlStartSnippet = this.serializeTag(currNode.parentNode) + followingHtmlStartSnippet; } currNode = currNode.parentNode; } let found = false; isSplit = false; for (let i = 0; i < fromChildTraceRel.length && !found; i++) { if (fromChildTraceRel[i].nodeName === 'OS-LINEBREAK') { found = true; } else { if (!this.isFirstNonemptyChild(fromChildTraceRel[i], fromChildTraceRel[i + 1])) { isSplit = true; } if (fromChildTraceRel[i].nodeName === 'OL') { const element = fromChildTraceRel[i]; fakeOl = element.cloneNode(false); offset = element.getAttribute('start') ? parseInt(element.getAttribute('start'), 10) - 1 : 0; fakeOl.setAttribute('start', (offset + this.isWithinNthLIOfOL(element, fromLineNode)).toString()); innerContextStart += this.serializeTag(fakeOl); } else { if (i < fromChildTraceRel.length - 1 && isSplit) { this.addCSSClass(fromChildTraceRel[i], 'os-split-before'); } innerContextStart += this.serializeTag(fromChildTraceRel[i]); } } } found = false; for (let i = 0; i < toChildTraceRel.length && !found; i++) { if (toChildTraceRel[i].nodeName === 'OS-LINEBREAK') { found = true; } else { innerContextEnd = '' + innerContextEnd; } } found = false; for (let i = 0; i < ancestor.childNodes.length; i++) { if (ancestor.childNodes[i] === fromChildTraceRel[0]) { found = true; fromChildTraceRel.shift(); htmlOut += this.serializePartialDomFromChild(ancestor.childNodes[i], fromChildTraceRel, true); } else if (ancestor.childNodes[i] === toChildTraceRel[0]) { found = false; toChildTraceRel.shift(); htmlOut += this.serializePartialDomToChild(ancestor.childNodes[i], toChildTraceRel, true); } else if (found === true) { htmlOut += this.serializeDom(ancestor.childNodes[i], true); } } currNode = ancestor; while (currNode.parentNode) { if (currNode.nodeName === 'OL') { const currElement = currNode; fakeOl = currElement.cloneNode(false); offset = currElement.getAttribute('start') ? parseInt(currElement.getAttribute('start'), 10) - 1 : 0; fakeOl.setAttribute('start', (this.isWithinNthLIOfOL(currElement, fromLineNode) + offset).toString()); outerContextStart = this.serializeTag(fakeOl) + outerContextStart; } else { outerContextStart = this.serializeTag(currNode) + outerContextStart; } outerContextEnd += ''; currNode = currNode.parentNode; } const ret = { html: htmlOut, ancestor: ancestor, outerContextStart: outerContextStart, outerContextEnd: outerContextEnd, innerContextStart: innerContextStart, innerContextEnd: innerContextEnd, previousHtml: previousHtml, previousHtmlEndSnippet: previousHtmlEndSnippet, followingHtml: followingHtml, followingHtmlStartSnippet: followingHtmlStartSnippet }; this.diffCache.put(cacheKey, ret); return ret; } /** * Convenience method that takes the html-attribute from an extractRangeByLineNumbers()-method and * wraps it with the context. * * @param {ExtractedContent} diff */ public formatDiff(diff: ExtractedContent): string { return ( diff.outerContextStart + diff.innerContextStart + diff.html + diff.innerContextEnd + diff.outerContextEnd ); } /** * Convenience method that takes the html-attribute from an extractRangeByLineNumbers()-method, * wraps it with the context and adds line numbers. * * @param {ExtractedContent} diff * @param {number} lineLength * @param {number} firstLine */ public formatDiffWithLineNumbers(diff: ExtractedContent, lineLength: number, firstLine: number): string { let text = this.formatDiff(diff); text = this.lineNumberingService.insertLineNumbers(text, lineLength, null, null, firstLine); return text; } /** * This is a workardoun to prevent the last word of the inserted text from accidently being merged with the * first word of the following line. * * This happens as trailing spaces in the change recommendation's text are frequently stripped, * which is pretty nasty if the original text goes on after the affected line. So we insert a space * if the original line ends with one. * * @param {Element|DocumentFragment} element */ private insertDanglingSpace(element: Element | DocumentFragment): void { if (element.childNodes.length > 0) { let lastChild = element.childNodes[element.childNodes.length - 1]; if ( lastChild.nodeType === TEXT_NODE && !lastChild.nodeValue.match(/[\S]/) && element.childNodes.length > 1 ) { // If the text node only contains whitespaces, chances are high it's just space between block elmeents, // like a line break between
and lastChild = element.childNodes[element.childNodes.length - 2]; } if (lastChild.nodeType === TEXT_NODE) { if (lastChild.nodeValue === '' || lastChild.nodeValue.substr(-1) !== ' ') { lastChild.nodeValue += ' '; } } else { this.insertDanglingSpace(lastChild); } } } /** * This functions merges to arrays of nodes. The last element of nodes1 and the first element of nodes2 * are merged, if they are of the same type. * * This is done recursively until a TEMPLATE-Tag is is found, which was inserted in this.replaceLines. * Using a TEMPLATE-Tag is a rather dirty hack, as it is allowed inside of any other element, including
. * * @param {Node[]} nodes1 * @param {Node[]} nodes2 * @returns {Node[]} */ public replaceLinesMergeNodeArrays(nodes1: Node[], nodes2: Node[]): Node[] { if (nodes1.length === 0) { return nodes2; } if (nodes2.length === 0) { return nodes1; } const out = []; for (let i = 0; i < nodes1.length - 1; i++) { out.push(nodes1[i]); } const lastNode = nodes1[nodes1.length - 1], firstNode = nodes2[0]; if (lastNode.nodeType === TEXT_NODE && firstNode.nodeType === TEXT_NODE) { const newTextNode = lastNode.ownerDocument.createTextNode(lastNode.nodeValue + firstNode.nodeValue); out.push(newTextNode); } else if (lastNode.nodeName === firstNode.nodeName) { const lastElement = lastNode, newNode = lastNode.ownerDocument.createElement(lastNode.nodeName); for (let i = 0; i < lastElement.attributes.length; i++) { const attr = lastElement.attributes[i]; newNode.setAttribute(attr.name, attr.value); } // Remove #text nodes inside of List elements (OL/UL), as they are confusing let lastChildren, firstChildren; if (lastElement.nodeName === 'OL' || lastElement.nodeName === 'UL') { lastChildren = []; firstChildren = []; for (let i = 0; i < firstNode.childNodes.length; i++) { if (firstNode.childNodes[i].nodeType === ELEMENT_NODE) { firstChildren.push(firstNode.childNodes[i]); } } for (let i = 0; i < lastElement.childNodes.length; i++) { if (lastElement.childNodes[i].nodeType === ELEMENT_NODE) { lastChildren.push(lastElement.childNodes[i]); } } } else { lastChildren = lastElement.childNodes; firstChildren = firstNode.childNodes; } const children = this.replaceLinesMergeNodeArrays(lastChildren, firstChildren); for (let i = 0; i < children.length; i++) { newNode.appendChild(children[i]); } out.push(newNode); } else { if (lastNode.nodeName !== 'TEMPLATE') { out.push(lastNode); } if (firstNode.nodeName !== 'TEMPLATE') { out.push(firstNode); } } for (let i = 1; i < nodes2.length; i++) { out.push(nodes2[i]); } return out; } /** * This returns the line number range in which changes (insertions, deletions) are encountered. * As in extractRangeByLineNumbers(), "to" refers to the line breaking element at the end, i.e. the start of the following line. * * @param {string} diffHtml * @returns {LineRange} */ public detectAffectedLineRange(diffHtml: string): LineRange { const cacheKey = this.lineNumberingService.djb2hash(diffHtml), cached = this.diffCache.get(cacheKey); if (cached) { return cached; } const fragment = this.htmlToFragment(diffHtml); this.insertInternalLineMarkers(fragment); const changes = fragment.querySelectorAll('ins, del, .insert, .delete'), firstChange = changes.item(0), lastChange = changes.item(changes.length - 1); if (!firstChange || !lastChange) { // There are no changes return null; } const firstTrace = this.getNodeContextTrace(firstChange); let lastLineNumberBefore = null; for (let j = firstTrace.length - 1; j >= 0 && lastLineNumberBefore === null; j--) { const prevSiblings = this.getAllPrevSiblingsReversed(firstTrace[j]); for (let i = 0; i < prevSiblings.length && lastLineNumberBefore === null; i++) { lastLineNumberBefore = this.getLastLineNumberNode(prevSiblings[i]); } } const lastTrace = this.getNodeContextTrace(lastChange); let firstLineNumberAfter = null; for (let j = lastTrace.length - 1; j >= 0 && firstLineNumberAfter === null; j--) { const nextSiblings = this.getAllNextSiblings(lastTrace[j]); for (let i = 0; i < nextSiblings.length && firstLineNumberAfter === null; i++) { firstLineNumberAfter = this.getFirstLineNumberNode(nextSiblings[i]); } } const range = { from: parseInt(lastLineNumberBefore.getAttribute('data-line-number'), 10), to: parseInt(firstLineNumberAfter.getAttribute('data-line-number'), 10) }; this.diffCache.put(cacheKey, range); return range; } /** * Removes .delete-nodes and -Tags (including content) * Removes the .insert-classes and the wrapping -Tags (while maintaining content) * * @param {string} html * @returns {string} */ public diffHtmlToFinalText(html: string): string { const fragment = this.htmlToFragment(html); const delNodes = fragment.querySelectorAll('.delete, del'); for (let i = 0; i < delNodes.length; i++) { delNodes[i].parentNode.removeChild(delNodes[i]); } const insNodes = fragment.querySelectorAll('ins'); for (let i = 0; i < insNodes.length; i++) { const ins = insNodes[i]; while (ins.childNodes.length > 0) { const child = ins.childNodes.item(0); ins.removeChild(child); ins.parentNode.insertBefore(child, ins); } ins.parentNode.removeChild(ins); } const insertNodes = fragment.querySelectorAll('.insert'); for (let i = 0; i < insertNodes.length; i++) { this.removeCSSClass(insertNodes[i], 'insert'); } return this.serializeDom(fragment, false); } /** * Given a line numbered string (`oldHtml`), this method removes the text between `fromLine` and `toLine` * and replaces it by the string given by `newHTML`. * While replacing it, it also merges HTML tags that have been split to create the `newHTML` fragment, * indicated by the CSS classes .os-split-before and .os-split-after. * * This is used for creating the consolidated version of motions. * * @param {string} oldHtml * @param {string} newHTML * @param {number} fromLine * @param {number} toLine */ public replaceLines(oldHtml: string, newHTML: string, fromLine: number, toLine: number): string { const data = this.extractRangeByLineNumbers(oldHtml, fromLine, toLine), previousHtml = data.previousHtml + '' + data.previousHtmlEndSnippet, previousFragment = this.htmlToFragment(previousHtml), followingHtml = data.followingHtmlStartSnippet + '' + data.followingHtml, followingFragment = this.htmlToFragment(followingHtml), newFragment = this.htmlToFragment(newHTML); if (data.html.length > 0 && data.html.substr(-1) === ' ') { this.insertDanglingSpace(newFragment); } let merged = this.replaceLinesMergeNodeArrays( Array.prototype.slice.call(previousFragment.childNodes), Array.prototype.slice.call(newFragment.childNodes) ); merged = this.replaceLinesMergeNodeArrays(merged, Array.prototype.slice.call(followingFragment.childNodes)); const mergedFragment = document.createDocumentFragment(); for (let i = 0; i < merged.length; i++) { mergedFragment.appendChild(merged[i]); } const forgottenTemplates = mergedFragment.querySelectorAll('TEMPLATE'); for (let i = 0; i < forgottenTemplates.length; i++) { const el = forgottenTemplates[i]; el.parentNode.removeChild(el); } const forgottenSplitClasses = mergedFragment.querySelectorAll('.os-split-before, .os-split-after'); for (let i = 0; i < forgottenSplitClasses.length; i++) { this.removeCSSClass(forgottenSplitClasses[i], 'os-split-before'); this.removeCSSClass(forgottenSplitClasses[i], 'os-split-after'); } return this.serializeDom(mergedFragment, true); } /** * If the inline diff does not work, we fall back to showing the diff on a paragraph base, i.e. deleting the old * paragraph (adding the "deleted"-class) and adding the new one (adding the "added" class). * If the provided Text is not wrapped in HTML elements but inline text, the returned text is using * /-tags instead of adding CSS-classes to the wrapping element. * * @param {string} oldText * @param {string} newText * @param {number|null} lineLength * @param {number|null} firstLineNumber * @returns {string} */ private diffParagraphs(oldText: string, newText: string, lineLength: number, firstLineNumber: number): string { let oldTextWithBreaks, newTextWithBreaks, currChild; if (lineLength !== null) { oldTextWithBreaks = this.lineNumberingService.insertLineNumbersNode( oldText, lineLength, null, firstLineNumber ); newText = this.lineNumberingService.insertLineBreaksWithoutNumbers(newText, lineLength); } else { oldTextWithBreaks = document.createElement('div'); oldTextWithBreaks.innerHTML = oldText; } newText = newText.replace(/^\s+/g, '').replace(/\s+$/g, ''); newTextWithBreaks = document.createElement('div'); newTextWithBreaks.innerHTML = newText; for (let i = 0; i < oldTextWithBreaks.childNodes.length; i++) { currChild = oldTextWithBreaks.childNodes[i]; if (currChild.nodeType === TEXT_NODE) { const wrapDel = document.createElement('del'); oldTextWithBreaks.insertBefore(wrapDel, currChild); oldTextWithBreaks.removeChild(currChild); wrapDel.appendChild(currChild); } else { this.addCSSClass(currChild, 'delete'); this.removeColorStyles(currChild); } } for (let i = 0; i < newTextWithBreaks.childNodes.length; i++) { currChild = newTextWithBreaks.childNodes[i]; if (currChild.nodeType === TEXT_NODE) { const wrapIns = document.createElement('ins'); newTextWithBreaks.insertBefore(wrapIns, currChild); newTextWithBreaks.removeChild(currChild); wrapIns.appendChild(currChild); } else { this.addCSSClass(currChild, 'insert'); this.removeColorStyles(currChild); } } const mergedFragment = document.createDocumentFragment(); let el; while (oldTextWithBreaks.firstChild) { el = oldTextWithBreaks.firstChild; oldTextWithBreaks.removeChild(el); mergedFragment.appendChild(el); } while (newTextWithBreaks.firstChild) { el = newTextWithBreaks.firstChild; newTextWithBreaks.removeChild(el); mergedFragment.appendChild(el); } return this.serializeDom(mergedFragment, false); } /** * This function calculates the diff between two strings and tries to fix problems with the resulting HTML. * If lineLength and firstLineNumber is given, line numbers will be returned es well * * @param {string} htmlOld * @param {string} htmlNew * @param {number} lineLength - optional * @param {number} firstLineNumber - optional * @returns {string} */ public diff(htmlOld: string, htmlNew: string, lineLength: number = null, firstLineNumber: number = null): string { const cacheKey = lineLength + ' ' + firstLineNumber + ' ' + this.lineNumberingService.djb2hash(htmlOld) + this.lineNumberingService.djb2hash(htmlNew), cached = this.diffCache.get(cacheKey); if (cached) { return cached; } // This fixes a really strange artefact with the diff that occures under the following conditions: // - The first tag of the two texts is identical, e.g.
// - A change happens in the next tag, e.g. inserted text // - The first tag occures a second time in the text, e.g. another
// In this condition, the first tag is deleted first and inserted afterwards again // Test case: "does not break when an insertion followes a beginning tag occuring twice" // The work around inserts to tags at the beginning and removes them afterwards again, // to make sure this situation does not happen (and uses invisible pseudo-tags in case something goes wrong) const workaroundPrepend = ''; // os-split-after should not be considered for detecting changes in paragraphs, so we strip it here // and add it afterwards. // We only do this for P for now, as for more complex types like UL/LI that tend to be nestend, // information would get lost by this that we will need to recursively merge it again later on. let oldIsSplitAfter = false, newIsSplitAfter = false; htmlOld = htmlOld.replace( /(\s*]+class\s*=\s*["'][^"']*)os-split-after/gi, (match: string, beginning: string): string => { oldIsSplitAfter = true; return beginning; } ); htmlNew = htmlNew.replace( /(\s*]+class\s*=\s*["'][^"']*)os-split-after/gi, (match: string, beginning: string): string => { newIsSplitAfter = true; return beginning; } ); // Performing the actual diff const str = this.diffString(workaroundPrepend + htmlOld, workaroundPrepend + htmlNew); let diffUnnormalized = str .replace(/^\s+/g, '') .replace(/\s+$/g, '') .replace(/ {2,}/g, ' '); diffUnnormalized = this.fixWrongChangeDetection(diffUnnormalized); // Remove ~~tags that only delete line numbers // We need to do this before removing~~ as done in one of the next statements diffUnnormalized = diffUnnormalized.replace( /((
<\/del>)?(]+os-line-number[^>]+?>)(\s|<\/?del>)*<\/span>)<\/del>/gi, (found: string, tag: string, br: string, span: string): string => { return (br !== undefined ? br : '') + span + ' '; } ); // Merging individual insert/delete statements into bigger blocks diffUnnormalized = diffUnnormalized.replace(/<\/ins>/gi, '').replace(/<\/del>/gi, ''); // If only a few characters of a word have changed, don't display this as a replacement of the whole word, // but only of these specific characters diffUnnormalized = diffUnnormalized.replace( /([a-z0-9,_-]* ?)<\/del>([a-z0-9,_-]* ?)<\/ins>/gi, (found: string, oldText: string, newText: string): string => { let foundDiff = false, commonStart = '', commonEnd = '', remainderOld = oldText, remainderNew = newText; while (remainderOld.length > 0 && remainderNew.length > 0 && !foundDiff) { if (remainderOld[0] === remainderNew[0]) { commonStart += remainderOld[0]; remainderOld = remainderOld.substr(1); remainderNew = remainderNew.substr(1); } else { foundDiff = true; } } foundDiff = false; while (remainderOld.length > 0 && remainderNew.length > 0 && !foundDiff) { if (remainderOld[remainderOld.length - 1] === remainderNew[remainderNew.length - 1]) { commonEnd = remainderOld[remainderOld.length - 1] + commonEnd; remainderNew = remainderNew.substr(0, remainderNew.length - 1); remainderOld = remainderOld.substr(0, remainderOld.length - 1); } else { foundDiff = true; } } let out = commonStart; if (remainderOld !== '') { out += '~~' + remainderOld + '~~'; } if (remainderNew !== '') { out += '' + remainderNew + ''; } out += commonEnd; return out; } ); // Replace spaces in line numbers by diffUnnormalized = diffUnnormalized.replace( /]+os-line-number[^>]+?>\s*<\/span>/gi, (found: string): string => { return found.toLowerCase().replace(/> <\/span/gi, '> elements and "insert"-class-based block elements. // ...
...
... => ...
...
... diffUnnormalized = diffUnnormalized.replace( /<(ins|del)>([\s\S]*?)<\/\1>/gi, (whole: string, insDel: string): string => { const modificationClass = insDel.toLowerCase() === 'ins' ? 'insert' : 'delete'; return whole.replace( /(<(p|div|blockquote|li)[^>]*>)([\s\S]*?)(<\/\2>)/gi, (whole2: string, opening: string, blockTag: string, content: string, closing: string): string => { const modifiedTag = this.addClassToHtmlTag(opening, modificationClass); return '' + modifiedTag + content + closing + '<' + insDel + '>'; } ); } ); // Cleanup leftovers from the operation above, when -tags ore -tags are left // around block tags. It should be safe to remove them and just leave the whitespaces. diffUnnormalized = diffUnnormalized.replace( /<(ins|del)>(\s*)<\/\1>/gi, (whole: string, insDel: string, space: string): string => space ); //
Added text
-> Added text
diffUnnormalized = diffUnnormalized.replace( /<\/(p|div|blockquote|li)><\/del>([\s\S]*?)<\/\1>(\s*)<\/ins>/gi, (whole: string, blockTag: string, content: string, space: string): string => { return '' + content + '' + space; } ); //
->
diffUnnormalized = diffUnnormalized.replace( /(<\/(p|div|blockquote|li)>)(\s*)<\/(ins|del)>/gi, (whole: string, ending: string, blockTag: string, space: string, insdel: string): string => { return '' + ending + space; } ); if (diffUnnormalized.substr(0, workaroundPrepend.length) === workaroundPrepend) { diffUnnormalized = diffUnnormalized.substring(workaroundPrepend.length); } let diff: string; if (this.diffDetectBrokenDiffHtml(diffUnnormalized)) { diff = this.diffParagraphs(htmlOld, htmlNew, lineLength, firstLineNumber); } else { let node: Element = document.createElement('div'); node.innerHTML = diffUnnormalized; diff = node.innerHTML; if (lineLength !== null && firstLineNumber !== null) { node = this.lineNumberingService.insertLineNumbersNode(diff, lineLength, null, firstLineNumber); diff = node.innerHTML; } } if (oldIsSplitAfter || newIsSplitAfter) { diff = this.addClassToLastNode(diff, 'os-split-after'); } this.diffCache.put(cacheKey, diff); return diff; } /** * Applies all given changes to the motion and returns the (line-numbered) text * * @param {ViewMotion} motion * @param {ViewUnifiedChange[]} changes * @param {number} lineLength * @param {number} highlightLine */ public getTextWithChanges( motion: ViewMotion, changes: ViewUnifiedChange[], lineLength: number, highlightLine: number ): string { let html = motion.text; // Changes need to be applied from the bottom up, to prevent conflicts with changing line numbers. changes.sort((change1: ViewUnifiedChange, change2: ViewUnifiedChange) => { if (change1.getLineFrom() < change2.getLineFrom()) { return 1; } else if (change1.getLineFrom() > change2.getLineFrom()) { return -1; } else { return 0; } }); changes.forEach((change: ViewUnifiedChange) => { html = this.lineNumberingService.insertLineNumbers(html, lineLength, null, null, 1); html = this.replaceLines(html, change.getChangeNewText(), change.getLineFrom(), change.getLineTo()); }); html = this.lineNumberingService.insertLineNumbers(html, lineLength, highlightLine, null, 1); return html; } /** * This is used to extract affected lines of a paragraph with the possibility to show the context (lines before * and after) the changed lines and displaying the line numbers. * * @param {number} paragraphNo The paragraph number * @param {string} origText The original text - needs to be line-numbered * @param {string} newText The changed text * @param {number} lineLength the line length * @return {DiffLinesInParagraph|null} */ public getAmendmentParagraphsLinesByMode( paragraphNo: number, origText: string, newText: string, lineLength: number ): DiffLinesInParagraph { const paragraph_line_range = this.lineNumberingService.getLineNumberRange(origText), diff = this.diff(origText, newText), affected_lines = this.detectAffectedLineRange(diff); if (affected_lines === null) { return null; } let textPre = ''; let textPost = ''; if (affected_lines.from > paragraph_line_range.from) { textPre = this.formatDiffWithLineNumbers( this.extractRangeByLineNumbers(diff, paragraph_line_range.from, affected_lines.from), lineLength, paragraph_line_range.from ); } if (paragraph_line_range.to > affected_lines.to) { textPost = this.formatDiffWithLineNumbers( this.extractRangeByLineNumbers(diff, affected_lines.to, paragraph_line_range.to), lineLength, affected_lines.to ); } const text = this.formatDiffWithLineNumbers( this.extractRangeByLineNumbers(diff, affected_lines.from, affected_lines.to), lineLength, affected_lines.from ); return { paragraphNo: paragraphNo, paragraphLineFrom: paragraph_line_range.from, paragraphLineTo: paragraph_line_range.to, diffLineFrom: affected_lines.from, diffLineTo: affected_lines.to, textPre: textPre, text: text, textPost: textPost } as DiffLinesInParagraph; } }