import { Injectable } from '@angular/core'; import { LinenumberingService } from './linenumbering.service'; import { ViewMotion } from '../models/view-motion'; import { ViewUnifiedChange } from '../models/view-unified-change'; const ELEMENT_NODE = 1; const TEXT_NODE = 3; const DOCUMENT_FRAGMENT_NODE = 11; /** * Indicates the type of a modification when comparing ("diff"ing) two versions of a text. * - TYPE_INSERTION indicates an insertion. An insertion is when the new version of a text contains a certain string * that did not exist in the original version of the. * - TYPE_DELETION indicates a replacement. A deletion is when the new version of a text does not contain a certain * string contained in the original version of the text anymore. * - TYPE_REPLACEMENT indicates both of the above: the new version of the text contains text not present in the original * version, but also removes some parts of that text. * * This enumeration is used when _automatically_ detecting the change type of an amendment / change recommendation. */ export enum ModificationType { TYPE_REPLACEMENT, TYPE_INSERTION, TYPE_DELETION } /** * This data structure is used when determining the most specific common ancestor of two HTML nodes (`node1` and `node2`) * within the same Document Fragment. */ interface CommonAncestorData { /** * The most specific common ancestor node. */ commonAncestor: Node; /** * The nodes inbetween `commonAncestor` and the `node1` in the DOM hierarchy. Empty, if node1 is a direct descendant. */ trace1: Node[]; /** * The nodes inbetween `commonAncestor` and the `node2` in the DOM hierarchy. Empty, if node2 is a direct descendant. */ trace2: Node[]; /** * Starting the root node, this indicates the depth level of the `commonAncestor`. */ index: number; } /** * An object produced by `extractRangeByLineNumbers``. It contains both the extracted lines as well as * information about the context in which these lines occur. * This additional information is meant to render the snippet correctly without producing broken HTML */ interface ExtractedContent { /** * The HTML between the two line numbers. Line numbers and automatically set line breaks are stripped. * All HTML tags are converted to uppercase * (e.g. Line 2
A line
Another line
Lorem ipsum dolor sit amet, sed diam voluptua. At
'; * const beforeLineNumbered = this.lineNumbering.insertLineNumbers(before, 80) * const after = 'Lorem ipsum dolor sit amet, sed diam voluptua. At2
'; * const diff = this.diffService.diff(before, after); * ```ts * * Given a (line numbered) diff string, detect the line number range with changes: * * ```ts * this.diffService.detectAffectedLineRange(diff); * ``` * * Given a diff'ed string, apply all changes to receive the new version of the text: * * ```ts * const diffedHtml = 'Test Test 2 Another test Test 3
Test 4
'; * const newVersion = this.diffService.diffHtmlToFinalText(diffedHtml); * ``` * * Replace a line number range in a text by new text: * * ```ts * const lineLength = 80; * const lineNumberedText = this.lineNumbering.insertLineNumbers('A line
Another line
Replaced paragraph
', 1, 2); * ``` */ @Injectable({ providedIn: 'root' }) export class DiffService { // @TODO Decide on a more sophisticated implementation private diffCache = { _cache: {}, get: (key: string): any => { return this.diffCache._cache[key] === undefined ? null : this.diffCache._cache[key]; }, put: (key: string, val: any): void => { this.diffCache._cache[key] = val; } }; /** * Creates the DiffService. * * @param {LinenumberingService} lineNumberingService */ public constructor(private readonly lineNumberingService: LinenumberingService) {} /** * Searches for the line breaking node within the given Document specified by the given lineNumber. * This is performed by using a querySelector. * * @param {DocumentFragment} fragment * @param {number} lineNumber * @returns {Element} */ public getLineNumberNode(fragment: DocumentFragment, lineNumber: number): Element { return fragment.querySelector('os-linebreak.os-line-number.line-number-' + lineNumber); } /** * This returns the first line breaking node within the given node. * If none is found, `null` is returned. * * @param {Node} node * @returns {Element} */ private getFirstLineNumberNode(node: Node): Element { if (node.nodeType === TEXT_NODE) { return null; } const element =Line 1
Line 2
Line 3
Line 1
* - extracting line 2 to 3 results inLine 2
* - extracting line 3 to null/4 results inLine 3
* * @param {string} htmlIn * @param {number} fromLine * @param {number} toLine * @returns {ExtractedContent} */ public extractRangeByLineNumbers(htmlIn: string, fromLine: number, toLine: number): ExtractedContent { if (typeof htmlIn !== 'string') { throw new Error('Invalid call - extractRangeByLineNumbers expects a string as first argument'); } const cacheKey = fromLine + '-' + toLine + '-' + this.lineNumberingService.djb2hash(htmlIn), cached = this.diffCache.get(cacheKey); if (cached) { return cached; } const fragment = this.htmlToFragment(htmlIn); this.insertInternalLineMarkers(fragment); if (toLine === null) { const internalLineMarkers = fragment.querySelectorAll('OS-LINEBREAK'), lastMarker =// - A change happens in the next tag, e.g. inserted text // - The first tag occures a second time in the text, e.g. another
// In this condition, the first tag is deleted first and inserted afterwards again
// Test case: "does not break when an insertion followes a beginning tag occuring twice"
// The work around inserts to tags at the beginning and removes them afterwards again,
// to make sure this situation does not happen (and uses invisible pseudo-tags in case something goes wrong)
const workaroundPrepend = ' ]+class\s*=\s*["'][^"']*)os-split-after/gi,
(match: string, beginning: string): string => {
oldIsSplitAfter = true;
return beginning;
}
);
htmlNew = htmlNew.replace(
/(\s* ]+class\s*=\s*["'][^"']*)os-split-after/gi,
(match: string, beginning: string): string => {
newIsSplitAfter = true;
return beginning;
}
);
// Performing the actual diff
const str = this.diffString(workaroundPrepend + htmlOld, workaroundPrepend + htmlNew);
let diffUnnormalized = str
.replace(/^\s+/g, '')
.replace(/\s+$/g, '')
.replace(/ {2,}/g, ' ');
diffUnnormalized = this.fixWrongChangeDetection(diffUnnormalized);
// Remove tags that only delete line numbers
// We need to do this before removing as done in one of the next statements
diffUnnormalized = diffUnnormalized.replace(
/((
<\/del>)?(]+os-line-number[^>]+?>)(\s|<\/?del>)*<\/span>)<\/del>/gi,
(found: string, tag: string, br: string, span: string): string => {
return (br !== undefined ? br : '') + span + ' ';
}
);
// Merging individual insert/delete statements into bigger blocks
diffUnnormalized = diffUnnormalized.replace(/<\/ins>/gi, '').replace(/<\/del>/gi, '');
// If only a few characters of a word have changed, don't display this as a replacement of the whole word,
// but only of these specific characters
diffUnnormalized = diffUnnormalized.replace(
/([a-z0-9,_-]* ?)<\/del>([a-z0-9,_-]* ?)<\/ins>/gi,
(found: string, oldText: string, newText: string): string => {
let foundDiff = false,
commonStart = '',
commonEnd = '',
remainderOld = oldText,
remainderNew = newText;
while (remainderOld.length > 0 && remainderNew.length > 0 && !foundDiff) {
if (remainderOld[0] === remainderNew[0]) {
commonStart += remainderOld[0];
remainderOld = remainderOld.substr(1);
remainderNew = remainderNew.substr(1);
} else {
foundDiff = true;
}
}
foundDiff = false;
while (remainderOld.length > 0 && remainderNew.length > 0 && !foundDiff) {
if (remainderOld[remainderOld.length - 1] === remainderNew[remainderNew.length - 1]) {
commonEnd = remainderOld[remainderOld.length - 1] + commonEnd;
remainderNew = remainderNew.substr(0, remainderNew.length - 1);
remainderOld = remainderOld.substr(0, remainderOld.length - 1);
} else {
foundDiff = true;
}
}
let out = commonStart;
if (remainderOld !== '') {
out += '' + remainderOld + '';
}
if (remainderNew !== '') {
out += '' + remainderNew + '';
}
out += commonEnd;
return out;
}
);
// Replace spaces in line numbers by
diffUnnormalized = diffUnnormalized.replace(
/]+os-line-number[^>]+?>\s*<\/span>/gi,
(found: string): string => {
return found.toLowerCase().replace(/> <\/span/gi, '> elements and "insert"-class-based block elements.
// ...