diff --git a/bower.json b/bower.json new file mode 100644 index 0000000..dd4da06 --- /dev/null +++ b/bower.json @@ -0,0 +1,35 @@ +{ + "name": "htmldiff", + "version": "0.0.4", + "description": "HTML Diffing in JavaScript (CoffeeScript)", + "main": "dist/htmldiff.js", + "license": "MIT (Expat)", + "authors": [ + "The Network Inc.", + "https://github.com/brendanjerwin", + "https://github.com/tnwinc", + "https://github.com/myobie/htmldiff" + ], + "repository": { + "type": "git", + "url": "git://github.com/benjamine/jsondiffpatch.git" + }, + "keywords": [ + "html", + "diff", + "split" + ], + "ignore": [ + "gulpfile.js" + ], + "homepage": "https://github.com/enkodellc/htmldiff", + "_release": "0.0.4", + "_resolution": { + "type": "version", + "tag": "v0.0.4", + "commit": "26d252d94bdd7dad22df8ac273990266aa677cd6" + }, + "_source": "https://github.com/enkodellc/htmldiff.js.git", + "_target": "0.0.4", + "_originalSource": "htmldiff" +} diff --git a/dist/htmldiff.js b/dist/htmldiff.js new file mode 100644 index 0000000..ac30265 --- /dev/null +++ b/dist/htmldiff.js @@ -0,0 +1,476 @@ +/* + * htmldiff.js is a library that compares HTML content. It creates a diff between two + * HTML documents by combining the two documents and wrapping the differences with + * and tags. Here is a high-level overview of how the diff works. + * + * 1. Tokenize the before and after HTML with html_to_tokens. + * 2. Generate a list of operations that convert the before list of tokens to the after + * list of tokens with calculate_operations, which does the following: + * a. Find all the matching blocks of tokens between the before and after lists of + * tokens with find_matching_blocks. This is done by finding the single longest + * matching block with find_match, then recursively finding the next longest + * matching block that precede and follow the longest matching block with + * recursively_find_matching_blocks. + * b. Determine insertions, deletions, and replacements from the matching blocks. + * This is done in calculate_operations. + * 3. Render the list of operations by wrapping tokens with and tags where + * appropriate with render_operations. + * + * Example usage: + * + * htmldiff = require 'htmldiff.js' + * + * htmldiff '

this is some text

', '

this is some more text

' + * == '

this is some more text

' + * + * htmldiff '

this is some text

', '

this is some more text

', 'diff-class' + * == '

this is some more text

' + */ +var Match, calculate_operations, consecutive_where, create_index, diff, diff_dual_pane, ends_in_end_script_tag, find_match, find_matching_blocks, html_to_tokens, is_end_of_tag, is_script_tag, is_start_of_tag, is_tag, is_whitespace, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, render_operations_dual_pane, return_dual_pane, wrap; + +is_end_of_tag = function(char) { + return char === '>'; +}; + +is_start_of_tag = function(char) { + return char === '<'; +}; + +is_whitespace = function(char) { + return /^\s+$/.test(char); +}; + +is_tag = function(token) { + return /^\s*<[^>]+>\s*$/.test(token); +}; + +isnt_tag = function(token) { + return !is_tag(token); +}; + +is_script_tag = function(token) { + return token === ''; +}; + +Match = (function() { + function Match(start_in_before, start_in_after, length) { + this.start_in_before = start_in_before; + this.start_in_after = start_in_after; + this.length = length; + this.end_in_before = (this.start_in_before + this.length) - 1; + this.end_in_after = (this.start_in_after + this.length) - 1; + } + + return Match; + +})(); + +return_dual_pane = function(before, after) { + return { + before: before, + after: after + }; +}; + +html_to_tokens = function(html) { + var char, current_word, mode, words, _i, _len; + mode = 'char'; + current_word = ''; + words = []; + for (_i = 0, _len = html.length; _i < _len; _i++) { + char = html[_i]; + switch (mode) { + case 'script': + if (is_end_of_tag(char)) { + current_word += '>'; + if (ends_in_end_script_tag(current_word)) { + words.push(current_word); + current_word = ''; + if (is_whitespace(char)) { + mode = 'whitespace'; + } else { + mode = 'char'; + } + } + } else { + current_word += char; + } + break; + case 'tag': + if (is_script_tag(current_word)) { + mode = 'script'; + current_word += char; + } else if (is_end_of_tag(char)) { + current_word += '>'; + words.push(current_word); + current_word = ''; + if (is_whitespace(char)) { + mode = 'whitespace'; + } else { + mode = 'char'; + } + } else { + current_word += char; + } + break; + case 'char': + if (is_start_of_tag(char)) { + if (current_word) { + words.push(current_word); + } + current_word = '<'; + mode = 'tag'; + } else if (/\s/.test(char)) { + if (current_word) { + words.push(current_word); + } + current_word = char; + mode = 'whitespace'; + } else if (/[\w\#@]+/i.test(char)) { + current_word += char; + } else { + if (current_word) { + words.push(current_word); + } + current_word = char; + } + break; + case 'whitespace': + if (is_start_of_tag(char)) { + if (current_word) { + words.push(current_word); + } + current_word = '<'; + mode = 'tag'; + } else if (is_whitespace(char)) { + current_word += char; + } else { + if (current_word) { + words.push(current_word); + } + current_word = char; + mode = 'char'; + } + break; + default: + throw new Error('Unknown mode ' + mode); + } + } + if (current_word) { + words.push(current_word); + } + return words; +}; + +find_match = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after) { + var best_match_in_after, best_match_in_before, best_match_length, index_in_after, index_in_before, locations_in_after, looking_for, match, match_length_at, new_match_length, new_match_length_at, _i, _j, _len; + best_match_in_before = start_in_before; + best_match_in_after = start_in_after; + best_match_length = 0; + match_length_at = {}; + for (index_in_before = _i = start_in_before; start_in_before <= end_in_before ? _i < end_in_before : _i > end_in_before; index_in_before = start_in_before <= end_in_before ? ++_i : --_i) { + new_match_length_at = {}; + looking_for = before_tokens[index_in_before]; + locations_in_after = index_of_before_locations_in_after_tokens[looking_for]; + for (_j = 0, _len = locations_in_after.length; _j < _len; _j++) { + index_in_after = locations_in_after[_j]; + if (index_in_after < start_in_after) { + continue; + } + if (index_in_after >= end_in_after) { + break; + } + if (match_length_at[index_in_after - 1] == null) { + match_length_at[index_in_after - 1] = 0; + } + new_match_length = match_length_at[index_in_after - 1] + 1; + new_match_length_at[index_in_after] = new_match_length; + if (new_match_length > best_match_length) { + best_match_in_before = index_in_before - new_match_length + 1; + best_match_in_after = index_in_after - new_match_length + 1; + best_match_length = new_match_length; + } + } + match_length_at = new_match_length_at; + } + if (best_match_length !== 0) { + match = new Match(best_match_in_before, best_match_in_after, best_match_length); + } + return match; +}; + +recursively_find_matching_blocks = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks) { + var match; + match = find_match(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after); + if (match != null) { + if (start_in_before < match.start_in_before && start_in_after < match.start_in_after) { + recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, match.start_in_before, start_in_after, match.start_in_after, matching_blocks); + } + matching_blocks.push(match); + if (match.end_in_before <= end_in_before && match.end_in_after <= end_in_after) { + recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, match.end_in_before + 1, end_in_before, match.end_in_after + 1, end_in_after, matching_blocks); + } + } + return matching_blocks; +}; + +create_index = function(p) { + var idx, index, token, _i, _len, _ref; + if (p.find_these == null) { + throw new Error('params must have find_these key'); + } + if (p.in_these == null) { + throw new Error('params must have in_these key'); + } + index = {}; + _ref = p.find_these; + for (_i = 0, _len = _ref.length; _i < _len; _i++) { + token = _ref[_i]; + index[token] = []; + idx = p.in_these.indexOf(token); + while (idx !== -1) { + index[token].push(idx); + idx = p.in_these.indexOf(token, idx + 1); + } + } + return index; +}; + +find_matching_blocks = function(before_tokens, after_tokens) { + var index_of_before_locations_in_after_tokens, matching_blocks; + matching_blocks = []; + index_of_before_locations_in_after_tokens = create_index({ + find_these: before_tokens, + in_these: after_tokens + }); + return recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks); +}; + +calculate_operations = function(before_tokens, after_tokens) { + var action_map, action_up_to_match_positions, index, is_single_whitespace, last_op, match, match_starts_at_current_position_in_after, match_starts_at_current_position_in_before, matches, op, operations, position_in_after, position_in_before, post_processed, _i, _j, _len, _len1; + if (before_tokens == null) { + throw new Error('before_tokens?'); + } + if (after_tokens == null) { + throw new Error('after_tokens?'); + } + position_in_before = position_in_after = 0; + operations = []; + action_map = { + 'false,false': 'replace', + 'true,false': 'insert', + 'false,true': 'delete', + 'true,true': 'none' + }; + matches = find_matching_blocks(before_tokens, after_tokens); + matches.push(new Match(before_tokens.length, after_tokens.length, 0)); + for (index = _i = 0, _len = matches.length; _i < _len; index = ++_i) { + match = matches[index]; + match_starts_at_current_position_in_before = position_in_before === match.start_in_before; + match_starts_at_current_position_in_after = position_in_after === match.start_in_after; + action_up_to_match_positions = action_map[[match_starts_at_current_position_in_before, match_starts_at_current_position_in_after].toString()]; + if (action_up_to_match_positions !== 'none') { + operations.push({ + action: action_up_to_match_positions, + start_in_before: position_in_before, + end_in_before: (action_up_to_match_positions !== 'insert' ? match.start_in_before - 1 : void 0), + start_in_after: position_in_after, + end_in_after: (action_up_to_match_positions !== 'delete' ? match.start_in_after - 1 : void 0) + }); + } + if (match.length !== 0) { + operations.push({ + action: 'equal', + start_in_before: match.start_in_before, + end_in_before: match.end_in_before, + start_in_after: match.start_in_after, + end_in_after: match.end_in_after + }); + } + position_in_before = match.end_in_before + 1; + position_in_after = match.end_in_after + 1; + } + post_processed = []; + last_op = { + action: 'none' + }; + is_single_whitespace = function(op) { + if (op.action !== 'equal') { + return false; + } + if (op.end_in_before - op.start_in_before !== 0) { + return false; + } + return /^\s$/.test(before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9)); + }; + for (_j = 0, _len1 = operations.length; _j < _len1; _j++) { + op = operations[_j]; + if (((is_single_whitespace(op)) && last_op.action === 'replace') || (op.action === 'replace' && last_op.action === 'replace')) { + last_op.end_in_before = op.end_in_before; + last_op.end_in_after = op.end_in_after; + } else { + post_processed.push(op); + last_op = op; + } + } + return post_processed; +}; + +consecutive_where = function(start, content, predicate) { + var answer, index, last_matching_index, token, _i, _len; + content = content.slice(start, +content.length + 1 || 9e9); + last_matching_index = void 0; + for (index = _i = 0, _len = content.length; _i < _len; index = ++_i) { + token = content[index]; + answer = predicate(token); + if (answer === true) { + last_matching_index = index; + } + if (answer === false) { + break; + } + } + if (last_matching_index != null) { + return content.slice(0, +last_matching_index + 1 || 9e9); + } + return []; +}; + +wrap = function(tag, content) { + var length, non_tags, position, rendering, tags; + rendering = ''; + position = 0; + length = content.length; + while (true) { + if (position >= length) { + break; + } + non_tags = consecutive_where(position, content, isnt_tag); + position += non_tags.length; + if (non_tags.length !== 0) { + rendering += '<' + tag + '>' + (non_tags.join('')) + ''; + } + if (position >= length) { + break; + } + tags = consecutive_where(position, content, is_tag); + position += tags.length; + rendering += tags.join(''); + } + return rendering; +}; + +op_map = { + equal: function(op, before_tokens, after_tokens) { + return before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9).join(''); + }, + insert: function(op, before_tokens, after_tokens) { + var val; + val = after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9); + return wrap('ins', val); + }, + delete: function(op, before_tokens, after_tokens) { + var val; + val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9); + return wrap('del', val); + } +}; + +op_map.replace = function(op, before_tokens, after_tokens) { + return [op_map['delete'](op, before_tokens, after_tokens), op_map.insert(op, before_tokens, after_tokens)]; +}; + +render_operations = function(before_tokens, after_tokens, operations) { + var op, rendering, result, _i, _len; + rendering = ''; + for (_i = 0, _len = operations.length; _i < _len; _i++) { + op = operations[_i]; + result = op_map[op.action](op, before_tokens, after_tokens); + if (op.action === 'replace') { + rendering += result[0] + result[1]; + } else { + rendering += result; + } + } + return rendering; +}; + +render_operations_dual_pane = function(before_tokens, after_tokens, operations) { + var after_render, before_render, next_block, op, _i, _len; + before_render = ''; + after_render = ''; + for (_i = 0, _len = operations.length; _i < _len; _i++) { + op = operations[_i]; + next_block = op_map[op.action](op, before_tokens, after_tokens); + switch (op.action) { + case 'equal': + before_render += next_block; + after_render += next_block; + break; + case 'insert': + after_render += next_block; + break; + case 'delete': + before_render += next_block; + break; + case 'replace': + before_render += next_block[0]; + after_render += next_block[1]; + } + } + return return_dual_pane(before_render, after_render); +}; + +diff_dual_pane = function(before, after) { + var ops; + if (before === after) { + return return_dual_pane(before, after); + } + before = html_to_tokens(before); + after = html_to_tokens(after); + ops = calculate_operations(before, after); + return render_operations_dual_pane(before, after, ops); +}; + +diff = function(before, after) { + var ops; + if (before === after) { + return before; + } + before = html_to_tokens(before); + after = html_to_tokens(after); + ops = calculate_operations(before, after); + return render_operations(before, after, ops); +}; + +diff.html_to_tokens = html_to_tokens; + +diff.find_matching_blocks = find_matching_blocks; + +find_matching_blocks.find_match = find_match; + +find_matching_blocks.create_index = create_index; + +diff.calculate_operations = calculate_operations; + +diff.render_operations = render_operations; + +diff.render_operations_dual_pane = render_operations_dual_pane; + +diff.diff_dual_pane = diff_dual_pane; + +if (typeof define === 'function') { + define([], function() { + return diff; + }); +} else if (typeof module !== 'undefined' && module !== null) { + module.exports = diff; +} else { + this.htmldiff = diff; +}