diff --git a/.gitignore b/.gitignore index 51f142db..53fdf6b5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ build /browser.js emsdk-portable package-lock.json + +benchmark/*.csv diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index 95106bb3..1fca4037 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -1,56 +1,53 @@ -const http = require('http') -const fs = require('fs') -const unzip = require('unzip') const { TextBuffer } = require('..') -const unzipper = unzip.Parse() - -const getText = () => { - return new Promise(resolve => { - console.log('fetching text file...') - const req = http.get({ - hostname: 'www.acleddata.com', - port: 80, - // 51 MB text file - path: '/wp-content/uploads/2017/01/ACLED-Version-7-All-Africa-1997-2016_csv_dyadic-file.zip', - agent: false - }, res => { - res - .pipe(unzipper) - .on('entry', entry => { - let data = ''; - entry.on('data', chunk => data += chunk); - entry.on('end', () => { - resolve(data) - }); - }) - }) - - req.end() - }) +const fs = require('fs') +const {promisify} = require('util') +const readFile = promisify(fs.readFile) +const path = require('path') +const download = require('download') +const {performance} = require("perf_hooks") + +async function getText() { + const filePath = path.join(__dirname, '1000000 Sales Records.csv') + if (!fs.existsSync(filePath)) { + // 122MB file + await download( + 'http://eforexcel.com/wp/wp-content/uploads/2017/07/1000000%20Sales%20Records.zip', + __dirname, + {extract: true} + ) + } + return await readFile(filePath) } -const timer = size => `Time to find "cat" in ${size} file` - -getText().then(txt => { +getText().then(async (txt) => { const buffer = new TextBuffer() - console.log('running findWordsWithSubsequence tests...') + console.log('\n running large-text-buffer tests... \n') - const sizes = [['10b', 10], ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000]] + const sizes = [ ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000], ['119MB', txt.length]] - const test = size => { - const _timer = timer(size[0]) - buffer.setText(txt.slice(0, size[1])) - console.time(_timer) - return buffer.findWordsWithSubsequence('cat', '', 100).then(sugs => { - console.timeEnd(_timer) - }) + const test = async (word, size) => { + const ti2 = performance.now() + await buffer.findWordsWithSubsequence(word, '', 100) + const tf2 = performance.now() + console.log(`For ${size[0]} file, time to find "${word}" was: ${' '.repeat(50-word.length-size[0].length)} ${(tf2-ti2).toFixed(5)} ms`) } + for (const size of sizes) { - return sizes.reduce((promise, size) => { - return promise.then(() => test(size)) - }, Promise.resolve()) + const bufferText = txt.slice(0, size[1]) + + // benchmark buffer.setText + const ti1 = performance.now() + buffer.setText(bufferText) + const tf1 = performance.now() + console.log(`For ${size[0]} file, buffer.setText took ${' '.repeat(51-size[0].length)} ${(tf1-ti1).toFixed(5)} ms`) + + for (const word of ["Morocco", "Austria", "France", "Liechtenstein", "Republic of the Congo", "Antigua and Barbuda", "Japan"]) { + await test(word, size) + } + console.log('\n') + } }).then(() => { - console.log('finished') + console.log(' large-text-buffer finished \n') }) diff --git a/benchmark/marker-index.benchmark.js b/benchmark/marker-index.benchmark.js index c6e2ac20..5a1058c3 100644 --- a/benchmark/marker-index.benchmark.js +++ b/benchmark/marker-index.benchmark.js @@ -1,6 +1,10 @@ 'use strict'; +console.log(' running marker-index tests... \n') + const Random = require('random-seed') +const {performance} = require("perf_hooks") + const {MarkerIndex} = require('..') const {traverse, traversalDistance, compare} = require('../test/js/helpers/point-helpers') @@ -41,12 +45,13 @@ function runBenchmark () { } function profileOperations (name, operations) { - console.time(name) + const ti1 = performance.now() for (let i = 0, n = operations.length; i < n; i++) { const operation = operations[i] markerIndex[operation[0]].apply(markerIndex, operation[1]) } - console.timeEnd(name) + const tf1 = performance.now() + console.log(`${name} ${' '.repeat(80-name.length)} ${(tf1-ti1).toFixed(3)} ms`) } function enqueueSequentialInsert () { @@ -118,3 +123,5 @@ function getSplice () { } runBenchmark() + +console.log(' \n marker-index finished \n') diff --git a/benchmark/text-buffer.benchmark.js b/benchmark/text-buffer.benchmark.js index 36152b1f..21b63147 100644 --- a/benchmark/text-buffer.benchmark.js +++ b/benchmark/text-buffer.benchmark.js @@ -1,4 +1,8 @@ +console.log(' running text-buffer tests... \n') + const assert = require('assert') +const {performance} = require("perf_hooks") + const {TextBuffer} = require('..') const text = 'abc def ghi jkl\n'.repeat(1024 * 1024) @@ -8,14 +12,15 @@ const trialCount = 10 function benchmarkSearch(description, pattern, expectedPosition) { let name = `Search for ${description} - TextBuffer` - console.time(name) + const ti1 = performance.now() for (let i = 0; i < trialCount; i++) { - assert.deepEqual(buffer.searchSync(pattern), expectedPosition) + assert.deepEqual(buffer.findSync(pattern), expectedPosition) } - console.timeEnd(name) + const tf1 = performance.now() + console.log(`${name} ${' '.repeat(80-name.length)} ${(tf1-ti1).toFixed(3)} ms`) name = `Search for ${description} - lines array` - console.time(name) + const ti2 = performance.now() const regex = new RegExp(pattern) for (let i = 0; i < trialCount; i++) { for (let row = 0, rowCount = lines.length; row < rowCount; row++) { @@ -32,11 +37,14 @@ function benchmarkSearch(description, pattern, expectedPosition) { } } } - console.timeEnd(name) - console.log() + const tf2 = performance.now() + console.log(`${name} ${' '.repeat(80-name.length)} ${(tf2-ti2).toFixed(3)} ms`) } benchmarkSearch('simple non-existent pattern', '\t', null) benchmarkSearch('complex non-existent pattern', '123|456|789', null) benchmarkSearch('simple existing pattern', 'jkl', {start: {row: 0, column: 12}, end: {row: 0, column: 15}}) -benchmarkSearch('complex existing pattern', 'j\\w+', {start: {row: 0, column: 12}, end: {row: 0, column: 15}}) \ No newline at end of file +benchmarkSearch('complex existing pattern', 'j\\w+', {start: {row: 0, column: 12}, end: {row: 0, column: 15}}) + + +console.log('\n text-buffer finished \n') \ No newline at end of file diff --git a/package.json b/package.json index 9ea8aa1b..c5ad7dcb 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "test:node": "mocha test/js/*.js", "test:browser": "SUPERSTRING_USE_BROWSER_VERSION=1 mocha test/js/*.js", "test": "npm run test:node && npm run test:browser", - "benchmark": "node benchmark/marker-index.benchmark.js", + "benchmark": "node benchmark/text-buffer.benchmark.js && node benchmark/marker-index.benchmark.js && node benchmark/large-text-buffer.benchmark.js", "prepublishOnly": "git submodule update --init --recursive && npm run build:browser", "standard": "standard --recursive src test" }, @@ -35,11 +35,11 @@ }, "devDependencies": { "chai": "^2.0.0", + "download": "^8.0.0", "mocha": "^2.3.4", "random-seed": "^0.2.0", "standard": "^4.5.4", - "temp": "^0.8.3", - "unzip": "^0.1.11" + "temp": "^0.8.3" }, "standard": { "global": [