From 238ff97c92d22bcc38f8a85ddeb3deac25b7bfdd Mon Sep 17 00:00:00 2001 From: Dongbo Wang Date: Thu, 21 Sep 2023 14:50:56 -0700 Subject: [PATCH 1/3] Handle large history file properly by reading lines in the streaming way --- PSReadLine/History.cs | 51 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/PSReadLine/History.cs b/PSReadLine/History.cs index 1d501e1a3..ba4f08a1f 100644 --- a/PSReadLine/History.cs +++ b/PSReadLine/History.cs @@ -457,12 +457,61 @@ private void ReadHistoryFile() { WithHistoryFileMutexDo(1000, () => { - var historyLines = File.ReadAllLines(Options.HistorySavePath); + var historyLines = ReadHistoryLinesImpl(Options.HistorySavePath, Options.MaximumHistoryCount); UpdateHistoryFromFile(historyLines, fromDifferentSession: false, fromInitialRead: true); var fileInfo = new FileInfo(Options.HistorySavePath); _historyFileLastSavedSize = fileInfo.Length; }); } + + static IEnumerable ReadHistoryLinesImpl(string path, int historyCount) + { + const long offset_1mb = 1048576; + const long offset_05mb = 524288; + + // 1mb content contains more than 34,000 history lines for a typical usage, which should be + // more than enough to cover 25,000 history records (a history record could be a multi-line + // command). Similarly, 0.5mb content should be enough to cover 10,000 history records. + // We optimize the file reading when the history count falls in those ranges. If the history + // count is even larger, which should be very rare, we just read all lines. + long offset = historyCount switch + { + <= 10000 => offset_05mb, + <= 25000 => offset_1mb, + _ => 0, + }; + + using var fs = new FileStream(path, FileMode.Open); + using var sr = new StreamReader(fs); + + if (offset > 0 && fs.Length > offset) + { + // When the file size is larger than 1mb, we only read the last 1mb content from the end. + int? b1 = null, b2 = null; + fs.Seek(-offset, SeekOrigin.End); + + // After seeking, the current position may point at the middle of a history record, or even + // a byte within a unicode char. So, we need to find the start of the next history record. + while ((b2 = fs.ReadByte()) is not -1) + { + // Read bytes until we find the first newline ('\n' == 0xA) that is not right after a backtick ('`' == 0x60). + // It means a separate full history record will start from the next byte. + if (b2 is 0xA && b1.HasValue && b1 is not 0x60) + { + break; + } + + b1 = b2; + } + } + + // Read lines in the streaming way, so it won't consume to much memory even if we have to + // read all lines from a large history file. + while (!sr.EndOfStream) + { + yield return sr.ReadLine(); + } + } } void UpdateHistoryFromFile(IEnumerable historyLines, bool fromDifferentSession, bool fromInitialRead) From 59d558719ebeb1b6b5c4d79248e4a544bd0f6e10 Mon Sep 17 00:00:00 2001 From: Dongbo Wang Date: Tue, 26 Sep 2023 12:44:54 -0700 Subject: [PATCH 2/3] Read lines instead of bytes --- PSReadLine/History.cs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/PSReadLine/History.cs b/PSReadLine/History.cs index ba4f08a1f..30905413f 100644 --- a/PSReadLine/History.cs +++ b/PSReadLine/History.cs @@ -470,14 +470,14 @@ static IEnumerable ReadHistoryLinesImpl(string path, int historyCount) const long offset_05mb = 524288; // 1mb content contains more than 34,000 history lines for a typical usage, which should be - // more than enough to cover 25,000 history records (a history record could be a multi-line + // more than enough to cover 20,000 history records (a history record could be a multi-line // command). Similarly, 0.5mb content should be enough to cover 10,000 history records. // We optimize the file reading when the history count falls in those ranges. If the history // count is even larger, which should be very rare, we just read all lines. long offset = historyCount switch { <= 10000 => offset_05mb, - <= 25000 => offset_1mb, + <= 20000 => offset_1mb, _ => 0, }; @@ -486,22 +486,22 @@ static IEnumerable ReadHistoryLinesImpl(string path, int historyCount) if (offset > 0 && fs.Length > offset) { - // When the file size is larger than 1mb, we only read the last 1mb content from the end. - int? b1 = null, b2 = null; + // When the file size is larger than the offset, we only read that amount of content from the end. fs.Seek(-offset, SeekOrigin.End); - // After seeking, the current position may point at the middle of a history record, or even - // a byte within a unicode char. So, we need to find the start of the next history record. - while ((b2 = fs.ReadByte()) is not -1) + // After seeking, the current position may point at the middle of a history record, or even at a + // byte within a UTF-8 character (history file is saved with UTF-8 encoding). So, let's ignore the + // first line read from that position. + sr.ReadLine(); + + string line; + while ((line = sr.ReadLine()) is not null) { - // Read bytes until we find the first newline ('\n' == 0xA) that is not right after a backtick ('`' == 0x60). - // It means a separate full history record will start from the next byte. - if (b2 is 0xA && b1.HasValue && b1 is not 0x60) + if (!line.EndsWith('`')) { + // A complete history record is guaranteed to start from the next line. break; } - - b1 = b2; } } From d293db1fc10f93d581c30fc205e5cf25a0f4bca3 Mon Sep 17 00:00:00 2001 From: Dongbo Wang Date: Tue, 26 Sep 2023 12:52:31 -0700 Subject: [PATCH 3/3] Fix for .net462 --- PSReadLine/History.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PSReadLine/History.cs b/PSReadLine/History.cs index 30905413f..c1490a230 100644 --- a/PSReadLine/History.cs +++ b/PSReadLine/History.cs @@ -497,7 +497,7 @@ static IEnumerable ReadHistoryLinesImpl(string path, int historyCount) string line; while ((line = sr.ReadLine()) is not null) { - if (!line.EndsWith('`')) + if (!line.EndsWith("`", StringComparison.Ordinal)) { // A complete history record is guaranteed to start from the next line. break;