Skip to content

Commit e2b3aa1

Browse files
authored
Merge pull request #1149 from kll/feature/preserve_encoding
Preserve existing file encoding if it can be easily determined otherwise use UTF8 with BOM.
2 parents d5bbf0b + 1ce6a5a commit e2b3aa1

File tree

8 files changed

+208
-17
lines changed

8 files changed

+208
-17
lines changed

src/GitVersionCore.Tests/GitVersionCore.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
<Compile Include="ConfigProviderTests.cs" />
9999
<Compile Include="GitVersionContextTests.cs" />
100100
<Compile Include="Helpers\DirectoryHelper.cs" />
101+
<Compile Include="IntegrationTests\FileSystemTests.cs" />
101102
<Compile Include="IntegrationTests\MainlineDevelopmentMode.cs" />
102103
<Compile Include="LogMessages.cs" />
103104
<Compile Include="Mocks\MockThreadSleep.cs" />
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
using System.IO;
2+
using System.Text;
3+
4+
using GitVersion.Helpers;
5+
6+
using NUnit.Framework;
7+
8+
using Shouldly;
9+
10+
[TestFixture]
11+
public class FileSystemTests
12+
{
13+
public string TempFilePath { get; set; }
14+
15+
[SetUp]
16+
public void CreateTempFile()
17+
{
18+
TempFilePath = Path.GetTempFileName();
19+
}
20+
21+
[TearDown]
22+
public void Cleanup()
23+
{
24+
File.Delete(TempFilePath);
25+
}
26+
27+
[TestCase("utf-32")]
28+
[TestCase("utf-32BE")]
29+
[TestCase("utf-16")]
30+
[TestCase("utf-16BE")]
31+
[TestCase("utf-8")]
32+
public void WhenFileExistsWithEncodingPreamble_EncodingIsPreservedAfterWriteAll(string encodingName)
33+
{
34+
var encoding = Encoding.GetEncoding(encodingName);
35+
36+
File.WriteAllText(TempFilePath, "(-‸ლ)", encoding);
37+
38+
var fileSystem = new FileSystem();
39+
fileSystem.WriteAllText(TempFilePath, @"¯\(◉◡◔)/¯");
40+
41+
using (var stream = File.OpenRead(TempFilePath))
42+
{
43+
var preamble = encoding.GetPreamble();
44+
var bytes = new byte[preamble.Length];
45+
stream.Read(bytes, 0, preamble.Length);
46+
47+
bytes.ShouldBe(preamble);
48+
}
49+
}
50+
51+
[Test]
52+
public void WhenFileDoesNotExist_CreateWithUTF8WithPreamble()
53+
{
54+
var encoding = Encoding.UTF8;
55+
56+
var fileSystem = new FileSystem();
57+
fileSystem.WriteAllText(TempFilePath, "╚(ಠ_ಠ)=┐");
58+
59+
using (var stream = File.OpenRead(TempFilePath))
60+
{
61+
var preamble = encoding.GetPreamble();
62+
var bytes = new byte[preamble.Length];
63+
stream.Read(bytes, 0, preamble.Length);
64+
65+
bytes.ShouldBe(preamble);
66+
}
67+
}
68+
}

src/GitVersionCore.Tests/TestFileSystem.cs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
public class TestFileSystem : IFileSystem
99
{
10-
Dictionary<string, string> fileSystem = new Dictionary<string, string>();
10+
Dictionary<string, byte[]> fileSystem = new Dictionary<string, byte[]>();
1111

1212
public void Copy(string @from, string to, bool overwrite)
1313
{
@@ -19,7 +19,7 @@ public void Copy(string @from, string to, bool overwrite)
1919
throw new IOException("File already exists");
2020
}
2121

22-
string source;
22+
byte[] source;
2323
if (!fileSystem.TryGetValue(from, out source))
2424
throw new FileNotFoundException(string.Format("The source file '{0}' was not found", from), from);
2525

@@ -44,19 +44,25 @@ public void Delete(string path)
4444

4545
public string ReadAllText(string path)
4646
{
47-
return fileSystem[path];
47+
byte[] content;
48+
if (!fileSystem.TryGetValue(path, out content))
49+
throw new FileNotFoundException(string.Format("The file '{0}' was not found", path), path);
50+
51+
var encoding = EncodingHelper.DetectEncoding(content) ?? Encoding.UTF8;
52+
return encoding.GetString(content);
4853
}
4954

5055
public void WriteAllText(string file, string fileContents)
5156
{
52-
if (fileSystem.ContainsKey(file))
53-
{
54-
fileSystem[file] = fileContents;
55-
}
56-
else
57-
{
58-
fileSystem.Add(file, fileContents);
59-
}
57+
var encoding = fileSystem.ContainsKey(file)
58+
? EncodingHelper.DetectEncoding(fileSystem[file]) ?? Encoding.UTF8
59+
: Encoding.UTF8;
60+
WriteAllText(file, fileContents, encoding);
61+
}
62+
63+
public void WriteAllText(string file, string fileContents, Encoding encoding)
64+
{
65+
fileSystem[file] = encoding.GetBytes(fileContents);
6066
}
6167

6268
public IEnumerable<string> DirectoryGetFiles(string directory, string searchPattern, SearchOption searchOption)
@@ -74,7 +80,7 @@ public Stream OpenRead(string path)
7480
if (fileSystem.ContainsKey(path))
7581
{
7682
var content = fileSystem[path];
77-
return new MemoryStream(Encoding.UTF8.GetBytes(content));
83+
return new MemoryStream(content);
7884
}
7985

8086
throw new FileNotFoundException("File not found.", path);
@@ -84,11 +90,11 @@ public void CreateDirectory(string path)
8490
{
8591
if (fileSystem.ContainsKey(path))
8692
{
87-
fileSystem[path] = "";
93+
fileSystem[path] = new byte[0];
8894
}
8995
else
9096
{
91-
fileSystem.Add(path, "");
97+
fileSystem.Add(path, new byte[0]);
9298
}
9399
}
94100

src/GitVersionCore/GitVersionCore.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
<Compile Include="GitVersionCache.cs" />
127127
<Compile Include="GitVersionCacheKey.cs" />
128128
<Compile Include="GitVersionCacheKeyFactory.cs" />
129+
<Compile Include="Helpers\EncodingHelper.cs" />
129130
<Compile Include="Helpers\FileSystem.cs" />
130131
<Compile Include="Helpers\IFileSystem.cs" />
131132
<Compile Include="Helpers\IThreadSleep.cs" />
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
namespace GitVersion.Helpers
2+
{
3+
using System.Collections.Generic;
4+
using System.IO;
5+
using System.Linq;
6+
using System.Text;
7+
8+
public static class EncodingHelper
9+
{
10+
private static IList<Encoding> EncodingsWithPreambles;
11+
12+
private static int MaxPreambleLength;
13+
14+
/// <summary>
15+
/// Detects the encoding of a file if and only if it includes a preamble .
16+
/// </summary>
17+
/// <param name="filename">The file name to check the encoding of.</param>
18+
/// <returns>The encoding of the file if it has a preamble otherwise null.</returns>
19+
public static Encoding DetectEncoding(string filename)
20+
{
21+
if (!File.Exists(filename))
22+
{
23+
return null;
24+
}
25+
26+
if (EncodingsWithPreambles == null)
27+
{
28+
ScanEncodings();
29+
}
30+
31+
using (var stream = File.OpenRead(filename))
32+
{
33+
// No bytes? No encoding!
34+
if (stream.Length == 0)
35+
{
36+
return null;
37+
}
38+
39+
// Read the minimum amount necessary.
40+
var length = stream.Length > MaxPreambleLength ? MaxPreambleLength : stream.Length;
41+
42+
var bytes = new byte[length];
43+
stream.Read(bytes, 0, (int)length);
44+
return DetectEncoding(bytes);
45+
}
46+
}
47+
48+
/// <summary>
49+
/// Returns the first encoding where all the preamble bytes match exactly.
50+
/// </summary>
51+
/// <param name="bytes">The bytes to check for a matching preamble.</param>
52+
/// <returns>The encoding that has a matching preamble or null if one was not found.</returns>
53+
public static Encoding DetectEncoding(IList<byte> bytes)
54+
{
55+
if (bytes == null || bytes.Count == 0)
56+
{
57+
return null;
58+
}
59+
60+
if (EncodingsWithPreambles == null)
61+
{
62+
ScanEncodings();
63+
}
64+
65+
return EncodingsWithPreambles.FirstOrDefault(encoding => PreambleMatches(encoding, bytes));
66+
}
67+
68+
/// <summary>
69+
/// Returns an ordered list of encodings that have preambles ordered by the length of the
70+
/// preamble longest to shortest. This prevents a short preamble masking a longer one
71+
/// later in the list.
72+
/// </summary>
73+
/// <returns>An ordered list of encodings and corresponding preambles.</returns>
74+
private static void ScanEncodings()
75+
{
76+
EncodingsWithPreambles = (from info in Encoding.GetEncodings()
77+
let encoding = info.GetEncoding()
78+
let preamble = encoding.GetPreamble()
79+
where preamble.Length > 0
80+
orderby preamble.Length descending
81+
select encoding).ToList();
82+
83+
var encodingWithLongestPreamble = EncodingsWithPreambles.FirstOrDefault();
84+
MaxPreambleLength = encodingWithLongestPreamble == null ? 0 : encodingWithLongestPreamble.GetPreamble().Length;
85+
}
86+
87+
/// <summary>
88+
/// Verifies that all bytes of an encoding's preamble are present at the beginning of some sample data.
89+
/// </summary>
90+
/// <param name="encoding">The encoding to check against.</param>
91+
/// <param name="data">The data to test.</param>
92+
/// <returns>A boolean indicating if a preamble match was found.</returns>
93+
private static bool PreambleMatches(Encoding encoding, IList<byte> data)
94+
{
95+
var preamble = encoding.GetPreamble();
96+
if (preamble.Length > data.Count)
97+
return false;
98+
99+
return !preamble.Where((preambleByte, index) => data[index] != preambleByte).Any();
100+
}
101+
}
102+
}

src/GitVersionCore/Helpers/FileSystem.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ namespace GitVersion.Helpers
44
using System.Collections.Generic;
55
using System.IO;
66
using System.Linq;
7+
using System.Text;
78

89
public class FileSystem : IFileSystem
910
{
@@ -36,7 +37,15 @@ public string ReadAllText(string path)
3637

3738
public void WriteAllText(string file, string fileContents)
3839
{
39-
File.WriteAllText(file, fileContents);
40+
// Opinionated decision to use UTF8 with BOM when creating new files or when the existing
41+
// encoding was not easily detected due to the file not having an encoding preamble.
42+
var encoding = EncodingHelper.DetectEncoding(file) ?? Encoding.UTF8;
43+
WriteAllText(file, fileContents, encoding);
44+
}
45+
46+
public void WriteAllText(string file, string fileContents, Encoding encoding)
47+
{
48+
File.WriteAllText(file, fileContents, encoding);
4049
}
4150

4251
public IEnumerable<string> DirectoryGetFiles(string directory, string searchPattern, SearchOption searchOption)

src/GitVersionCore/Helpers/IFileSystem.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ namespace GitVersion.Helpers
22
{
33
using System.Collections.Generic;
44
using System.IO;
5+
using System.Text;
56

67
public interface IFileSystem
78
{
@@ -11,6 +12,7 @@ public interface IFileSystem
1112
void Delete(string path);
1213
string ReadAllText(string path);
1314
void WriteAllText(string file, string fileContents);
15+
void WriteAllText(string file, string fileContents, Encoding encoding);
1416
IEnumerable<string> DirectoryGetFiles(string directory, string searchPattern, SearchOption searchOption);
1517
Stream OpenWrite(string path);
1618
Stream OpenRead(string path);

src/GitVersionTask/AssemblyInfoBuilder/UpdateAssemblyInfo.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Text;
77

88
using GitVersion;
9+
using GitVersion.Helpers;
910

1011
using Microsoft.Build.Framework;
1112

@@ -95,14 +96,15 @@ void CreateTempAssemblyInfo(VersionVariables versionVariables)
9596
}
9697

9798
var assemblyInfo = assemblyInfoBuilder.GetAssemblyInfoText(versionVariables, RootNamespace).Trim();
99+
var encoding = EncodingHelper.DetectEncoding(AssemblyInfoTempFilePath) ?? Encoding.UTF8;
98100

99101
// We need to try to read the existing text first if the file exists and see if it's the same
100102
// This is to avoid writing when there's no differences and causing a rebuild
101103
try
102104
{
103105
if (File.Exists(AssemblyInfoTempFilePath))
104106
{
105-
var content = File.ReadAllText(AssemblyInfoTempFilePath, Encoding.UTF8).Trim();
107+
var content = File.ReadAllText(AssemblyInfoTempFilePath, encoding).Trim();
106108
if (string.Equals(assemblyInfo, content, StringComparison.Ordinal))
107109
{
108110
return; // nothign to do as the file matches what we'd create
@@ -114,7 +116,7 @@ void CreateTempAssemblyInfo(VersionVariables versionVariables)
114116
// Something happened reading the file, try to overwrite anyway
115117
}
116118

117-
File.WriteAllText(AssemblyInfoTempFilePath, assemblyInfo, Encoding.UTF8);
119+
File.WriteAllText(AssemblyInfoTempFilePath, assemblyInfo, encoding);
118120
}
119121
}
120122
}

0 commit comments

Comments
 (0)