Skip to content
This repository was archived by the owner on Nov 20, 2018. It is now read-only.

Update WebEncoders from Unicode 7.0 to Unicode 8.0 #388

Merged
merged 1 commit into from
Sep 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/Microsoft.Framework.WebEncoders.Core/UnicodeHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ internal unsafe static class UnicodeHelpers

/// <summary>
/// Helper method which creates a bitmap of all characters which are
/// defined per version 7.0.0 of the Unicode specification.
/// defined per version 8.0 of the Unicode specification.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
private static uint[] CreateDefinedCharacterBitmap()
{
// The stream should be exactly 8KB in size.
var assembly = typeof(UnicodeHelpers).GetTypeInfo().Assembly;
var resourceName = assembly.GetName().Name + ".compiler.resources.unicode-7.0.0-defined-characters.bin";
var resourceName = assembly.GetName().Name + ".compiler.resources.unicode-defined-chars.bin";

var stream = assembly.GetManifestResourceStream(resourceName);
if (stream.Length != 8 * 1024)
Expand Down Expand Up @@ -72,7 +72,7 @@ private static uint[] CreateDefinedCharacterBitmap()
}

/// <summary>
/// Returns a bitmap of all characters which are defined per version 7.0.0
/// Returns a bitmap of all characters which are defined per version 8.0
/// of the Unicode specification.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand Down Expand Up @@ -204,7 +204,7 @@ internal static int GetUtf8RepresentationForScalarValue(uint scalar)
}

/// <summary>
/// Returns a value stating whether a character is defined per version 7.0.0
/// Returns a value stating whether a character is defined per version 8.0
/// of the Unicode specification. Certain classes of characters (control chars,
/// private use, surrogates, some whitespace) are considered "undefined" for
/// our purposes.
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.Framework.WebEncoders.Core/UnicodeRanges.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Microsoft.Framework.WebEncoders
{
/// <summary>
/// Contains predefined <see cref="UnicodeRange"/> instances which correspond to blocks
/// from the Unicode 7.0 specification.
/// from the Unicode 8.0 specification.
/// </summary>
public static partial class UnicodeRanges
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,15 @@ public static partial class UnicodeRanges
public static UnicodeRange LatinExtendedE => Volatile.Read(ref _latinExtendedE) ?? CreateRange(ref _latinExtendedE, first: '\uAB30', last: '\uAB6F');
private static UnicodeRange _latinExtendedE;

/// <summary>
/// A <see cref="UnicodeRange"/> corresponding to the 'Cherokee Supplement' Unicode block (U+AB70..U+ABBF).
/// </summary>
/// <remarks>
/// See http://www.unicode.org/charts/PDF/UAB70.pdf for the full set of characters in this block.
/// </remarks>
public static UnicodeRange CherokeeSupplement => Volatile.Read(ref _cherokeeSupplement) ?? CreateRange(ref _cherokeeSupplement, first: '\uAB70', last: '\uABBF');
private static UnicodeRange _cherokeeSupplement;

/// <summary>
/// A <see cref="UnicodeRange"/> corresponding to the 'Meetei Mayek' Unicode block (U+ABC0..U+ABFF).
/// </summary>
Expand All @@ -1303,7 +1312,7 @@ public static partial class UnicodeRanges
/// </remarks>
public static UnicodeRange HangulJamoExtendedB => Volatile.Read(ref _hangulJamoExtendedB) ?? CreateRange(ref _hangulJamoExtendedB, first: '\uD7B0', last: '\uD7FF');
private static UnicodeRange _hangulJamoExtendedB;

/// <summary>
/// A <see cref="UnicodeRange"/> corresponding to the 'CJK Compatibility Ideographs' Unicode block (U+F900..U+FAFF).
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,15 @@ private static bool[] ReadListOfDefinedCharacters()
}
}

// Handle known spans from Unicode 7.0.0's UnicodeData.txt
// Handle known spans from Unicode 8.0's UnicodeData.txt

// CJK Ideograph Extension A
for (int i = '\u3400'; i <= '\u4DB5'; i++)
{
retVal[i] = true;
}
// CJK Ideograph
for (int i = '\u4E00'; i <= '\u9FCC'; i++)
for (int i = '\u4E00'; i <= '\u9FD5'; i++)
{
retVal[i] = true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ public void Range_All()
[InlineData('\uAAE0', '\uAAFF', nameof(UnicodeRanges.MeeteiMayekExtensions))]
[InlineData('\uAB00', '\uAB2F', nameof(UnicodeRanges.EthiopicExtendedA))]
[InlineData('\uAB30', '\uAB6F', nameof(UnicodeRanges.LatinExtendedE))]
[InlineData('\uAB70', '\uABBF', nameof(UnicodeRanges.CherokeeSupplement))]
[InlineData('\uABC0', '\uABFF', nameof(UnicodeRanges.MeeteiMayek))]
[InlineData('\uAC00', '\uD7AF', nameof(UnicodeRanges.HangulSyllables))]
[InlineData('\uD7B0', '\uD7FF', nameof(UnicodeRanges.HangulJamoExtendedB))]
Expand Down
25 changes: 20 additions & 5 deletions unicode/Blocks.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
# Blocks-7.0.0.txt
# Date: 2014-04-03, 23:23:00 GMT [RP, KW]
# Blocks-8.0.0.txt
# Date: 2014-11-10, 23:04:00 GMT [KW]
#
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Note: The casing of block names is not normative.
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
#
# Format:
# Start Code..End Code; Block Name

Expand All @@ -20,6 +17,14 @@
# For more information on the comparison of property values,
# see UAX #44: http://www.unicode.org/reports/tr44/
#
# All block ranges start with a value where (cp MOD 16) = 0,
# and end with a value where (cp MOD 16) = 15. In other words,
# the last hexadecimal digit of the start of range is ...0
# and the last hexadecimal digit of the end of range is ...F.
# This constraint on block ranges guarantees that allocations
# are done in terms of whole columns, and that code chart display
# never involves splitting columns in the charts.
#
# All code points not explicitly listed for Block
# have the value No_Block.

Expand Down Expand Up @@ -168,6 +173,7 @@ AA80..AADF; Tai Viet
AAE0..AAFF; Meetei Mayek Extensions
AB00..AB2F; Ethiopic Extended-A
AB30..AB6F; Latin Extended-E
AB70..ABBF; Cherokee Supplement
ABC0..ABFF; Meetei Mayek
AC00..D7AF; Hangul Syllables
D7B0..D7FF; Hangul Jamo Extended-B
Expand Down Expand Up @@ -210,6 +216,7 @@ FFF0..FFFF; Specials
10840..1085F; Imperial Aramaic
10860..1087F; Palmyrene
10880..108AF; Nabataean
108E0..108FF; Hatran
10900..1091F; Phoenician
10920..1093F; Lydian
10980..1099F; Meroitic Hieroglyphs
Expand All @@ -223,6 +230,7 @@ FFF0..FFFF; Specials
10B60..10B7F; Inscriptional Pahlavi
10B80..10BAF; Psalter Pahlavi
10C00..10C4F; Old Turkic
10C80..10CFF; Old Hungarian
10E60..10E7F; Rumi Numeral Symbols
11000..1107F; Brahmi
11080..110CF; Kaithi
Expand All @@ -232,17 +240,21 @@ FFF0..FFFF; Specials
11180..111DF; Sharada
111E0..111FF; Sinhala Archaic Numbers
11200..1124F; Khojki
11280..112AF; Multani
112B0..112FF; Khudawadi
11300..1137F; Grantha
11480..114DF; Tirhuta
11580..115FF; Siddham
11600..1165F; Modi
11680..116CF; Takri
11700..1173F; Ahom
118A0..118FF; Warang Citi
11AC0..11AFF; Pau Cin Hau
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
12480..1254F; Early Dynastic Cuneiform
13000..1342F; Egyptian Hieroglyphs
14400..1467F; Anatolian Hieroglyphs
16800..16A3F; Bamum Supplement
16A40..16A6F; Mro
16AD0..16AFF; Bassa Vah
Expand All @@ -257,6 +269,7 @@ FFF0..FFFF; Specials
1D300..1D35F; Tai Xuan Jing Symbols
1D360..1D37F; Counting Rod Numerals
1D400..1D7FF; Mathematical Alphanumeric Symbols
1D800..1DAAF; Sutton SignWriting
1E800..1E8DF; Mende Kikakui
1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
1F000..1F02F; Mahjong Tiles
Expand All @@ -271,9 +284,11 @@ FFF0..FFFF; Specials
1F700..1F77F; Alchemical Symbols
1F780..1F7FF; Geometric Shapes Extended
1F800..1F8FF; Supplemental Arrows-C
1F900..1F9FF; Supplemental Symbols and Pictographs
20000..2A6DF; CJK Unified Ideographs Extension B
2A700..2B73F; CJK Unified Ideographs Extension C
2B740..2B81F; CJK Unified Ideographs Extension D
2B820..2CEAF; CJK Unified Ideographs Extension E
2F800..2FA1F; CJK Compatibility Ideographs Supplement
E0000..E007F; Tags
E0100..E01EF; Variation Selectors Supplement
Expand Down
Loading