diff --git a/src/etc/unicode.py b/src/etc/unicode.py index a58c5767ead15..427f7697e7643 100755 --- a/src/etc/unicode.py +++ b/src/etc/unicode.py @@ -35,13 +35,20 @@ def load_unicode_data(f): c_hi = 0 com_lo = 0 com_hi = 0 + cases={"upcase":[],"lowcase":[]} + cas_offs=0 + cas_lo=0 + cas_hi=0 + curr_cas_offs=0 + curr_cas="" + cas="" for line in fileinput.input(f): fields = line.split(";") if len(fields) != 15: continue [code, name, gencat, combine, bidi, decomp, deci, digit, num, mirror, - old, iso, upcase, lowcsae, titlecase ] = fields + old, iso, upcase, lowcase, titlecase ] = fields code = int(code, 16) @@ -87,9 +94,57 @@ def load_unicode_data(f): com_lo = code com_hi = code - return (canon_decomp, compat_decomp, gencats, combines) + if upcase != "": + curr_cas_offs = int(upcase,16)-code + curr_cas="upcase" + elif lowcase != "": + curr_cas_offs = int(lowcase, 16)-code + curr_cas="lowcase" + else: + curr_cas_offs=0 + curr_cas="" + + if (upcase=="" and lowcase=="") or curr_cas_offs != cas_offs or curr_cas != cas: + if cas != "": + cases[cas].append((cas_lo, cas_hi, cas_offs)) + + + if curr_cas_offs !=0: + if curr_cas != cas or curr_cas_offs != cas_offs: + cas_lo=code + cas_hi = code + cas=curr_cas + cas_offs=curr_cas_offs + return (canon_decomp, compat_decomp, gencats, combines, cases) + +def load_special_casing(f): + fetch(f) + cases=[] + sensative=False + for line in fileinput.input(f): + s="# Language-Sensitive Mappings" + if line[:len(s)]==s: + sensative=True + if sensative: continue + if line[0]=="#": + continue + fields = line.split("; ") + if len(fields)< 5: + continue + [code, lower, title, upper] = fields[:4] + code = "'\\u%4.4x'" % int(code,16) + lower='"'+"".join([ ("\\u%4.4x" % int(x,16)) for x in lower.strip().split(" ")]) +'"' + upper='"'+"".join([ ("\\u%4.4x" % int(x,16)) for x in upper.strip().split(" ")]) +'"' + cases.append( ( + code + , lower + , upper + ) ) + cases.sort() + return cases + def load_derived_core_properties(f): fetch(f) derivedprops = {} @@ -172,6 +227,83 @@ def emit_property_module(f, mod, tbl): f.write(" }\n\n") f.write("}\n") +def emit_case_module(f, mod, tbl, spec): + f.write("pub mod %s {\n" % mod) + keys = tbl.keys() + keys.sort() + #emit_bsearch_range_table(f); + #f.write(" use option::Option;\n"); + f.write(" use option::{Some, None};\n"); + f.write(" use vec::ImmutableVector;\n"); + f.write(""" + fn bsearch_range_value_table(c: char, r: &'static [(char, char, i32)]) -> i32 { + use cmp::{Equal, Less, Greater}; + match r.bsearch(|&(lo, hi, _)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) { + Some(idx) => { + let (_, _, result) = r[idx]; + result + } + None => 0 + } + }\n\n +""") + for cat in keys: + f.write(" static %s_table : &'static [(char,char,i32)] = &[\n" % cat) + ix = 0 + for tup in tbl[cat]: + f.write(ch_prefix(ix)) + f.write("(%s, %s, %s)" % (escape_char(tup[0]), escape_char(tup[1]), str(tup[2]))) + ix += 1 + f.write("\n ];\n\n") + + f.write(" pub fn %s(c: char) -> i32 {\n" % (cat+"_offset") ) + f.write(" bsearch_range_value_table(c, %s_table)\n" % cat) + f.write(" }\n\n") + + f.write(" static %s_table : &'static [(char, &'static str,&'static str)] = &[\n" % "special") + ix = 0 + for tup in special: + f.write(ch_prefix(ix)) + f.write("(%s, %s, %s)" % (tup[0], tup[1], tup[2])) + ix += 2 + f.write("\n ];\n\n") + + f.write(""" + pub fn case_special(c:char, case:u8) -> &'static str { + use cmp::{Equal, Less, Greater}; + match special_table.bsearch(|&(code, _, _)| { + if c==code { Equal } + else if code < c { Less } + else { Greater } + }) { + Some(idx) => { + if case==0 { + let (_, result, _) = special_table[idx]; + result + } + else { + let (_, _, result) = special_table[idx]; + result + } + } + None => "" + } + }\n\n +""") + + f.write(" pub fn upcase_special(c:char) -> &'static str {\n") + f.write(" case_special(c, 1)\n") + f.write(" }\n\n") + + f.write(" pub fn lowcase_special(c:char) -> &'static str {\n") + f.write(" case_special(c, 0)\n") + f.write(" }\n\n") + + f.write("}\n") def emit_property_module_old(f, mod, tbl): f.write("mod %s {\n" % mod) @@ -352,8 +484,8 @@ def emit_decomp_module(f, canon, compat, combine): os.remove(i); rf = open(r, "w") -(canon_decomp, compat_decomp, gencats, combines) = load_unicode_data("UnicodeData.txt") - +(canon_decomp, compat_decomp, gencats, combines, cases) = load_unicode_data("UnicodeData.txt") +special = load_special_casing("SpecialCasing.txt") # Preamble rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at @@ -374,6 +506,8 @@ def emit_decomp_module(f, canon, compat, combine): emit_property_module(rf, "general_category", gencats) +emit_case_module(rf, "case_changes", cases, special) + emit_decomp_module(rf, canon_decomp, compat_decomp, combines) derived = load_derived_core_properties("DerivedCoreProperties.txt") diff --git a/src/libstd/char.rs b/src/libstd/char.rs index 431fc27a202d3..c56c567f7c4ce 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -14,7 +14,7 @@ use cast::transmute; use option::{None, Option, Some}; use iter::{Iterator, range_step}; use str::StrSlice; -use unicode::{derived_property, general_category, decompose}; +use unicode::{derived_property, general_category, decompose, case_changes}; use to_str::ToStr; use str; @@ -221,6 +221,36 @@ pub fn from_digit(num: uint, radix: uint) -> Option { } } +/// Returns the lowercase form of a given unicode character. +/// Makes a best-effort attempt without checking locale. +pub fn to_lower_default(u: char) -> char { + let off = case_changes::lowcase_offset(u); + from_u32( (((u as u32) as i32) + off) as u32 ).unwrap() +} + +/// Returns the lowercase form of a given unicode character +/// using full mapping, may map to multiple char. +/// Makes a best-effort attempt without checking locale. +pub fn to_lower_full_default(u: char) -> ~str { + case_changes::lowcase_special(u).to_owned() +} + +/// Returns the uppercase form of a given unicode character. +/// Makes a best-effort attempt without checking locale. +pub fn to_upper_default(u: char) -> char { + let off = case_changes::upcase_offset(u); + from_u32( (((u as u32) as i32) + off) as u32 ).unwrap() +} + +/// Returns the uppercase form of a given unicode character +/// using full mapping, may map to multiple char. +/// Makes a best-effort attempt without checking locale. +pub fn to_upper_full_default(u: char) -> ~str { + case_changes::upcase_special(u).to_owned() +} + +//FIXME #9363: implement to_upper and to_lower which take into acount locale + // Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior static S_BASE: uint = 0xAC00; static L_BASE: uint = 0x1100; @@ -367,6 +397,11 @@ pub trait Char { /// /// This will then return the number of characters written to the slice. fn encode_utf8(&self, dst: &mut [u8]) -> uint; + + fn to_lower_default(&self) -> char; + fn to_upper_default(&self) -> char; + fn to_lower_full_default(&self) -> ~str; + fn to_upper_full_default(&self) -> ~str; } impl Char for char { @@ -420,6 +455,12 @@ impl Char for char { return 4; } } + + fn to_lower_default(&self) -> char { to_lower_default(*self) } + fn to_upper_default(&self) -> char { to_upper_default(*self) } + fn to_lower_full_default(&self) -> ~str { to_lower_full_default(*self) } + fn to_upper_full_default(&self) -> ~str { to_upper_full_default(*self) } + } #[cfg(not(test))] @@ -546,3 +587,15 @@ fn test_to_str() { let s = 't'.to_str(); assert_eq!(s, ~"t"); } + +#[test] +fn test_to_lower_default() { + assert_eq!('ŗ'.to_lower_default(), 'ŗ'); + assert_eq!('Ʋ'.to_lower_default(), 'ʋ'); +} + +#[test] +fn test_to_upper_default() { + assert_eq!('ŗ'.to_upper_default(), 'Ŗ'); + assert_eq!('Ʋ'.to_upper_default(), 'Ʋ'); +} diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 9707d592a2e85..b498e4c1fd49b 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -1410,6 +1410,11 @@ pub trait StrSlice<'self> { fn subslice_offset(&self, inner: &str) -> uint; fn as_imm_buf(&self, f: &fn(*u8, uint) -> T) -> T; + + fn to_lower_default(&self) -> ~str; + fn to_upper_default(&self) -> ~str; + fn to_lower_full_default(&self) -> ~str; + fn to_upper_full_default(&self) -> ~str; } /// Extension methods for strings @@ -2190,6 +2195,50 @@ impl<'self> StrSlice<'self> for &'self str { let v: &[u8] = unsafe { cast::transmute(*self) }; v.as_imm_buf(f) } + + /// Convert all characters in the string to lowercase + fn to_lower_default(&self) -> ~str { + let mut out = with_capacity(self.len()); + for c in self.iter() { + out.push_char(c.to_lower_default()); + } + out + } + + /// Convert all characters in the string to lowercase using + /// full mapping, may increase string length + fn to_lower_full_default(&self) -> ~str { + let mut out = with_capacity(self.len()); + for c in self.iter() { + let low = c.to_lower_full_default(); + if low.is_empty(){ out.push_char(c.to_lower_default()); } + else { out.push_str(low); } + } + out + } + + /// Convert all characters in the string to uppercase + fn to_upper_default(&self) -> ~str { + let mut out = with_capacity(self.len()); + for c in self.iter() { + out.push_char(c.to_upper_default()); + } + out + } + + /// Convert all characters in the string to uppercase using + /// full mapping, may increase string length + fn to_upper_full_default(&self) -> ~str { + let mut out = with_capacity(self.len()); + for c in self.iter() { + let up = c.to_upper_full_default(); + if up.is_empty(){ out.push_char(c.to_upper_default()); } + else { out.push_str(up); } + } + out + } + + //FIXME #9363: implement to_lower and to_upper which take into acount locale } #[allow(missing_doc)] @@ -3741,6 +3790,27 @@ mod tests { assert_eq!("abcde".to_send_str(), SendStrStatic("abcde")); assert_eq!("abcde".to_send_str(), SendStrOwned(~"abcde")); } + + #[test] + fn test_to_lower_default() { + assert_eq!("ŗƲΣꓔ!".to_lower_default(), ~"ŗʋσꓔ!" ) + } + + #[test] + fn test_to_upper_default() { + assert_eq!("ŗƲΣꓔ!".to_upper_default(), ~"ŖƲΣꓔ!" ) + } + + #[test] + fn test_to_lower_full_default() { + assert_eq!("İA".to_lower_full_default(), ~"i̇a") + } + + #[test] + fn test_to_upper_full_default() { + assert_eq!("ßa".to_upper_full_default(), ~"SSA") + } + } #[cfg(test)] diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs index d7f84a6abfbf3..1f9b7e71fea23 100644 --- a/src/libstd/unicode.rs +++ b/src/libstd/unicode.rs @@ -1439,6 +1439,795 @@ pub mod general_category { bsearch_range_table(c, Zs_table) } +} +pub mod case_changes { + use option::{Some, None}; + use vec::ImmutableVector; + + fn bsearch_range_value_table(c: char, r: &'static [(char, char, i32)]) -> i32 { + use cmp::{Equal, Less, Greater}; + match r.bsearch(|&(lo, hi, _)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) { + Some(idx) => { + let (_, _, result) = r[idx]; + result + } + None => 0 + } + } + + + static lowcase_table : &'static [(char,char,i32)] = &[ + ('\x41', '\x5a', 32), ('\xc0', '\xd6', 32), + ('\xd8', '\xde', 32), ('\u0100', '\u0100', 1), + ('\u0102', '\u0102', 1), ('\u0104', '\u0104', 1), + ('\u0106', '\u0106', 1), ('\u0108', '\u0108', 1), + ('\u010a', '\u010a', 1), ('\u010c', '\u010c', 1), + ('\u010e', '\u010e', 1), ('\u0110', '\u0110', 1), + ('\u0112', '\u0112', 1), ('\u0114', '\u0114', 1), + ('\u0116', '\u0116', 1), ('\u0118', '\u0118', 1), + ('\u011a', '\u011a', 1), ('\u011c', '\u011c', 1), + ('\u011e', '\u011e', 1), ('\u0120', '\u0120', 1), + ('\u0122', '\u0122', 1), ('\u0124', '\u0124', 1), + ('\u0126', '\u0126', 1), ('\u0128', '\u0128', 1), + ('\u012a', '\u012a', 1), ('\u012c', '\u012c', 1), + ('\u012e', '\u012e', 1), ('\u0130', '\u0130', -199), + ('\u0132', '\u0132', 1), ('\u0134', '\u0134', 1), + ('\u0136', '\u0136', 1), ('\u0139', '\u0139', 1), + ('\u013b', '\u013b', 1), ('\u013d', '\u013d', 1), + ('\u013f', '\u013f', 1), ('\u0141', '\u0141', 1), + ('\u0143', '\u0143', 1), ('\u0145', '\u0145', 1), + ('\u0147', '\u0147', 1), ('\u014a', '\u014a', 1), + ('\u014c', '\u014c', 1), ('\u014e', '\u014e', 1), + ('\u0150', '\u0150', 1), ('\u0152', '\u0152', 1), + ('\u0154', '\u0154', 1), ('\u0156', '\u0156', 1), + ('\u0158', '\u0158', 1), ('\u015a', '\u015a', 1), + ('\u015c', '\u015c', 1), ('\u015e', '\u015e', 1), + ('\u0160', '\u0160', 1), ('\u0162', '\u0162', 1), + ('\u0164', '\u0164', 1), ('\u0166', '\u0166', 1), + ('\u0168', '\u0168', 1), ('\u016a', '\u016a', 1), + ('\u016c', '\u016c', 1), ('\u016e', '\u016e', 1), + ('\u0170', '\u0170', 1), ('\u0172', '\u0172', 1), + ('\u0174', '\u0174', 1), ('\u0176', '\u0176', 1), + ('\u0178', '\u0178', -121), ('\u0179', '\u0179', 1), + ('\u017b', '\u017b', 1), ('\u017d', '\u017d', 1), + ('\u0181', '\u0181', 210), ('\u0182', '\u0182', 1), + ('\u0184', '\u0184', 1), ('\u0186', '\u0186', 206), + ('\u0187', '\u0187', 1), ('\u0189', '\u018a', 205), + ('\u018b', '\u018b', 1), ('\u018e', '\u018e', 79), + ('\u018f', '\u018f', 202), ('\u0190', '\u0190', 203), + ('\u0191', '\u0191', 1), ('\u0193', '\u0193', 205), + ('\u0194', '\u0194', 207), ('\u0196', '\u0196', 211), + ('\u0197', '\u0197', 209), ('\u0198', '\u0198', 1), + ('\u019c', '\u019c', 211), ('\u019d', '\u019d', 213), + ('\u019f', '\u019f', 214), ('\u01a0', '\u01a0', 1), + ('\u01a2', '\u01a2', 1), ('\u01a4', '\u01a4', 1), + ('\u01a6', '\u01a6', 218), ('\u01a7', '\u01a7', 1), + ('\u01a9', '\u01a9', 218), ('\u01ac', '\u01ac', 1), + ('\u01ae', '\u01ae', 218), ('\u01af', '\u01af', 1), + ('\u01b1', '\u01b2', 217), ('\u01b3', '\u01b3', 1), + ('\u01b5', '\u01b5', 1), ('\u01b7', '\u01b7', 219), + ('\u01b8', '\u01b8', 1), ('\u01bc', '\u01bc', 1), + ('\u01c4', '\u01c4', 2), ('\u01c7', '\u01c7', 2), + ('\u01ca', '\u01ca', 2), ('\u01cd', '\u01cd', 1), + ('\u01cf', '\u01cf', 1), ('\u01d1', '\u01d1', 1), + ('\u01d3', '\u01d3', 1), ('\u01d5', '\u01d5', 1), + ('\u01d7', '\u01d7', 1), ('\u01d9', '\u01d9', 1), + ('\u01db', '\u01db', 1), ('\u01de', '\u01de', 1), + ('\u01e0', '\u01e0', 1), ('\u01e2', '\u01e2', 1), + ('\u01e4', '\u01e4', 1), ('\u01e6', '\u01e6', 1), + ('\u01e8', '\u01e8', 1), ('\u01ea', '\u01ea', 1), + ('\u01ec', '\u01ec', 1), ('\u01ee', '\u01ee', 1), + ('\u01f1', '\u01f1', 2), ('\u01f4', '\u01f4', 1), + ('\u01f6', '\u01f6', -97), ('\u01f7', '\u01f7', -56), + ('\u01f8', '\u01f8', 1), ('\u01fa', '\u01fa', 1), + ('\u01fc', '\u01fc', 1), ('\u01fe', '\u01fe', 1), + ('\u0200', '\u0200', 1), ('\u0202', '\u0202', 1), + ('\u0204', '\u0204', 1), ('\u0206', '\u0206', 1), + ('\u0208', '\u0208', 1), ('\u020a', '\u020a', 1), + ('\u020c', '\u020c', 1), ('\u020e', '\u020e', 1), + ('\u0210', '\u0210', 1), ('\u0212', '\u0212', 1), + ('\u0214', '\u0214', 1), ('\u0216', '\u0216', 1), + ('\u0218', '\u0218', 1), ('\u021a', '\u021a', 1), + ('\u021c', '\u021c', 1), ('\u021e', '\u021e', 1), + ('\u0220', '\u0220', -130), ('\u0222', '\u0222', 1), + ('\u0224', '\u0224', 1), ('\u0226', '\u0226', 1), + ('\u0228', '\u0228', 1), ('\u022a', '\u022a', 1), + ('\u022c', '\u022c', 1), ('\u022e', '\u022e', 1), + ('\u0230', '\u0230', 1), ('\u0232', '\u0232', 1), + ('\u023a', '\u023a', 10795), ('\u023b', '\u023b', 1), + ('\u023d', '\u023d', -163), ('\u023e', '\u023e', 10792), + ('\u0241', '\u0241', 1), ('\u0243', '\u0243', -195), + ('\u0244', '\u0244', 69), ('\u0245', '\u0245', 71), + ('\u0246', '\u0246', 1), ('\u0248', '\u0248', 1), + ('\u024a', '\u024a', 1), ('\u024c', '\u024c', 1), + ('\u024e', '\u024e', 1), ('\u0370', '\u0370', 1), + ('\u0372', '\u0372', 1), ('\u0376', '\u0376', 1), + ('\u0386', '\u0386', 38), ('\u0388', '\u038a', 37), + ('\u038c', '\u038c', 64), ('\u038e', '\u038f', 63), + ('\u0391', '\u03ab', 32), ('\u03cf', '\u03cf', 8), + ('\u03d8', '\u03d8', 1), ('\u03da', '\u03da', 1), + ('\u03dc', '\u03dc', 1), ('\u03de', '\u03de', 1), + ('\u03e0', '\u03e0', 1), ('\u03e2', '\u03e2', 1), + ('\u03e4', '\u03e4', 1), ('\u03e6', '\u03e6', 1), + ('\u03e8', '\u03e8', 1), ('\u03ea', '\u03ea', 1), + ('\u03ec', '\u03ec', 1), ('\u03ee', '\u03ee', 1), + ('\u03f4', '\u03f4', -60), ('\u03f7', '\u03f7', 1), + ('\u03f9', '\u03f9', -7), ('\u03fa', '\u03fa', 1), + ('\u03fd', '\u03ff', -130), ('\u0400', '\u040f', 80), + ('\u0410', '\u042f', 32), ('\u0460', '\u0460', 1), + ('\u0462', '\u0462', 1), ('\u0464', '\u0464', 1), + ('\u0466', '\u0466', 1), ('\u0468', '\u0468', 1), + ('\u046a', '\u046a', 1), ('\u046c', '\u046c', 1), + ('\u046e', '\u046e', 1), ('\u0470', '\u0470', 1), + ('\u0472', '\u0472', 1), ('\u0474', '\u0474', 1), + ('\u0476', '\u0476', 1), ('\u0478', '\u0478', 1), + ('\u047a', '\u047a', 1), ('\u047c', '\u047c', 1), + ('\u047e', '\u047e', 1), ('\u0480', '\u0480', 1), + ('\u048a', '\u048a', 1), ('\u048c', '\u048c', 1), + ('\u048e', '\u048e', 1), ('\u0490', '\u0490', 1), + ('\u0492', '\u0492', 1), ('\u0494', '\u0494', 1), + ('\u0496', '\u0496', 1), ('\u0498', '\u0498', 1), + ('\u049a', '\u049a', 1), ('\u049c', '\u049c', 1), + ('\u049e', '\u049e', 1), ('\u04a0', '\u04a0', 1), + ('\u04a2', '\u04a2', 1), ('\u04a4', '\u04a4', 1), + ('\u04a6', '\u04a6', 1), ('\u04a8', '\u04a8', 1), + ('\u04aa', '\u04aa', 1), ('\u04ac', '\u04ac', 1), + ('\u04ae', '\u04ae', 1), ('\u04b0', '\u04b0', 1), + ('\u04b2', '\u04b2', 1), ('\u04b4', '\u04b4', 1), + ('\u04b6', '\u04b6', 1), ('\u04b8', '\u04b8', 1), + ('\u04ba', '\u04ba', 1), ('\u04bc', '\u04bc', 1), + ('\u04be', '\u04be', 1), ('\u04c0', '\u04c0', 15), + ('\u04c1', '\u04c1', 1), ('\u04c3', '\u04c3', 1), + ('\u04c5', '\u04c5', 1), ('\u04c7', '\u04c7', 1), + ('\u04c9', '\u04c9', 1), ('\u04cb', '\u04cb', 1), + ('\u04cd', '\u04cd', 1), ('\u04d0', '\u04d0', 1), + ('\u04d2', '\u04d2', 1), ('\u04d4', '\u04d4', 1), + ('\u04d6', '\u04d6', 1), ('\u04d8', '\u04d8', 1), + ('\u04da', '\u04da', 1), ('\u04dc', '\u04dc', 1), + ('\u04de', '\u04de', 1), ('\u04e0', '\u04e0', 1), + ('\u04e2', '\u04e2', 1), ('\u04e4', '\u04e4', 1), + ('\u04e6', '\u04e6', 1), ('\u04e8', '\u04e8', 1), + ('\u04ea', '\u04ea', 1), ('\u04ec', '\u04ec', 1), + ('\u04ee', '\u04ee', 1), ('\u04f0', '\u04f0', 1), + ('\u04f2', '\u04f2', 1), ('\u04f4', '\u04f4', 1), + ('\u04f6', '\u04f6', 1), ('\u04f8', '\u04f8', 1), + ('\u04fa', '\u04fa', 1), ('\u04fc', '\u04fc', 1), + ('\u04fe', '\u04fe', 1), ('\u0500', '\u0500', 1), + ('\u0502', '\u0502', 1), ('\u0504', '\u0504', 1), + ('\u0506', '\u0506', 1), ('\u0508', '\u0508', 1), + ('\u050a', '\u050a', 1), ('\u050c', '\u050c', 1), + ('\u050e', '\u050e', 1), ('\u0510', '\u0510', 1), + ('\u0512', '\u0512', 1), ('\u0514', '\u0514', 1), + ('\u0516', '\u0516', 1), ('\u0518', '\u0518', 1), + ('\u051a', '\u051a', 1), ('\u051c', '\u051c', 1), + ('\u051e', '\u051e', 1), ('\u0520', '\u0520', 1), + ('\u0522', '\u0522', 1), ('\u0524', '\u0524', 1), + ('\u0526', '\u0526', 1), ('\u0531', '\u0556', 48), + ('\u10a0', '\u10cd', 7264), ('\u1e00', '\u1e00', 1), + ('\u1e02', '\u1e02', 1), ('\u1e04', '\u1e04', 1), + ('\u1e06', '\u1e06', 1), ('\u1e08', '\u1e08', 1), + ('\u1e0a', '\u1e0a', 1), ('\u1e0c', '\u1e0c', 1), + ('\u1e0e', '\u1e0e', 1), ('\u1e10', '\u1e10', 1), + ('\u1e12', '\u1e12', 1), ('\u1e14', '\u1e14', 1), + ('\u1e16', '\u1e16', 1), ('\u1e18', '\u1e18', 1), + ('\u1e1a', '\u1e1a', 1), ('\u1e1c', '\u1e1c', 1), + ('\u1e1e', '\u1e1e', 1), ('\u1e20', '\u1e20', 1), + ('\u1e22', '\u1e22', 1), ('\u1e24', '\u1e24', 1), + ('\u1e26', '\u1e26', 1), ('\u1e28', '\u1e28', 1), + ('\u1e2a', '\u1e2a', 1), ('\u1e2c', '\u1e2c', 1), + ('\u1e2e', '\u1e2e', 1), ('\u1e30', '\u1e30', 1), + ('\u1e32', '\u1e32', 1), ('\u1e34', '\u1e34', 1), + ('\u1e36', '\u1e36', 1), ('\u1e38', '\u1e38', 1), + ('\u1e3a', '\u1e3a', 1), ('\u1e3c', '\u1e3c', 1), + ('\u1e3e', '\u1e3e', 1), ('\u1e40', '\u1e40', 1), + ('\u1e42', '\u1e42', 1), ('\u1e44', '\u1e44', 1), + ('\u1e46', '\u1e46', 1), ('\u1e48', '\u1e48', 1), + ('\u1e4a', '\u1e4a', 1), ('\u1e4c', '\u1e4c', 1), + ('\u1e4e', '\u1e4e', 1), ('\u1e50', '\u1e50', 1), + ('\u1e52', '\u1e52', 1), ('\u1e54', '\u1e54', 1), + ('\u1e56', '\u1e56', 1), ('\u1e58', '\u1e58', 1), + ('\u1e5a', '\u1e5a', 1), ('\u1e5c', '\u1e5c', 1), + ('\u1e5e', '\u1e5e', 1), ('\u1e60', '\u1e60', 1), + ('\u1e62', '\u1e62', 1), ('\u1e64', '\u1e64', 1), + ('\u1e66', '\u1e66', 1), ('\u1e68', '\u1e68', 1), + ('\u1e6a', '\u1e6a', 1), ('\u1e6c', '\u1e6c', 1), + ('\u1e6e', '\u1e6e', 1), ('\u1e70', '\u1e70', 1), + ('\u1e72', '\u1e72', 1), ('\u1e74', '\u1e74', 1), + ('\u1e76', '\u1e76', 1), ('\u1e78', '\u1e78', 1), + ('\u1e7a', '\u1e7a', 1), ('\u1e7c', '\u1e7c', 1), + ('\u1e7e', '\u1e7e', 1), ('\u1e80', '\u1e80', 1), + ('\u1e82', '\u1e82', 1), ('\u1e84', '\u1e84', 1), + ('\u1e86', '\u1e86', 1), ('\u1e88', '\u1e88', 1), + ('\u1e8a', '\u1e8a', 1), ('\u1e8c', '\u1e8c', 1), + ('\u1e8e', '\u1e8e', 1), ('\u1e90', '\u1e90', 1), + ('\u1e92', '\u1e92', 1), ('\u1e94', '\u1e94', 1), + ('\u1e9e', '\u1e9e', -7615), ('\u1ea0', '\u1ea0', 1), + ('\u1ea2', '\u1ea2', 1), ('\u1ea4', '\u1ea4', 1), + ('\u1ea6', '\u1ea6', 1), ('\u1ea8', '\u1ea8', 1), + ('\u1eaa', '\u1eaa', 1), ('\u1eac', '\u1eac', 1), + ('\u1eae', '\u1eae', 1), ('\u1eb0', '\u1eb0', 1), + ('\u1eb2', '\u1eb2', 1), ('\u1eb4', '\u1eb4', 1), + ('\u1eb6', '\u1eb6', 1), ('\u1eb8', '\u1eb8', 1), + ('\u1eba', '\u1eba', 1), ('\u1ebc', '\u1ebc', 1), + ('\u1ebe', '\u1ebe', 1), ('\u1ec0', '\u1ec0', 1), + ('\u1ec2', '\u1ec2', 1), ('\u1ec4', '\u1ec4', 1), + ('\u1ec6', '\u1ec6', 1), ('\u1ec8', '\u1ec8', 1), + ('\u1eca', '\u1eca', 1), ('\u1ecc', '\u1ecc', 1), + ('\u1ece', '\u1ece', 1), ('\u1ed0', '\u1ed0', 1), + ('\u1ed2', '\u1ed2', 1), ('\u1ed4', '\u1ed4', 1), + ('\u1ed6', '\u1ed6', 1), ('\u1ed8', '\u1ed8', 1), + ('\u1eda', '\u1eda', 1), ('\u1edc', '\u1edc', 1), + ('\u1ede', '\u1ede', 1), ('\u1ee0', '\u1ee0', 1), + ('\u1ee2', '\u1ee2', 1), ('\u1ee4', '\u1ee4', 1), + ('\u1ee6', '\u1ee6', 1), ('\u1ee8', '\u1ee8', 1), + ('\u1eea', '\u1eea', 1), ('\u1eec', '\u1eec', 1), + ('\u1eee', '\u1eee', 1), ('\u1ef0', '\u1ef0', 1), + ('\u1ef2', '\u1ef2', 1), ('\u1ef4', '\u1ef4', 1), + ('\u1ef6', '\u1ef6', 1), ('\u1ef8', '\u1ef8', 1), + ('\u1efa', '\u1efa', 1), ('\u1efc', '\u1efc', 1), + ('\u1efe', '\u1efe', 1), ('\u1f08', '\u1f0f', -8), + ('\u1f18', '\u1f1d', -8), ('\u1f28', '\u1f2f', -8), + ('\u1f38', '\u1f3f', -8), ('\u1f48', '\u1f4d', -8), + ('\u1f59', '\u1f5f', -8), ('\u1f68', '\u1f6f', -8), + ('\u1f88', '\u1f8f', -8), ('\u1f98', '\u1f9f', -8), + ('\u1fa8', '\u1faf', -8), ('\u1fb8', '\u1fb9', -8), + ('\u1fba', '\u1fbb', -74), ('\u1fbc', '\u1fbc', -9), + ('\u1fc8', '\u1fcb', -86), ('\u1fcc', '\u1fcc', -9), + ('\u1fd8', '\u1fd9', -8), ('\u1fda', '\u1fdb', -100), + ('\u1fe8', '\u1fe9', -8), ('\u1fea', '\u1feb', -112), + ('\u1fec', '\u1fec', -7), ('\u1ff8', '\u1ff9', -128), + ('\u1ffa', '\u1ffb', -126), ('\u1ffc', '\u1ffc', -9), + ('\u2126', '\u2126', -7517), ('\u212a', '\u212a', -8383), + ('\u212b', '\u212b', -8262), ('\u2132', '\u2132', 28), + ('\u2160', '\u216f', 16), ('\u2183', '\u2183', 1), + ('\u24b6', '\u24cf', 26), ('\u2c00', '\u2c2e', 48), + ('\u2c60', '\u2c60', 1), ('\u2c62', '\u2c62', -10743), + ('\u2c63', '\u2c63', -3814), ('\u2c64', '\u2c64', -10727), + ('\u2c67', '\u2c67', 1), ('\u2c69', '\u2c69', 1), + ('\u2c6b', '\u2c6b', 1), ('\u2c6d', '\u2c6d', -10780), + ('\u2c6e', '\u2c6e', -10749), ('\u2c6f', '\u2c6f', -10783), + ('\u2c70', '\u2c70', -10782), ('\u2c72', '\u2c72', 1), + ('\u2c75', '\u2c75', 1), ('\u2c7e', '\u2c7f', -10815), + ('\u2c80', '\u2c80', 1), ('\u2c82', '\u2c82', 1), + ('\u2c84', '\u2c84', 1), ('\u2c86', '\u2c86', 1), + ('\u2c88', '\u2c88', 1), ('\u2c8a', '\u2c8a', 1), + ('\u2c8c', '\u2c8c', 1), ('\u2c8e', '\u2c8e', 1), + ('\u2c90', '\u2c90', 1), ('\u2c92', '\u2c92', 1), + ('\u2c94', '\u2c94', 1), ('\u2c96', '\u2c96', 1), + ('\u2c98', '\u2c98', 1), ('\u2c9a', '\u2c9a', 1), + ('\u2c9c', '\u2c9c', 1), ('\u2c9e', '\u2c9e', 1), + ('\u2ca0', '\u2ca0', 1), ('\u2ca2', '\u2ca2', 1), + ('\u2ca4', '\u2ca4', 1), ('\u2ca6', '\u2ca6', 1), + ('\u2ca8', '\u2ca8', 1), ('\u2caa', '\u2caa', 1), + ('\u2cac', '\u2cac', 1), ('\u2cae', '\u2cae', 1), + ('\u2cb0', '\u2cb0', 1), ('\u2cb2', '\u2cb2', 1), + ('\u2cb4', '\u2cb4', 1), ('\u2cb6', '\u2cb6', 1), + ('\u2cb8', '\u2cb8', 1), ('\u2cba', '\u2cba', 1), + ('\u2cbc', '\u2cbc', 1), ('\u2cbe', '\u2cbe', 1), + ('\u2cc0', '\u2cc0', 1), ('\u2cc2', '\u2cc2', 1), + ('\u2cc4', '\u2cc4', 1), ('\u2cc6', '\u2cc6', 1), + ('\u2cc8', '\u2cc8', 1), ('\u2cca', '\u2cca', 1), + ('\u2ccc', '\u2ccc', 1), ('\u2cce', '\u2cce', 1), + ('\u2cd0', '\u2cd0', 1), ('\u2cd2', '\u2cd2', 1), + ('\u2cd4', '\u2cd4', 1), ('\u2cd6', '\u2cd6', 1), + ('\u2cd8', '\u2cd8', 1), ('\u2cda', '\u2cda', 1), + ('\u2cdc', '\u2cdc', 1), ('\u2cde', '\u2cde', 1), + ('\u2ce0', '\u2ce0', 1), ('\u2ce2', '\u2ce2', 1), + ('\u2ceb', '\u2ceb', 1), ('\u2ced', '\u2ced', 1), + ('\u2cf2', '\u2cf2', 1), ('\ua640', '\ua640', 1), + ('\ua642', '\ua642', 1), ('\ua644', '\ua644', 1), + ('\ua646', '\ua646', 1), ('\ua648', '\ua648', 1), + ('\ua64a', '\ua64a', 1), ('\ua64c', '\ua64c', 1), + ('\ua64e', '\ua64e', 1), ('\ua650', '\ua650', 1), + ('\ua652', '\ua652', 1), ('\ua654', '\ua654', 1), + ('\ua656', '\ua656', 1), ('\ua658', '\ua658', 1), + ('\ua65a', '\ua65a', 1), ('\ua65c', '\ua65c', 1), + ('\ua65e', '\ua65e', 1), ('\ua660', '\ua660', 1), + ('\ua662', '\ua662', 1), ('\ua664', '\ua664', 1), + ('\ua666', '\ua666', 1), ('\ua668', '\ua668', 1), + ('\ua66a', '\ua66a', 1), ('\ua66c', '\ua66c', 1), + ('\ua680', '\ua680', 1), ('\ua682', '\ua682', 1), + ('\ua684', '\ua684', 1), ('\ua686', '\ua686', 1), + ('\ua688', '\ua688', 1), ('\ua68a', '\ua68a', 1), + ('\ua68c', '\ua68c', 1), ('\ua68e', '\ua68e', 1), + ('\ua690', '\ua690', 1), ('\ua692', '\ua692', 1), + ('\ua694', '\ua694', 1), ('\ua696', '\ua696', 1), + ('\ua722', '\ua722', 1), ('\ua724', '\ua724', 1), + ('\ua726', '\ua726', 1), ('\ua728', '\ua728', 1), + ('\ua72a', '\ua72a', 1), ('\ua72c', '\ua72c', 1), + ('\ua72e', '\ua72e', 1), ('\ua732', '\ua732', 1), + ('\ua734', '\ua734', 1), ('\ua736', '\ua736', 1), + ('\ua738', '\ua738', 1), ('\ua73a', '\ua73a', 1), + ('\ua73c', '\ua73c', 1), ('\ua73e', '\ua73e', 1), + ('\ua740', '\ua740', 1), ('\ua742', '\ua742', 1), + ('\ua744', '\ua744', 1), ('\ua746', '\ua746', 1), + ('\ua748', '\ua748', 1), ('\ua74a', '\ua74a', 1), + ('\ua74c', '\ua74c', 1), ('\ua74e', '\ua74e', 1), + ('\ua750', '\ua750', 1), ('\ua752', '\ua752', 1), + ('\ua754', '\ua754', 1), ('\ua756', '\ua756', 1), + ('\ua758', '\ua758', 1), ('\ua75a', '\ua75a', 1), + ('\ua75c', '\ua75c', 1), ('\ua75e', '\ua75e', 1), + ('\ua760', '\ua760', 1), ('\ua762', '\ua762', 1), + ('\ua764', '\ua764', 1), ('\ua766', '\ua766', 1), + ('\ua768', '\ua768', 1), ('\ua76a', '\ua76a', 1), + ('\ua76c', '\ua76c', 1), ('\ua76e', '\ua76e', 1), + ('\ua779', '\ua779', 1), ('\ua77b', '\ua77b', 1), + ('\ua77d', '\ua77d', -35332), ('\ua77e', '\ua77e', 1), + ('\ua780', '\ua780', 1), ('\ua782', '\ua782', 1), + ('\ua784', '\ua784', 1), ('\ua786', '\ua786', 1), + ('\ua78b', '\ua78b', 1), ('\ua78d', '\ua78d', -42280), + ('\ua790', '\ua790', 1), ('\ua792', '\ua792', 1), + ('\ua7a0', '\ua7a0', 1), ('\ua7a2', '\ua7a2', 1), + ('\ua7a4', '\ua7a4', 1), ('\ua7a6', '\ua7a6', 1), + ('\ua7a8', '\ua7a8', 1), ('\ua7aa', '\ua7aa', -42308), + ('\uff21', '\uff3a', 32), ('\U00010400', '\U00010427', 40) + ]; + + pub fn lowcase_offset(c: char) -> i32 { + bsearch_range_value_table(c, lowcase_table) + } + + static upcase_table : &'static [(char,char,i32)] = &[ + ('\x61', '\x7a', -32), ('\xb5', '\xb5', 743), + ('\xe0', '\xf6', -32), ('\xf8', '\xfe', -32), + ('\xff', '\xff', 121), ('\u0101', '\u0101', -1), + ('\u0103', '\u0103', -1), ('\u0105', '\u0105', -1), + ('\u0107', '\u0107', -1), ('\u0109', '\u0109', -1), + ('\u010b', '\u010b', -1), ('\u010d', '\u010d', -1), + ('\u010f', '\u010f', -1), ('\u0111', '\u0111', -1), + ('\u0113', '\u0113', -1), ('\u0115', '\u0115', -1), + ('\u0117', '\u0117', -1), ('\u0119', '\u0119', -1), + ('\u011b', '\u011b', -1), ('\u011d', '\u011d', -1), + ('\u011f', '\u011f', -1), ('\u0121', '\u0121', -1), + ('\u0123', '\u0123', -1), ('\u0125', '\u0125', -1), + ('\u0127', '\u0127', -1), ('\u0129', '\u0129', -1), + ('\u012b', '\u012b', -1), ('\u012d', '\u012d', -1), + ('\u012f', '\u012f', -1), ('\u0131', '\u0131', -232), + ('\u0133', '\u0133', -1), ('\u0135', '\u0135', -1), + ('\u0137', '\u0137', -1), ('\u013a', '\u013a', -1), + ('\u013c', '\u013c', -1), ('\u013e', '\u013e', -1), + ('\u0140', '\u0140', -1), ('\u0142', '\u0142', -1), + ('\u0144', '\u0144', -1), ('\u0146', '\u0146', -1), + ('\u0148', '\u0148', -1), ('\u014b', '\u014b', -1), + ('\u014d', '\u014d', -1), ('\u014f', '\u014f', -1), + ('\u0151', '\u0151', -1), ('\u0153', '\u0153', -1), + ('\u0155', '\u0155', -1), ('\u0157', '\u0157', -1), + ('\u0159', '\u0159', -1), ('\u015b', '\u015b', -1), + ('\u015d', '\u015d', -1), ('\u015f', '\u015f', -1), + ('\u0161', '\u0161', -1), ('\u0163', '\u0163', -1), + ('\u0165', '\u0165', -1), ('\u0167', '\u0167', -1), + ('\u0169', '\u0169', -1), ('\u016b', '\u016b', -1), + ('\u016d', '\u016d', -1), ('\u016f', '\u016f', -1), + ('\u0171', '\u0171', -1), ('\u0173', '\u0173', -1), + ('\u0175', '\u0175', -1), ('\u0177', '\u0177', -1), + ('\u017a', '\u017a', -1), ('\u017c', '\u017c', -1), + ('\u017e', '\u017e', -1), ('\u017f', '\u017f', -300), + ('\u0180', '\u0180', 195), ('\u0183', '\u0183', -1), + ('\u0185', '\u0185', -1), ('\u0188', '\u0188', -1), + ('\u018c', '\u018c', -1), ('\u0192', '\u0192', -1), + ('\u0195', '\u0195', 97), ('\u0199', '\u0199', -1), + ('\u019a', '\u019a', 163), ('\u019e', '\u019e', 130), + ('\u01a1', '\u01a1', -1), ('\u01a3', '\u01a3', -1), + ('\u01a5', '\u01a5', -1), ('\u01a8', '\u01a8', -1), + ('\u01ad', '\u01ad', -1), ('\u01b0', '\u01b0', -1), + ('\u01b4', '\u01b4', -1), ('\u01b6', '\u01b6', -1), + ('\u01b9', '\u01b9', -1), ('\u01bd', '\u01bd', -1), + ('\u01bf', '\u01bf', 56), ('\u01c5', '\u01c5', -1), + ('\u01c6', '\u01c6', -2), ('\u01c8', '\u01c8', -1), + ('\u01c9', '\u01c9', -2), ('\u01cb', '\u01cb', -1), + ('\u01cc', '\u01cc', -2), ('\u01ce', '\u01ce', -1), + ('\u01d0', '\u01d0', -1), ('\u01d2', '\u01d2', -1), + ('\u01d4', '\u01d4', -1), ('\u01d6', '\u01d6', -1), + ('\u01d8', '\u01d8', -1), ('\u01da', '\u01da', -1), + ('\u01dc', '\u01dc', -1), ('\u01dd', '\u01dd', -79), + ('\u01df', '\u01df', -1), ('\u01e1', '\u01e1', -1), + ('\u01e3', '\u01e3', -1), ('\u01e5', '\u01e5', -1), + ('\u01e7', '\u01e7', -1), ('\u01e9', '\u01e9', -1), + ('\u01eb', '\u01eb', -1), ('\u01ed', '\u01ed', -1), + ('\u01ef', '\u01ef', -1), ('\u01f2', '\u01f2', -1), + ('\u01f3', '\u01f3', -2), ('\u01f5', '\u01f5', -1), + ('\u01f9', '\u01f9', -1), ('\u01fb', '\u01fb', -1), + ('\u01fd', '\u01fd', -1), ('\u01ff', '\u01ff', -1), + ('\u0201', '\u0201', -1), ('\u0203', '\u0203', -1), + ('\u0205', '\u0205', -1), ('\u0207', '\u0207', -1), + ('\u0209', '\u0209', -1), ('\u020b', '\u020b', -1), + ('\u020d', '\u020d', -1), ('\u020f', '\u020f', -1), + ('\u0211', '\u0211', -1), ('\u0213', '\u0213', -1), + ('\u0215', '\u0215', -1), ('\u0217', '\u0217', -1), + ('\u0219', '\u0219', -1), ('\u021b', '\u021b', -1), + ('\u021d', '\u021d', -1), ('\u021f', '\u021f', -1), + ('\u0223', '\u0223', -1), ('\u0225', '\u0225', -1), + ('\u0227', '\u0227', -1), ('\u0229', '\u0229', -1), + ('\u022b', '\u022b', -1), ('\u022d', '\u022d', -1), + ('\u022f', '\u022f', -1), ('\u0231', '\u0231', -1), + ('\u0233', '\u0233', -1), ('\u023c', '\u023c', -1), + ('\u023f', '\u0240', 10815), ('\u0242', '\u0242', -1), + ('\u0247', '\u0247', -1), ('\u0249', '\u0249', -1), + ('\u024b', '\u024b', -1), ('\u024d', '\u024d', -1), + ('\u024f', '\u024f', -1), ('\u0250', '\u0250', 10783), + ('\u0251', '\u0251', 10780), ('\u0252', '\u0252', 10782), + ('\u0253', '\u0253', -210), ('\u0254', '\u0254', -206), + ('\u0256', '\u0257', -205), ('\u0259', '\u0259', -202), + ('\u025b', '\u025b', -203), ('\u0260', '\u0260', -205), + ('\u0263', '\u0263', -207), ('\u0265', '\u0265', 42280), + ('\u0266', '\u0266', 42308), ('\u0268', '\u0268', -209), + ('\u0269', '\u0269', -211), ('\u026b', '\u026b', 10743), + ('\u026f', '\u026f', -211), ('\u0271', '\u0271', 10749), + ('\u0272', '\u0272', -213), ('\u0275', '\u0275', -214), + ('\u027d', '\u027d', 10727), ('\u0280', '\u0280', -218), + ('\u0283', '\u0283', -218), ('\u0288', '\u0288', -218), + ('\u0289', '\u0289', -69), ('\u028a', '\u028b', -217), + ('\u028c', '\u028c', -71), ('\u0292', '\u0292', -219), + ('\u0345', '\u0345', 84), ('\u0371', '\u0371', -1), + ('\u0373', '\u0373', -1), ('\u0377', '\u0377', -1), + ('\u037b', '\u037d', 130), ('\u03ac', '\u03ac', -38), + ('\u03ad', '\u03af', -37), ('\u03b1', '\u03c1', -32), + ('\u03c2', '\u03c2', -31), ('\u03c3', '\u03cb', -32), + ('\u03cc', '\u03cc', -64), ('\u03cd', '\u03ce', -63), + ('\u03d0', '\u03d0', -62), ('\u03d1', '\u03d1', -57), + ('\u03d5', '\u03d5', -47), ('\u03d6', '\u03d6', -54), + ('\u03d7', '\u03d7', -8), ('\u03d9', '\u03d9', -1), + ('\u03db', '\u03db', -1), ('\u03dd', '\u03dd', -1), + ('\u03df', '\u03df', -1), ('\u03e1', '\u03e1', -1), + ('\u03e3', '\u03e3', -1), ('\u03e5', '\u03e5', -1), + ('\u03e7', '\u03e7', -1), ('\u03e9', '\u03e9', -1), + ('\u03eb', '\u03eb', -1), ('\u03ed', '\u03ed', -1), + ('\u03ef', '\u03ef', -1), ('\u03f0', '\u03f0', -86), + ('\u03f1', '\u03f1', -80), ('\u03f2', '\u03f2', 7), + ('\u03f5', '\u03f5', -96), ('\u03f8', '\u03f8', -1), + ('\u03fb', '\u03fb', -1), ('\u0430', '\u044f', -32), + ('\u0450', '\u045f', -80), ('\u0461', '\u0461', -1), + ('\u0463', '\u0463', -1), ('\u0465', '\u0465', -1), + ('\u0467', '\u0467', -1), ('\u0469', '\u0469', -1), + ('\u046b', '\u046b', -1), ('\u046d', '\u046d', -1), + ('\u046f', '\u046f', -1), ('\u0471', '\u0471', -1), + ('\u0473', '\u0473', -1), ('\u0475', '\u0475', -1), + ('\u0477', '\u0477', -1), ('\u0479', '\u0479', -1), + ('\u047b', '\u047b', -1), ('\u047d', '\u047d', -1), + ('\u047f', '\u047f', -1), ('\u0481', '\u0481', -1), + ('\u048b', '\u048b', -1), ('\u048d', '\u048d', -1), + ('\u048f', '\u048f', -1), ('\u0491', '\u0491', -1), + ('\u0493', '\u0493', -1), ('\u0495', '\u0495', -1), + ('\u0497', '\u0497', -1), ('\u0499', '\u0499', -1), + ('\u049b', '\u049b', -1), ('\u049d', '\u049d', -1), + ('\u049f', '\u049f', -1), ('\u04a1', '\u04a1', -1), + ('\u04a3', '\u04a3', -1), ('\u04a5', '\u04a5', -1), + ('\u04a7', '\u04a7', -1), ('\u04a9', '\u04a9', -1), + ('\u04ab', '\u04ab', -1), ('\u04ad', '\u04ad', -1), + ('\u04af', '\u04af', -1), ('\u04b1', '\u04b1', -1), + ('\u04b3', '\u04b3', -1), ('\u04b5', '\u04b5', -1), + ('\u04b7', '\u04b7', -1), ('\u04b9', '\u04b9', -1), + ('\u04bb', '\u04bb', -1), ('\u04bd', '\u04bd', -1), + ('\u04bf', '\u04bf', -1), ('\u04c2', '\u04c2', -1), + ('\u04c4', '\u04c4', -1), ('\u04c6', '\u04c6', -1), + ('\u04c8', '\u04c8', -1), ('\u04ca', '\u04ca', -1), + ('\u04cc', '\u04cc', -1), ('\u04ce', '\u04ce', -1), + ('\u04cf', '\u04cf', -15), ('\u04d1', '\u04d1', -1), + ('\u04d3', '\u04d3', -1), ('\u04d5', '\u04d5', -1), + ('\u04d7', '\u04d7', -1), ('\u04d9', '\u04d9', -1), + ('\u04db', '\u04db', -1), ('\u04dd', '\u04dd', -1), + ('\u04df', '\u04df', -1), ('\u04e1', '\u04e1', -1), + ('\u04e3', '\u04e3', -1), ('\u04e5', '\u04e5', -1), + ('\u04e7', '\u04e7', -1), ('\u04e9', '\u04e9', -1), + ('\u04eb', '\u04eb', -1), ('\u04ed', '\u04ed', -1), + ('\u04ef', '\u04ef', -1), ('\u04f1', '\u04f1', -1), + ('\u04f3', '\u04f3', -1), ('\u04f5', '\u04f5', -1), + ('\u04f7', '\u04f7', -1), ('\u04f9', '\u04f9', -1), + ('\u04fb', '\u04fb', -1), ('\u04fd', '\u04fd', -1), + ('\u04ff', '\u04ff', -1), ('\u0501', '\u0501', -1), + ('\u0503', '\u0503', -1), ('\u0505', '\u0505', -1), + ('\u0507', '\u0507', -1), ('\u0509', '\u0509', -1), + ('\u050b', '\u050b', -1), ('\u050d', '\u050d', -1), + ('\u050f', '\u050f', -1), ('\u0511', '\u0511', -1), + ('\u0513', '\u0513', -1), ('\u0515', '\u0515', -1), + ('\u0517', '\u0517', -1), ('\u0519', '\u0519', -1), + ('\u051b', '\u051b', -1), ('\u051d', '\u051d', -1), + ('\u051f', '\u051f', -1), ('\u0521', '\u0521', -1), + ('\u0523', '\u0523', -1), ('\u0525', '\u0525', -1), + ('\u0527', '\u0527', -1), ('\u0561', '\u0586', -48), + ('\u1d79', '\u1d79', 35332), ('\u1d7d', '\u1d7d', 3814), + ('\u1e01', '\u1e01', -1), ('\u1e03', '\u1e03', -1), + ('\u1e05', '\u1e05', -1), ('\u1e07', '\u1e07', -1), + ('\u1e09', '\u1e09', -1), ('\u1e0b', '\u1e0b', -1), + ('\u1e0d', '\u1e0d', -1), ('\u1e0f', '\u1e0f', -1), + ('\u1e11', '\u1e11', -1), ('\u1e13', '\u1e13', -1), + ('\u1e15', '\u1e15', -1), ('\u1e17', '\u1e17', -1), + ('\u1e19', '\u1e19', -1), ('\u1e1b', '\u1e1b', -1), + ('\u1e1d', '\u1e1d', -1), ('\u1e1f', '\u1e1f', -1), + ('\u1e21', '\u1e21', -1), ('\u1e23', '\u1e23', -1), + ('\u1e25', '\u1e25', -1), ('\u1e27', '\u1e27', -1), + ('\u1e29', '\u1e29', -1), ('\u1e2b', '\u1e2b', -1), + ('\u1e2d', '\u1e2d', -1), ('\u1e2f', '\u1e2f', -1), + ('\u1e31', '\u1e31', -1), ('\u1e33', '\u1e33', -1), + ('\u1e35', '\u1e35', -1), ('\u1e37', '\u1e37', -1), + ('\u1e39', '\u1e39', -1), ('\u1e3b', '\u1e3b', -1), + ('\u1e3d', '\u1e3d', -1), ('\u1e3f', '\u1e3f', -1), + ('\u1e41', '\u1e41', -1), ('\u1e43', '\u1e43', -1), + ('\u1e45', '\u1e45', -1), ('\u1e47', '\u1e47', -1), + ('\u1e49', '\u1e49', -1), ('\u1e4b', '\u1e4b', -1), + ('\u1e4d', '\u1e4d', -1), ('\u1e4f', '\u1e4f', -1), + ('\u1e51', '\u1e51', -1), ('\u1e53', '\u1e53', -1), + ('\u1e55', '\u1e55', -1), ('\u1e57', '\u1e57', -1), + ('\u1e59', '\u1e59', -1), ('\u1e5b', '\u1e5b', -1), + ('\u1e5d', '\u1e5d', -1), ('\u1e5f', '\u1e5f', -1), + ('\u1e61', '\u1e61', -1), ('\u1e63', '\u1e63', -1), + ('\u1e65', '\u1e65', -1), ('\u1e67', '\u1e67', -1), + ('\u1e69', '\u1e69', -1), ('\u1e6b', '\u1e6b', -1), + ('\u1e6d', '\u1e6d', -1), ('\u1e6f', '\u1e6f', -1), + ('\u1e71', '\u1e71', -1), ('\u1e73', '\u1e73', -1), + ('\u1e75', '\u1e75', -1), ('\u1e77', '\u1e77', -1), + ('\u1e79', '\u1e79', -1), ('\u1e7b', '\u1e7b', -1), + ('\u1e7d', '\u1e7d', -1), ('\u1e7f', '\u1e7f', -1), + ('\u1e81', '\u1e81', -1), ('\u1e83', '\u1e83', -1), + ('\u1e85', '\u1e85', -1), ('\u1e87', '\u1e87', -1), + ('\u1e89', '\u1e89', -1), ('\u1e8b', '\u1e8b', -1), + ('\u1e8d', '\u1e8d', -1), ('\u1e8f', '\u1e8f', -1), + ('\u1e91', '\u1e91', -1), ('\u1e93', '\u1e93', -1), + ('\u1e95', '\u1e95', -1), ('\u1e9b', '\u1e9b', -59), + ('\u1ea1', '\u1ea1', -1), ('\u1ea3', '\u1ea3', -1), + ('\u1ea5', '\u1ea5', -1), ('\u1ea7', '\u1ea7', -1), + ('\u1ea9', '\u1ea9', -1), ('\u1eab', '\u1eab', -1), + ('\u1ead', '\u1ead', -1), ('\u1eaf', '\u1eaf', -1), + ('\u1eb1', '\u1eb1', -1), ('\u1eb3', '\u1eb3', -1), + ('\u1eb5', '\u1eb5', -1), ('\u1eb7', '\u1eb7', -1), + ('\u1eb9', '\u1eb9', -1), ('\u1ebb', '\u1ebb', -1), + ('\u1ebd', '\u1ebd', -1), ('\u1ebf', '\u1ebf', -1), + ('\u1ec1', '\u1ec1', -1), ('\u1ec3', '\u1ec3', -1), + ('\u1ec5', '\u1ec5', -1), ('\u1ec7', '\u1ec7', -1), + ('\u1ec9', '\u1ec9', -1), ('\u1ecb', '\u1ecb', -1), + ('\u1ecd', '\u1ecd', -1), ('\u1ecf', '\u1ecf', -1), + ('\u1ed1', '\u1ed1', -1), ('\u1ed3', '\u1ed3', -1), + ('\u1ed5', '\u1ed5', -1), ('\u1ed7', '\u1ed7', -1), + ('\u1ed9', '\u1ed9', -1), ('\u1edb', '\u1edb', -1), + ('\u1edd', '\u1edd', -1), ('\u1edf', '\u1edf', -1), + ('\u1ee1', '\u1ee1', -1), ('\u1ee3', '\u1ee3', -1), + ('\u1ee5', '\u1ee5', -1), ('\u1ee7', '\u1ee7', -1), + ('\u1ee9', '\u1ee9', -1), ('\u1eeb', '\u1eeb', -1), + ('\u1eed', '\u1eed', -1), ('\u1eef', '\u1eef', -1), + ('\u1ef1', '\u1ef1', -1), ('\u1ef3', '\u1ef3', -1), + ('\u1ef5', '\u1ef5', -1), ('\u1ef7', '\u1ef7', -1), + ('\u1ef9', '\u1ef9', -1), ('\u1efb', '\u1efb', -1), + ('\u1efd', '\u1efd', -1), ('\u1eff', '\u1eff', -1), + ('\u1f00', '\u1f07', 8), ('\u1f10', '\u1f15', 8), + ('\u1f20', '\u1f27', 8), ('\u1f30', '\u1f37', 8), + ('\u1f40', '\u1f45', 8), ('\u1f51', '\u1f51', 8), + ('\u1f53', '\u1f53', 8), ('\u1f55', '\u1f55', 8), + ('\u1f57', '\u1f57', 8), ('\u1f60', '\u1f67', 8), + ('\u1f70', '\u1f71', 74), ('\u1f72', '\u1f75', 86), + ('\u1f76', '\u1f77', 100), ('\u1f78', '\u1f79', 128), + ('\u1f7a', '\u1f7b', 112), ('\u1f7c', '\u1f7d', 126), + ('\u1f80', '\u1f87', 8), ('\u1f90', '\u1f97', 8), + ('\u1fa0', '\u1fa7', 8), ('\u1fb0', '\u1fb1', 8), + ('\u1fb3', '\u1fb3', 9), ('\u1fbe', '\u1fbe', -7205), + ('\u1fc3', '\u1fc3', 9), ('\u1fd0', '\u1fd1', 8), + ('\u1fe0', '\u1fe1', 8), ('\u1fe5', '\u1fe5', 7), + ('\u1ff3', '\u1ff3', 9), ('\u214e', '\u214e', -28), + ('\u2170', '\u217f', -16), ('\u2184', '\u2184', -1), + ('\u24d0', '\u24e9', -26), ('\u2c30', '\u2c5e', -48), + ('\u2c61', '\u2c61', -1), ('\u2c65', '\u2c65', -10795), + ('\u2c66', '\u2c66', -10792), ('\u2c68', '\u2c68', -1), + ('\u2c6a', '\u2c6a', -1), ('\u2c6c', '\u2c6c', -1), + ('\u2c73', '\u2c73', -1), ('\u2c76', '\u2c76', -1), + ('\u2c81', '\u2c81', -1), ('\u2c83', '\u2c83', -1), + ('\u2c85', '\u2c85', -1), ('\u2c87', '\u2c87', -1), + ('\u2c89', '\u2c89', -1), ('\u2c8b', '\u2c8b', -1), + ('\u2c8d', '\u2c8d', -1), ('\u2c8f', '\u2c8f', -1), + ('\u2c91', '\u2c91', -1), ('\u2c93', '\u2c93', -1), + ('\u2c95', '\u2c95', -1), ('\u2c97', '\u2c97', -1), + ('\u2c99', '\u2c99', -1), ('\u2c9b', '\u2c9b', -1), + ('\u2c9d', '\u2c9d', -1), ('\u2c9f', '\u2c9f', -1), + ('\u2ca1', '\u2ca1', -1), ('\u2ca3', '\u2ca3', -1), + ('\u2ca5', '\u2ca5', -1), ('\u2ca7', '\u2ca7', -1), + ('\u2ca9', '\u2ca9', -1), ('\u2cab', '\u2cab', -1), + ('\u2cad', '\u2cad', -1), ('\u2caf', '\u2caf', -1), + ('\u2cb1', '\u2cb1', -1), ('\u2cb3', '\u2cb3', -1), + ('\u2cb5', '\u2cb5', -1), ('\u2cb7', '\u2cb7', -1), + ('\u2cb9', '\u2cb9', -1), ('\u2cbb', '\u2cbb', -1), + ('\u2cbd', '\u2cbd', -1), ('\u2cbf', '\u2cbf', -1), + ('\u2cc1', '\u2cc1', -1), ('\u2cc3', '\u2cc3', -1), + ('\u2cc5', '\u2cc5', -1), ('\u2cc7', '\u2cc7', -1), + ('\u2cc9', '\u2cc9', -1), ('\u2ccb', '\u2ccb', -1), + ('\u2ccd', '\u2ccd', -1), ('\u2ccf', '\u2ccf', -1), + ('\u2cd1', '\u2cd1', -1), ('\u2cd3', '\u2cd3', -1), + ('\u2cd5', '\u2cd5', -1), ('\u2cd7', '\u2cd7', -1), + ('\u2cd9', '\u2cd9', -1), ('\u2cdb', '\u2cdb', -1), + ('\u2cdd', '\u2cdd', -1), ('\u2cdf', '\u2cdf', -1), + ('\u2ce1', '\u2ce1', -1), ('\u2ce3', '\u2ce3', -1), + ('\u2cec', '\u2cec', -1), ('\u2cee', '\u2cee', -1), + ('\u2cf3', '\u2cf3', -1), ('\u2d00', '\u2d2d', -7264), + ('\ua641', '\ua641', -1), ('\ua643', '\ua643', -1), + ('\ua645', '\ua645', -1), ('\ua647', '\ua647', -1), + ('\ua649', '\ua649', -1), ('\ua64b', '\ua64b', -1), + ('\ua64d', '\ua64d', -1), ('\ua64f', '\ua64f', -1), + ('\ua651', '\ua651', -1), ('\ua653', '\ua653', -1), + ('\ua655', '\ua655', -1), ('\ua657', '\ua657', -1), + ('\ua659', '\ua659', -1), ('\ua65b', '\ua65b', -1), + ('\ua65d', '\ua65d', -1), ('\ua65f', '\ua65f', -1), + ('\ua661', '\ua661', -1), ('\ua663', '\ua663', -1), + ('\ua665', '\ua665', -1), ('\ua667', '\ua667', -1), + ('\ua669', '\ua669', -1), ('\ua66b', '\ua66b', -1), + ('\ua66d', '\ua66d', -1), ('\ua681', '\ua681', -1), + ('\ua683', '\ua683', -1), ('\ua685', '\ua685', -1), + ('\ua687', '\ua687', -1), ('\ua689', '\ua689', -1), + ('\ua68b', '\ua68b', -1), ('\ua68d', '\ua68d', -1), + ('\ua68f', '\ua68f', -1), ('\ua691', '\ua691', -1), + ('\ua693', '\ua693', -1), ('\ua695', '\ua695', -1), + ('\ua697', '\ua697', -1), ('\ua723', '\ua723', -1), + ('\ua725', '\ua725', -1), ('\ua727', '\ua727', -1), + ('\ua729', '\ua729', -1), ('\ua72b', '\ua72b', -1), + ('\ua72d', '\ua72d', -1), ('\ua72f', '\ua72f', -1), + ('\ua733', '\ua733', -1), ('\ua735', '\ua735', -1), + ('\ua737', '\ua737', -1), ('\ua739', '\ua739', -1), + ('\ua73b', '\ua73b', -1), ('\ua73d', '\ua73d', -1), + ('\ua73f', '\ua73f', -1), ('\ua741', '\ua741', -1), + ('\ua743', '\ua743', -1), ('\ua745', '\ua745', -1), + ('\ua747', '\ua747', -1), ('\ua749', '\ua749', -1), + ('\ua74b', '\ua74b', -1), ('\ua74d', '\ua74d', -1), + ('\ua74f', '\ua74f', -1), ('\ua751', '\ua751', -1), + ('\ua753', '\ua753', -1), ('\ua755', '\ua755', -1), + ('\ua757', '\ua757', -1), ('\ua759', '\ua759', -1), + ('\ua75b', '\ua75b', -1), ('\ua75d', '\ua75d', -1), + ('\ua75f', '\ua75f', -1), ('\ua761', '\ua761', -1), + ('\ua763', '\ua763', -1), ('\ua765', '\ua765', -1), + ('\ua767', '\ua767', -1), ('\ua769', '\ua769', -1), + ('\ua76b', '\ua76b', -1), ('\ua76d', '\ua76d', -1), + ('\ua76f', '\ua76f', -1), ('\ua77a', '\ua77a', -1), + ('\ua77c', '\ua77c', -1), ('\ua77f', '\ua77f', -1), + ('\ua781', '\ua781', -1), ('\ua783', '\ua783', -1), + ('\ua785', '\ua785', -1), ('\ua787', '\ua787', -1), + ('\ua78c', '\ua78c', -1), ('\ua791', '\ua791', -1), + ('\ua793', '\ua793', -1), ('\ua7a1', '\ua7a1', -1), + ('\ua7a3', '\ua7a3', -1), ('\ua7a5', '\ua7a5', -1), + ('\ua7a7', '\ua7a7', -1), ('\ua7a9', '\ua7a9', -1), + ('\uff41', '\uff5a', -32), ('\U00010428', '\U0001044f', -40) + ]; + + pub fn upcase_offset(c: char) -> i32 { + bsearch_range_value_table(c, upcase_table) + } + + static special_table : &'static [(char, &'static str,&'static str)] = &[ + ('\u00df', "\u00df", "\u0053\u0053"), + ('\u0130', "\u0069\u0307", "\u0130"), + ('\u0149', "\u0149", "\u02bc\u004e"), + ('\u01f0', "\u01f0", "\u004a\u030c"), + ('\u0390', "\u0390", "\u0399\u0308\u0301"), + ('\u03a3', "\u03c2", "\u03a3"), + ('\u03b0', "\u03b0", "\u03a5\u0308\u0301"), + ('\u0587', "\u0587", "\u0535\u0552"), + ('\u1e96', "\u1e96", "\u0048\u0331"), + ('\u1e97', "\u1e97", "\u0054\u0308"), + ('\u1e98', "\u1e98", "\u0057\u030a"), + ('\u1e99', "\u1e99", "\u0059\u030a"), + ('\u1e9a', "\u1e9a", "\u0041\u02be"), + ('\u1f50', "\u1f50", "\u03a5\u0313"), + ('\u1f52', "\u1f52", "\u03a5\u0313\u0300"), + ('\u1f54', "\u1f54", "\u03a5\u0313\u0301"), + ('\u1f56', "\u1f56", "\u03a5\u0313\u0342"), + ('\u1f80', "\u1f80", "\u1f08\u0399"), + ('\u1f81', "\u1f81", "\u1f09\u0399"), + ('\u1f82', "\u1f82", "\u1f0a\u0399"), + ('\u1f83', "\u1f83", "\u1f0b\u0399"), + ('\u1f84', "\u1f84", "\u1f0c\u0399"), + ('\u1f85', "\u1f85", "\u1f0d\u0399"), + ('\u1f86', "\u1f86", "\u1f0e\u0399"), + ('\u1f87', "\u1f87", "\u1f0f\u0399"), + ('\u1f88', "\u1f80", "\u1f08\u0399"), + ('\u1f89', "\u1f81", "\u1f09\u0399"), + ('\u1f8a', "\u1f82", "\u1f0a\u0399"), + ('\u1f8b', "\u1f83", "\u1f0b\u0399"), + ('\u1f8c', "\u1f84", "\u1f0c\u0399"), + ('\u1f8d', "\u1f85", "\u1f0d\u0399"), + ('\u1f8e', "\u1f86", "\u1f0e\u0399"), + ('\u1f8f', "\u1f87", "\u1f0f\u0399"), + ('\u1f90', "\u1f90", "\u1f28\u0399"), + ('\u1f91', "\u1f91", "\u1f29\u0399"), + ('\u1f92', "\u1f92", "\u1f2a\u0399"), + ('\u1f93', "\u1f93", "\u1f2b\u0399"), + ('\u1f94', "\u1f94", "\u1f2c\u0399"), + ('\u1f95', "\u1f95", "\u1f2d\u0399"), + ('\u1f96', "\u1f96", "\u1f2e\u0399"), + ('\u1f97', "\u1f97", "\u1f2f\u0399"), + ('\u1f98', "\u1f90", "\u1f28\u0399"), + ('\u1f99', "\u1f91", "\u1f29\u0399"), + ('\u1f9a', "\u1f92", "\u1f2a\u0399"), + ('\u1f9b', "\u1f93", "\u1f2b\u0399"), + ('\u1f9c', "\u1f94", "\u1f2c\u0399"), + ('\u1f9d', "\u1f95", "\u1f2d\u0399"), + ('\u1f9e', "\u1f96", "\u1f2e\u0399"), + ('\u1f9f', "\u1f97", "\u1f2f\u0399"), + ('\u1fa0', "\u1fa0", "\u1f68\u0399"), + ('\u1fa1', "\u1fa1", "\u1f69\u0399"), + ('\u1fa2', "\u1fa2", "\u1f6a\u0399"), + ('\u1fa3', "\u1fa3", "\u1f6b\u0399"), + ('\u1fa4', "\u1fa4", "\u1f6c\u0399"), + ('\u1fa5', "\u1fa5", "\u1f6d\u0399"), + ('\u1fa6', "\u1fa6", "\u1f6e\u0399"), + ('\u1fa7', "\u1fa7", "\u1f6f\u0399"), + ('\u1fa8', "\u1fa0", "\u1f68\u0399"), + ('\u1fa9', "\u1fa1", "\u1f69\u0399"), + ('\u1faa', "\u1fa2", "\u1f6a\u0399"), + ('\u1fab', "\u1fa3", "\u1f6b\u0399"), + ('\u1fac', "\u1fa4", "\u1f6c\u0399"), + ('\u1fad', "\u1fa5", "\u1f6d\u0399"), + ('\u1fae', "\u1fa6", "\u1f6e\u0399"), + ('\u1faf', "\u1fa7", "\u1f6f\u0399"), + ('\u1fb2', "\u1fb2", "\u1fba\u0399"), + ('\u1fb3', "\u1fb3", "\u0391\u0399"), + ('\u1fb4', "\u1fb4", "\u0386\u0399"), + ('\u1fb6', "\u1fb6", "\u0391\u0342"), + ('\u1fb7', "\u1fb7", "\u0391\u0342\u0399"), + ('\u1fbc', "\u1fb3", "\u0391\u0399"), + ('\u1fc2', "\u1fc2", "\u1fca\u0399"), + ('\u1fc3', "\u1fc3", "\u0397\u0399"), + ('\u1fc4', "\u1fc4", "\u0389\u0399"), + ('\u1fc6', "\u1fc6", "\u0397\u0342"), + ('\u1fc7', "\u1fc7", "\u0397\u0342\u0399"), + ('\u1fcc', "\u1fc3", "\u0397\u0399"), + ('\u1fd2', "\u1fd2", "\u0399\u0308\u0300"), + ('\u1fd3', "\u1fd3", "\u0399\u0308\u0301"), + ('\u1fd6', "\u1fd6", "\u0399\u0342"), + ('\u1fd7', "\u1fd7", "\u0399\u0308\u0342"), + ('\u1fe2', "\u1fe2", "\u03a5\u0308\u0300"), + ('\u1fe3', "\u1fe3", "\u03a5\u0308\u0301"), + ('\u1fe4', "\u1fe4", "\u03a1\u0313"), + ('\u1fe6', "\u1fe6", "\u03a5\u0342"), + ('\u1fe7', "\u1fe7", "\u03a5\u0308\u0342"), + ('\u1ff2', "\u1ff2", "\u1ffa\u0399"), + ('\u1ff3', "\u1ff3", "\u03a9\u0399"), + ('\u1ff4', "\u1ff4", "\u038f\u0399"), + ('\u1ff6', "\u1ff6", "\u03a9\u0342"), + ('\u1ff7', "\u1ff7", "\u03a9\u0342\u0399"), + ('\u1ffc', "\u1ff3", "\u03a9\u0399"), + ('\ufb00', "\ufb00", "\u0046\u0046"), + ('\ufb01', "\ufb01", "\u0046\u0049"), + ('\ufb02', "\ufb02", "\u0046\u004c"), + ('\ufb03', "\ufb03", "\u0046\u0046\u0049"), + ('\ufb04', "\ufb04", "\u0046\u0046\u004c"), + ('\ufb05', "\ufb05", "\u0053\u0054"), + ('\ufb06', "\ufb06", "\u0053\u0054"), + ('\ufb13', "\ufb13", "\u0544\u0546"), + ('\ufb14', "\ufb14", "\u0544\u0535"), + ('\ufb15', "\ufb15", "\u0544\u053b"), + ('\ufb16', "\ufb16", "\u054e\u0546"), + ('\ufb17', "\ufb17", "\u0544\u053d") + ]; + + + pub fn case_special(c:char, case:u8) -> &'static str { + use cmp::{Equal, Less, Greater}; + match special_table.bsearch(|&(code, _, _)| { + if c==code { Equal } + else if code < c { Less } + else { Greater } + }) { + Some(idx) => { + if case==0 { + let (_, result, _) = special_table[idx]; + result + } + else { + let (_, _, result) = special_table[idx]; + result + } + } + None => "" + } + } + + + pub fn upcase_special(c:char) -> &'static str { + case_special(c, 1) + } + + pub fn lowcase_special(c:char) -> &'static str { + case_special(c, 0) + } + } pub mod decompose { use option::Option;