19
19
# programs". It is not meant to be a complete implementation of unicode.
20
20
# For that we recommend you use a proper binding to libicu.
21
21
22
- import fileinput , re , os , sys
22
+ import fileinput , re , os , sys , operator
23
23
24
24
25
25
def fetch (f ):
@@ -35,6 +35,8 @@ def fetch(f):
35
35
def load_unicode_data (f ):
36
36
fetch (f )
37
37
gencats = {}
38
+ upperlower = {}
39
+ lowerupper = {}
38
40
combines = []
39
41
canon_decomp = {}
40
42
compat_decomp = {}
@@ -44,6 +46,7 @@ def load_unicode_data(f):
44
46
c_hi = 0
45
47
com_lo = 0
46
48
com_hi = 0
49
+
47
50
for line in fileinput .input (f ):
48
51
fields = line .split (";" )
49
52
if len (fields ) != 15 :
@@ -52,7 +55,17 @@ def load_unicode_data(f):
52
55
decomp , deci , digit , num , mirror ,
53
56
old , iso , upcase , lowcase , titlecase ] = fields
54
57
55
- code = int (code , 16 )
58
+ code_org = code
59
+ code = int (code , 16 )
60
+
61
+ # generate char to char direct common and simple conversions
62
+ # uppercase to lowercase
63
+ if gencat == "Lu" and lowcase != "" and code_org != lowcase :
64
+ upperlower [code ] = int (lowcase , 16 )
65
+
66
+ # lowercase to uppercase
67
+ if gencat == "Ll" and upcase != "" and code_org != upcase :
68
+ lowerupper [code ] = int (upcase , 16 )
56
69
57
70
if decomp != "" :
58
71
if decomp .startswith ('<' ):
@@ -96,7 +109,7 @@ def load_unicode_data(f):
96
109
com_lo = code
97
110
com_hi = code
98
111
99
- return (canon_decomp , compat_decomp , gencats , combines )
112
+ return (canon_decomp , compat_decomp , gencats , combines , lowerupper , upperlower )
100
113
101
114
def load_properties (f , interestingprops ):
102
115
fetch (f )
@@ -147,25 +160,28 @@ def ch_prefix(ix):
147
160
148
161
def emit_bsearch_range_table (f ):
149
162
f .write ("""
150
- fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
151
- use cmp::{Equal, Less, Greater};
152
- use vec::ImmutableVector;
153
- use option::None;
154
- r.bsearch(|&(lo,hi)| {
155
- if lo <= c && c <= hi { Equal }
156
- else if hi < c { Less }
157
- else { Greater }
158
- }) != None
159
- }\n \n
163
+ fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
164
+ use cmp::{Equal, Less, Greater};
165
+ use vec::ImmutableVector;
166
+ use option::None;
167
+ r.bsearch(|&(lo,hi)| {
168
+ if lo <= c && c <= hi { Equal }
169
+ else if hi < c { Less }
170
+ else { Greater }
171
+ }) != None
172
+ }\n \n
160
173
""" );
161
174
162
175
def emit_property_module (f , mod , tbl ):
163
176
f .write ("pub mod %s {\n " % mod )
164
177
keys = tbl .keys ()
165
178
keys .sort ()
166
- emit_bsearch_range_table ( f );
179
+
167
180
for cat in keys :
168
- if cat == "Cs" : continue
181
+ if cat not in ["Nd" , "Nl" , "No" , "Cc" ,
182
+ "XID_Start" , "XID_Continue" , "Alphabetic" ,
183
+ "Lowercase" , "Uppercase" , "White_Space" ]:
184
+ continue
169
185
f .write (" static %s_table : &'static [(char,char)] = &[\n " % cat )
170
186
ix = 0
171
187
for pair in tbl [cat ]:
@@ -175,35 +191,55 @@ def emit_property_module(f, mod, tbl):
175
191
f .write ("\n ];\n \n " )
176
192
177
193
f .write (" pub fn %s(c: char) -> bool {\n " % cat )
178
- f .write (" bsearch_range_table(c, %s_table)\n " % cat )
194
+ f .write (" super:: bsearch_range_table(c, %s_table)\n " % cat )
179
195
f .write (" }\n \n " )
180
196
f .write ("}\n " )
181
197
182
198
183
- def emit_property_module_old (f , mod , tbl ):
184
- f .write ("mod %s {\n " % mod )
185
- keys = tbl .keys ()
186
- keys .sort ()
187
- for cat in keys :
188
- f .write (" fn %s(c: char) -> bool {\n " % cat )
189
- f .write (" ret alt c {\n " )
190
- prefix = ' '
191
- for pair in tbl [cat ]:
192
- if pair [0 ] == pair [1 ]:
193
- f .write (" %c %s\n " %
194
- (prefix , escape_char (pair [0 ])))
195
- else :
196
- f .write (" %c %s to %s\n " %
197
- (prefix ,
198
- escape_char (pair [0 ]),
199
- escape_char (pair [1 ])))
200
- prefix = '|'
201
- f .write (" { true }\n " )
202
- f .write (" _ { false }\n " )
203
- f .write (" };\n " )
204
- f .write (" }\n \n " )
199
+ def emit_conversions_module (f , lowerupper , upperlower ):
200
+ f .write ("pub mod conversions {\n " )
201
+ f .write ("""
202
+ use cmp::{Equal, Less, Greater};
203
+ use vec::ImmutableVector;
204
+ use tuple::Tuple2;
205
+ use option::{Option, Some, None};
206
+
207
+ pub fn to_lower(c: char) -> char {
208
+ match bsearch_case_table(c, LuLl_table) {
209
+ None => c,
210
+ Some(index) => LuLl_table[index].val1()
211
+ }
212
+ }
213
+
214
+ pub fn to_upper(c: char) -> char {
215
+ match bsearch_case_table(c, LlLu_table) {
216
+ None => c,
217
+ Some(index) => LlLu_table[index].val1()
218
+ }
219
+ }
220
+
221
+ fn bsearch_case_table(c: char, table: &'static [(char, char)]) -> Option<uint> {
222
+ table.bsearch(|&(key, _)| {
223
+ if c == key { Equal }
224
+ else if key < c { Less }
225
+ else { Greater }
226
+ })
227
+ }
228
+ """ );
229
+ emit_caseconversion_table (f , "LuLl" , upperlower )
230
+ emit_caseconversion_table (f , "LlLu" , lowerupper )
205
231
f .write ("}\n " )
206
232
233
+ def emit_caseconversion_table (f , name , table ):
234
+ f .write (" static %s_table : &'static [(char, char)] = &[\n " % name )
235
+ sorted_table = sorted (table .iteritems (), key = operator .itemgetter (0 ))
236
+ ix = 0
237
+ for key , value in sorted_table :
238
+ f .write (ch_prefix (ix ))
239
+ f .write ("(%s, %s)" % (escape_char (key ), escape_char (value )))
240
+ ix += 1
241
+ f .write ("\n ];\n \n " )
242
+
207
243
def format_table_content (f , content , indent ):
208
244
line = " " * indent
209
245
first = True
@@ -359,7 +395,8 @@ def emit_decomp_module(f, canon, compat, combine):
359
395
os .remove (i );
360
396
rf = open (r , "w" )
361
397
362
- (canon_decomp , compat_decomp , gencats , combines ) = load_unicode_data ("UnicodeData.txt" )
398
+ (canon_decomp , compat_decomp , gencats ,
399
+ combines , lowerupper , upperlower ) = load_unicode_data ("UnicodeData.txt" )
363
400
364
401
# Preamble
365
402
rf .write ('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
@@ -379,13 +416,16 @@ def emit_decomp_module(f, canon, compat, combine):
379
416
380
417
''' )
381
418
419
+ emit_bsearch_range_table (rf );
382
420
emit_property_module (rf , "general_category" , gencats )
383
421
384
422
emit_decomp_module (rf , canon_decomp , compat_decomp , combines )
385
423
386
424
derived = load_properties ("DerivedCoreProperties.txt" ,
387
425
["XID_Start" , "XID_Continue" , "Alphabetic" , "Lowercase" , "Uppercase" ])
426
+
388
427
emit_property_module (rf , "derived_property" , derived )
389
428
390
429
props = load_properties ("PropList.txt" , ["White_Space" ])
391
430
emit_property_module (rf , "property" , props )
431
+ emit_conversions_module (rf , lowerupper , upperlower )
0 commit comments