Skip to content

Commit 40ad877

Browse files
Add support code for new unicode_data module
1 parent 064f888 commit 40ad877

File tree

1 file changed

+49
-5
lines changed

1 file changed

+49
-5
lines changed

src/libcore/unicode/mod.rs

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,59 @@
11
#![unstable(feature = "unicode_internals", issue = "none")]
22
#![allow(missing_docs)]
33

4-
mod bool_trie;
54
pub(crate) mod printable;
6-
pub(crate) mod tables;
5+
mod unicode_data;
76
pub(crate) mod version;
87

8+
use version::UnicodeVersion;
9+
10+
/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
11+
/// `char` and `str` methods are based on.
12+
#[unstable(feature = "unicode_version", issue = "49726")]
13+
pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
14+
major: unicode_data::UNICODE_VERSION.0,
15+
minor: unicode_data::UNICODE_VERSION.1,
16+
micro: unicode_data::UNICODE_VERSION.2,
17+
_priv: (),
18+
};
19+
920
// For use in liballoc, not re-exported in libstd.
1021
pub mod derived_property {
11-
pub use crate::unicode::tables::derived_property::{Case_Ignorable, Cased};
22+
pub use super::{Case_Ignorable, Cased};
1223
}
13-
pub mod conversions {
14-
pub use crate::unicode::tables::conversions::{to_lower, to_upper};
24+
25+
pub use unicode_data::alphabetic::lookup as Alphabetic;
26+
pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
27+
pub use unicode_data::cased::lookup as Cased;
28+
pub use unicode_data::cc::lookup as Cc;
29+
pub use unicode_data::conversions;
30+
pub use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
31+
pub use unicode_data::lowercase::lookup as Lowercase;
32+
pub use unicode_data::n::lookup as N;
33+
pub use unicode_data::uppercase::lookup as Uppercase;
34+
pub use unicode_data::white_space::lookup as White_Space;
35+
36+
#[inline(always)]
37+
fn range_search<const N: usize, const N1: usize, const N2: usize>(
38+
needle: u32,
39+
chunk_idx_map: &[u8; N],
40+
(last_chunk_idx, last_chunk_mapping): (u16, u8),
41+
bitset_chunk_idx: &[[u8; 16]; N1],
42+
bitset: &[u64; N2],
43+
) -> bool {
44+
let bucket_idx = (needle / 64) as usize;
45+
let chunk_map_idx = bucket_idx / 16;
46+
let chunk_piece = bucket_idx % 16;
47+
let chunk_idx = if chunk_map_idx >= N {
48+
if chunk_map_idx == last_chunk_idx as usize {
49+
last_chunk_mapping
50+
} else {
51+
return false;
52+
}
53+
} else {
54+
chunk_idx_map[chunk_map_idx]
55+
};
56+
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
57+
let word = bitset[(idx as usize)];
58+
(word & (1 << (needle % 64) as u64)) != 0
1559
}

0 commit comments

Comments
 (0)