Skip to content

Commit 9952769

Browse files
committed
deprecate Unicode functions that will be moved to crates.io
This patch 1. renames libunicode to librustc_unicode, 2. deprecates several pieces of libunicode (see below), and 3. removes references to deprecated functions from librustc_driver and libsyntax. This may change pretty-printed output from these modules in cases involving wide or combining characters used in filenames, identifiers, etc. The following functions are marked deprecated: 1. char.width() and str.width(): --> use unicode-width crate 2. str.graphemes() and str.grapheme_indices(): --> use unicode-segmentation crate 3. str.nfd_chars(), str.nfkd_chars(), str.nfc_chars(), str.nfkc_chars(), char.compose(), char.decompose_canonical(), char.decompose_compatible(), char.canonical_combining_class(): --> use unicode-normalization crate
1 parent a691f1e commit 9952769

File tree

19 files changed

+99
-44
lines changed

19 files changed

+99
-44
lines changed

mk/crates.mk

+6-6
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
TARGET_CRATES := libc std flate arena term \
5353
serialize getopts collections test rand \
5454
log graphviz core rbml alloc \
55-
unicode rustc_bitflags
55+
rustc_unicode rustc_bitflags
5656
RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \
5757
rustc_trans rustc_back rustc_llvm rustc_privacy rustc_lint
5858
HOST_CRATES := syntax $(RUSTC_CRATES) rustdoc fmt_macros
@@ -61,9 +61,9 @@ TOOLS := compiletest rustdoc rustc rustbook
6161

6262
DEPS_core :=
6363
DEPS_libc := core
64-
DEPS_unicode := core
64+
DEPS_rustc_unicode := core
6565
DEPS_alloc := core libc native:jemalloc
66-
DEPS_std := core libc rand alloc collections unicode \
66+
DEPS_std := core libc rand alloc collections rustc_unicode \
6767
native:rust_builtin native:backtrace native:rustrt_native \
6868
rustc_bitflags
6969
DEPS_graphviz := std
@@ -94,7 +94,7 @@ DEPS_serialize := std log
9494
DEPS_rbml := std log serialize
9595
DEPS_term := std log
9696
DEPS_getopts := std
97-
DEPS_collections := core alloc unicode
97+
DEPS_collections := core alloc rustc_unicode
9898
DEPS_num := std
9999
DEPS_test := std getopts serialize rbml term native:rust_test_helpers
100100
DEPS_rand := core
@@ -115,11 +115,11 @@ ONLY_RLIB_libc := 1
115115
ONLY_RLIB_alloc := 1
116116
ONLY_RLIB_rand := 1
117117
ONLY_RLIB_collections := 1
118-
ONLY_RLIB_unicode := 1
118+
ONLY_RLIB_rustc_unicode := 1
119119
ONLY_RLIB_rustc_bitflags := 1
120120

121121
# Documented-by-default crates
122-
DOC_CRATES := std alloc collections core libc unicode
122+
DOC_CRATES := std alloc collections core libc rustc_unicode
123123

124124
################################################################################
125125
# You should not need to edit below this line

src/etc/unicode.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -518,11 +518,14 @@ def comp_pfun(char):
518518
emit_table(f, "combining_class_table", combine, "&'static [(char, char, u8)]", is_pub=False,
519519
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))
520520

521-
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
522-
+ " bsearch_range_value_table(c, combining_class_table)\n"
523-
+ " }\n")
521+
f.write(""" #[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
522+
since = "1.0.0")]
523+
#[unstable(feature = "unicode",
524+
reason = "this functionality will be moved to crates.io")]
525+
pub fn canonical_combining_class(c: char) -> u8 {
526+
bsearch_range_value_table(c, combining_class_table)
527+
}
524528
525-
f.write("""
526529
}
527530
528531
""")

src/libcollections/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
#[macro_use]
5151
extern crate core;
5252

53-
extern crate unicode;
53+
extern crate rustc_unicode;
5454
extern crate alloc;
5555

5656
#[cfg(test)] #[macro_use] extern crate std;

src/libcollections/str.rs

+35-9
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,13 @@ use core::result::Result;
5959
use core::str as core_str;
6060
use core::str::pattern::Pattern;
6161
use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
62-
use unicode::str::{UnicodeStr, Utf16Encoder};
62+
use rustc_unicode::str::{UnicodeStr, Utf16Encoder};
6363

6464
use core::convert::AsRef;
6565
use vec_deque::VecDeque;
6666
use borrow::{Borrow, ToOwned};
6767
use string::String;
68-
use unicode;
68+
use rustc_unicode;
6969
use vec::Vec;
7070
use slice::SliceConcatExt;
7171

@@ -78,7 +78,7 @@ pub use core::str::{Matches, RMatches};
7878
pub use core::str::{MatchIndices, RMatchIndices};
7979
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
8080
pub use core::str::{from_utf8_unchecked, ParseBoolError};
81-
pub use unicode::str::{Words, Graphemes, GraphemeIndices};
81+
pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
8282
pub use core::str::pattern;
8383

8484
/*
@@ -161,6 +161,9 @@ enum DecompositionType {
161161
/// External iterator for a string decomposition's characters.
162162
///
163163
/// For use with the `std::iter` module.
164+
#[allow(deprecated)]
165+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
166+
since = "1.0.0")]
164167
#[derive(Clone)]
165168
#[unstable(feature = "unicode",
166169
reason = "this functionality may be replaced with a more generic \
@@ -172,6 +175,7 @@ pub struct Decompositions<'a> {
172175
sorted: bool
173176
}
174177

178+
#[allow(deprecated)]
175179
#[stable(feature = "rust1", since = "1.0.0")]
176180
impl<'a> Iterator for Decompositions<'a> {
177181
type Item = char;
@@ -198,7 +202,7 @@ impl<'a> Iterator for Decompositions<'a> {
198202
{
199203
let callback = |d| {
200204
let class =
201-
unicode::char::canonical_combining_class(d);
205+
rustc_unicode::char::canonical_combining_class(d);
202206
if class == 0 && !*sorted {
203207
canonical_sort(buffer);
204208
*sorted = true;
@@ -207,10 +211,10 @@ impl<'a> Iterator for Decompositions<'a> {
207211
};
208212
match self.kind {
209213
Canonical => {
210-
unicode::char::decompose_canonical(ch, callback)
214+
rustc_unicode::char::decompose_canonical(ch, callback)
211215
}
212216
Compatible => {
213-
unicode::char::decompose_compatible(ch, callback)
217+
rustc_unicode::char::decompose_compatible(ch, callback)
214218
}
215219
}
216220
}
@@ -254,6 +258,9 @@ enum RecompositionState {
254258
/// External iterator for a string recomposition's characters.
255259
///
256260
/// For use with the `std::iter` module.
261+
#[allow(deprecated)]
262+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
263+
since = "1.0.0")]
257264
#[derive(Clone)]
258265
#[unstable(feature = "unicode",
259266
reason = "this functionality may be replaced with a more generic \
@@ -266,6 +273,7 @@ pub struct Recompositions<'a> {
266273
last_ccc: Option<u8>
267274
}
268275

276+
#[allow(deprecated)]
269277
#[stable(feature = "rust1", since = "1.0.0")]
270278
impl<'a> Iterator for Recompositions<'a> {
271279
type Item = char;
@@ -276,7 +284,7 @@ impl<'a> Iterator for Recompositions<'a> {
276284
match self.state {
277285
Composing => {
278286
for ch in self.iter.by_ref() {
279-
let ch_class = unicode::char::canonical_combining_class(ch);
287+
let ch_class = rustc_unicode::char::canonical_combining_class(ch);
280288
if self.composee.is_none() {
281289
if ch_class != 0 {
282290
return Some(ch);
@@ -288,7 +296,7 @@ impl<'a> Iterator for Recompositions<'a> {
288296

289297
match self.last_ccc {
290298
None => {
291-
match unicode::char::compose(k, ch) {
299+
match rustc_unicode::char::compose(k, ch) {
292300
Some(r) => {
293301
self.composee = Some(r);
294302
continue;
@@ -316,7 +324,7 @@ impl<'a> Iterator for Recompositions<'a> {
316324
self.last_ccc = Some(ch_class);
317325
continue;
318326
}
319-
match unicode::char::compose(k, ch) {
327+
match rustc_unicode::char::compose(k, ch) {
320328
Some(r) => {
321329
self.composee = Some(r);
322330
continue;
@@ -465,6 +473,9 @@ impl str {
465473

466474
/// Returns an iterator over the string in Unicode Normalization Form D
467475
/// (canonical decomposition).
476+
#[allow(deprecated)]
477+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
478+
since = "1.0.0")]
468479
#[inline]
469480
#[unstable(feature = "unicode",
470481
reason = "this functionality may be replaced with a more generic \
@@ -480,6 +491,9 @@ impl str {
480491

481492
/// Returns an iterator over the string in Unicode Normalization Form KD
482493
/// (compatibility decomposition).
494+
#[allow(deprecated)]
495+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
496+
since = "1.0.0")]
483497
#[inline]
484498
#[unstable(feature = "unicode",
485499
reason = "this functionality may be replaced with a more generic \
@@ -495,6 +509,9 @@ impl str {
495509

496510
/// An Iterator over the string in Unicode Normalization Form C
497511
/// (canonical decomposition followed by canonical composition).
512+
#[allow(deprecated)]
513+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
514+
since = "1.0.0")]
498515
#[inline]
499516
#[unstable(feature = "unicode",
500517
reason = "this functionality may be replaced with a more generic \
@@ -511,6 +528,9 @@ impl str {
511528

512529
/// An Iterator over the string in Unicode Normalization Form KC
513530
/// (compatibility decomposition followed by canonical composition).
531+
#[allow(deprecated)]
532+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
533+
since = "1.0.0")]
514534
#[inline]
515535
#[unstable(feature = "unicode",
516536
reason = "this functionality may be replaced with a more generic \
@@ -1690,6 +1710,8 @@ impl str {
16901710
///
16911711
/// assert_eq!(&gr2[..], b);
16921712
/// ```
1713+
#[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
1714+
since = "1.0.0")]
16931715
#[unstable(feature = "unicode",
16941716
reason = "this functionality may only be provided by libunicode")]
16951717
pub fn graphemes(&self, is_extended: bool) -> Graphemes {
@@ -1709,6 +1731,8 @@ impl str {
17091731
///
17101732
/// assert_eq!(&gr_inds[..], b);
17111733
/// ```
1734+
#[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
1735+
since = "1.0.0")]
17121736
#[unstable(feature = "unicode",
17131737
reason = "this functionality may only be provided by libunicode")]
17141738
pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
@@ -1749,6 +1773,8 @@ impl str {
17491773
/// recommends that these
17501774
/// characters be treated as 1 column (i.e., `is_cjk = false`) if the
17511775
/// locale is unknown.
1776+
#[deprecated(reason = "use the crates.io `unicode-width` library instead",
1777+
since = "1.0.0")]
17521778
#[unstable(feature = "unicode",
17531779
reason = "this functionality may only be provided by libunicode")]
17541780
pub fn width(&self, is_cjk: bool) -> usize {

src/libcollections/string.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ use core::ops::{self, Deref, Add, Index};
2525
use core::ptr;
2626
use core::slice;
2727
use core::str::pattern::Pattern;
28-
use unicode::str as unicode_str;
29-
use unicode::str::Utf16Item;
28+
use rustc_unicode::str as unicode_str;
29+
use rustc_unicode::str::Utf16Item;
3030

3131
use borrow::{Cow, IntoCow};
3232
use str::{self, FromStr, Utf8Error};

src/libcollectionstest/str.rs

+6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ fn test_le() {
1919
assert!("foo" != "bar");
2020
}
2121

22+
#[allow(deprecated)]
2223
#[test]
2324
fn test_len() {
2425
assert_eq!("".len(), 0);
@@ -944,6 +945,7 @@ fn test_words() {
944945
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
945946
}
946947

948+
#[allow(deprecated)]
947949
#[test]
948950
fn test_nfd_chars() {
949951
macro_rules! t {
@@ -963,6 +965,7 @@ fn test_nfd_chars() {
963965
t!("\u{ac1c}", "\u{1100}\u{1162}");
964966
}
965967

968+
#[allow(deprecated)]
966969
#[test]
967970
fn test_nfkd_chars() {
968971
macro_rules! t {
@@ -982,6 +985,7 @@ fn test_nfkd_chars() {
982985
t!("\u{ac1c}", "\u{1100}\u{1162}");
983986
}
984987

988+
#[allow(deprecated)]
985989
#[test]
986990
fn test_nfc_chars() {
987991
macro_rules! t {
@@ -1002,6 +1006,7 @@ fn test_nfc_chars() {
10021006
t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
10031007
}
10041008

1009+
#[allow(deprecated)]
10051010
#[test]
10061011
fn test_nfkc_chars() {
10071012
macro_rules! t {
@@ -1033,6 +1038,7 @@ fn test_lines() {
10331038
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
10341039
}
10351040

1041+
#[allow(deprecated)]
10361042
#[test]
10371043
fn test_graphemes() {
10381044
use std::iter::order;

src/libcoretest/char.rs

+1
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ fn test_len_utf16() {
210210
assert!('\u{1f4a9}'.len_utf16() == 2);
211211
}
212212

213+
#[allow(deprecated)]
213214
#[test]
214215
fn test_width() {
215216
assert_eq!('\x00'.width(false),Some(0));

src/librustc_driver/lib.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ Available lint options:
574574
let builtin_groups = sort_lint_groups(builtin_groups);
575575

576576
let max_name_len = plugin.iter().chain(builtin.iter())
577-
.map(|&s| s.name.width(true))
577+
.map(|&s| s.name.chars().count())
578578
.max().unwrap_or(0);
579579
let padded = |x: &str| {
580580
let mut s = repeat(" ").take(max_name_len - x.chars().count())
@@ -601,7 +601,7 @@ Available lint options:
601601

602602

603603
let max_name_len = plugin_groups.iter().chain(builtin_groups.iter())
604-
.map(|&(s, _)| s.width(true))
604+
.map(|&(s, _)| s.chars().count())
605605
.max().unwrap_or(0);
606606
let padded = |x: &str| {
607607
let mut s = repeat(" ").take(max_name_len - x.chars().count())
@@ -790,7 +790,6 @@ fn parse_crate_attrs(sess: &Session, input: &Input) ->
790790
///
791791
/// The diagnostic emitter yielded to the procedure should be used for reporting
792792
/// errors of the compiler.
793-
#[allow(deprecated)]
794793
pub fn monitor<F:FnOnce()+Send+'static>(f: F) {
795794
const STACK_SIZE: usize = 8 * 1024 * 1024; // 8MB
796795

src/libunicode/char.rs renamed to src/librustc_unicode/char.rs

+4
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@ use tables::{derived_property, property, general_category, conversions, charwidt
3737
pub use core::char::{MAX, from_u32, from_digit, EscapeUnicode, EscapeDefault};
3838

3939
// unstable reexports
40+
#[allow(deprecated)]
4041
pub use normalize::{decompose_canonical, decompose_compatible, compose};
42+
#[allow(deprecated)]
4143
pub use tables::normalization::canonical_combining_class;
4244
pub use tables::UNICODE_VERSION;
4345

@@ -445,6 +447,8 @@ impl char {
445447
/// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
446448
/// recommends that these characters be treated as 1 column (i.e.,
447449
/// `is_cjk` = `false`) if the context cannot be reliably determined.
450+
#[deprecated(reason = "use the crates.io `unicode-width` library instead",
451+
since = "1.0.0")]
448452
#[unstable(feature = "unicode",
449453
reason = "needs expert opinion. is_cjk flag stands out as ugly")]
450454
pub fn width(self, is_cjk: bool) -> Option<usize> { charwidth::width(self, is_cjk) }

src/libunicode/lib.rs renamed to src/librustc_unicode/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
2323
// Do not remove on snapshot creation. Needed for bootstrap. (Issue #22364)
2424
#![cfg_attr(stage0, feature(custom_attribute))]
25-
#![crate_name = "unicode"]
25+
#![crate_name = "rustc_unicode"]
2626
#![unstable(feature = "unicode")]
2727
#![feature(lang_items)]
2828
#![feature(staged_api)]

src/libunicode/normalize.rs renamed to src/librustc_unicode/normalize.rs

+12
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,17 @@ fn bsearch_table<T>(c: char, r: &'static [(char, &'static [T])]) -> Option<&'sta
3333
}
3434

3535
/// Compute canonical Unicode decomposition for character
36+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
37+
since = "1.0.0")]
38+
#[unstable(feature = "unicode",
39+
reason = "this functionality will be moved to crates.io")]
3640
pub fn decompose_canonical<F>(c: char, mut i: F) where F: FnMut(char) { d(c, &mut i, false); }
3741

3842
/// Compute canonical or compatible Unicode decomposition for character
43+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
44+
since = "1.0.0")]
45+
#[unstable(feature = "unicode",
46+
reason = "this functionality will be moved to crates.io")]
3947
pub fn decompose_compatible<F>(c: char, mut i: F) where F: FnMut(char) { d(c, &mut i, true); }
4048

4149
// FIXME(#19596) This is a workaround, we should use `F` instead of `&mut F`
@@ -78,6 +86,10 @@ fn d<F>(c: char, i: &mut F, k: bool) where F: FnMut(char) {
7886
(*i)(c);
7987
}
8088

89+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
90+
since = "1.0.0")]
91+
#[unstable(feature = "unicode",
92+
reason = "this functionality will be moved to crates.io")]
8193
pub fn compose(a: char, b: char) -> Option<char> {
8294
compose_hangul(a, b).or_else(|| {
8395
match bsearch_table(a, composition_table) {

0 commit comments

Comments
 (0)