Skip to content

Commit 7b0556e

Browse files
author
Thomas Bahn
committed
Merge pull request #14 from tormol/enum
Convert `Ascii` into an enum Allows static initialization, nicer pattern matches and comparisons. Naming conventions: * alphabetic: just use it. breaks CamelCase convention, but worth it for simplicity. * digits: prepend _ to make them valid identifiers, but still short. * non-alphanumeric but visible: Use Wikipedia names CamelCased, but remove -Mark endings. * control codes without escape code: use uppercase acronym to deter use. * control codes with escape code: Expand and CamelCase acronym to preserve meaning, eg LineFeed not NewLine or LFneBreak.
2 parents f94d00a + c290db0 commit 7b0556e

File tree

1 file changed

+201
-31
lines changed

1 file changed

+201
-31
lines changed

src/ascii.rs

+201-31
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,183 @@
1+
use std::mem::transmute;
12
use std::fmt;
23
#[cfg(feature="unstable")]
34
use std::ascii::AsciiExt;
45

56
use AsciiCast;
67

8+
#[allow(non_camel_case_types)]
79
/// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
810
#[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash, Copy)]
9-
pub struct Ascii { chr: u8 }
11+
#[repr(u8)]
12+
pub enum Ascii {
13+
/**`'\0'`*/Null = 0,
14+
/** [Start Of Heading](http://en.wikipedia.org/wiki/Start_of_Heading)
15+
*/ SOH = 1,
16+
/** [Start Of teXt](http://en.wikipedia.org/wiki/Start_of_Text)
17+
*/ SOX = 2,
18+
/** [End Of teXt](http://en.wikipedia.org/wiki/End-of-Text_character)
19+
*/ ETX = 3,
20+
/** [End Of Transmission](http://en.wikipedia.org/wiki/End-of-Transmission_character)
21+
*/ EOT = 4,
22+
/** [Enquiry](http://en.wikipedia.org/wiki/Enquiry_character)
23+
*/ ENQ = 5,
24+
/** [Acknowledgement](http://en.wikipedia.org/wiki/Acknowledge_character)
25+
*/ ACK = 6,
26+
/** [bell / alarm / audible](http://en.wikipedia.org/wiki/Bell_character)
27+
28+
'\a' is not supported by Rust.
29+
*/ Bell = 7,
30+
/** [Backspace character](http://en.wikipedia.org/wiki/Backspace)
31+
32+
'\b' is not supported by Rust.
33+
*/ BackSpace = 8,
34+
/**`'\t'`*/Tab = 9,
35+
/**`'\n'`*/LineFeed = 10,
36+
/** [Vertical tab](http://en.wikipedia.org/wiki/Vertical_Tab)
37+
38+
'\v' is not supported by Rust.
39+
*/ VerticalTab = 11,
40+
/** [Form Feed](http://en.wikipedia.org/wiki/Form_Feed)
41+
42+
'\f' is not supported by Rust.
43+
*/ FormFeed = 12,
44+
/**`'\r'`*/CarriageReturn = 13,
45+
/** [Shift In](http://en.wikipedia.org/wiki/Shift_Out_and_Shift_In_characters)
46+
*/ SI = 14,
47+
/** [Shift Out](http://en.wikipedia.org/wiki/Shift_Out_and_Shift_In_characters)
48+
*/ SO = 15,
49+
/** [Data Link Escape](http://en.wikipedia.org/wiki/Data_Link_Escape)
50+
*/ DLE = 16,
51+
/** [Device control 1, often XON](http://en.wikipedia.org/wiki/Device_Control_1)
52+
*/ DC1 = 17,
53+
/** Device control 2
54+
*/ DC2 = 18,
55+
/** Device control 3, Often XOFF
56+
*/ DC3 = 19,
57+
/** Device control 4
58+
*/ DC4 = 20,
59+
/** [Negative Acknowledgement](http://en.wikipedia.org/wiki/Negative-acknowledge_character)
60+
*/ NAK = 21,
61+
/** [Synchronous idle](http://en.wikipedia.org/wiki/Synchronous_Idle)
62+
*/ SYN = 22,
63+
/** [End of Transmission Block](http://en.wikipedia.org/wiki/End-of-Transmission-Block_character)
64+
*/ ETB = 23,
65+
/** [Cancel](http://en.wikipedia.org/wiki/Cancel_character)
66+
*/ CAN = 24,
67+
/** [End of Medium](http://en.wikipedia.org/wiki/End_of_Medium)
68+
*/ EM = 25,
69+
/** [Substitute](http://en.wikipedia.org/wiki/Substitute_character)
70+
*/ SUB = 26,
71+
/** [Escape](http://en.wikipedia.org/wiki/Escape_character)
72+
73+
'\e' is not supported in Rust.
74+
*/ Escape = 27,
75+
/** [File Separator](http://en.wikipedia.org/wiki/File_separator)
76+
*/ FS = 28,
77+
/** [Group Separator](http://en.wikipedia.org/wiki/Group_separator)
78+
*/ GS = 29,
79+
/** [Record Separator](http://en.wikipedia.org/wiki/Record_separator)
80+
*/ RS = 30,
81+
/** [Unit Separator](http://en.wikipedia.org/wiki/Unit_separator)
82+
*/ US = 31,
83+
/**`' '`*/ Space = 32,
84+
/**`'!'`*/ Exclamation = 33,
85+
/**`'"'`*/ Quotation = 34,
86+
/**`'''`*/ Hash = 35,
87+
/**`'$'`*/ Dollar = 36,
88+
/**`'%'`*/ Percent = 37,
89+
/**`'&'`*/ Ampersand = 38,
90+
/**`'''`*/ Apostrophe = 39,
91+
/**`'('`*/ ParenOpen = 40,
92+
/**`')'`*/ ParenClose = 41,
93+
/**`'*'`*/ Asterisk = 42,
94+
/**`'+'`*/ Plus = 43,
95+
/**`','`*/ Comma = 44,
96+
/**`'-'`*/ Minus = 45,
97+
/**`'.'`*/ Dot = 46,
98+
/**`'/'`*/ Slash = 47,
99+
/**`'0'`*/ _0 = 48,
100+
/**`'1'`*/ _1 = 49,
101+
/**`'2'`*/ _2 = 50,
102+
/**`'3'`*/ _3 = 51,
103+
/**`'4'`*/ _4 = 52,
104+
/**`'5'`*/ _5 = 53,
105+
/**`'6'`*/ _6 = 54,
106+
/**`'7'`*/ _7 = 55,
107+
/**`'8'`*/ _8 = 56,
108+
/**`'9'`*/ _9 = 57,
109+
/**`':'`*/ Colon = 58,
110+
/**`';'`*/ SemiColon = 59,
111+
/**`'<'`*/ LessThan = 60,
112+
/**`'='`*/ Equal = 61,
113+
/**`'>'`*/ GreaterThan = 62,
114+
/**`'?'`*/ Question = 63,
115+
/**`'@'`*/ At = 64,
116+
/**`'A'`*/ A = 65,
117+
/**`'B'`*/ B = 66,
118+
/**`'C'`*/ C = 67,
119+
/**`'D'`*/ D = 68,
120+
/**`'E'`*/ E = 69,
121+
/**`'F'`*/ F = 70,
122+
/**`'G'`*/ G = 71,
123+
/**`'H'`*/ H = 72,
124+
/**`'I'`*/ I = 73,
125+
/**`'J'`*/ J = 74,
126+
/**`'K'`*/ K = 75,
127+
/**`'L'`*/ L = 76,
128+
/**`'M'`*/ M = 77,
129+
/**`'N'`*/ N = 78,
130+
/**`'O'`*/ O = 79,
131+
/**`'P'`*/ P = 80,
132+
/**`'Q'`*/ Q = 81,
133+
/**`'R'`*/ R = 82,
134+
/**`'S'`*/ S = 83,
135+
/**`'T'`*/ T = 84,
136+
/**`'U'`*/ U = 85,
137+
/**`'V'`*/ V = 86,
138+
/**`'W'`*/ W = 87,
139+
/**`'X'`*/ X = 88,
140+
/**`'Y'`*/ Y = 89,
141+
/**`'Z'`*/ Z = 90,
142+
/**`'['`*/ BracketOpen = 91,
143+
/**`'\'`*/ BackSlash = 92,
144+
/**`']'`*/ BracketClose = 93,
145+
/**`'_'`*/ Caret = 94,
146+
/**`'_'`*/ UnderScore = 95,
147+
/**`'`'`*/ Grave = 96,
148+
/**`'a'`*/ a = 97,
149+
/**`'b'`*/ b = 98,
150+
/**`'c'`*/ c = 99,
151+
/**`'d'`*/ d = 100,
152+
/**`'e'`*/ e = 101,
153+
/**`'f'`*/ f = 102,
154+
/**`'g'`*/ g = 103,
155+
/**`'h'`*/ h = 104,
156+
/**`'i'`*/ i = 105,
157+
/**`'j'`*/ j = 106,
158+
/**`'k'`*/ k = 107,
159+
/**`'l'`*/ l = 108,
160+
/**`'m'`*/ m = 109,
161+
/**`'n'`*/ n = 110,
162+
/**`'o'`*/ o = 111,
163+
/**`'p'`*/ p = 112,
164+
/**`'q'`*/ q = 113,
165+
/**`'r'`*/ r = 114,
166+
/**`'s'`*/ s = 115,
167+
/**`'t'`*/ t = 116,
168+
/**`'u'`*/ u = 117,
169+
/**`'v'`*/ v = 118,
170+
/**`'w'`*/ w = 119,
171+
/**`'x'`*/ x = 120,
172+
/**`'y'`*/ y = 121,
173+
/**`'z'`*/ z = 122,
174+
/**`'{'`*/ CurlyBraceOpen = 123,
175+
/**`'|'`*/ VerticalBar = 124,
176+
/**`'}'`*/ CurlyBraceClose = 125,
177+
/**`'~'`*/ Tilde = 126,
178+
/** [Delete](http://en.wikipedia.org/wiki/Delete_character)
179+
*/ DEL = 127,
180+
}
10181

11182
impl Ascii {
12183
/// Constructs an Ascii character from a `char`.
@@ -23,9 +194,9 @@ impl Ascii {
23194
/// ```
24195
#[inline]
25196
pub fn from(ch: char) -> Result<Ascii, ()> {
26-
if ch as u32 <= 0x7F {
27-
return Ok( Ascii { chr: ch as u8 });
28-
}
197+
unsafe{if ch as u32 <= 0x7F {
198+
return Ok(ch.to_ascii_nocheck());
199+
}}
29200
Err(())
30201
}
31202

@@ -44,36 +215,37 @@ impl Ascii {
44215
/// ```
45216
#[inline]
46217
pub fn from_byte(ch: u8) -> Result<Ascii, ()> {
47-
if ch <= 0x7F {
48-
return Ok( Ascii { chr: ch });
49-
}
218+
unsafe{if ch <= 0x7F {
219+
return Ok(ch.to_ascii_nocheck());
220+
}}
50221
Err(())
51222
}
52223

53224
/// Converts an ascii character into a `u8`.
54225
#[inline]
55226
pub fn as_byte(&self) -> u8 {
56-
self.chr
227+
*self as u8
57228
}
58229

59230
/// Converts an ascii character into a `char`.
60231
#[inline]
61232
pub fn as_char(&self) -> char {
62-
self.chr as char
233+
self.as_byte() as char
63234
}
64235

65236
// the following methods are like ctype, and the implementation is inspired by musl
66237

67238
/// Check if the character is a letter (a-z, A-Z)
68239
#[inline]
69240
pub fn is_alphabetic(&self) -> bool {
70-
(self.chr >= 0x41 && self.chr <= 0x5A) || (self.chr >= 0x61 && self.chr <= 0x7A)
241+
let c = self.as_byte() | 0b010_0000;// Turns uppercase into lowercase.
242+
c >= b'a' && c <= b'z'
71243
}
72244

73245
/// Check if the character is a number (0-9)
74246
#[inline]
75247
pub fn is_digit(&self) -> bool {
76-
self.chr >= 0x30 && self.chr <= 0x39
248+
self >= &Ascii::_0 && self <= &Ascii::_9
77249
}
78250

79251
/// Check if the character is a letter or number
@@ -85,7 +257,7 @@ impl Ascii {
85257
/// Check if the character is a space or horizontal tab
86258
#[inline]
87259
pub fn is_blank(&self) -> bool {
88-
self.chr == b' ' || self.chr == b'\t'
260+
*self == Ascii::Space || *self == Ascii::Tab
89261
}
90262

91263
/// Check if the character is a control character
@@ -101,7 +273,7 @@ impl Ascii {
101273
/// ```
102274
#[inline]
103275
pub fn is_control(&self) -> bool {
104-
self.chr < 0x20 || self.chr == 0x7F
276+
self.as_byte() < 0x20 || *self == Ascii::DEL
105277
}
106278

107279
/// Checks if the character is printable (except space)
@@ -116,7 +288,7 @@ impl Ascii {
116288
/// ```
117289
#[inline]
118290
pub fn is_graph(&self) -> bool {
119-
self.chr.wrapping_sub(0x21) < 0x5E
291+
self.as_byte().wrapping_sub(0x21) < 0x5E
120292
}
121293

122294
/// Checks if the character is printable (including space)
@@ -131,7 +303,7 @@ impl Ascii {
131303
/// ```
132304
#[inline]
133305
pub fn is_print(&self) -> bool {
134-
self.chr.wrapping_sub(0x20) < 0x5F
306+
self.as_byte().wrapping_sub(0x20) < 0x5F
135307
}
136308

137309
/// Checks if the character is alphabetic and lowercase
@@ -146,7 +318,7 @@ impl Ascii {
146318
/// ```
147319
#[inline]
148320
pub fn is_lowercase(&self) -> bool {
149-
self.chr.wrapping_sub(b'a') < 26
321+
self.as_byte().wrapping_sub(b'a') < 26
150322
}
151323

152324
/// Checks if the character is alphabetic and uppercase
@@ -161,7 +333,7 @@ impl Ascii {
161333
/// ```
162334
#[inline]
163335
pub fn is_uppercase(&self) -> bool {
164-
self.chr.wrapping_sub(b'A') < 26
336+
self.as_byte().wrapping_sub(b'A') < 26
165337
}
166338

167339
/// Checks if the character is punctuation
@@ -193,7 +365,7 @@ impl Ascii {
193365
/// ```
194366
#[inline]
195367
pub fn is_hex(&self) -> bool {
196-
self.is_digit() || (self.chr | 32u8).wrapping_sub(b'a') < 6
368+
self.is_digit() || (self.as_byte() | 32u8).wrapping_sub(b'a') < 6
197369
}
198370
}
199371

@@ -219,25 +391,25 @@ impl AsciiExt for Ascii {
219391
}
220392

221393
fn to_ascii_uppercase(&self) -> Ascii {
222-
Ascii{chr: self.chr.to_ascii_uppercase()}
394+
unsafe{ self.as_byte().to_ascii_uppercase().to_ascii_nocheck() }
223395
}
224396

225397
fn to_ascii_lowercase(&self) -> Ascii {
226-
Ascii{chr: self.chr.to_ascii_lowercase()}
398+
unsafe{ self.as_byte().to_ascii_uppercase().to_ascii_nocheck() }
227399
}
228400

229401
fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
230-
self.chr.eq_ignore_ascii_case(&other.chr)
402+
self.as_byte().eq_ignore_ascii_case(&other.as_byte())
231403
}
232404

233405
#[inline]
234406
fn make_ascii_uppercase(&mut self) {
235-
self.chr.make_ascii_uppercase()
407+
*self = self.to_ascii_uppercase();
236408
}
237409

238410
#[inline]
239411
fn make_ascii_lowercase(&mut self) {
240-
self.chr.make_ascii_lowercase()
412+
*self = self.to_ascii_lowercase();
241413
}
242414
}
243415

@@ -246,7 +418,7 @@ impl<'a> AsciiCast<'a> for u8 {
246418

247419
#[inline]
248420
unsafe fn to_ascii_nocheck(&self) -> Ascii {
249-
Ascii{ chr: *self }
421+
transmute(*self)
250422
}
251423
}
252424

@@ -255,7 +427,7 @@ impl<'a> AsciiCast<'a> for char {
255427

256428
#[inline]
257429
unsafe fn to_ascii_nocheck(&self) -> Ascii {
258-
Ascii{ chr: *self as u8 }
430+
(*self as u8).to_ascii_nocheck()
259431
}
260432
}
261433

@@ -266,10 +438,10 @@ mod tests {
266438

267439
#[test]
268440
fn to_ascii() {
269-
assert_eq!(65_u8.to_ascii(), Ok(Ascii { chr: 65_u8 }));
441+
assert_eq!(65_u8.to_ascii(), Ok(Ascii::A));
270442
assert_eq!(255_u8.to_ascii(), Err(()));
271443

272-
assert_eq!('A'.to_ascii(), Ok(Ascii { chr: 65_u8 }));
444+
assert_eq!('A'.to_ascii(), Ok(Ascii::A));
273445
assert_eq!('λ'.to_ascii(), Err(()));
274446
}
275447

@@ -302,13 +474,11 @@ mod tests {
302474

303475
#[test]
304476
fn fmt_display_ascii() {
305-
let s = Ascii { chr: b't' };
306-
assert_eq!(format!("{}", s), "t".to_string());
477+
assert_eq!(format!("{}", Ascii::t), "t".to_string());
307478
}
308479

309480
#[test]
310481
fn fmt_debug_ascii() {
311-
let c = Ascii { chr: b't' };
312-
assert_eq!(format!("{:?}", c), "'t'".to_string());
482+
assert_eq!(format!("{:?}", Ascii::t), "'t'".to_string());
313483
}
314484
}

0 commit comments

Comments
 (0)