Skip to content

Commit e8c821f

Browse files
committed
Convert Ascii into an enum.
Allows static initialization, nicer pattern matches and comparisons. Naming conventions: * alphabetic: just use it. breaks CamelCase convention, but worth it for simplicity. * digits: prepend _ to make them valid identifiers, but still short. * non-alphanumeric but visible: Use Wikipedia names CamelCased, but remove -Mark endings. * rarely used control codes: use uppercase acronym to deter use. * more commonly used control codes: Expand and CamelCase acronym to preserve meaning, eg LineFeed not NewLine or LineBreak. I'm not certain which control codes should be expanded; Currently BEL and DEL are, but should they? I'm not sure whether this is a breaking change.
1 parent c575fdf commit e8c821f

File tree

1 file changed

+158
-27
lines changed

1 file changed

+158
-27
lines changed

src/ascii.rs

Lines changed: 158 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,143 @@
1+
use std::mem::transmute;
12
use std::fmt;
23
use std::ascii::AsciiExt;
34

45
use AsciiCast;
56

7+
#[allow(non_camel_case_types)]
68
/// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
79
#[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash, Copy)]
8-
pub struct Ascii { chr: u8 }
10+
#[repr(u8)]
11+
pub enum Ascii {
12+
NUL = 0,
13+
SOH = 1,
14+
SOX = 2,
15+
ETX = 3,
16+
EOT = 4,
17+
ENQ = 5,
18+
ACK = 6,
19+
Bell = 7,
20+
BackSpace = 8,
21+
Tab = 9,
22+
LineFeed = 10,
23+
VT = 11,
24+
FF = 12,
25+
CarriageReturn = 13,
26+
SI = 14,
27+
SO = 15,
28+
DLE = 16,
29+
DC1 = 17,
30+
DC2 = 18,
31+
DC3 = 19,
32+
DC4 = 20,
33+
NAK = 21,
34+
SYN = 22,
35+
ETB = 23,
36+
CAN = 24,
37+
EM = 25,
38+
SUB = 26,
39+
Escape = 27,
40+
FS = 28,
41+
GS = 29,
42+
RS = 30,
43+
US = 31,
44+
Space = 32,
45+
Exclamation = 33,
46+
Quotation = 34,
47+
Hash = 35,
48+
Dollar = 36,
49+
Percent = 37,
50+
Ampersand = 38,
51+
Apostrophe = 39,
52+
ParenOpen = 40,
53+
ParenClose = 41,
54+
Asterisk = 42,
55+
Plus = 43,
56+
Comma = 44,
57+
Minus = 45,
58+
Dot = 46,
59+
Slash = 47,
60+
_0 = 48,
61+
_1 = 49,
62+
_2 = 50,
63+
_3 = 51,
64+
_4 = 52,
65+
_5 = 53,
66+
_6 = 54,
67+
_7 = 55,
68+
_8 = 56,
69+
_9 = 57,
70+
Colon = 58,
71+
SemiColon = 59,
72+
LessThan = 60,
73+
Equal = 61,
74+
GreaterThan = 62,
75+
Question = 63,
76+
At = 64,
77+
A = 65,
78+
B = 66,
79+
C = 67,
80+
D = 68,
81+
E = 69,
82+
F = 70,
83+
G = 71,
84+
H = 72,
85+
I = 73,
86+
J = 74,
87+
K = 75,
88+
L = 76,
89+
M = 77,
90+
N = 78,
91+
O = 79,
92+
P = 80,
93+
Q = 81,
94+
R = 82,
95+
S = 83,
96+
T = 84,
97+
U = 85,
98+
V = 86,
99+
W = 87,
100+
X = 88,
101+
Y = 89,
102+
Z = 90,
103+
BracketOpen = 91,
104+
BackSlash = 92,
105+
BracketClose = 93,
106+
Caret = 94,
107+
UnderScore = 95,
108+
Grave = 96,
109+
a = 97,
110+
b = 98,
111+
c = 99,
112+
d = 100,
113+
e = 101,
114+
f = 102,
115+
g = 103,
116+
h = 104,
117+
i = 105,
118+
j = 106,
119+
k = 107,
120+
l = 108,
121+
m = 109,
122+
n = 110,
123+
o = 111,
124+
p = 112,
125+
q = 113,
126+
r = 114,
127+
s = 115,
128+
t = 116,
129+
u = 117,
130+
v = 118,
131+
w = 119,
132+
x = 120,
133+
y = 121,
134+
z = 122,
135+
CurlyBraceOpen = 123,
136+
VerticalBar = 124,
137+
CurlyBraceClose = 125,
138+
Tilde = 126,
139+
Delete = 127,
140+
}
9141

10142
impl Ascii {
11143
/// Constructs an Ascii character from a `char`.
@@ -23,7 +155,7 @@ impl Ascii {
23155
#[inline]
24156
pub fn from(ch: char) -> Result<Ascii, ()> {
25157
if ch as u32 <= 0x7F {
26-
return Ok( Ascii { chr: ch as u8 });
158+
return Ok(unsafe{ ch.to_ascii_nocheck() });
27159
}
28160
Err(())
29161
}
@@ -44,35 +176,36 @@ impl Ascii {
44176
#[inline]
45177
pub fn from_byte(ch: u8) -> Result<Ascii, ()> {
46178
if ch <= 0x7F {
47-
return Ok( Ascii { chr: ch });
179+
return Ok(unsafe{ ch.to_ascii_nocheck() });
48180
}
49181
Err(())
50182
}
51183

52184
/// Converts an ascii character into a `u8`.
53185
#[inline]
54186
pub fn as_byte(&self) -> u8 {
55-
self.chr
187+
unsafe{ transmute(*self) }
56188
}
57189

58190
/// Converts an ascii character into a `char`.
59191
#[inline]
60192
pub fn as_char(&self) -> char {
61-
self.chr as char
193+
self.as_byte() as char
62194
}
63195

64196
// the following methods are like ctype, and the implementation is inspired by musl
65197

66198
/// Check if the character is a letter (a-z, A-Z)
67199
#[inline]
68200
pub fn is_alphabetic(&self) -> bool {
69-
(self.chr >= 0x41 && self.chr <= 0x5A) || (self.chr >= 0x61 && self.chr <= 0x7A)
201+
(self >= &Ascii::a && self <= &Ascii::z) ||
202+
(self >= &Ascii::A && self <= &Ascii::Z)
70203
}
71204

72205
/// Check if the character is a number (0-9)
73206
#[inline]
74207
pub fn is_digit(&self) -> bool {
75-
self.chr >= 0x30 && self.chr <= 0x39
208+
self >= &Ascii::_0 && self <= &Ascii::_9
76209
}
77210

78211
/// Check if the character is a letter or number
@@ -84,37 +217,37 @@ impl Ascii {
84217
/// Check if the character is a space or horizontal tab
85218
#[inline]
86219
pub fn is_blank(&self) -> bool {
87-
self.chr == b' ' || self.chr == b'\t'
220+
*self == Ascii::Space || *self == Ascii::Tab
88221
}
89222

90223
/// Check if the character is a control character
91224
#[inline]
92225
pub fn is_control(&self) -> bool {
93-
self.chr < 0x20 || self.chr == 0x7F
226+
self.as_byte() < 0x20 || *self == Ascii::Delete
94227
}
95228

96229
/// Checks if the character is printable (except space)
97230
#[inline]
98231
pub fn is_graph(&self) -> bool {
99-
(self.chr - 0x21) < 0x5E
232+
(self.as_byte() - 0x21) < 0x5E
100233
}
101234

102235
/// Checks if the character is printable (including space)
103236
#[inline]
104237
pub fn is_print(&self) -> bool {
105-
(self.chr - 0x20) < 0x5F
238+
(self.as_byte() - 0x20) < 0x5F
106239
}
107240

108241
/// Checks if the character is alphabetic and lowercase
109242
#[inline]
110243
pub fn is_lowercase(&self) -> bool {
111-
(self.chr - b'a') < 26
244+
(self.as_byte() - b'a') < 26
112245
}
113246

114247
/// Checks if the character is alphabetic and uppercase
115248
#[inline]
116249
pub fn is_uppercase(&self) -> bool {
117-
(self.chr - b'A') < 26
250+
(self.as_byte() - b'A') < 26
118251
}
119252

120253
/// Checks if the character is punctuation
@@ -126,7 +259,7 @@ impl Ascii {
126259
/// Checks if the character is a valid hex digit
127260
#[inline]
128261
pub fn is_hex(&self) -> bool {
129-
self.is_digit() || ((self.chr | 32u8) - b'a') < 6
262+
self.is_digit() || ((self.as_byte() | 32u8) - b'a') < 6
130263
}
131264
}
132265

@@ -152,25 +285,25 @@ impl AsciiExt for Ascii {
152285
}
153286

154287
fn to_ascii_uppercase(&self) -> Ascii {
155-
Ascii{chr: self.chr.to_ascii_uppercase()}
288+
unsafe{ self.as_byte().to_ascii_uppercase().to_ascii_nocheck() }
156289
}
157290

158291
fn to_ascii_lowercase(&self) -> Ascii {
159-
Ascii{chr: self.chr.to_ascii_lowercase()}
292+
unsafe{ self.as_byte().to_ascii_uppercase().to_ascii_nocheck() }
160293
}
161294

162295
fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
163-
self.chr.eq_ignore_ascii_case(&other.chr)
296+
self.as_byte().eq_ignore_ascii_case(&other.as_byte())
164297
}
165298

166299
#[inline]
167300
fn make_ascii_uppercase(&mut self) {
168-
self.chr.make_ascii_uppercase()
301+
*self = self.to_ascii_uppercase();
169302
}
170303

171304
#[inline]
172305
fn make_ascii_lowercase(&mut self) {
173-
self.chr.make_ascii_lowercase()
306+
*self = self.to_ascii_lowercase();
174307
}
175308
}
176309

@@ -179,7 +312,7 @@ impl<'a> AsciiCast<'a> for u8 {
179312

180313
#[inline]
181314
unsafe fn to_ascii_nocheck(&self) -> Ascii {
182-
Ascii{ chr: *self }
315+
transmute(*self)
183316
}
184317
}
185318

@@ -188,7 +321,7 @@ impl<'a> AsciiCast<'a> for char {
188321

189322
#[inline]
190323
unsafe fn to_ascii_nocheck(&self) -> Ascii {
191-
Ascii{ chr: *self as u8 }
324+
(*self as u8).to_ascii_nocheck()
192325
}
193326
}
194327

@@ -199,10 +332,10 @@ mod tests {
199332

200333
#[test]
201334
fn to_ascii() {
202-
assert_eq!(65_u8.to_ascii(), Ok(Ascii { chr: 65_u8 }));
335+
assert_eq!(65_u8.to_ascii(), Ok(Ascii::A));
203336
assert_eq!(255_u8.to_ascii(), Err(()));
204337

205-
assert_eq!('A'.to_ascii(), Ok(Ascii { chr: 65_u8 }));
338+
assert_eq!('A'.to_ascii(), Ok(Ascii::A));
206339
assert_eq!('λ'.to_ascii(), Err(()));
207340
}
208341

@@ -235,13 +368,11 @@ mod tests {
235368

236369
#[test]
237370
fn fmt_display_ascii() {
238-
let s = Ascii { chr: b't' };
239-
assert_eq!(format!("{}", s), "t".to_string());
371+
assert_eq!(format!("{}", Ascii::t), "t".to_string());
240372
}
241373

242374
#[test]
243375
fn fmt_debug_ascii() {
244-
let c = Ascii { chr: b't' };
245-
assert_eq!(format!("{:?}", c), "'t'".to_string());
376+
assert_eq!(format!("{:?}", Ascii::t), "'t'".to_string());
246377
}
247378
}

0 commit comments

Comments
 (0)