Skip to content

Commit 125440a

Browse files
feat(uri): make Authority/PathAndQuery::from_static const
1 parent 918bbc3 commit 125440a

File tree

2 files changed

+197
-95
lines changed

2 files changed

+197
-95
lines changed

src/uri/authority.rs

Lines changed: 115 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,19 @@ use bytes::Bytes;
88
use super::{ErrorKind, InvalidUri, Port, URI_CHARS};
99
use crate::byte_str::ByteStr;
1010

11+
/// Validation result for authority parsing.
12+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13+
enum AuthorityError {
14+
Empty,
15+
InvalidUriChar,
16+
InvalidAuthority,
17+
TooManyColons,
18+
MismatchedBrackets,
19+
InvalidBracketUsage,
20+
EmptyAfterAt,
21+
InvalidPercent,
22+
}
23+
1124
/// Represents the authority component of a URI.
1225
#[derive(Clone)]
1326
pub struct Authority {
@@ -45,9 +58,20 @@ impl Authority {
4558
/// let authority = Authority::from_static("example.com");
4659
/// assert_eq!(authority.host(), "example.com");
4760
/// ```
48-
pub fn from_static(src: &'static str) -> Self {
49-
Authority::from_shared(Bytes::from_static(src.as_bytes()))
50-
.expect("static str is not valid authority")
61+
#[inline]
62+
pub const fn from_static(src: &'static str) -> Self {
63+
match validate_authority_bytes(src.as_bytes()) {
64+
Ok(_) => Authority {
65+
data: ByteStr::from_static(src),
66+
},
67+
Err(_) => {
68+
#[allow(unconditional_panic, clippy::no_effect, clippy::out_of_bounds_indexing)]
69+
{
70+
([] as [u8; 0])[0];
71+
}
72+
loop {}
73+
}
74+
}
5175
}
5276

5377
/// Attempt to convert a `Bytes` buffer to a `Authority`.
@@ -69,95 +93,19 @@ impl Authority {
6993
// Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
7094
// ret is the return value.
7195
pub(super) fn parse(s: &[u8]) -> Result<usize, InvalidUri> {
72-
let mut colon_cnt = 0u32;
73-
let mut start_bracket = false;
74-
let mut end_bracket = false;
75-
let mut has_percent = false;
76-
let mut end = s.len();
77-
let mut at_sign_pos = None;
78-
const MAX_COLONS: u32 = 8; // e.g., [FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80
79-
80-
// Among other things, this loop checks that every byte in s up to the
81-
// first '/', '?', or '#' is a valid URI character (or in some contexts,
82-
// a '%'). This means that each such byte is a valid single-byte UTF-8
83-
// code point.
84-
for (i, &b) in s.iter().enumerate() {
85-
match URI_CHARS[b as usize] {
86-
b'/' | b'?' | b'#' => {
87-
end = i;
88-
break;
89-
}
90-
b':' => {
91-
if colon_cnt >= MAX_COLONS {
92-
return Err(ErrorKind::InvalidAuthority.into());
93-
}
94-
colon_cnt += 1;
95-
}
96-
b'[' => {
97-
if has_percent || start_bracket {
98-
// Something other than the userinfo has a `%`, so reject it.
99-
return Err(ErrorKind::InvalidAuthority.into());
100-
}
101-
start_bracket = true;
102-
}
103-
b']' => {
104-
if (!start_bracket) || end_bracket {
105-
return Err(ErrorKind::InvalidAuthority.into());
106-
}
107-
end_bracket = true;
108-
109-
// Those were part of an IPv6 hostname, so forget them...
110-
colon_cnt = 0;
111-
has_percent = false;
112-
}
113-
b'@' => {
114-
at_sign_pos = Some(i);
115-
116-
// Those weren't a port colon, but part of the
117-
// userinfo, so it needs to be forgotten.
118-
colon_cnt = 0;
119-
has_percent = false;
120-
}
121-
0 if b == b'%' => {
122-
// Per https://tools.ietf.org/html/rfc3986#section-3.2.1 and
123-
// https://url.spec.whatwg.org/#authority-state
124-
// the userinfo can have a percent-encoded username and password,
125-
// so record that a `%` was found. If this turns out to be
126-
// part of the userinfo, this flag will be cleared.
127-
// Also per https://tools.ietf.org/html/rfc6874, percent-encoding can
128-
// be used to indicate a zone identifier.
129-
// If the flag hasn't been cleared at the end, that means this
130-
// was part of the hostname (and not part of an IPv6 address), and
131-
// will fail with an error.
132-
has_percent = true;
133-
}
134-
0 => {
135-
return Err(ErrorKind::InvalidUriChar.into());
136-
}
137-
_ => {}
96+
validate_authority_bytes(s).map_err(|e| {
97+
match e {
98+
AuthorityError::Empty => ErrorKind::Empty,
99+
AuthorityError::InvalidUriChar => ErrorKind::InvalidUriChar,
100+
AuthorityError::InvalidAuthority
101+
| AuthorityError::MismatchedBrackets
102+
| AuthorityError::InvalidBracketUsage
103+
| AuthorityError::EmptyAfterAt
104+
| AuthorityError::InvalidPercent
105+
| AuthorityError::TooManyColons => ErrorKind::InvalidAuthority,
138106
}
139-
}
140-
141-
if start_bracket ^ end_bracket {
142-
return Err(ErrorKind::InvalidAuthority.into());
143-
}
144-
145-
if colon_cnt > 1 {
146-
// Things like 'localhost:8080:3030' are rejected.
147-
return Err(ErrorKind::InvalidAuthority.into());
148-
}
149-
150-
if end > 0 && at_sign_pos == Some(end - 1) {
151-
// If there's nothing after an `@`, this is bonkers.
152-
return Err(ErrorKind::InvalidAuthority.into());
153-
}
154-
155-
if has_percent {
156-
// Something after the userinfo has a `%`, so reject it.
157-
return Err(ErrorKind::InvalidAuthority.into());
158-
}
159-
160-
Ok(end)
107+
.into()
108+
})
161109
}
162110

163111
// Parse bytes as an Authority, not allowing an empty string.
@@ -528,6 +476,82 @@ where
528476
})
529477
}
530478

479+
/// Shared validation logic for authority bytes.
480+
/// Returns the end position of valid authority bytes, or an error.
481+
const fn validate_authority_bytes(s: &[u8]) -> Result<usize, AuthorityError> {
482+
if s.is_empty() {
483+
return Err(AuthorityError::Empty);
484+
}
485+
486+
let mut colon_cnt: u32 = 0;
487+
let mut start_bracket = false;
488+
let mut end_bracket = false;
489+
let mut has_percent = false;
490+
let mut end = s.len();
491+
let mut at_sign_pos: usize = s.len();
492+
const MAX_COLONS: u32 = 8;
493+
494+
let mut i = 0;
495+
while i < s.len() {
496+
let b = s[i];
497+
let ch = URI_CHARS[b as usize];
498+
499+
if ch == b'/' || ch == b'?' || ch == b'#' {
500+
end = i;
501+
break;
502+
}
503+
504+
if ch == 0 {
505+
if b == b'%' {
506+
has_percent = true;
507+
} else {
508+
return Err(AuthorityError::InvalidUriChar);
509+
}
510+
} else if ch == b':' {
511+
if colon_cnt >= MAX_COLONS {
512+
return Err(AuthorityError::TooManyColons);
513+
}
514+
colon_cnt += 1;
515+
} else if ch == b'[' {
516+
if has_percent || start_bracket {
517+
return Err(AuthorityError::InvalidBracketUsage);
518+
}
519+
start_bracket = true;
520+
} else if ch == b']' {
521+
if !start_bracket || end_bracket {
522+
return Err(AuthorityError::InvalidBracketUsage);
523+
}
524+
end_bracket = true;
525+
colon_cnt = 0;
526+
has_percent = false;
527+
} else if ch == b'@' {
528+
at_sign_pos = i;
529+
colon_cnt = 0;
530+
has_percent = false;
531+
}
532+
533+
i += 1;
534+
}
535+
536+
if start_bracket != end_bracket {
537+
return Err(AuthorityError::MismatchedBrackets);
538+
}
539+
540+
if colon_cnt > 1 {
541+
return Err(AuthorityError::InvalidAuthority);
542+
}
543+
544+
if end > 0 && at_sign_pos == end - 1 {
545+
return Err(AuthorityError::EmptyAfterAt);
546+
}
547+
548+
if has_percent {
549+
return Err(AuthorityError::InvalidPercent);
550+
}
551+
552+
Ok(end)
553+
}
554+
531555
#[cfg(test)]
532556
mod tests {
533557
use super::*;

src/uri/path.rs

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ use bytes::Bytes;
77
use super::{ErrorKind, InvalidUri};
88
use crate::byte_str::ByteStr;
99

10+
/// Validation result for path and query parsing.
11+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12+
enum PathAndQueryError {
13+
InvalidPathChar,
14+
InvalidQueryChar,
15+
FragmentNotAllowed,
16+
}
17+
1018
/// Represents the path component of a URI
1119
#[derive(Clone)]
1220
pub struct PathAndQuery {
@@ -138,10 +146,20 @@ impl PathAndQuery {
138146
/// assert_eq!(v.query(), Some("world"));
139147
/// ```
140148
#[inline]
141-
pub fn from_static(src: &'static str) -> Self {
142-
let src = Bytes::from_static(src.as_bytes());
143-
144-
PathAndQuery::from_shared(src).unwrap()
149+
pub const fn from_static(src: &'static str) -> Self {
150+
match validate_path_and_query_bytes(src.as_bytes()) {
151+
Ok(query) => PathAndQuery {
152+
data: ByteStr::from_static(src),
153+
query,
154+
},
155+
Err(_) => {
156+
#[allow(unconditional_panic, clippy::no_effect, clippy::out_of_bounds_indexing)]
157+
{
158+
([] as [u8; 0])[0];
159+
}
160+
loop {}
161+
}
162+
}
145163
}
146164

147165
/// Attempt to convert a `Bytes` buffer to a `PathAndQuery`.
@@ -467,6 +485,66 @@ impl PartialOrd<PathAndQuery> for String {
467485
}
468486
}
469487

488+
/// Shared validation logic for path and query bytes.
489+
/// Returns the query position (or NONE), or an error.
490+
const fn validate_path_and_query_bytes(bytes: &[u8]) -> Result<u16, PathAndQueryError> {
491+
let mut query: u16 = NONE;
492+
let mut i: usize = 0;
493+
494+
// path ...
495+
while i < bytes.len() {
496+
let b = bytes[i];
497+
if b == b'?' {
498+
query = i as u16;
499+
i += 1;
500+
break;
501+
} else if b == b'#' {
502+
return Err(PathAndQueryError::FragmentNotAllowed);
503+
} else {
504+
let allowed = b == 0x21
505+
|| (b >= 0x24 && b <= 0x3B)
506+
|| b == 0x3D
507+
|| (b >= 0x40 && b <= 0x5F)
508+
|| (b >= 0x61 && b <= 0x7A)
509+
|| b == 0x7C
510+
|| b == 0x7E
511+
|| b == b'"'
512+
|| b == b'{'
513+
|| b == b'}'
514+
|| (b >= 0x7F);
515+
516+
if !allowed {
517+
return Err(PathAndQueryError::InvalidPathChar);
518+
}
519+
}
520+
i += 1;
521+
}
522+
523+
// query ...
524+
if query != NONE {
525+
while i < bytes.len() {
526+
let b = bytes[i];
527+
if b == b'#' {
528+
return Err(PathAndQueryError::FragmentNotAllowed);
529+
}
530+
531+
let allowed = b == 0x21
532+
|| (b >= 0x24 && b <= 0x3B)
533+
|| b == 0x3D
534+
|| (b >= 0x3F && b <= 0x7E)
535+
|| (b >= 0x7F);
536+
537+
if !allowed {
538+
return Err(PathAndQueryError::InvalidQueryChar);
539+
}
540+
541+
i += 1;
542+
}
543+
}
544+
545+
Ok(query)
546+
}
547+
470548
#[cfg(test)]
471549
mod tests {
472550
use super::*;

0 commit comments

Comments
 (0)