Skip to content

Commit 3e2e0b5

Browse files
committed
Treat url(<string>) as a normal functions, per spec change.
Only unquoted URLs are special tokens now. Use `Parser::expect_url`. This is a [breaking-change]. The version number was incremented accordingly. This change will help with servo/servo#7767
1 parent 920c23a commit 3e2e0b5

File tree

5 files changed

+98
-64
lines changed

5 files changed

+98
-64
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "cssparser"
4-
version = "0.3.9"
4+
version = "0.4.0"
55
authors = [ "Simon Sapin <[email protected]>" ]
66

77
description = "Rust implementation of CSS Syntax Level 3"

src/parser.rs

+13-7
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ impl<'i, 't> Parser<'i, 't> {
331331
/// This can help tell e.g. `color: green;` from `color: green 4px;`
332332
#[inline]
333333
pub fn parse_entirely<F, T>(&mut self, parse: F) -> Result<T, ()>
334-
where F: FnOnce(&mut Parser) -> Result<T, ()> {
334+
where F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ()> {
335335
let result = parse(self);
336336
try!(self.expect_exhausted());
337337
result
@@ -374,7 +374,7 @@ impl<'i, 't> Parser<'i, 't> {
374374
/// The result is overridden to `Err(())` if the closure leaves some input before that point.
375375
#[inline]
376376
pub fn parse_nested_block<F, T>(&mut self, parse: F) -> Result <T, ()>
377-
where F: FnOnce(&mut Parser) -> Result<T, ()> {
377+
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
378378
let block_type = self.at_start_of.take().expect("\
379379
A nested parser can only be created when a Function, \
380380
ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \
@@ -412,7 +412,7 @@ impl<'i, 't> Parser<'i, 't> {
412412
#[inline]
413413
pub fn parse_until_before<F, T>(&mut self, delimiters: Delimiters, parse: F)
414414
-> Result <T, ()>
415-
where F: FnOnce(&mut Parser) -> Result<T, ()> {
415+
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
416416
let delimiters = self.stop_before | delimiters;
417417
let result;
418418
// Introduce a new scope to limit duration of nested_parser’s borrow
@@ -451,7 +451,7 @@ impl<'i, 't> Parser<'i, 't> {
451451
#[inline]
452452
pub fn parse_until_after<F, T>(&mut self, delimiters: Delimiters, parse: F)
453453
-> Result <T, ()>
454-
where F: FnOnce(&mut Parser) -> Result<T, ()> {
454+
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
455455
let result = self.parse_until_before(delimiters, parse);
456456
let next_byte = self.tokenizer.next_byte();
457457
if next_byte.is_some() && !self.stop_before.contains(Delimiters::from_byte(next_byte)) {
@@ -481,7 +481,7 @@ impl<'i, 't> Parser<'i, 't> {
481481

482482
/// Parse a <ident-token> whose unescaped value is an ASCII-insensitive match for the given value.
483483
#[inline]
484-
pub fn expect_ident_matching<'a>(&mut self, expected_value: &str) -> Result<(), ()> {
484+
pub fn expect_ident_matching(&mut self, expected_value: &str) -> Result<(), ()> {
485485
match try!(self.next()) {
486486
Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()),
487487
_ => Err(())
@@ -511,7 +511,10 @@ impl<'i, 't> Parser<'i, 't> {
511511
#[inline]
512512
pub fn expect_url(&mut self) -> Result<Cow<'i, str>, ()> {
513513
match try!(self.next()) {
514-
Token::Url(value) => Ok(value),
514+
Token::UnquotedUrl(value) => Ok(value),
515+
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
516+
self.parse_nested_block(|input| input.expect_string())
517+
},
515518
_ => Err(())
516519
}
517520
}
@@ -520,8 +523,11 @@ impl<'i, 't> Parser<'i, 't> {
520523
#[inline]
521524
pub fn expect_url_or_string(&mut self) -> Result<Cow<'i, str>, ()> {
522525
match try!(self.next()) {
523-
Token::Url(value) => Ok(value),
526+
Token::UnquotedUrl(value) => Ok(value),
524527
Token::QuotedString(value) => Ok(value),
528+
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
529+
self.parse_nested_block(|input| input.expect_string())
530+
},
525531
_ => Err(())
526532
}
527533
}

src/serializer.rs

+23-3
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ impl<'a> ToCss for Token<'a> {
8181
try!(serialize_identifier(&**value, dest));
8282
}
8383
Token::QuotedString(ref value) => try!(serialize_string(&**value, dest)),
84-
Token::Url(ref value) => {
84+
Token::UnquotedUrl(ref value) => {
8585
try!(dest.write_str("url("));
86-
try!(serialize_string(&**value, dest));
86+
try!(serialize_unquoted_url(&**value, dest));
8787
try!(dest.write_str(")"));
8888
},
8989
Token::Delim(value) => try!(write!(dest, "{}", value)),
@@ -213,6 +213,26 @@ fn serialize_name<W>(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Writ
213213
}
214214

215215

216+
fn serialize_unquoted_url<W>(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Write {
217+
let mut chunk_start = 0;
218+
for (i, b) in value.bytes().enumerate() {
219+
let hex = match b {
220+
b'\0' ... b' ' | b'\x7F' => true,
221+
b'(' | b')' | b'"' | b'\'' | b'\\' => false,
222+
_ => continue
223+
};
224+
try!(dest.write_str(&value[chunk_start..i]));
225+
if hex {
226+
try!(write!(dest, "\\{:X} ", b));
227+
} else {
228+
try!(write!(dest, "\\{}", b as char));
229+
}
230+
chunk_start = i + 1;
231+
}
232+
dest.write_str(&value[chunk_start..])
233+
}
234+
235+
216236
/// Write a double-quoted CSS string token, escaping content as necessary.
217237
pub fn serialize_string<W>(value: &str, dest: &mut W) -> fmt::Result where W: fmt::Write {
218238
try!(dest.write_str("\""));
@@ -382,7 +402,7 @@ impl<'a> Token<'a> {
382402
TokenSerializationType(match *self {
383403
Token::Ident(_) => Ident,
384404
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
385-
Token::Url(_) | Token::BadUrl => UrlOrBadUrl,
405+
Token::UnquotedUrl(_) | Token::BadUrl => UrlOrBadUrl,
386406
Token::Delim('#') => DelimHash,
387407
Token::Delim('@') => DelimAt,
388408
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,

src/tests.rs

+40-31
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
* License, v. 2.0. If a copy of the MPL was not distributed with this
33
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
44

5-
use std::borrow::Cow::Borrowed;
5+
use std::borrow::Cow::{self, Borrowed};
66
use std::fs::File;
77
use std::io::{self, Write};
88
use std::path::Path;
99
use std::process::Command;
10-
use std::mem;
1110
use rustc_serialize::json::{self, Json, ToJson};
1211
use tempdir::TempDir;
1312

@@ -74,14 +73,8 @@ fn almost_equals(a: &Json, b: &Json) -> bool {
7473
fn normalize(json: &mut Json) {
7574
match *json {
7675
Json::Array(ref mut list) => {
77-
match find_url(list) {
78-
Some(Ok(url)) => *list = vec!["url".to_json(), Json::String(url)],
79-
Some(Err(())) => *list = vec!["error".to_json(), "bad-url".to_json()],
80-
None => {
81-
for item in list.iter_mut() {
82-
normalize(item)
83-
}
84-
}
76+
for item in list.iter_mut() {
77+
normalize(item)
8578
}
8679
}
8780
Json::String(ref mut s) => {
@@ -93,26 +86,6 @@ fn normalize(json: &mut Json) {
9386
}
9487
}
9588

96-
fn find_url(list: &mut [Json]) -> Option<Result<String, ()>> {
97-
if list.len() < 2 ||
98-
list[0].as_string() != Some("function") ||
99-
list[1].as_string() != Some("url") {
100-
return None
101-
}
102-
103-
let mut args = list[2..].iter_mut().filter(|a| a.as_string() != Some(" "));
104-
if let (Some(&mut Json::Array(ref mut arg)), None) = (args.next(), args.next()) {
105-
if arg.len() == 2 && arg[0].as_string() == Some("string") {
106-
if let &mut Json::String(ref mut value) = &mut arg[1] {
107-
return Some(Ok(mem::replace(value, String::new())))
108-
}
109-
}
110-
}
111-
112-
Some(Err(()))
113-
}
114-
115-
11689
fn assert_json_eq(results: json::Json, mut expected: json::Json, message: String) {
11790
normalize(&mut expected);
11891
if !almost_equals(&results, &expected) {
@@ -281,6 +254,42 @@ fn outer_block_end_consumed() {
281254
assert_eq!(input.next(), Err(()));
282255
}
283256

257+
#[test]
258+
fn unquoted_url_escaping() {
259+
let token = Token::UnquotedUrl("\
260+
\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\
261+
\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \
262+
!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\
263+
^_`abcdefghijklmnopqrstuvwxyz{|}~\x7fé\
264+
".into());
265+
let serialized = token.to_css_string();
266+
assert_eq!(serialized, "\
267+
url(\
268+
\\1 \\2 \\3 \\4 \\5 \\6 \\7 \\8 \\9 \\A \\B \\C \\D \\E \\F \\10 \
269+
\\11 \\12 \\13 \\14 \\15 \\16 \\17 \\18 \\19 \\1A \\1B \\1C \\1D \\1E \\1F \\20 \
270+
!\\\"#$%&\\'\\(\\)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]\
271+
^_`abcdefghijklmnopqrstuvwxyz{|}~\\7F é\
272+
)\
273+
");
274+
assert_eq!(Parser::new(&serialized).next(), Ok(token))
275+
}
276+
277+
#[test]
278+
fn test_expect_url() {
279+
fn parse(s: &str) -> Result<Cow<str>, ()> {
280+
Parser::new(s).expect_url()
281+
}
282+
assert_eq!(parse("url()").unwrap(), "");
283+
assert_eq!(parse("url( ").unwrap(), "");
284+
assert_eq!(parse("url( abc").unwrap(), "abc");
285+
assert_eq!(parse("url( abc \t)").unwrap(), "abc");
286+
assert_eq!(parse("url( 'abc' \t)").unwrap(), "abc");
287+
assert_eq!(parse("url(abc more stuff)"), Err(()));
288+
// The grammar at https://drafts.csswg.org/css-values/#urls plans for `<url-modifier>*`
289+
// at the position of "more stuff", but no such modifier is defined yet.
290+
assert_eq!(parse("url('abc' more stuff)"), Err(()));
291+
}
292+
284293

285294
fn run_color_tests<F: Fn(Result<Color, ()>) -> Json>(json_data: &str, to_json: F) {
286295
run_json_tests(json_data, |input| {
@@ -606,7 +615,7 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json {
606615
Token::Hash(value) => JArray!["hash", value, "unrestricted"],
607616
Token::IDHash(value) => JArray!["hash", value, "id"],
608617
Token::QuotedString(value) => JArray!["string", value],
609-
Token::Url(value) => JArray!["url", value],
618+
Token::UnquotedUrl(value) => JArray!["url", value],
610619
Token::Delim('\\') => "\\".to_json(),
611620
Token::Delim(value) => value.to_string().to_json(),
612621

src/tokenizer.rs

+21-22
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ pub enum Token<'a> {
4848
/// A [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram) or `url( <string-token> )` function
4949
///
5050
/// The value does not include the `url(` `)` markers or the quotes.
51-
Url(Cow<'a, str>),
51+
UnquotedUrl(Cow<'a, str>),
5252

5353
/// A `<delim-token>`
5454
Delim(char),
@@ -628,7 +628,7 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
628628
if !tokenizer.is_eof() && tokenizer.next_char() == '(' {
629629
tokenizer.advance(1);
630630
if value.eq_ignore_ascii_case("url") {
631-
consume_url(tokenizer)
631+
consume_unquoted_url(tokenizer).unwrap_or(Function(value))
632632
} else {
633633
if tokenizer.var_functions == VarFunctions::LookingForThem &&
634634
value.eq_ignore_ascii_case("var") {
@@ -791,31 +791,30 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
791791
}
792792

793793

794-
fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
795-
while !tokenizer.is_eof() {
796-
match tokenizer.next_char() {
797-
' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1),
798-
'"' => return consume_quoted_url(tokenizer, false),
799-
'\'' => return consume_quoted_url(tokenizer, true),
800-
')' => { tokenizer.advance(1); break },
801-
_ => return consume_unquoted_url(tokenizer),
802-
}
803-
}
804-
return Url(Borrowed(""));
805-
806-
fn consume_quoted_url<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
807-
match consume_quoted_string(tokenizer, single_quote) {
808-
Ok(value) => consume_url_end(tokenizer, value),
809-
Err(()) => consume_bad_url(tokenizer),
794+
fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
795+
for (offset, c) in tokenizer.input[tokenizer.position..].char_indices() {
796+
match c {
797+
' ' | '\t' | '\n' | '\r' | '\x0C' => {},
798+
'"' | '\'' => return Err(()), // Do not advance
799+
')' => {
800+
tokenizer.advance(offset + 1);
801+
return Ok(UnquotedUrl(Borrowed("")));
802+
}
803+
_ => {
804+
tokenizer.advance(offset);
805+
return Ok(consume_unquoted_url(tokenizer))
806+
}
810807
}
811808
}
809+
tokenizer.position = tokenizer.input.len();
810+
return Ok(UnquotedUrl(Borrowed("")));
812811

813812
fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
814813
let start_pos = tokenizer.position();
815814
let mut string;
816815
loop {
817816
if tokenizer.is_eof() {
818-
return Url(Borrowed(tokenizer.slice_from(start_pos)))
817+
return UnquotedUrl(Borrowed(tokenizer.slice_from(start_pos)))
819818
}
820819
match tokenizer.next_char() {
821820
' ' | '\t' | '\n' | '\r' | '\x0C' => {
@@ -826,7 +825,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
826825
')' => {
827826
let value = tokenizer.slice_from(start_pos);
828827
tokenizer.advance(1);
829-
return Url(Borrowed(value))
828+
return UnquotedUrl(Borrowed(value))
830829
}
831830
'\x01'...'\x08' | '\x0B' | '\x0E'...'\x1F' | '\x7F' // non-printable
832831
| '"' | '\'' | '(' => {
@@ -861,7 +860,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
861860
};
862861
string.push(next_char)
863862
}
864-
Url(Owned(string))
863+
UnquotedUrl(Owned(string))
865864
}
866865

867866
fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: Cow<'a, str>) -> Token<'a> {
@@ -872,7 +871,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
872871
_ => return consume_bad_url(tokenizer)
873872
}
874873
}
875-
Url(string)
874+
UnquotedUrl(string)
876875
}
877876

878877
fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {

0 commit comments

Comments
 (0)