diff --git a/Cargo.toml b/Cargo.toml index 24989321..ea0f06c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cssparser" -version = "0.3.8" +version = "0.3.9" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3" diff --git a/src/lib.rs b/src/lib.rs index cb0459e4..c7deb4d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,7 +84,7 @@ pub use rules_and_declarations::{AtRuleType, QualifiedRuleParser, AtRuleParser}; pub use from_bytes::decode_stylesheet_bytes; pub use color::{RGBA, Color, parse_color_keyword}; pub use nth::parse_nth; -pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string}; +pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType}; pub use parser::{Parser, Delimiter, Delimiters, SourcePosition}; diff --git a/src/serializer.rs b/src/serializer.rs index 8d5f582b..f94903e6 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -290,3 +290,127 @@ impl_tocss_for_number!(i32); impl_tocss_for_number!(u32); impl_tocss_for_number!(i64); impl_tocss_for_number!(u64); + + +/// A category of token. See the `needs_separator_when_before` method. +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] +pub struct TokenSerializationType(TokenSerializationTypeVariants); + +impl TokenSerializationType { + /// Return a value that represents the absence of a token, e.g. before the start of the input. + pub fn nothing() -> TokenSerializationType { + TokenSerializationType(TokenSerializationTypeVariants::Nothing) + } + + /// If this value is `TokenSerializationType::nothing()`, set it to the given value instead. + pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) { + if self.0 == TokenSerializationTypeVariants::Nothing { + self.0 = new_value.0 + } + } + + /// Return true if, when a token of category `self` is serialized just before + /// a token of category `other` with no whitespace in between, + /// an empty comment `/**/` needs to be inserted between them + /// so that they are not re-parsed as a single token. + /// + /// See https://drafts.csswg.org/css-syntax/#serialization + pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool { + use self::TokenSerializationTypeVariants::*; + match self.0 { + Ident => matches!(other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | + UnicodeRange | CDC | OpenParen), + AtKeywordOrHash | Dimension => matches!(other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | + UnicodeRange | CDC), + DelimHash | DelimMinus | Number => matches!(other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | + UnicodeRange), + DelimAt => matches!(other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | UnicodeRange), + UnicodeRange => matches!(other.0, + Ident | Function | Number | Percentage | Dimension | DelimQuestion), + DelimDotOrPlus => matches!(other.0, Number | Percentage | Dimension), + DelimAssorted | DelimAsterisk => matches!(other.0, DelimEquals), + DelimBar => matches!(other.0, DelimEquals | DelimBar | DashMatch), + DelimSlash => matches!(other.0, DelimAsterisk | SubstringMatch), + Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen | + DashMatch | SubstringMatch | DelimQuestion | DelimEquals | Other => false, + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] +enum TokenSerializationTypeVariants { + Nothing, + WhiteSpace, + AtKeywordOrHash, + Number, + Dimension, + Percentage, + UnicodeRange, + UrlOrBadUrl, + Function, + Ident, + CDC, + DashMatch, + SubstringMatch, + OpenParen, // '(' + DelimHash, // '#' + DelimAt, // '@' + DelimDotOrPlus, // '.', '+' + DelimMinus, // '-' + DelimQuestion, // '?' + DelimAssorted, // '$', '^', '~' + DelimEquals, // '=' + DelimBar, // '|' + DelimSlash, // '/' + DelimAsterisk, // '*' + Other, // anything else +} + +impl<'a> Token<'a> { + /// Categorize a token into a type that determines when `/**/` needs to be inserted + /// between two tokens when serialized next to each other without whitespace in between. + /// + /// See the `TokenSerializationType::needs_separator_when_before` method. + pub fn serialization_type(&self) -> TokenSerializationType { + use self::TokenSerializationTypeVariants::*; + TokenSerializationType(match *self { + Token::Ident(_) => Ident, + Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash, + Token::Url(_) | Token::BadUrl => UrlOrBadUrl, + Token::Delim('#') => DelimHash, + Token::Delim('@') => DelimAt, + Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus, + Token::Delim('-') => DelimMinus, + Token::Delim('?') => DelimQuestion, + Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted, + Token::Delim('=') => DelimEquals, + Token::Delim('|') => DelimBar, + Token::Delim('/') => DelimSlash, + Token::Delim('*') => DelimAsterisk, + Token::Number(_) => Number, + Token::Percentage(_) => Percentage, + Token::Dimension(..) => Dimension, + Token::UnicodeRange(..) => UnicodeRange, + Token::WhiteSpace(_) => WhiteSpace, + Token::Comment(_) => DelimSlash, + Token::DashMatch => DashMatch, + Token::SubstringMatch => SubstringMatch, + Token::Column => DelimBar, + Token::CDC => CDC, + Token::Function(_) => Function, + Token::ParenthesisBlock => OpenParen, + Token::SquareBracketBlock | Token::CurlyBracketBlock | + Token::CloseParenthesis | Token::CloseSquareBracket | Token::CloseCurlyBracket | + Token::QuotedString(_) | Token::BadString | + Token::Delim(_) | Token::Colon | Token::Semicolon | Token::Comma | Token::CDO | + Token::IncludeMatch | Token::PrefixMatch | Token::SuffixMatch + => Other, + }) + } +} diff --git a/src/tests.rs b/src/tests.rs index 0068e989..3e2f70e1 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -18,6 +18,7 @@ use super::{Parser, Delimiter, Token, NumericValue, PercentageValue, SourceLocat AtRuleType, AtRuleParser, QualifiedRuleParser, parse_one_declaration, parse_one_rule, parse_important, decode_stylesheet_bytes, + TokenSerializationType, Color, RGBA, parse_nth, ToCss}; @@ -323,10 +324,31 @@ fn nth() { #[test] -fn serializer() { +fn serializer_not_preserving_comments() { + serializer(false) +} + +#[test] +fn serializer_preserving_comments() { + serializer(true) +} + +fn serializer(preserve_comments: bool) { run_json_tests(include_str!("css-parsing-tests/component_value_list.json"), |input| { - fn write_to(input: &mut Parser, string: &mut String) { - while let Ok(token) = input.next_including_whitespace_and_comments() { + fn write_to(mut previous_token: TokenSerializationType, + input: &mut Parser, + string: &mut String, + preserve_comments: bool) { + while let Ok(token) = if preserve_comments { + input.next_including_whitespace_and_comments() + } else { + input.next_including_whitespace() + } { + let token_type = token.serialization_type(); + if !preserve_comments && previous_token.needs_separator_when_before(token_type) { + string.push_str("/**/") + } + previous_token = token_type; token.to_css(string).unwrap(); let closing_token = match token { Token::Function(_) | Token::ParenthesisBlock => Some(Token::CloseParenthesis), @@ -336,7 +358,7 @@ fn serializer() { }; if let Some(closing_token) = closing_token { input.parse_nested_block(|input| { - write_to(input, string); + write_to(previous_token, input, string, preserve_comments); Ok(()) }).unwrap(); closing_token.to_css(string).unwrap(); @@ -344,7 +366,7 @@ fn serializer() { } } let mut serialized = String::new(); - write_to(input, &mut serialized); + write_to(TokenSerializationType::nothing(), input, &mut serialized, preserve_comments); let parser = &mut Parser::new(&serialized); Json::Array(component_values_to_json(parser)) });