Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(formatter): improve string normalization #3564

Merged
merged 1 commit into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 36 additions & 88 deletions crates/biome_css_formatter/src/utils/string_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,6 @@ struct StringInformation {
/// This is the quote that the is calculated and eventually used inside the string.
/// It could be different from the one inside the formatter options
preferred_quote: QuoteStyle,
/// It flags if the raw content has quotes (single or double). The raw content is the
/// content of a string literal without the quotes
raw_content_has_quotes: bool,
}

impl FormatLiteralStringToken<'_> {
Expand Down Expand Up @@ -153,35 +150,32 @@ impl FormatLiteralStringToken<'_> {
// preferred quote style without having to check the content.
if !matches!(self.token().kind(), CSS_STRING_LITERAL) {
return StringInformation {
raw_content_has_quotes: false,
preferred_quote: chosen_quote,
};
}

let literal = self.token().text_trimmed();
let alternate = chosen_quote.other();

let char_count = literal.chars().count();

let (preferred_quotes_count, alternate_quotes_count) = literal.chars().enumerate().fold(
(0, 0),
|(preferred_quotes_counter, alternate_quotes_counter), (index, current_character)| {
if index == 0 || index == char_count - 1 {
(preferred_quotes_counter, alternate_quotes_counter)
} else if current_character == chosen_quote.as_char() {
(preferred_quotes_counter + 1, alternate_quotes_counter)
} else if current_character == alternate.as_char() {
(preferred_quotes_counter, alternate_quotes_counter + 1)
let alternate_quote = chosen_quote.other();
let chosen_quote_byte = chosen_quote.as_byte();
let alternate_quote_byte = alternate_quote.as_byte();

let quoteless = &literal[1..literal.len() - 1];
let (chosen_quote_count, alternate_quote_count) = quoteless.bytes().fold(
(0u32, 0u32),
|(chosen_quote_count, alternate_quote_count), current_character| {
if current_character == chosen_quote_byte {
(chosen_quote_count + 1, alternate_quote_count)
} else if current_character == alternate_quote_byte {
(chosen_quote_count, alternate_quote_count + 1)
} else {
(preferred_quotes_counter, alternate_quotes_counter)
(chosen_quote_count, alternate_quote_count)
}
},
);

StringInformation {
raw_content_has_quotes: preferred_quotes_count > 0 || alternate_quotes_count > 0,
preferred_quote: if preferred_quotes_count > alternate_quotes_count {
alternate
preferred_quote: if chosen_quote_count > alternate_quote_count {
alternate_quote
} else {
chosen_quote
},
Expand Down Expand Up @@ -214,74 +208,32 @@ impl<'token> LiteralStringNormaliser<'token> {
.token
.compute_string_information(self.chosen_quote_style);

match self.token.token.kind() {
CSS_STRING_LITERAL => self.normalise_string_literal(string_information),
_ => self.normalise_non_string_token(string_information),
}
// Normalize string token and non-string token.
//
// Add the chosen quotes to any non-string tokensto normalize them into strings.
//
// CSS has various places where "string-like" tokens can be used without quotes, but the
// semantics aren't affected by whether they are present or not. This function lets those
// tokens become string literals by safely adding quotes around them.
self.normalise_tokens(string_information)
}

fn get_token(&self) -> &'token CssSyntaxToken {
self.token.token()
}

fn normalise_string_literal(&self, string_information: StringInformation) -> Cow<'token, str> {
let preferred_quote = string_information.preferred_quote;
let polished_raw_content = self.normalize_string(&string_information);

match polished_raw_content {
Cow::Borrowed(raw_content) => {
let final_content = self.swap_quotes(raw_content, &string_information);
match final_content {
Cow::Borrowed(final_content) => Cow::Borrowed(final_content),
Cow::Owned(final_content) => Cow::Owned(final_content),
}
}
Cow::Owned(s) => {
// content is owned, meaning we allocated a new string,
// so we force replacing quotes, regardless
let final_content = std::format!(
"{}{}{}",
preferred_quote.as_char(),
s.as_str(),
preferred_quote.as_char()
);

Cow::Owned(final_content)
}
}
}

/// Add the chosen quotes to any other kind of token to normalize it into a string.
///
/// CSS has various places where "string-like" tokens can be used without quotes, but the
/// semantics aren't affected by whether they are present or not. This function lets those
/// tokens become string literals by safely adding quotes around them.
fn normalise_non_string_token(
&self,
string_information: StringInformation,
) -> Cow<'token, str> {
fn normalise_tokens(&self, string_information: StringInformation) -> Cow<'token, str> {
let preferred_quote = string_information.preferred_quote;
let polished_raw_content = self.normalize_string(&string_information);

match polished_raw_content {
Cow::Borrowed(raw_content) => {
let final_content = self.swap_quotes(raw_content, &string_information);
match final_content {
Cow::Borrowed(final_content) => Cow::Borrowed(final_content),
Cow::Owned(final_content) => Cow::Owned(final_content),
}
}
Cow::Owned(s) => {
Cow::Borrowed(raw_content) => self.swap_quotes(raw_content, &string_information),
Cow::Owned(mut s) => {
// content is owned, meaning we allocated a new string,
// so we force replacing quotes, regardless
let final_content = std::format!(
"{}{}{}",
preferred_quote.as_char(),
s.as_str(),
preferred_quote.as_char()
);

Cow::Owned(final_content)
s.insert(0, preferred_quote.as_char());
s.push(preferred_quote.as_char());
Cow::Owned(s)
}
}
}
Expand All @@ -308,22 +260,18 @@ impl<'token> LiteralStringNormaliser<'token> {
content_to_use: &'token str,
string_information: &StringInformation,
) -> Cow<'token, str> {
let original_content = self.get_token().text_trimmed();
let preferred_quote = string_information.preferred_quote;

let raw_content_has_quotes = string_information.raw_content_has_quotes;
let preferred_quote = string_information.preferred_quote.as_char();
let original = self.get_token().text_trimmed();

if raw_content_has_quotes {
Cow::Borrowed(original_content)
} else if !original_content.starts_with(preferred_quote.as_char()) {
if original.starts_with(preferred_quote) {
Cow::Borrowed(original)
} else {
Cow::Owned(std::format!(
"{}{}{}",
preferred_quote.as_char(),
preferred_quote,
content_to_use,
preferred_quote.as_char()
preferred_quote,
))
} else {
Cow::Borrowed(original_content)
}
}
}
17 changes: 1 addition & 16 deletions crates/biome_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,22 +496,7 @@ impl QuoteStyle {
}
}

pub fn as_string(&self) -> &str {
match self {
QuoteStyle::Double => "\"",
QuoteStyle::Single => "'",
}
}

/// Returns the quote, prepended with a backslash (escaped)
pub fn as_escaped(&self) -> &str {
match self {
QuoteStyle::Double => "\\\"",
QuoteStyle::Single => "\\'",
}
}

pub fn as_bytes(&self) -> u8 {
pub fn as_byte(&self) -> u8 {
self.as_char() as u8
}

Expand Down
Loading
Loading