Skip to content

Commit

Permalink
refactor(parser/html): refactor comments to be nodes in the tree (#4056)
Browse files Browse the repository at this point in the history
  • Loading branch information
dyc3 authored Sep 24, 2024
1 parent b357094 commit ecf1f16
Show file tree
Hide file tree
Showing 20 changed files with 359 additions and 31 deletions.
14 changes: 14 additions & 0 deletions crates/biome_html_factory/src/generated/node_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions crates/biome_html_factory/src/generated/syntax_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions crates/biome_html_formatter/src/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,46 @@ impl IntoFormat<HtmlFormatContext> for biome_html_syntax::HtmlClosingElement {
)
}
}
impl FormatRule<biome_html_syntax::HtmlComment>
for crate::html::auxiliary::comment::FormatHtmlComment
{
type Context = HtmlFormatContext;
#[inline(always)]
fn fmt(
&self,
node: &biome_html_syntax::HtmlComment,
f: &mut HtmlFormatter,
) -> FormatResult<()> {
FormatNodeRule::<biome_html_syntax::HtmlComment>::fmt(self, node, f)
}
}
impl AsFormat<HtmlFormatContext> for biome_html_syntax::HtmlComment {
type Format<'a> = FormatRefWithRule<
'a,
biome_html_syntax::HtmlComment,
crate::html::auxiliary::comment::FormatHtmlComment,
>;
fn format(&self) -> Self::Format<'_> {
#![allow(clippy::default_constructed_unit_structs)]
FormatRefWithRule::new(
self,
crate::html::auxiliary::comment::FormatHtmlComment::default(),
)
}
}
impl IntoFormat<HtmlFormatContext> for biome_html_syntax::HtmlComment {
type Format = FormatOwnedWithRule<
biome_html_syntax::HtmlComment,
crate::html::auxiliary::comment::FormatHtmlComment,
>;
fn into_format(self) -> Self::Format {
#![allow(clippy::default_constructed_unit_structs)]
FormatOwnedWithRule::new(
self,
crate::html::auxiliary::comment::FormatHtmlComment::default(),
)
}
}
impl FormatRule<biome_html_syntax::HtmlContent>
for crate::html::auxiliary::content::FormatHtmlContent
{
Expand Down
1 change: 1 addition & 0 deletions crates/biome_html_formatter/src/html/any/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ impl FormatRule<AnyHtmlElement> for FormatAnyHtmlElement {
fn fmt(&self, node: &AnyHtmlElement, f: &mut HtmlFormatter) -> FormatResult<()> {
match node {
AnyHtmlElement::HtmlBogusElement(node) => node.format().fmt(f),
AnyHtmlElement::HtmlComment(node) => node.format().fmt(f),
AnyHtmlElement::HtmlContent(node) => node.format().fmt(f),
AnyHtmlElement::HtmlElement(node) => node.format().fmt(f),
AnyHtmlElement::HtmlSelfClosingElement(node) => node.format().fmt(f),
Expand Down
10 changes: 10 additions & 0 deletions crates/biome_html_formatter/src/html/auxiliary/comment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use crate::prelude::*;
use biome_html_syntax::HtmlComment;
use biome_rowan::AstNode;
#[derive(Debug, Clone, Default)]
pub(crate) struct FormatHtmlComment;
impl FormatNodeRule<HtmlComment> for FormatHtmlComment {
fn fmt_fields(&self, node: &HtmlComment, f: &mut HtmlFormatter) -> FormatResult<()> {
format_verbatim_node(node.syntax()).fmt(f)
}
}
1 change: 1 addition & 0 deletions crates/biome_html_formatter/src/html/auxiliary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
pub(crate) mod attribute;
pub(crate) mod attribute_initializer_clause;
pub(crate) mod closing_element;
pub(crate) mod comment;
pub(crate) mod content;
pub(crate) mod directive;
pub(crate) mod element;
Expand Down
55 changes: 36 additions & 19 deletions crates/biome_html_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ mod tests;

use crate::token_source::{HtmlEmbededLanguage, HtmlLexContext};
use biome_html_syntax::HtmlSyntaxKind::{
COMMENT, DOCTYPE_KW, EOF, ERROR_TOKEN, HTML_KW, HTML_LITERAL, HTML_STRING_LITERAL, NEWLINE,
TOMBSTONE, UNICODE_BOM, WHITESPACE,
DOCTYPE_KW, EOF, ERROR_TOKEN, HTML_KW, HTML_LITERAL, HTML_STRING_LITERAL, NEWLINE, TOMBSTONE,
UNICODE_BOM, WHITESPACE,
};
use biome_html_syntax::{HtmlSyntaxKind, TextLen, TextSize, T};
use biome_parser::diagnostic::ParseDiagnostic;
Expand Down Expand Up @@ -137,6 +137,24 @@ impl<'src> HtmlLexer<'src> {
}
}

/// Consume a token in the [HtmlLexContext::Comment] context.
fn consume_inside_comment(&mut self, current: u8) -> HtmlSyntaxKind {
match current {
b'<' if self.at_start_comment() => self.consume_comment_start(),
b'-' if self.at_end_comment() => self.consume_comment_end(),
_ => {
while let Some(char) = self.current_byte() {
if self.at_end_comment() {
// eat -->
break;
}
self.advance_byte_or_char(char);
}
HTML_LITERAL
}
}
}

/// Bumps the current byte and creates a lexed token of the passed in kind.
#[inline]
fn consume_byte(&mut self, tok: HtmlSyntaxKind) -> HtmlSyntaxKind {
Expand Down Expand Up @@ -309,28 +327,12 @@ impl<'src> HtmlLexer<'src> {
self.assert_byte(b'<');

if self.at_start_comment() {
self.consume_comment()
self.consume_comment_start()
} else {
self.consume_byte(T![<])
}
}

fn consume_comment(&mut self) -> HtmlSyntaxKind {
// eat <!--
self.advance(4);

while let Some(char) = self.current_byte() {
if self.at_end_comment() {
// eat -->
self.advance(3);
return COMMENT;
}
self.advance_byte_or_char(char);
}

COMMENT
}

fn at_start_comment(&mut self) -> bool {
self.current_byte() == Some(b'<')
&& self.byte_at(1) == Some(b'!')
Expand All @@ -344,6 +346,20 @@ impl<'src> HtmlLexer<'src> {
&& self.byte_at(2) == Some(b'>')
}

fn consume_comment_start(&mut self) -> HtmlSyntaxKind {
debug_assert!(self.at_start_comment());

self.advance(4);
T![<!--]
}

fn consume_comment_end(&mut self) -> HtmlSyntaxKind {
debug_assert!(self.at_end_comment());

self.advance(3);
T![-->]
}

/// Lexes a `\u0000` escape sequence. Assumes that the lexer is positioned at the `u` token.
///
/// A unicode escape sequence must consist of 4 hex characters.
Expand Down Expand Up @@ -462,6 +478,7 @@ impl<'src> Lexer<'src> for HtmlLexer<'src> {
HtmlLexContext::EmbeddedLanguage(lang) => {
self.consume_token_embedded_language(current, lang)
}
HtmlLexContext::Comment => self.consume_inside_comment(current),
},
None => EOF,
}
Expand Down
28 changes: 28 additions & 0 deletions crates/biome_html_parser/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,31 @@ fn unquoted_attribute_value_invalid_chars() {
ERROR_TOKEN: 3,
}
}

#[test]
fn comment_start() {
assert_lex! {
"<!--",
COMMENT_START: 4,
}
}

#[test]
fn comment_end() {
assert_lex! {
HtmlLexContext::Comment,
"-->",
COMMENT_END: 3,
}
}

#[test]
fn comment_full() {
assert_lex! {
HtmlLexContext::Comment,
"<!-- foo -->",
COMMENT_START: 4,
HTML_LITERAL: 5,
COMMENT_END: 3,
}
}
12 changes: 12 additions & 0 deletions crates/biome_html_parser/src/syntax/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ impl ParseNodeList for ElementList {

fn parse_element(&mut self, p: &mut Self::Parser<'_>) -> ParsedSyntax {
match p.cur() {
T![<!--] => parse_comment(p),
T![<] => parse_element(p),
HTML_LITERAL => {
let m = p.start();
Expand Down Expand Up @@ -263,3 +264,14 @@ fn parse_attribute_initializer(p: &mut HtmlParser) -> ParsedSyntax {
parse_attribute_string_literal(p).or_add_diagnostic(p, expected_initializer);
Present(m.complete(p, HTML_ATTRIBUTE_INITIALIZER_CLAUSE))
}

fn parse_comment(p: &mut HtmlParser) -> ParsedSyntax {
if !p.at(T![<!--]) {
return Absent;
}
let m = p.start();
p.bump_with_context(T![<!--], HtmlLexContext::Comment);
p.bump_with_context(HTML_LITERAL, HtmlLexContext::Comment);
p.expect(T![-->]);
Present(m.complete(p, HTML_COMMENT))
}
2 changes: 2 additions & 0 deletions crates/biome_html_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ pub(crate) enum HtmlLexContext {
Doctype,
/// Treat everything as text until the closing tag is encountered.
EmbeddedLanguage(HtmlEmbededLanguage),
/// Comments are treated as text until the closing comment tag is encountered.
Comment,
}

#[derive(Copy, Clone, Debug)]
Expand Down
18 changes: 14 additions & 4 deletions crates/biome_html_parser/tests/html_specs/ok/comment.html.snap
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@ expression: snapshot
HtmlRoot {
bom_token: missing (optional),
directive: missing (optional),
html: HtmlElementList [],
eof_token: EOF@0..21 "" [Comments("<!-- Hello World -->"), Newline("\n")] [],
html: HtmlElementList [
HtmlComment {
comment_start_token: COMMENT_START@0..4 "<!--" [] [],
content_token: HTML_LITERAL@4..17 " Hello World " [] [],
comment_end_token: COMMENT_END@17..20 "-->" [] [],
},
],
eof_token: EOF@20..21 "" [Newline("\n")] [],
}
```

Expand All @@ -27,7 +33,11 @@ HtmlRoot {
0: [email protected]
0: (empty)
1: (empty)
2: [email protected]
3: [email protected] "" [Comments("<!-- Hello World -->"), Newline("\n")] []
2: [email protected]
0: [email protected]
0: [email protected] "<!--" [] []
1: [email protected] " Hello World " [] []
2: [email protected] "-->" [] []
3: [email protected] "" [Newline("\n")] []
```
10 changes: 7 additions & 3 deletions crates/biome_html_syntax/src/generated/kind.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions crates/biome_html_syntax/src/generated/macros.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ecf1f16

Please sign in to comment.