Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect syntax kinds directly from DSL v2 and isolate parser generation logic #991

Merged
merged 20 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
0d1fda3
Bundle codegen-related traits for grammar in a dedicated module
Xanewok May 23, 2024
645b05f
refactor: Remove unused GrammarVisitor methods
Xanewok May 24, 2024
2cb87c4
refactor: Simplify existing collecting of referenced_versions
Xanewok May 24, 2024
df5e187
chore: Update the issue for the keyword ident scanning
Xanewok May 24, 2024
b619a0e
Update a FIXME issue reference
Xanewok May 24, 2024
fbe2870
refactor: Keep quoting/codegen entirely in the codegen module
Xanewok May 24, 2024
6a7697b
Merge remote-tracking branch 'upstream/main' into no-grammar-visitor
Xanewok May 28, 2024
e88e071
refactor: Clean up a little bit to_precedence_expression_parser_code
Xanewok May 28, 2024
85b1535
refactor: Migrate `referenced_versions` from the grammar visitor
Xanewok May 28, 2024
51b0d03
refactor: Migrate `terminal_kinds` from the grammar visitor
Xanewok May 28, 2024
5e15306
refactor: Migrate `nonterminal_kinds` from the grammar visitor
Xanewok May 28, 2024
cf85366
refactor: Migrate `trivia_scanner_names` from the grammar visitor
Xanewok May 28, 2024
c935ce6
refactor: Migrate `labels` from the grammar visitor
Xanewok May 28, 2024
0d144df
Revert "refactor: Remove unused GrammarVisitor methods"
Xanewok May 29, 2024
6ec9ec0
refactor: Separate a dedicated model for generated kinds.rs
Xanewok May 29, 2024
2d8b7dd
Revert "refactor: Migrate `referenced_versions` from the grammar visi…
Xanewok May 29, 2024
44b4ac5
Apply feedback
Xanewok Jun 3, 2024
8e497a9
Apply feedback
Xanewok Jun 3, 2024
fe9c6b7
Use a newly opened issue for the wasteful allocation in keyword promo…
Xanewok Jun 4, 2024
dd934b0
Revert the helper changes in ast.rs
Xanewok Jun 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions crates/codegen/language/definition/src/model/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,24 @@ impl Item {
Item::Fragment { item } => &item.name,
}
}

/// Whether the language item corresponds to a dedicated terminal kind.
pub fn is_terminal(&self) -> bool {
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
// NOTE: `Item::Fragment` is inlined.
matches!(
self,
Item::Trivia { .. } | Item::Keyword { .. } | Item::Token { .. }
)
}

pub fn is_nonterminal(&self) -> bool {
matches!(
self,
Item::Struct { .. }
| Item::Enum { .. }
| Item::Repeated { .. }
| Item::Separated { .. }
| Item::Precedence { .. }
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,16 @@ impl VersionSpecifier {
VersionSpecifier::Range { from, till } => from <= version && version < till,
}
}

/// Returns an iterator over the versions specified as the upper and lower bound.
pub fn versions(&self) -> impl Iterator<Item = &Version> {
match self {
VersionSpecifier::Never => [None, None],
VersionSpecifier::From { from } => [Some(from), None],
VersionSpecifier::Till { till } => [None, Some(till)],
VersionSpecifier::Range { from, till } => [Some(from), Some(till)],
}
.into_iter()
.flatten()
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
}
}
12 changes: 6 additions & 6 deletions crates/codegen/runtime/cargo/src/runtime/kinds.rs.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub enum NonterminalKind {
Stub2,
Stub3,
{%- else -%}
{%- for variant in model.parser.nonterminal_kinds -%}
{%- for variant in model.kinds.nonterminal_kinds -%}
{# variant.documentation | indent(prefix = "/// ", first = true, blank = true) #}
{{ variant }},
{%- endfor -%}
Expand Down Expand Up @@ -65,7 +65,7 @@ pub enum EdgeLabel {
Stub2,
Stub3,
{%- else -%}
{% for variant in model.parser.labels -%}
{% for variant in model.kinds.labels -%}
{{ variant | pascal_case }},
{%- endfor -%}
{%- endif -%}
Expand Down Expand Up @@ -98,7 +98,7 @@ pub enum TerminalKind {
Stub2,
Stub3,
{%- else -%}
{%- for variant in model.parser.terminal_kinds -%}
{%- for variant in model.kinds.terminal_kinds -%}
{# variant.documentation | indent(prefix = "/// ", first = true, blank = true) #}
{{ variant }},
{%- endfor -%}
Expand All @@ -112,7 +112,7 @@ impl metaslang_cst::TerminalKind for TerminalKind {
{%- else -%}
matches!(
self,
{%- for variant in model.parser.trivia_scanner_names -%}
{%- for variant in model.kinds.trivia_scanner_names -%}
| Self::{{ variant }}
{%- endfor -%}
)
Expand All @@ -128,7 +128,7 @@ pub(crate) enum LexicalContext {
Stub2,
Stub3,
{%- else -%}
{%- for context_name, _ in model.parser.scanner_contexts %}
{%- for context_name in model.kinds.lexical_contexts %}
{{ context_name }},
{%- endfor %}
{%- endif -%}
Expand All @@ -143,7 +143,7 @@ pub(crate) trait IsLexicalContext {
#[allow(non_snake_case)]
pub(crate) mod LexicalContextType {
{%- if not rendering_in_stubs -%}
{%- for context_name, _ in model.parser.scanner_contexts %}
{%- for context_name in model.kinds.lexical_contexts %}
pub struct {{ context_name }};

impl super::IsLexicalContext for {{ context_name }} {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ impl Lexer for Language {
if kw_scan == KeywordScan::Absent {
input.set_position(save);

// TODO(#638): Don't allocate a string here
// TODO(#723): Don't allocate a string here
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
let ident_value = input.content(save.utf8..furthest_position.utf8);

for keyword_compound_scanner in [
Expand Down
38 changes: 9 additions & 29 deletions crates/codegen/runtime/generator/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,43 +58,23 @@ pub struct Separated {

impl AstModel {
pub fn create(language: &model::Language) -> Self {
let mut model = Self::default();

// First pass: collect all terminals:
model.collect_terminals(language);
let mut model = Self {
terminals: language
.items()
.filter(|item| item.is_terminal())
.map(|item| item.name())
.cloned()
.collect(),
..Self::default()
};

// Second pass: use them to build nonterminals:
model.collect_nonterminals(language);

model
}

fn collect_terminals(&mut self, language: &model::Language) {
for item in language.items() {
match item {
model::Item::Struct { .. }
| model::Item::Enum { .. }
| model::Item::Repeated { .. }
| model::Item::Separated { .. }
| model::Item::Precedence { .. } => {
// These items are nonterminals.
}
model::Item::Trivia { item } => {
self.terminals.insert(item.name.clone());
}
model::Item::Keyword { item } => {
self.terminals.insert(item.name.clone());
}
model::Item::Token { item } => {
self.terminals.insert(item.name.clone());
}
model::Item::Fragment { .. } => {
// These items are inlined.
}
};
}
}

fn collect_nonterminals(&mut self, language: &model::Language) {
for item in language.items() {
match item {
Expand Down
98 changes: 98 additions & 0 deletions crates/codegen/runtime/generator/src/kinds.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use std::collections::BTreeSet;

use codegen_language_definition::model::{self, Identifier, Item};
use serde::Serialize;

#[derive(Default, Serialize)]
pub struct KindsModel {
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
/// Defines the `NonterminalKind` enum variants.
nonterminal_kinds: BTreeSet<Identifier>,
/// Defines the `TerminalKind` enum variants.
terminal_kinds: BTreeSet<Identifier>,
/// Defines `TerminalKind::is_trivia` method.
trivia_scanner_names: BTreeSet<Identifier>,
/// Defines `EdgeLabel` enum variants.
labels: BTreeSet<Identifier>,
// Defines the `LexicalContext(Type)` enum and type-level variants.
lexical_contexts: BTreeSet<Identifier>,
}

impl KindsModel {
pub fn create(language: &model::Language) -> Self {
let terminal_kinds = language
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
.items()
.filter(|item| item.is_terminal() && !matches!(item, Item::Fragment { .. }))
.map(|item| item.name().clone())
.collect();

let mut nonterminal_kinds = BTreeSet::default();
for item in language.items() {
match item {
Item::Struct { item } => {
nonterminal_kinds.insert(item.name.clone());
}
Item::Enum { item } => {
nonterminal_kinds.insert(item.name.clone());
}
Item::Repeated { item } => {
nonterminal_kinds.insert(item.name.clone());
}
Item::Separated { item } => {
nonterminal_kinds.insert(item.name.clone());
}
Item::Precedence { item } => {
nonterminal_kinds.insert(item.name.clone());
for op in &item.precedence_expressions {
nonterminal_kinds.insert(op.name.clone());
}
}
// Terminals
_ => {}
}
}

let trivia_scanner_names = language
.items()
.filter_map(|item| match item {
Item::Trivia { item } => Some(item.name.clone()),
_ => None,
})
.collect();

let mut labels = BTreeSet::default();
for item in language.items() {
match item {
Item::Struct { item } => {
for field_name in item.fields.keys() {
labels.insert(field_name.clone());
}
}
Item::Precedence { item } => {
for item in &item.precedence_expressions {
for item in &item.operators {
for field_name in item.fields.keys() {
labels.insert(field_name.clone());
}
}
}
}
_ => {}
}
}

let lexical_contexts: BTreeSet<_> = language
.topics()
.filter_map(|t| t.lexical_context.as_ref())
.cloned()
.chain(std::iter::once(Identifier::from("Default")))
.collect();

KindsModel {
nonterminal_kinds,
terminal_kinds,
trivia_scanner_names,
labels,
lexical_contexts,
}
}
}
1 change: 1 addition & 0 deletions crates/codegen/runtime/generator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use serde::Serialize;
use crate::model::RuntimeModel;

mod ast;
mod kinds;
mod model;
mod parser;

Expand Down
3 changes: 3 additions & 0 deletions crates/codegen/runtime/generator/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use semver::Version;
use serde::Serialize;

use crate::ast::AstModel;
use crate::kinds::KindsModel;
use crate::parser::ParserModel;

#[derive(Default, Serialize)]
Expand All @@ -14,6 +15,7 @@ pub struct RuntimeModel {
all_versions: BTreeSet<Version>,
parser: ParserModel,
ast: AstModel,
kinds: KindsModel,
}

impl RuntimeModel {
Expand All @@ -22,6 +24,7 @@ impl RuntimeModel {
all_versions: language.versions.iter().cloned().collect(),
ast: AstModel::create(language),
parser: ParserModel::from_language(language),
kinds: KindsModel::create(language),
}
}
}
Loading