diff --git a/pest/src/parser_state.rs b/pest/src/parser_state.rs index 0d3e8ffb..e1b3ad29 100644 --- a/pest/src/parser_state.rs +++ b/pest/src/parser_state.rs @@ -26,10 +26,11 @@ pub enum Lookahead { pub struct ParserState { queue: Vec>, lookahead: Lookahead, - is_atomic: bool, pos_attempts: Vec, neg_attempts: Vec, attempt_pos: usize, + /// Specifies whether the current state is atomic + pub is_atomic: bool, /// Stack of `Span`s pub stack: Vec> } diff --git a/pest_derive/Cargo.toml b/pest_derive/Cargo.toml index 523bfa26..98062a91 100644 --- a/pest_derive/Cargo.toml +++ b/pest_derive/Cargo.toml @@ -17,7 +17,7 @@ proc-macro = true [dependencies] quote = "^0.3" syn = "^0.10" -pest = { path = "../pest" } +pest = { git = "https://github.com/pest-parser/pest", branch = "procedural" } [badges] travis-ci = { repository = "pest-parser/pest" } diff --git a/pest_derive/src/generator.rs b/pest_derive/src/generator.rs index 945c7474..8bf74375 100644 --- a/pest_derive/src/generator.rs +++ b/pest_derive/src/generator.rs @@ -44,7 +44,8 @@ pub fn generate(name: Ident, rules: Vec, defaults: Vec) -> Tokens { pos: pest::inputs::Position, state: &mut pest::ParserState ) -> Result, pest::inputs::Position> { - let string = state.stack.pop().expect("pop was called on empty stack").capture(); + let span = state.stack.pop().expect("pop was called on empty stack"); + let string = span.capture(); pos.match_string(string) } @@ -156,8 +157,8 @@ fn generate_rule(rule: Rule) -> Tokens { pos: pest::inputs::Position, state: &mut pest::ParserState ) -> Result, pest::inputs::Position> { - state.atomic(true, move |state| { - state.rule(Rule::#name, pos, |state, pos| { + state.rule(Rule::#name, pos, |state, pos| { + state.atomic(true, move |state| { #expr }) }) @@ -169,8 +170,8 @@ fn generate_rule(rule: Rule) -> Tokens { pos: pest::inputs::Position, state: &mut pest::ParserState ) -> Result, pest::inputs::Position> { - state.atomic(false, move |state| { - state.rule(Rule::#name, pos, |state, pos| { + state.rule(Rule::#name, pos, |state, pos| { + state.atomic(false, move |state| { #expr }) }) @@ -197,9 +198,13 @@ fn generate_skip(rules: &Vec) -> Tokens { pos: pest::inputs::Position, state: &mut pest::ParserState ) -> Result, pest::inputs::Position> { - pos.repeat(|pos| { - whitespace(pos, state) - }) + if !state.is_atomic { + pos.repeat(|pos| { + whitespace(pos, state) + }) + } else { + Ok(pos) + } } }, (false, true) => quote! { @@ -207,9 +212,13 @@ fn generate_skip(rules: &Vec) -> Tokens { pos: pest::inputs::Position, state: &mut pest::ParserState ) -> Result, pest::inputs::Position> { - pos.repeat(|pos| { - comment(pos, state) - }) + if !state.is_atomic { + pos.repeat(|pos| { + comment(pos, state) + }) + } else { + Ok(pos) + } } }, (true, true) => quote! { @@ -217,22 +226,24 @@ fn generate_skip(rules: &Vec) -> Tokens { pos: pest::inputs::Position, state: &mut pest::ParserState ) -> Result, pest::inputs::Position> { - state.sequence(move |state| { - pos.sequence(|pos| { - pos.repeat(|pos| { - whitespace(pos, state) - }).and_then(|pos| { + if !state.is_atomic { + state.sequence(move |state| { + pos.sequence(|pos| { pos.repeat(|pos| { - state.sequence(move |state| { - pos.sequence(|pos| { - pos.optional(|pos| { - comment(pos, state) - }).and_then(|pos| { - state.sequence(move |state| { - pos.sequence(|pos| { - whitespace(pos, state).and_then(|pos| { - pos.repeat(|pos| { - whitespace(pos, state) + whitespace(pos, state) + }).and_then(|pos| { + pos.repeat(|pos| { + state.sequence(move |state| { + pos.sequence(|pos| { + pos.optional(|pos| { + comment(pos, state) + }).and_then(|pos| { + state.sequence(move |state| { + pos.sequence(|pos| { + whitespace(pos, state).and_then(|pos| { + pos.repeat(|pos| { + whitespace(pos, state) + }) }) }) }) @@ -243,7 +254,9 @@ fn generate_skip(rules: &Vec) -> Tokens { }) }) }) - }) + } else { + Ok(pos) + } } } } @@ -477,7 +490,10 @@ fn generate_expr_atomic(expr: Expr) -> Tokens { let start = pos.clone(); match #expr { - Ok(end) => pos.match_string(start.span(end).capture()), + Ok(end) => { + state.stack.push(start.span(end.clone())); + Ok(end) + } Err(pos) => Err(pos) } } diff --git a/pest_derive/src/lib.rs b/pest_derive/src/lib.rs index e8aaa682..08838104 100644 --- a/pest_derive/src/lib.rs +++ b/pest_derive/src/lib.rs @@ -15,6 +15,7 @@ extern crate proc_macro; extern crate quote; extern crate syn; +use std::env; use std::path::Path; use std::rc::Rc; @@ -38,7 +39,7 @@ pub fn derive_parser(input: TokenStream) -> TokenStream { let (name, path) = parse_derive(source); - let root = std::env::var("CARGO_MANIFEST_DIR").unwrap_or(".".into()); + let root = env::var("CARGO_MANIFEST_DIR").unwrap_or(".".into()); let path = Path::new(&root).join("src/").join(&path); let file_name = match path.file_name() { Some(file_name) => file_name, diff --git a/pest_derive/src/parser.rs b/pest_derive/src/parser.rs index c8cca8fb..1438975f 100644 --- a/pest_derive/src/parser.rs +++ b/pest_derive/src/parser.rs @@ -847,18 +847,17 @@ fn consume_expr( GrammarRule::insensitive_string => { let span = pair.into_span(); let string = span.capture(); - Expr::Insens(string[1..string.len() - 1].to_owned()) + Expr::Insens(string[2..string.len() - 1].to_owned()) } GrammarRule::range => { + let mut pairs = pair.into_inner(); let span = pairs.next().unwrap().into_span(); let start = span.capture(); + pairs.next(); let span = pairs.next().unwrap().into_span(); let end = span.capture(); - Expr::Range( - start[1..start.len() - 1].to_owned(), - end[1..end.len() - 1].to_owned() - ) + Expr::Range(start.to_owned(), end.to_owned()) } _ => unreachable!() }; diff --git a/pest_derive/src/validator.rs b/pest_derive/src/validator.rs index 5381d608..f10a38ba 100644 --- a/pest_derive/src/validator.rs +++ b/pest_derive/src/validator.rs @@ -79,6 +79,13 @@ pub fn validate_pairs(pairs: Pairs) -> Vec { pest_keywords.insert("push"); pest_keywords.insert("soi"); + let mut predefined = HashSet::new(); + predefined.insert("any"); + predefined.insert("eoi"); + predefined.insert("peek"); + predefined.insert("pop"); + predefined.insert("soi"); + let definitions: Vec<_> = pairs.clone() .filter(|pair| pair.as_rule() == GrammarRule::grammar_rule) .map(|pair| { @@ -90,13 +97,7 @@ pub fn validate_pairs(pairs: Pairs) -> Vec { let called_rules: Vec<_> = pairs.clone() .filter(|pair| pair.as_rule() == GrammarRule::grammar_rule) .flat_map(|pair| { - let expr = pair.into_inner() - .skip(4) - .next() - .unwrap() - .into_inner(); - - expr.flatten().filter(|pair| { + pair.into_inner().flatten().skip(1).filter(|pair| { pair.as_rule() == GrammarRule::identifier }).map(|pair| { pair.into_span() @@ -108,8 +109,7 @@ pub fn validate_pairs(pairs: Pairs) -> Vec { errors.extend(validate_rust_keywords(&definitions, &rust_keywords)); errors.extend(validate_pest_keywords(&definitions, &pest_keywords)); errors.extend(validate_already_defined(&definitions)); - // TODO: Add the actual set of predefined rules. - errors.extend(validate_undefined(&definitions, &called_rules, &HashSet::new())); + errors.extend(validate_undefined(&definitions, &called_rules, &predefined)); let errors = errors.into_iter().map(|error| { format!("grammar error\n\n{}", error) diff --git a/pest_derive/tests/grammar.pest b/pest_derive/tests/grammar.pest new file mode 100644 index 00000000..d592e50f --- /dev/null +++ b/pest_derive/tests/grammar.pest @@ -0,0 +1,17 @@ +string = { "abc" } +insensitive = { ^"abc" } +range = { '0'..'9' } +ident = { string } +pos_pred = { &string } +neg_pred = { !string } +sequence = !@{ string ~ string } +sequence_atomic = @{ string ~ string } +choice = { string | range } +optional = { string? } +repeat = { string* } +repeat_atomic = @{ string* } +repeat_once = { string+ } +repeat_once_atomic = @{ string+ } +peek_ = { push(range) ~ push(range) ~ peek ~ peek } +pop_ = { push(range) ~ push(range) ~ pop ~ pop } +whitespace = _{ " " } diff --git a/pest_derive/tests/grammar.rs b/pest_derive/tests/grammar.rs new file mode 100644 index 00000000..6f2d8325 --- /dev/null +++ b/pest_derive/tests/grammar.rs @@ -0,0 +1,329 @@ +#[macro_use] +extern crate pest; +#[macro_use] +extern crate pest_derive; + +use pest::Parser; + +#[derive(Parser)] +#[grammar = "../tests/grammar.pest"] +struct GrammarParser; + +#[test] +fn string() { + parses_to! { + parser: GrammarParser, + input: "abc", + rule: Rule::string, + tokens: [ + string(0, 3) + ] + }; +} + +#[test] +fn insensitive() { + parses_to! { + parser: GrammarParser, + input: "aBC", + rule: Rule::insensitive, + tokens: [ + insensitive(0, 3) + ] + }; +} + +#[test] +fn range() { + parses_to! { + parser: GrammarParser, + input: "6", + rule: Rule::range, + tokens: [ + range(0, 1) + ] + }; +} + +#[test] +fn ident() { + parses_to! { + parser: GrammarParser, + input: "abc", + rule: Rule::ident, + tokens: [ + ident(0, 3, [ + string(0, 3) + ]) + ] + }; +} + +#[test] +fn pos_pred() { + parses_to! { + parser: GrammarParser, + input: "abc", + rule: Rule::pos_pred, + tokens: [ + pos_pred(0, 0) + ] + }; +} + +#[test] +fn neg_pred() { + parses_to! { + parser: GrammarParser, + input: "", + rule: Rule::neg_pred, + tokens: [ + neg_pred(0, 0) + ] + }; +} + +#[test] +fn sequence() { + parses_to! { + parser: GrammarParser, + input: "abc abc", + rule: Rule::sequence, + tokens: [ + sequence(0, 9, [ + string(0, 3), + string(6, 9) + ]) + ] + }; +} + +#[test] +fn sequence_atomic() { + parses_to! { + parser: GrammarParser, + input: "abcabc", + rule: Rule::sequence_atomic, + tokens: [ + sequence_atomic(0, 6) + ] + }; +} + +#[test] +#[should_panic] +fn sequence_atomic_space() { + parses_to! { + parser: GrammarParser, + input: "abc abc", + rule: Rule::sequence_atomic, + tokens: [] + }; +} + +#[test] +fn choice_string() { + parses_to! { + parser: GrammarParser, + input: "abc", + rule: Rule::choice, + tokens: [ + choice(0, 3, [ + string(0, 3) + ]) + ] + }; +} + +#[test] +fn choice_range() { + parses_to! { + parser: GrammarParser, + input: "0", + rule: Rule::choice, + tokens: [ + choice(0, 1, [ + range(0, 1) + ]) + ] + }; +} + +#[test] +fn optional_string() { + parses_to! { + parser: GrammarParser, + input: "abc", + rule: Rule::optional, + tokens: [ + optional(0, 3, [ + string(0, 3) + ]) + ] + }; +} + +#[test] +fn optional_empty() { + parses_to! { + parser: GrammarParser, + input: "", + rule: Rule::optional, + tokens: [ + optional(0, 0) + ] + }; +} + +#[test] +fn repeat_empty() { + parses_to! { + parser: GrammarParser, + input: "", + rule: Rule::repeat, + tokens: [ + repeat(0, 0) + ] + }; +} + +#[test] +fn repeat_strings() { + parses_to! { + parser: GrammarParser, + input: "abc abc", + rule: Rule::repeat, + tokens: [ + repeat(0, 9, [ + string(0, 3), + string(6, 9) + ]) + ] + }; +} + +#[test] +fn repeat_atomic_empty() { + parses_to! { + parser: GrammarParser, + input: "", + rule: Rule::repeat_atomic, + tokens: [ + repeat_atomic(0, 0) + ] + }; +} + +#[test] +fn repeat_atomic_strings() { + parses_to! { + parser: GrammarParser, + input: "abcabc", + rule: Rule::repeat_atomic, + tokens: [ + repeat_atomic(0, 6) + ] + }; +} + +#[test] +#[should_panic] +fn repeat_atomic_space() { + parses_to! { + parser: GrammarParser, + input: "abc abc", + rule: Rule::repeat_atomic, + tokens: [] + }; +} + + +#[test] +#[should_panic] +fn repeat_once_empty() { + parses_to! { + parser: GrammarParser, + input: "", + rule: Rule::repeat_once, + tokens: [] + }; +} + +#[test] +fn repeat_once_strings() { + parses_to! { + parser: GrammarParser, + input: "abc abc", + rule: Rule::repeat_once, + tokens: [ + repeat_once(0, 9, [ + string(0, 3), + string(6, 9) + ]) + ] + }; +} + +#[test] +#[should_panic] +fn repeat_once_atomic_empty() { + parses_to! { + parser: GrammarParser, + input: "", + rule: Rule::repeat_once_atomic, + tokens: [] + }; +} + +#[test] +fn repeat_once_atomic_strings() { + parses_to! { + parser: GrammarParser, + input: "abcabc", + rule: Rule::repeat_once_atomic, + tokens: [ + repeat_once_atomic(0, 6) + ] + }; +} + +#[test] +#[should_panic] +fn repeat_once_atomic_space() { + parses_to! { + parser: GrammarParser, + input: "abc abc", + rule: Rule::repeat_once_atomic, + tokens: [] + }; +} + +#[test] +fn peek() { + parses_to! { + parser: GrammarParser, + input: "0111", + rule: Rule::peek_, + tokens: [ + peek_(0, 4, [ + range(0, 1), + range(1, 2) + ]) + ] + }; +} + +#[test] +fn pop() { + parses_to! { + parser: GrammarParser, + input: "0110", + rule: Rule::pop_, + tokens: [ + pop_(0, 4, [ + range(0, 1), + range(1, 2) + ]) + ] + }; +}