Simple custom lexical precedence in PostgreSQL dialect (#1379)

apache · Aug 14, 2024 · f223530 · f223530
1 parent 6a11a67
commit f223530
Show file tree

Hide file tree

Showing 4 changed files with 137 additions and 212 deletions.
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
@@ -354,13 +354,18 @@ pub trait Dialect: Debug + Any {
  if let Some(precedence) = self.get_next_precedence(parser) {
  return precedence;
  }
+ macro_rules! p {
+ ($precedence:ident) => {
+ self.prec_value(Precedence::$precedence)
+ };
+ }
 
  let token = parser.peek_token();
  debug!("get_next_precedence_full() {:?}", token);
  match token.token {
- Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC),
- Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC),
- Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC),
+ Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)),
+ Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)),
+ Token::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)),
 
  Token::Word(w) if w.keyword == Keyword::AT => {
  match (
@@ -370,9 +375,9 @@ pub trait Dialect: Debug + Any {
  (Token::Word(w), Token::Word(w2))
  if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
  {
- Ok(AT_TZ_PREC)
+ Ok(p!(AtTz))
  }
- _ => Ok(UNKNOWN_PREC),
+ _ => Ok(self.prec_unknown()),
  }
  }
 
@@ -382,25 +387,25 @@ pub trait Dialect: Debug + Any {
  // it takes on the precedence of those tokens. Otherwise, it
  // is not an infix operator, and therefore has zero
  // precedence.
- Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
- Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
- Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
- _ => Ok(UNKNOWN_PREC),
+ Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
+ Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
+ Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
+ _ => Ok(self.prec_unknown()),
  },
- Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC),
- Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
- Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
- Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
- Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC),
- Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC),
+ Token::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)),
+ Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
+ Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
+ Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
+ Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)),
+ Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)),
  Token::Eq
  | Token::Lt
  | Token::LtEq
@@ -416,20 +421,19 @@ pub trait Dialect: Debug + Any {
  | Token::DoubleTildeAsterisk
  | Token::ExclamationMarkDoubleTilde
  | Token::ExclamationMarkDoubleTildeAsterisk
- | Token::Spaceship => Ok(EQ_PREC),
- Token::Pipe => Ok(PIPE_PREC),
- Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC),
- Token::Ampersand => Ok(AMPERSAND_PREC),
- Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC),
+ | Token::Spaceship => Ok(p!(Eq)),
+ Token::Pipe => Ok(p!(Pipe)),
+ Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(p!(Caret)),
+ Token::Ampersand => Ok(p!(Ampersand)),
+ Token::Plus | Token::Minus => Ok(p!(PlusMinus)),
  Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
- Ok(MUL_DIV_MOD_OP_PREC)
+ Ok(p!(MulDivModOp))
  }
  Token::DoubleColon
  | Token::ExclamationMark
  | Token::LBracket
  | Token::Overlap
- | Token::CaretAt => Ok(DOUBLE_COLON_PREC),
- // Token::Colon if (self as dyn Dialect).is::<SnowflakeDialect>() => Ok(DOUBLE_COLON_PREC),
+ | Token::CaretAt => Ok(p!(DoubleColon)),
  Token::Arrow
  | Token::LongArrow
  | Token::HashArrow
@@ -442,8 +446,8 @@ pub trait Dialect: Debug + Any {
  | Token::Question
  | Token::QuestionAnd
  | Token::QuestionPipe
- | Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC),
- _ => Ok(UNKNOWN_PREC),
+ | Token::CustomBinaryOperator(_) => Ok(p!(PgOther)),
+ _ => Ok(self.prec_unknown()),
  }
  }
 
@@ -457,88 +461,57 @@ pub trait Dialect: Debug + Any {
  None
  }
 
- // The following precedence values are used directly by `Parse` or in dialects,
- // so have to be made public by the dialect.
-
- /// Return the precedence of the `::` operator.
+ /// Decide the lexical Precedence of operators.
  ///
- /// Default is 50.
- fn prec_double_colon(&self) -> u8 {
- DOUBLE_COLON_PREC
- }
-
- /// Return the precedence of `*`, `/`, and `%` operators.
- ///
- /// Default is 40.
- fn prec_mul_div_mod_op(&self) -> u8 {
- MUL_DIV_MOD_OP_PREC
- }
-
- /// Return the precedence of the `+` and `-` operators.
- ///
- /// Default is 30.
- fn prec_plus_minus(&self) -> u8 {
- PLUS_MINUS_PREC
- }
-
- /// Return the precedence of the `BETWEEN` operator.
- ///
- /// For example `BETWEEN <low> AND <high>`
- ///
- /// Default is 22.
- fn prec_between(&self) -> u8 {
- BETWEEN_PREC
- }
-
- /// Return the precedence of the `LIKE` operator.
- ///
- /// Default is 19.
- fn prec_like(&self) -> u8 {
- LIKE_PREC
- }
-
- /// Return the precedence of the unary `NOT` operator.
- ///
- /// For example `NOT (a OR b)`
- ///
- /// Default is 15.
- fn prec_unary_not(&self) -> u8 {
- UNARY_NOT_PREC
+ /// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
+ fn prec_value(&self, prec: Precedence) -> u8 {
+ match prec {
+ Precedence::DoubleColon => 50,
+ Precedence::AtTz => 41,
+ Precedence::MulDivModOp => 40,
+ Precedence::PlusMinus => 30,
+ Precedence::Xor => 24,
+ Precedence::Ampersand => 23,
+ Precedence::Caret => 22,
+ Precedence::Pipe => 21,
+ Precedence::Between => 20,
+ Precedence::Eq => 20,
+ Precedence::Like => 19,
+ Precedence::Is => 17,
+ Precedence::PgOther => 16,
+ Precedence::UnaryNot => 15,
+ Precedence::And => 10,
+ Precedence::Or => 5,
+ }
  }
 
- /// Return the default (unknown) precedence.
- ///
- /// Default is 0.
  fn prec_unknown(&self) -> u8 {
- UNKNOWN_PREC
+ 0
  }
 }
 
-// Define the lexical Precedence of operators.
-//
-// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
-// higher number = higher precedence
-//
-// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator
-// actually has higher precedence than addition.
-// See <https://postgrespro.com/list/thread-id/2673331>.
-const DOUBLE_COLON_PREC: u8 = 50;
-const AT_TZ_PREC: u8 = 41;
-const MUL_DIV_MOD_OP_PREC: u8 = 40;
-const PLUS_MINUS_PREC: u8 = 30;
-const XOR_PREC: u8 = 24;
-const AMPERSAND_PREC: u8 = 23;
-const CARET_PREC: u8 = 22;
-const PIPE_PREC: u8 = 21;
-const BETWEEN_PREC: u8 = 20;
-const EQ_PREC: u8 = 20;
-const LIKE_PREC: u8 = 19;
-const IS_PREC: u8 = 17;
-const PG_OTHER_PREC: u8 = 16;
-const UNARY_NOT_PREC: u8 = 15;
-const AND_PREC: u8 = 10;
-const OR_PREC: u8 = 5;
-const UNKNOWN_PREC: u8 = 0;
+/// This represents the operators for which precedence must be defined
+///
+/// higher number -> higher precedence
+#[derive(Debug, Clone, Copy)]
+pub enum Precedence {
+ DoubleColon,
+ AtTz,
+ MulDivModOp,
+ PlusMinus,
+ Xor,
+ Ampersand,
+ Caret,
+ Pipe,
+ Between,
+ Eq,
+ Like,
+ Is,
+ PgOther,
+ UnaryNot,
+ And,
+ Or,
+}
 
 impl dyn Dialect {
  #[inline]

diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs
@@ -12,7 +12,7 @@
 use log::debug;
 
 use crate::ast::{CommentObject, Statement};
-use crate::dialect::Dialect;
+use crate::dialect::{Dialect, Precedence};
 use crate::keywords::Keyword;
 use crate::parser::{Parser, ParserError};
 use crate::tokenizer::Token;
@@ -89,71 +89,11 @@ impl Dialect for PostgreSqlDialect {
  let token = parser.peek_token();
  debug!("get_next_precedence() {:?}", token);
 
- let precedence = match token.token {
- Token::Word(w) if w.keyword == Keyword::OR => OR_PREC,
- Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC,
- Token::Word(w) if w.keyword == Keyword::AND => AND_PREC,
- Token::Word(w) if w.keyword == Keyword::AT => {
- match (
- parser.peek_nth_token(1).token,
- parser.peek_nth_token(2).token,
- ) {
- (Token::Word(w), Token::Word(w2))
- if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
- {
- AT_TZ_PREC
- }
- _ => self.prec_unknown(),
- }
- }
-
- Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
- // The precedence of NOT varies depending on keyword that
- // follows it. If it is followed by IN, BETWEEN, or LIKE,
- // it takes on the precedence of those tokens. Otherwise, it
- // is not an infix operator, and therefore has zero
- // precedence.
- Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
- _ => self.prec_unknown(),
- },
- Token::Word(w) if w.keyword == Keyword::IS => IS_PREC,
- Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC,
- Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC,
- Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC,
- Token::Eq
- | Token::Lt
- | Token::LtEq
- | Token::Neq
- | Token::Gt
- | Token::GtEq
- | Token::DoubleEq
- | Token::Tilde
- | Token::TildeAsterisk
- | Token::ExclamationMarkTilde
- | Token::ExclamationMarkTildeAsterisk
- | Token::DoubleTilde
- | Token::DoubleTildeAsterisk
- | Token::ExclamationMarkDoubleTilde
- | Token::ExclamationMarkDoubleTildeAsterisk
- | Token::Spaceship => EQ_PREC,
- Token::Caret => CARET_PREC,
- Token::Plus | Token::Minus => PLUS_MINUS_PREC,
- Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC,
- Token::DoubleColon => DOUBLE_COLON_PREC,
- Token::LBracket => BRACKET_PREC,
+ // we only return some custom value here when the behaviour (not merely the numeric value) differs
+ // from the default implementation
+ match token.token {
+ Token::Word(w) if w.keyword == Keyword::COLLATE => Some(Ok(COLLATE_PREC)),
+ Token::LBracket => Some(Ok(BRACKET_PREC)),
  Token::Arrow
  | Token::LongArrow
  | Token::HashArrow
@@ -173,12 +113,9 @@ impl Dialect for PostgreSqlDialect {
  | Token::Sharp
  | Token::ShiftRight
  | Token::ShiftLeft
- | Token::Pipe
- | Token::Ampersand
- | Token::CustomBinaryOperator(_) => PG_OTHER_PREC,
- _ => self.prec_unknown(),
- };
- Some(Ok(precedence))
+ | Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
+ _ => None,
+ }
  }
 
  fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
@@ -197,24 +134,25 @@ impl Dialect for PostgreSqlDialect {
  true
  }
 
- fn prec_mul_div_mod_op(&self) -> u8 {
- MUL_DIV_MOD_OP_PREC
- }
-
- fn prec_plus_minus(&self) -> u8 {
- PLUS_MINUS_PREC
- }
-
- fn prec_between(&self) -> u8 {
- BETWEEN_LIKE_PREC
- }
-
- fn prec_like(&self) -> u8 {
- BETWEEN_LIKE_PREC
- }
-
- fn prec_unary_not(&self) -> u8 {
- NOT_PREC
+ fn prec_value(&self, prec: Precedence) -> u8 {
+ match prec {
+ Precedence::DoubleColon => DOUBLE_COLON_PREC,
+ Precedence::AtTz => AT_TZ_PREC,
+ Precedence::MulDivModOp => MUL_DIV_MOD_OP_PREC,
+ Precedence::PlusMinus => PLUS_MINUS_PREC,
+ Precedence::Xor => XOR_PREC,
+ Precedence::Ampersand => PG_OTHER_PREC,
+ Precedence::Caret => CARET_PREC,
+ Precedence::Pipe => PG_OTHER_PREC,
+ Precedence::Between => BETWEEN_LIKE_PREC,
+ Precedence::Eq => EQ_PREC,
+ Precedence::Like => BETWEEN_LIKE_PREC,
+ Precedence::Is => IS_PREC,
+ Precedence::PgOther => PG_OTHER_PREC,
+ Precedence::UnaryNot => NOT_PREC,
+ Precedence::And => AND_PREC,
+ Precedence::Or => OR_PREC,
+ }
  }
 }