Skip to content

Commit

Permalink
Simple custom lexical precedence in PostgreSQL dialect (#1379)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin authored Aug 14, 2024
1 parent 6a11a67 commit f223530
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 212 deletions.
189 changes: 81 additions & 108 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,18 @@ pub trait Dialect: Debug + Any {
if let Some(precedence) = self.get_next_precedence(parser) {
return precedence;
}
macro_rules! p {
($precedence:ident) => {
self.prec_value(Precedence::$precedence)
};
}

let token = parser.peek_token();
debug!("get_next_precedence_full() {:?}", token);
match token.token {
Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC),
Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC),
Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)),
Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)),

Token::Word(w) if w.keyword == Keyword::AT => {
match (
Expand All @@ -370,9 +375,9 @@ pub trait Dialect: Debug + Any {
(Token::Word(w), Token::Word(w2))
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
{
Ok(AT_TZ_PREC)
Ok(p!(AtTz))
}
_ => Ok(UNKNOWN_PREC),
_ => Ok(self.prec_unknown()),
}
}

Expand All @@ -382,25 +387,25 @@ pub trait Dialect: Debug + Any {
// it takes on the precedence of those tokens. Otherwise, it
// is not an infix operator, and therefore has zero
// precedence.
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
_ => Ok(UNKNOWN_PREC),
Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
_ => Ok(self.prec_unknown()),
},
Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC),
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC),
Token::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)),
Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)),
Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)),
Token::Eq
| Token::Lt
| Token::LtEq
Expand All @@ -416,20 +421,19 @@ pub trait Dialect: Debug + Any {
| Token::DoubleTildeAsterisk
| Token::ExclamationMarkDoubleTilde
| Token::ExclamationMarkDoubleTildeAsterisk
| Token::Spaceship => Ok(EQ_PREC),
Token::Pipe => Ok(PIPE_PREC),
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC),
Token::Ampersand => Ok(AMPERSAND_PREC),
Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC),
| Token::Spaceship => Ok(p!(Eq)),
Token::Pipe => Ok(p!(Pipe)),
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(p!(Caret)),
Token::Ampersand => Ok(p!(Ampersand)),
Token::Plus | Token::Minus => Ok(p!(PlusMinus)),
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
Ok(MUL_DIV_MOD_OP_PREC)
Ok(p!(MulDivModOp))
}
Token::DoubleColon
| Token::ExclamationMark
| Token::LBracket
| Token::Overlap
| Token::CaretAt => Ok(DOUBLE_COLON_PREC),
// Token::Colon if (self as dyn Dialect).is::<SnowflakeDialect>() => Ok(DOUBLE_COLON_PREC),
| Token::CaretAt => Ok(p!(DoubleColon)),
Token::Arrow
| Token::LongArrow
| Token::HashArrow
Expand All @@ -442,8 +446,8 @@ pub trait Dialect: Debug + Any {
| Token::Question
| Token::QuestionAnd
| Token::QuestionPipe
| Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC),
_ => Ok(UNKNOWN_PREC),
| Token::CustomBinaryOperator(_) => Ok(p!(PgOther)),
_ => Ok(self.prec_unknown()),
}
}

Expand All @@ -457,88 +461,57 @@ pub trait Dialect: Debug + Any {
None
}

// The following precedence values are used directly by `Parse` or in dialects,
// so have to be made public by the dialect.

/// Return the precedence of the `::` operator.
/// Decide the lexical Precedence of operators.
///
/// Default is 50.
fn prec_double_colon(&self) -> u8 {
DOUBLE_COLON_PREC
}

/// Return the precedence of `*`, `/`, and `%` operators.
///
/// Default is 40.
fn prec_mul_div_mod_op(&self) -> u8 {
MUL_DIV_MOD_OP_PREC
}

/// Return the precedence of the `+` and `-` operators.
///
/// Default is 30.
fn prec_plus_minus(&self) -> u8 {
PLUS_MINUS_PREC
}

/// Return the precedence of the `BETWEEN` operator.
///
/// For example `BETWEEN <low> AND <high>`
///
/// Default is 22.
fn prec_between(&self) -> u8 {
BETWEEN_PREC
}

/// Return the precedence of the `LIKE` operator.
///
/// Default is 19.
fn prec_like(&self) -> u8 {
LIKE_PREC
}

/// Return the precedence of the unary `NOT` operator.
///
/// For example `NOT (a OR b)`
///
/// Default is 15.
fn prec_unary_not(&self) -> u8 {
UNARY_NOT_PREC
/// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
fn prec_value(&self, prec: Precedence) -> u8 {
match prec {
Precedence::DoubleColon => 50,
Precedence::AtTz => 41,
Precedence::MulDivModOp => 40,
Precedence::PlusMinus => 30,
Precedence::Xor => 24,
Precedence::Ampersand => 23,
Precedence::Caret => 22,
Precedence::Pipe => 21,
Precedence::Between => 20,
Precedence::Eq => 20,
Precedence::Like => 19,
Precedence::Is => 17,
Precedence::PgOther => 16,
Precedence::UnaryNot => 15,
Precedence::And => 10,
Precedence::Or => 5,
}
}

/// Return the default (unknown) precedence.
///
/// Default is 0.
fn prec_unknown(&self) -> u8 {
UNKNOWN_PREC
0
}
}

// Define the lexical Precedence of operators.
//
// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
// higher number = higher precedence
//
// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator
// actually has higher precedence than addition.
// See <https://postgrespro.com/list/thread-id/2673331>.
const DOUBLE_COLON_PREC: u8 = 50;
const AT_TZ_PREC: u8 = 41;
const MUL_DIV_MOD_OP_PREC: u8 = 40;
const PLUS_MINUS_PREC: u8 = 30;
const XOR_PREC: u8 = 24;
const AMPERSAND_PREC: u8 = 23;
const CARET_PREC: u8 = 22;
const PIPE_PREC: u8 = 21;
const BETWEEN_PREC: u8 = 20;
const EQ_PREC: u8 = 20;
const LIKE_PREC: u8 = 19;
const IS_PREC: u8 = 17;
const PG_OTHER_PREC: u8 = 16;
const UNARY_NOT_PREC: u8 = 15;
const AND_PREC: u8 = 10;
const OR_PREC: u8 = 5;
const UNKNOWN_PREC: u8 = 0;
/// This represents the operators for which precedence must be defined
///
/// higher number -> higher precedence
#[derive(Debug, Clone, Copy)]
pub enum Precedence {
DoubleColon,
AtTz,
MulDivModOp,
PlusMinus,
Xor,
Ampersand,
Caret,
Pipe,
Between,
Eq,
Like,
Is,
PgOther,
UnaryNot,
And,
Or,
}

impl dyn Dialect {
#[inline]
Expand Down
118 changes: 28 additions & 90 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
use log::debug;

use crate::ast::{CommentObject, Statement};
use crate::dialect::Dialect;
use crate::dialect::{Dialect, Precedence};
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Token;
Expand Down Expand Up @@ -89,71 +89,11 @@ impl Dialect for PostgreSqlDialect {
let token = parser.peek_token();
debug!("get_next_precedence() {:?}", token);

let precedence = match token.token {
Token::Word(w) if w.keyword == Keyword::OR => OR_PREC,
Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC,
Token::Word(w) if w.keyword == Keyword::AND => AND_PREC,
Token::Word(w) if w.keyword == Keyword::AT => {
match (
parser.peek_nth_token(1).token,
parser.peek_nth_token(2).token,
) {
(Token::Word(w), Token::Word(w2))
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
{
AT_TZ_PREC
}
_ => self.prec_unknown(),
}
}

Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
// The precedence of NOT varies depending on keyword that
// follows it. If it is followed by IN, BETWEEN, or LIKE,
// it takes on the precedence of those tokens. Otherwise, it
// is not an infix operator, and therefore has zero
// precedence.
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
_ => self.prec_unknown(),
},
Token::Word(w) if w.keyword == Keyword::IS => IS_PREC,
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC,
Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC,
Token::Eq
| Token::Lt
| Token::LtEq
| Token::Neq
| Token::Gt
| Token::GtEq
| Token::DoubleEq
| Token::Tilde
| Token::TildeAsterisk
| Token::ExclamationMarkTilde
| Token::ExclamationMarkTildeAsterisk
| Token::DoubleTilde
| Token::DoubleTildeAsterisk
| Token::ExclamationMarkDoubleTilde
| Token::ExclamationMarkDoubleTildeAsterisk
| Token::Spaceship => EQ_PREC,
Token::Caret => CARET_PREC,
Token::Plus | Token::Minus => PLUS_MINUS_PREC,
Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC,
Token::DoubleColon => DOUBLE_COLON_PREC,
Token::LBracket => BRACKET_PREC,
// we only return some custom value here when the behaviour (not merely the numeric value) differs
// from the default implementation
match token.token {
Token::Word(w) if w.keyword == Keyword::COLLATE => Some(Ok(COLLATE_PREC)),
Token::LBracket => Some(Ok(BRACKET_PREC)),
Token::Arrow
| Token::LongArrow
| Token::HashArrow
Expand All @@ -173,12 +113,9 @@ impl Dialect for PostgreSqlDialect {
| Token::Sharp
| Token::ShiftRight
| Token::ShiftLeft
| Token::Pipe
| Token::Ampersand
| Token::CustomBinaryOperator(_) => PG_OTHER_PREC,
_ => self.prec_unknown(),
};
Some(Ok(precedence))
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
_ => None,
}
}

fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
Expand All @@ -197,24 +134,25 @@ impl Dialect for PostgreSqlDialect {
true
}

fn prec_mul_div_mod_op(&self) -> u8 {
MUL_DIV_MOD_OP_PREC
}

fn prec_plus_minus(&self) -> u8 {
PLUS_MINUS_PREC
}

fn prec_between(&self) -> u8 {
BETWEEN_LIKE_PREC
}

fn prec_like(&self) -> u8 {
BETWEEN_LIKE_PREC
}

fn prec_unary_not(&self) -> u8 {
NOT_PREC
fn prec_value(&self, prec: Precedence) -> u8 {
match prec {
Precedence::DoubleColon => DOUBLE_COLON_PREC,
Precedence::AtTz => AT_TZ_PREC,
Precedence::MulDivModOp => MUL_DIV_MOD_OP_PREC,
Precedence::PlusMinus => PLUS_MINUS_PREC,
Precedence::Xor => XOR_PREC,
Precedence::Ampersand => PG_OTHER_PREC,
Precedence::Caret => CARET_PREC,
Precedence::Pipe => PG_OTHER_PREC,
Precedence::Between => BETWEEN_LIKE_PREC,
Precedence::Eq => EQ_PREC,
Precedence::Like => BETWEEN_LIKE_PREC,
Precedence::Is => IS_PREC,
Precedence::PgOther => PG_OTHER_PREC,
Precedence::UnaryNot => NOT_PREC,
Precedence::And => AND_PREC,
Precedence::Or => OR_PREC,
}
}
}

Expand Down
Loading

0 comments on commit f223530

Please sign in to comment.