Skip to content

Commit

Permalink
Support DuckDB struct syntax and support list of struct syntax (#1372)
Browse files Browse the repository at this point in the history
Signed-off-by: jayzhan211 <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
Co-authored-by: Ifeanyi Ubah <[email protected]>
  • Loading branch information
3 people authored Aug 15, 2024
1 parent fab834d commit 8c4d30b
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 33 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ Cargo.lock
.vscode

*.swp

.DS_store
24 changes: 21 additions & 3 deletions src/ast/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ pub enum DataType {
///
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct(Vec<StructField>),
Struct(Vec<StructField>, StructBracketKind),
/// Union
///
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
Expand Down Expand Up @@ -517,9 +517,16 @@ impl fmt::Display for DataType {
}
write!(f, ")")
}
DataType::Struct(fields) => {
DataType::Struct(fields, bracket) => {
if !fields.is_empty() {
write!(f, "STRUCT<{}>", display_comma_separated(fields))
match bracket {
StructBracketKind::Parentheses => {
write!(f, "STRUCT({})", display_comma_separated(fields))
}
StructBracketKind::AngleBrackets => {
write!(f, "STRUCT<{}>", display_comma_separated(fields))
}
}
} else {
write!(f, "STRUCT")
}
Expand Down Expand Up @@ -618,6 +625,17 @@ fn format_clickhouse_datetime_precision_and_timezone(
Ok(())
}

/// Type of brackets used for `STRUCT` literals.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum StructBracketKind {
/// Example: `STRUCT(a INT, b STRING)`
Parentheses,
/// Example: `STRUCT<a INT, b STRING>`
AngleBrackets,
}

/// Timestamp and Time data types information about TimeZone formatting.
///
/// This is more related to a display information than real differences between each variant. To
Expand Down
3 changes: 2 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};

pub use self::data_type::{
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
StructBracketKind, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
pub use self::ddl::{
Expand Down
27 changes: 26 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2266,6 +2266,23 @@ impl<'a> Parser<'a> {
))
}

/// Duckdb Struct Data Type <https://duckdb.org/docs/sql/data_types/struct.html#retrieving-from-structs>
fn parse_duckdb_struct_type_def(&mut self) -> Result<Vec<StructField>, ParserError> {
self.expect_keyword(Keyword::STRUCT)?;
self.expect_token(&Token::LParen)?;
let struct_body = self.parse_comma_separated(|parser| {
let field_name = parser.parse_identifier(false)?;
let field_type = parser.parse_data_type()?;

Ok(StructField {
field_name: Some(field_name),
field_type,
})
});
self.expect_token(&Token::RParen)?;
struct_body
}

/// Parse a field definition in a [struct] or [tuple].
/// Syntax:
///
Expand Down Expand Up @@ -7495,12 +7512,20 @@ impl<'a> Parser<'a> {
))))
}
}
Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => {
self.prev_token();
let field_defs = self.parse_duckdb_struct_type_def()?;
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
}
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
self.prev_token();
let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_struct_field_def)?;
trailing_bracket = _trailing_bracket;
Ok(DataType::Struct(field_defs))
Ok(DataType::Struct(
field_defs,
StructBracketKind::AngleBrackets,
))
}
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
self.prev_token();
Expand Down
74 changes: 46 additions & 28 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -489,28 +489,34 @@ fn parse_nested_data_types() {
vec![
ColumnDef {
name: Ident::new("x"),
data_type: DataType::Struct(vec![
StructField {
field_name: Some("a".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
Box::new(DataType::Int64,)
))
},
StructField {
field_name: Some("b".into()),
field_type: DataType::Bytes(Some(42))
},
]),
data_type: DataType::Struct(
vec![
StructField {
field_name: Some("a".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
Box::new(DataType::Int64,)
))
},
StructField {
field_name: Some("b".into()),
field_type: DataType::Bytes(Some(42))
},
],
StructBracketKind::AngleBrackets
),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("y"),
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Int64,
}]),
DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Int64,
}],
StructBracketKind::AngleBrackets
),
))),
collation: None,
options: vec![],
Expand Down Expand Up @@ -708,10 +714,13 @@ fn parse_typed_struct_syntax_bigquery() {
},
StructField {
field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Bool
}])
field_type: DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Bool
}],
StructBracketKind::AngleBrackets
)
},
]
},
Expand All @@ -730,12 +739,15 @@ fn parse_typed_struct_syntax_bigquery() {
fields: vec![
StructField {
field_name: Some("x".into()),
field_type: DataType::Struct(Default::default())
field_type: DataType::Struct(
Default::default(),
StructBracketKind::AngleBrackets
)
},
StructField {
field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default())
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
)))
},
]
Expand Down Expand Up @@ -1013,10 +1025,13 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
},
StructField {
field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Bool
}])
field_type: DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Bool
}],
StructBracketKind::AngleBrackets
)
},
]
},
Expand All @@ -1035,12 +1050,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
fields: vec![
StructField {
field_name: Some("x".into()),
field_type: DataType::Struct(Default::default())
field_type: DataType::Struct(
Default::default(),
StructBracketKind::AngleBrackets
)
},
StructField {
field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default())
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
)))
},
]
Expand Down
112 changes: 112 additions & 0 deletions tests/sqlparser_duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,118 @@ fn duckdb_and_generic() -> TestedDialects {
}
}

#[test]
fn test_struct() {
// s STRUCT(v VARCHAR, i INTEGER)
let struct_type1 = DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("v")),
field_type: DataType::Varchar(None),
},
StructField {
field_name: Some(Ident::new("i")),
field_type: DataType::Integer(None),
},
],
StructBracketKind::Parentheses,
);

// basic struct
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))"#);
assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: struct_type1.clone(),
collation: None,
options: vec![],
}]
);

// struct array
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)[])"#);
assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
Box::new(struct_type1),
None
)),
collation: None,
options: vec![],
}]
);

// s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))
let struct_type2 = DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("v")),
field_type: DataType::Varchar(None),
},
StructField {
field_name: Some(Ident::new("s")),
field_type: DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("a1")),
field_type: DataType::Integer(None),
},
StructField {
field_name: Some(Ident::new("a2")),
field_type: DataType::Varchar(None),
},
],
StructBracketKind::Parentheses,
),
},
],
StructBracketKind::Parentheses,
);

// nested struct
let statement = duckdb().verified_stmt(
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))[])"#,
);

assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
Box::new(struct_type2),
None
)),
collation: None,
options: vec![],
}]
);

// failing test (duckdb does not support bracket syntax)
let sql_list = vec![
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)))"#,
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER>)"#,
r#"CREATE TABLE t1 (s STRUCT<v VARCHAR, i INTEGER>)"#,
r#"CREATE TABLE t1 (s STRUCT v VARCHAR, i INTEGER )"#,
r#"CREATE TABLE t1 (s STRUCT VARCHAR, i INTEGER )"#,
r#"CREATE TABLE t1 (s STRUCT (VARCHAR, INTEGER))"#,
];

for sql in sql_list {
duckdb().parse_sql_statements(sql).unwrap_err();
}
}

/// Returns the ColumnDefinitions from a CreateTable statement
fn column_defs(statement: Statement) -> Vec<ColumnDef> {
match statement {
Statement::CreateTable(CreateTable { columns, .. }) => columns,
_ => panic!("Expected CreateTable"),
}
}

#[test]
fn test_select_wildcard_with_exclude() {
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
Expand Down

0 comments on commit 8c4d30b

Please sign in to comment.