diff --git a/src/ast/helpers/stmt_create_database.rs b/src/ast/helpers/stmt_create_database.rs index c718dbce1..bafe7822f 100644 --- a/src/ast/helpers/stmt_create_database.rs +++ b/src/ast/helpers/stmt_create_database.rs @@ -85,6 +85,10 @@ pub struct CreateDatabaseBuilder { pub storage_serialization_policy: Option, /// Optional comment attached to the database. pub comment: Option, + /// Optional default character set (MySQL). + pub default_charset: Option, + /// Optional default collation (MySQL). + pub default_collation: Option, /// Optional catalog sync configuration. pub catalog_sync: Option, /// Optional catalog sync namespace mode. @@ -120,6 +124,8 @@ impl CreateDatabaseBuilder { default_ddl_collation: None, storage_serialization_policy: None, comment: None, + default_charset: None, + default_collation: None, catalog_sync: None, catalog_sync_namespace_mode: None, catalog_sync_namespace_flatten_delimiter: None, @@ -218,6 +224,18 @@ impl CreateDatabaseBuilder { self } + /// Set the default character set for the database. + pub fn default_charset(mut self, default_charset: Option) -> Self { + self.default_charset = default_charset; + self + } + + /// Set the default collation for the database. + pub fn default_collation(mut self, default_collation: Option) -> Self { + self.default_collation = default_collation; + self + } + /// Set the catalog sync for the database. pub fn catalog_sync(mut self, catalog_sync: Option) -> Self { self.catalog_sync = catalog_sync; @@ -272,6 +290,8 @@ impl CreateDatabaseBuilder { default_ddl_collation: self.default_ddl_collation, storage_serialization_policy: self.storage_serialization_policy, comment: self.comment, + default_charset: self.default_charset, + default_collation: self.default_collation, catalog_sync: self.catalog_sync, catalog_sync_namespace_mode: self.catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter: self.catalog_sync_namespace_flatten_delimiter, @@ -302,6 +322,8 @@ impl TryFrom for CreateDatabaseBuilder { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, @@ -323,6 +345,8 @@ impl TryFrom for CreateDatabaseBuilder { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d77186bc7..6264f8f56 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4285,6 +4285,10 @@ pub enum Statement { storage_serialization_policy: Option, /// Optional comment. comment: Option, + /// Optional default character set (MySQL). + default_charset: Option, + /// Optional default collation (MySQL). + default_collation: Option, /// Optional catalog sync identifier. catalog_sync: Option, /// Catalog sync namespace mode. @@ -5165,6 +5169,8 @@ impl fmt::Display for Statement { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, @@ -5224,6 +5230,14 @@ impl fmt::Display for Statement { write!(f, " COMMENT = '{comment}'")?; } + if let Some(charset) = default_charset { + write!(f, " DEFAULT CHARACTER SET {charset}")?; + } + + if let Some(collation) = default_collation { + write!(f, " DEFAULT COLLATE {collation}")?; + } + if let Some(sync) = catalog_sync { write!(f, " CATALOG_SYNC = '{sync}'")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cfc173d76..5e8ac222b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5294,6 +5294,8 @@ impl<'a> Parser<'a> { let db_name = self.parse_object_name(false)?; let mut location = None; let mut managed_location = None; + let mut default_charset = None; + let mut default_collation = None; loop { match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), @@ -5309,6 +5311,26 @@ impl<'a> Parser<'a> { None }; + // Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE options + loop { + let has_default = self.parse_keyword(Keyword::DEFAULT); + if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) + || self.parse_keyword(Keyword::CHARSET) + { + self.expect_token(&Token::Eq).ok(); + default_charset = Some(self.parse_identifier()?.value); + } else if self.parse_keyword(Keyword::COLLATE) { + self.expect_token(&Token::Eq).ok(); + default_collation = Some(self.parse_identifier()?.value); + } else if has_default { + // DEFAULT keyword not followed by CHARACTER SET, CHARSET, or COLLATE + self.prev_token(); + break; + } else { + break; + } + } + Ok(Statement::CreateDatabase { db_name, if_not_exists: ine, @@ -5325,6 +5347,8 @@ impl<'a> Parser<'a> { default_ddl_collation: None, storage_serialization_policy: None, comment: None, + default_charset, + default_collation, catalog_sync: None, catalog_sync_namespace_mode: None, catalog_sync_namespace_flatten_delimiter: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e847d3edb..0bc7e8790 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -4354,3 +4354,96 @@ fn test_create_index_options() { "CREATE INDEX idx_name ON t(c1, c2) USING BTREE LOCK = EXCLUSIVE ALGORITHM = DEFAULT", ); } + +#[test] +fn parse_create_database_with_charset() { + // Test DEFAULT CHARACTER SET with = sign + mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4"); + + // Test DEFAULT CHARACTER SET without = sign (normalized form) + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT CHARACTER SET = utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test CHARACTER SET without DEFAULT + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test CHARSET shorthand + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb CHARSET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test DEFAULT CHARSET shorthand + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT CHARSET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test DEFAULT COLLATE + mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci"); + + // Test COLLATE without DEFAULT + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci", + "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // Test both CHARACTER SET and COLLATE together + mysql_and_generic().verified_stmt( + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // Test IF NOT EXISTS with CHARACTER SET + mysql_and_generic() + .verified_stmt("CREATE DATABASE IF NOT EXISTS mydb DEFAULT CHARACTER SET utf16"); + + // Test the exact syntax from the issue + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET = utf16", + "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET utf16", + ); +} + +#[test] +fn parse_create_database_with_charset_errors() { + // Missing charset name after CHARACTER SET + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb DEFAULT CHARACTER SET") + .is_err()); + + // Missing charset name after CHARSET + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb CHARSET") + .is_err()); + + // Missing collation name after COLLATE + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb DEFAULT COLLATE") + .is_err()); + + // Equals sign but no value + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb CHARACTER SET =") + .is_err()); +} + +#[test] +fn parse_create_database_with_charset_option_ordering() { + // MySQL allows COLLATE before CHARACTER SET - output is normalized to CHARACTER SET first + // (matches MySQL's own SHOW CREATE DATABASE output order) + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci DEFAULT CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // COLLATE first without DEFAULT keywords + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); +}