From ec13296991263a383ebe63c1d8b2a7295eb0d978 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Thu, 21 May 2026 13:09:56 +0200 Subject: [PATCH 1/6] Parser: fix exponential parse time on compound chains (#2344) --- sqlparser_bench/benches/sqlparser_bench.rs | 33 +++++++++++++++++++++- src/parser/mod.rs | 14 +++++---- tests/sqlparser_common.rs | 27 ++++++++++++++++++ 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index b52683aa55..46c2015400 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -152,5 +152,36 @@ fn parse_many_identifiers(c: &mut Criterion) { group.finish(); } -criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers); +/// Benchmark parsing pathological compound chains that previously caused 2^N +/// work in `parse_compound_expr`. The input `IF a0.a1...aN.#` rejects at the +/// trailing `#`, which used to force quadratic-or-worse backtracking through +/// the chain. +fn parse_compound_chain(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_compound_chain"); + let dialect = GenericDialect {}; + + for &n in &[10usize, 20, 30] { + let chain = (0..n) + .map(|i| format!("a{i}")) + .collect::>() + .join("."); + let sql = format!("IF {chain}.#"); + + group.bench_function(format!("chain_{n}"), |b| { + b.iter(|| { + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); + }); + }); + } + + group.finish(); +} + +criterion_group!( + benches, + basic_queries, + word_to_ident, + parse_many_identifiers, + parse_compound_chain +); criterion_main!(benches); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 668c520e5e..9a47f5a665 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2027,14 +2027,16 @@ impl<'a> Parser<'a> { chain.push(AccessExpr::Dot(expr)); self.advance_token(); // The consumed placeholder } - // Fallback to parsing an arbitrary expression, but restrict to expression - // types that are valid after the dot operator. This ensures that e.g. - // `T.interval` is parsed as a compound identifier, not as an interval - // expression. + // Parse a single field component, restricted to expression types valid + // after `.` (so e.g. `T.interval` is a compound identifier, not an + // interval expression). Using `parse_prefix` here rather than + // `parse_subexpr` avoids 2^N work on inputs like `IF a.b.c...x.#`: + // the outer loop already consumes successive `.field` segments, so a + // recursive `parse_subexpr` would re-walk the rest of the chain at + // every dot. _ => { let expr = self.maybe_parse(|parser| { - let expr = parser - .parse_subexpr(parser.dialect.prec_value(Precedence::Period))?; + let expr = parser.parse_prefix()?; match &expr { Expr::CompoundFieldAccess { .. } | Expr::CompoundIdentifier(_) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 221c88971a..45a02ad14b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18851,3 +18851,30 @@ fn parse_non_pg_dialects_keep_xml_names_as_regular_identifiers() { let dialects = all_dialects_except(|d| d.supports_xml_expressions()); dialects.verified_only_select("SELECT xml FROM t"); } + +/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on +/// inputs like `IF a0.a1...aN.#`. The parse is run on a worker thread and the +/// main thread asserts that it reports back within a generous timeout. Post-fix +/// the parser returns `Err` in well under a millisecond, so the timeout is a +/// hang guard, not a perf threshold. +#[test] +fn parse_compound_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let chain: String = (0..30) + .map(|i| format!("a{i}")) + .collect::>() + .join("."); + let sql = format!("IF {chain}.#"); + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&GenericDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("parser should reject this quickly, not loop exponentially"); +} From 80ec31bd65463ba7efcfeb4be54296600b51208e Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Fri, 22 May 2026 15:10:11 +0200 Subject: [PATCH 2/6] Parser: add regression test and bench for compound keyword-chain blowup --- sqlparser_bench/benches/sqlparser_bench.rs | 26 +++++++++++++++++++++- tests/sqlparser_common.rs | 25 +++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 46c2015400..7f6bc2ff93 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -177,11 +177,35 @@ fn parse_compound_chain(c: &mut Criterion) { group.finish(); } +/// Benchmark parsing pathological compound chains with a reserved keyword in +/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to +/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended +/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at +/// every segment. +fn parse_compound_keyword_chain(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_compound_keyword_chain"); + let dialect = GenericDialect {}; + + for &n in &[5usize, 10, 15] { + let body = std::iter::repeat_n(".not-b", n).collect::(); + let sql = format!("SELECT x{body}"); + + group.bench_function(format!("chain_{n}"), |b| { + b.iter(|| { + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); + }); + }); + } + + group.finish(); +} + criterion_group!( benches, basic_queries, word_to_ident, parse_many_identifiers, - parse_compound_chain + parse_compound_chain, + parse_compound_keyword_chain ); criterion_main!(benches); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 45a02ad14b..f45329de12 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18878,3 +18878,28 @@ fn parse_compound_chain_no_exponential_blowup() { rx.recv_timeout(Duration::from_secs(5)) .expect("parser should reject this quickly, not loop exponentially"); } + +/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on +/// chains like `x.not-b.not-b...`. The `NOT` keyword in field position drives +/// `parse_prefix` -> `parse_not` -> `parse_subexpr`, which re-walks the +/// remaining chain at every segment and doubles the work. Post-fix the parser +/// handles 25 segments in well under a millisecond, so the timeout is a hang +/// guard, not a perf threshold. +#[test] +fn parse_compound_keyword_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let body: String = std::iter::repeat_n(".not-b", 25).collect(); + let sql = format!("SELECT x{body}"); + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&GenericDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("parser should handle this quickly, not loop exponentially"); +} From 5bd78dc901513c37e7a03a1ac5bc57869e9c7625 Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Fri, 22 May 2026 15:14:25 +0200 Subject: [PATCH 3/6] Parser: fix exponential parse time on compound keyword chains --- src/parser/mod.rs | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9a47f5a665..8597984dc4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2035,20 +2035,35 @@ impl<'a> Parser<'a> { // recursive `parse_subexpr` would re-walk the rest of the chain at // every dot. _ => { - let expr = self.maybe_parse(|parser| { - let expr = parser.parse_prefix()?; - match &expr { - Expr::CompoundFieldAccess { .. } - | Expr::CompoundIdentifier(_) - | Expr::Identifier(_) - | Expr::Value(_) - | Expr::Function(_) => Ok(expr), - _ => parser.expected_ref( - "an identifier or value", - parser.peek_token_ref(), - ), - } - })?; + // For a plain `Word` field (not followed by `(`), skip the + // speculative `parse_prefix`. The only result the validator + // below would accept is `Identifier`, which `parse_identifier` + // in the None branch produces directly. This avoids 2^N work + // on chains like `.not-b.not-b...` where `parse_prefix` would + // descend into `parse_not` and re-walk the remaining chain at + // every segment. + let word_field_no_lparen = + matches!(self.peek_token_ref().token, Token::Word(_)) + && self.peek_nth_token_ref(1).token != Token::LParen; + + let expr = if word_field_no_lparen { + None + } else { + self.maybe_parse(|parser| { + let expr = parser.parse_prefix()?; + match &expr { + Expr::CompoundFieldAccess { .. } + | Expr::CompoundIdentifier(_) + | Expr::Identifier(_) + | Expr::Value(_) + | Expr::Function(_) => Ok(expr), + _ => parser.expected_ref( + "an identifier or value", + parser.peek_token_ref(), + ), + } + })? + }; match expr { // If we get back a compound field access or identifier, From a3369e8ecb091642327788b660467511659f94cf Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Sun, 24 May 2026 00:15:50 +0200 Subject: [PATCH 4/6] Parser: fix exponential parse time on speculative prefix parsing --- sqlparser_bench/benches/sqlparser_bench.rs | 38 ++++++++++++++++- src/parser/mod.rs | 49 +++++++++++++++++++++- tests/sqlparser_common.rs | 36 ++++++++++++++++ 3 files changed, 121 insertions(+), 2 deletions(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 7f6bc2ff93..7df44ea7f9 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -16,7 +16,7 @@ // under the License. use criterion::{criterion_group, criterion_main, Criterion}; -use sqlparser::dialect::GenericDialect; +use sqlparser::dialect::{GenericDialect, PostgreSqlDialect, SQLiteDialect}; use sqlparser::keywords::Keyword; use sqlparser::parser::Parser; use sqlparser::tokenizer::{Span, Word}; @@ -189,6 +189,40 @@ fn parse_compound_keyword_chain(c: &mut Criterion) { for &n in &[5usize, 10, 15] { let body = std::iter::repeat_n(".not-b", n).collect::(); let sql = format!("SELECT x{body}"); +/// Benchmark parsing pathological `IF(((...x` chains +/// that previously caused 2^N work in `parse_prefix`. Each nested +/// `current_time(` segment used to be explored twice at every level (once via +/// the speculative reserved-word arm, once via the unreserved-word fallback), +/// doubling work per level. Post-fix the cost is linear in chain length. +fn parse_prefix_keyword_call_chain(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_prefix_keyword_call_chain"); + let dialect = PostgreSqlDialect {}; + + for &n in &[10usize, 20, 30] { + let sql = String::from("if(") + &"current_time(".repeat(n) + "x"; + + group.bench_function(format!("chain_{n}"), |b| { + b.iter(|| { + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); + }); + }); + } + + group.finish(); +} + +/// Benchmark parsing pathological `case-case-case-...c` chains that +/// previously caused 2^N work in `parse_prefix`. Each `case` token used to +/// trigger a speculative `parse_case_expr` that recursively descends the +/// chain, but the unreserved-word fallback returns `Identifier(case)` so the +/// overall `parse_prefix` succeeds and the failure cache never fires. +/// Post-fix the per-arm cache short-circuits the speculative descent. +fn parse_prefix_case_chain(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_prefix_case_chain"); + let dialect = SQLiteDialect {}; + + for &n in &[10usize, 20, 30] { + let sql = "case\t-".repeat(n) + "c"; group.bench_function(format!("chain_{n}"), |b| { b.iter(|| { @@ -207,5 +241,7 @@ criterion_group!( parse_many_identifiers, parse_compound_chain, parse_compound_keyword_chain + parse_prefix_keyword_call_chain, + parse_prefix_case_chain ); criterion_main!(benches); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8597984dc4..a2a3415df6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15,6 +15,7 @@ #[cfg(not(feature = "std"))] use alloc::{ boxed::Box, + collections::BTreeMap, format, string::{String, ToString}, vec, @@ -24,6 +25,9 @@ use core::{ fmt::{self, Display}, str::FromStr, }; +#[cfg(feature = "std")] +use std::collections::BTreeMap; + use helpers::attached_token::AttachedToken; use log::debug; @@ -359,6 +363,12 @@ pub struct Parser<'a> { options: ParserOptions, /// Ensures the stack does not overflow by limiting recursion depth. recursion_counter: RecursionCounter, + /// Cached errors from `parse_prefix` calls that returned `Err`. See + /// [`Parser::parse_prefix`] for the 2^N patterns this guards. + failed_prefix_positions: BTreeMap, + /// Cached errors from the speculative reserved-word prefix arm. See + /// [`Parser::parse_prefix`] for the 2^N patterns this guards. + failed_reserved_word_prefix_positions: BTreeMap, } impl<'a> Parser<'a> { @@ -385,6 +395,8 @@ impl<'a> Parser<'a> { dialect, recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH), options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()), + failed_prefix_positions: BTreeMap::new(), + failed_reserved_word_prefix_positions: BTreeMap::new(), } } @@ -446,6 +458,8 @@ impl<'a> Parser<'a> { pub fn with_tokens_with_locations(mut self, tokens: Vec) -> Self { self.tokens = tokens; self.index = 0; + self.failed_prefix_positions.clear(); + self.failed_reserved_word_prefix_positions.clear(); self } @@ -1716,6 +1730,23 @@ impl<'a> Parser<'a> { return prefix; } + // Memoize parse_prefix failures to break 2^N speculation when both + // prefix arms fail at every level (e.g. `IF(current_time(...x`). + // The per-arm cache in `parse_prefix_inner` complements this for + // chains where the reserved arm fails but the unreserved fallback + // succeeds (e.g. `case-case-...c`). + let start_index = self.index; + if let Some(cached) = self.failed_prefix_positions.get(&start_index) { + return Err(cached.clone()); + } + let result = self.parse_prefix_inner(); + if let Err(ref e) = result { + self.failed_prefix_positions.insert(start_index, e.clone()); + } + result + } + + fn parse_prefix_inner(&mut self) -> Result { // PostgreSQL allows any string literal to be preceded by a type name, indicating that the // string literal represents a literal of that type. Some examples: // @@ -1800,7 +1831,21 @@ impl<'a> Parser<'a> { // We first try to parse the word and following tokens as a special expression, and if that fails, // we rollback and try to parse it as an identifier. let w = w.clone(); - match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) { + // Memoize failed speculative reserved-word parses. When + // the reserved arm (CASE, CURRENT_TIME, etc.) does + // exponential work but the unreserved fallback ultimately + // succeeds, the overall `parse_prefix` returns `Ok` and the + // outer cache never fires. Chains like `case-case-...c` + // need this per-arm cache to break the doubling. + let try_parse_result = if let Some(cached) = self + .failed_reserved_word_prefix_positions + .get(&next_token_index) + { + Err(cached.clone()) + } else { + self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) + }; + match try_parse_result { // This word indicated an expression prefix and parsing was successful Ok(Some(expr)) => Ok(expr), @@ -1814,6 +1859,8 @@ impl<'a> Parser<'a> { // we rollback and return the parsing error we got from trying to parse a // special expression (to maintain backwards compatibility of parsing errors). Err(e) => { + self.failed_reserved_word_prefix_positions + .insert(next_token_index, e.clone()); if !self.dialect.is_reserved_for_identifier(w.keyword) { if let Ok(Some(expr)) = self.maybe_parse(|parser| { parser.parse_expr_prefix_by_unreserved_word(&w, span) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f45329de12..1b498893b5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18887,6 +18887,13 @@ fn parse_compound_chain_no_exponential_blowup() { /// guard, not a perf threshold. #[test] fn parse_compound_keyword_chain_no_exponential_blowup() { +/// Regression test for the 2^N parse-time blowup in `parse_prefix` on inputs +/// like `IF(current_time(current_time(...x`. Each nested `current_time(` used +/// to be explored twice at every level (once via the speculative reserved-word +/// arm, once via the unreserved-word fallback), doubling work per level. +/// Post-fix the failing parse short-circuits via the position-keyed cache. +#[test] +fn parse_prefix_keyword_call_chain_no_exponential_blowup() { use std::sync::mpsc; use std::thread; use std::time::Duration; @@ -18897,9 +18904,38 @@ fn parse_compound_keyword_chain_no_exponential_blowup() { let (tx, rx) = mpsc::channel(); thread::spawn(move || { let _ = Parser::parse_sql(&GenericDialect {}, &sql); + let sql = String::from("if(") + &"current_time(".repeat(30) + "x"; + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&PostgreSqlDialect {}, &sql); let _ = tx.send(()); }); rx.recv_timeout(Duration::from_secs(5)) .expect("parser should handle this quickly, not loop exponentially"); + .expect("parser should reject this quickly, not loop exponentially"); +} + +/// Regression test for the 2^N parse-time blowup in `parse_prefix` on inputs +/// like `case-case-case-...c`. Each `case` token triggers a speculative +/// `parse_case_expr` that fails, but the unreserved-word fallback returns +/// `Identifier(case)`, so the outer failure cache never fires. Post-fix the +/// per-arm cache short-circuits the speculative descent. +#[test] +fn parse_prefix_case_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let sql = "case\t-".repeat(30) + "c"; + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&SQLiteDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("parser should reject this quickly, not loop exponentially"); } From 053c6e210baa1ccdff9d8b4786b22b52e46ea4d8 Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Fri, 5 Jun 2026 16:48:37 +0200 Subject: [PATCH 5/6] Store a copy marker in the prefix failure caches instead of the full error --- src/parser/mod.rs | 45 ++++++++++++++++++++++++++++++--------- tests/sqlparser_common.rs | 18 +++++++++++++++- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a2a3415df6..00a382e25d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -363,12 +363,29 @@ pub struct Parser<'a> { options: ParserOptions, /// Ensures the stack does not overflow by limiting recursion depth. recursion_counter: RecursionCounter, - /// Cached errors from `parse_prefix` calls that returned `Err`. See + /// Cached failures from `parse_prefix` calls that returned `Err`. See /// [`Parser::parse_prefix`] for the 2^N patterns this guards. - failed_prefix_positions: BTreeMap, - /// Cached errors from the speculative reserved-word prefix arm. See + failed_prefix_positions: BTreeMap, + /// Cached failures from the speculative reserved-word prefix arm. See /// [`Parser::parse_prefix`] for the 2^N patterns this guards. - failed_reserved_word_prefix_positions: BTreeMap, + failed_reserved_word_prefix_positions: BTreeMap, +} + +/// Copy marker for a [`ParserError`] cached by the `parse_prefix` failure +/// memoization, so the caches hold no strings. +#[derive(Debug, Clone, Copy)] +enum ExprPrefixError { + RecursionLimitExceeded, + Err, +} + +impl From<&ParserError> for ExprPrefixError { + fn from(e: &ParserError) -> Self { + match e { + ParserError::RecursionLimitExceeded => Self::RecursionLimitExceeded, + _ => Self::Err, + } + } } impl<'a> Parser<'a> { @@ -1736,16 +1753,24 @@ impl<'a> Parser<'a> { // chains where the reserved arm fails but the unreserved fallback // succeeds (e.g. `case-case-...c`). let start_index = self.index; - if let Some(cached) = self.failed_prefix_positions.get(&start_index) { - return Err(cached.clone()); + if let Some(&cached) = self.failed_prefix_positions.get(&start_index) { + return Err(self.cached_prefix_error(cached, self.peek_token_ref())); } let result = self.parse_prefix_inner(); if let Err(ref e) = result { - self.failed_prefix_positions.insert(start_index, e.clone()); + self.failed_prefix_positions.insert(start_index, e.into()); } result } + /// Rebuild the error for a cached prefix failure at the `found` token. + fn cached_prefix_error(&self, cached: ExprPrefixError, found: &TokenWithSpan) -> ParserError { + match cached { + ExprPrefixError::RecursionLimitExceeded => ParserError::RecursionLimitExceeded, + ExprPrefixError::Err => self.expected_ref::<()>("an expression", found).unwrap_err(), + } + } + fn parse_prefix_inner(&mut self) -> Result { // PostgreSQL allows any string literal to be preceded by a type name, indicating that the // string literal represents a literal of that type. Some examples: @@ -1837,11 +1862,11 @@ impl<'a> Parser<'a> { // succeeds, the overall `parse_prefix` returns `Ok` and the // outer cache never fires. Chains like `case-case-...c` // need this per-arm cache to break the doubling. - let try_parse_result = if let Some(cached) = self + let try_parse_result = if let Some(&cached) = self .failed_reserved_word_prefix_positions .get(&next_token_index) { - Err(cached.clone()) + Err(self.cached_prefix_error(cached, self.get_current_token())) } else { self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) }; @@ -1860,7 +1885,7 @@ impl<'a> Parser<'a> { // special expression (to maintain backwards compatibility of parsing errors). Err(e) => { self.failed_reserved_word_prefix_positions - .insert(next_token_index, e.clone()); + .insert(next_token_index, (&e).into()); if !self.dialect.is_reserved_for_identifier(w.keyword) { if let Ok(Some(expr)) = self.maybe_parse(|parser| { parser.parse_expr_prefix_by_unreserved_word(&w, span) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1b498893b5..b7dd58784a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15438,7 +15438,10 @@ fn parse_create_table_select() { #[test] fn test_reserved_keywords_for_identifiers() { - let dialects = all_dialects_where(|d| d.is_reserved_for_identifier(Keyword::INTERVAL)); + let dialects = all_dialects_where(|d| { + d.is_reserved_for_identifier(Keyword::INTERVAL) + && !d.supports_named_fn_args_with_expr_name() + }); // Dialects that reserve the word INTERVAL will not allow it as an unquoted identifier let sql = "SELECT MAX(interval) FROM tbl"; assert_eq!( @@ -15448,6 +15451,19 @@ fn test_reserved_keywords_for_identifiers() { )) ); + // Dialects with expression-named function arguments parse the argument + // expression twice, so the second attempt reports the memoized failure + // at the start of the expression + let dialects = all_dialects_where(|d| { + d.is_reserved_for_identifier(Keyword::INTERVAL) && d.supports_named_fn_args_with_expr_name() + }); + assert_eq!( + dialects.parse_sql_statements(sql), + Err(ParserError::ParserError( + "Expected: an expression, found: interval".to_string() + )) + ); + // Dialects that do not reserve the word INTERVAL will allow it let dialects = all_dialects_where(|d| !d.is_reserved_for_identifier(Keyword::INTERVAL)); let sql = "SELECT MAX(interval) FROM tbl"; From c8d0a194900290959ecbf915a47585868deb19ec Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Fri, 5 Jun 2026 16:48:37 +0200 Subject: [PATCH 6/6] Resolve cherry-pick conflict between #2350 and #2352 test/bench files #2350 and #2352 both added regression tests and benches in the same file regions. When cherry-picking both onto v0.62.0, the test bodies and the bench bodies for parse_compound_keyword_chain (#2350) and parse_prefix_keyword_call_chain (#2352) interleaved. Fix by separating them into independent function bodies and adding the missing group.bench_function / group.finish / closing braces for the #2350 function body that was truncated by the conflict. --- sqlparser_bench/benches/sqlparser_bench.rs | 10 +++++++++ tests/sqlparser_common.rs | 24 +++++++++++++++------- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 7df44ea7f9..c343c2ceff 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -189,6 +189,16 @@ fn parse_compound_keyword_chain(c: &mut Criterion) { for &n in &[5usize, 10, 15] { let body = std::iter::repeat_n(".not-b", n).collect::(); let sql = format!("SELECT x{body}"); + + group.bench_function(format!("chain_{n}"), |b| { + b.iter(|| { + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); + }); + }); + } + group.finish(); +} + /// Benchmark parsing pathological `IF(((...x` chains /// that previously caused 2^N work in `parse_prefix`. Each nested /// `current_time(` segment used to be explored twice at every level (once via diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b7dd58784a..68de89d09b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18903,6 +18903,23 @@ fn parse_compound_chain_no_exponential_blowup() { /// guard, not a perf threshold. #[test] fn parse_compound_keyword_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let body: String = std::iter::repeat_n(".not-b", 25).collect(); + let sql = format!("SELECT x{body}"); + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&GenericDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("parser should handle this quickly, not loop exponentially"); +} + /// Regression test for the 2^N parse-time blowup in `parse_prefix` on inputs /// like `IF(current_time(current_time(...x`. Each nested `current_time(` used /// to be explored twice at every level (once via the speculative reserved-word @@ -18914,12 +18931,6 @@ fn parse_prefix_keyword_call_chain_no_exponential_blowup() { use std::thread; use std::time::Duration; - let body: String = std::iter::repeat_n(".not-b", 25).collect(); - let sql = format!("SELECT x{body}"); - - let (tx, rx) = mpsc::channel(); - thread::spawn(move || { - let _ = Parser::parse_sql(&GenericDialect {}, &sql); let sql = String::from("if(") + &"current_time(".repeat(30) + "x"; let (tx, rx) = mpsc::channel(); @@ -18929,7 +18940,6 @@ fn parse_prefix_keyword_call_chain_no_exponential_blowup() { }); rx.recv_timeout(Duration::from_secs(5)) - .expect("parser should handle this quickly, not loop exponentially"); .expect("parser should reject this quickly, not loop exponentially"); }