apache · adriangb · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -22,7 +22,7 @@ use arrow_ipc::CompressionType;
 #[cfg(feature = "parquet_encryption")]
 use crate::encryption::{FileDecryptionProperties, FileEncryptionProperties};
 use crate::error::_config_err;
-use crate::format::{ExplainAnalyzeLevel, ExplainFormat};
+use crate::format::{ExplainAnalyzeCategories, ExplainFormat, MetricType};
 use crate::parquet_config::DFParquetWriterVersion;
 use crate::parsers::CompressionTypeVariant;
 use crate::utils::get_available_parallelism;
@@ -1211,7 +1211,13 @@ config_namespace! {
         /// Verbosity level for "EXPLAIN ANALYZE". Default is "dev"
         /// "summary" shows common metrics for high-level insights.
         /// "dev" provides deep operator-level introspection for developers.
-        pub analyze_level: ExplainAnalyzeLevel, default = ExplainAnalyzeLevel::Dev
+        pub analyze_level: MetricType, default = MetricType::Dev
+
+        /// Which metric categories to include in "EXPLAIN ANALYZE" output.
+        /// Comma-separated list of: "rows", "bytes", "timing", "uncategorized".
+        /// Use "none" to show plan structure only, or "all" (default) to show everything.
+        /// Metrics without a declared category are treated as "uncategorized".
+        pub analyze_categories: ExplainAnalyzeCategories, default = ExplainAnalyzeCategories::All
     }
 }
 

diff --git a/datafusion/common/src/format.rs b/datafusion/common/src/format.rs
@@ -206,47 +206,227 @@ impl ConfigField for ExplainFormat {
     }
 }
 
-/// Verbosity levels controlling how `EXPLAIN ANALYZE` renders metrics
+/// Categorizes metrics so the display layer can choose the desired verbosity.
+///
+/// The `datafusion.explain.analyze_level` configuration controls which
+/// type is shown:
+/// - `"dev"` (the default): all metrics are shown.
+/// - `"summary"`: only metrics tagged as `Summary` are shown.
+///
+/// This is orthogonal to [`MetricCategory`], which filters by *what kind*
+/// of value a metric represents (rows / bytes / timing).
+///
+/// # Difference from `EXPLAIN ANALYZE VERBOSE`
+///
+/// The `VERBOSE` keyword controls whether per-partition metrics are shown
+/// (when specified) or aggregated metrics are displayed (when omitted).
+/// In contrast, `MetricType` determines which *levels* of metrics are
+/// displayed.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum ExplainAnalyzeLevel {
-    /// Show a compact view containing high-level metrics
+pub enum MetricType {
+    /// Common metrics for high-level insights (answering which operator is slow)
     Summary,
-    /// Show a developer-focused view with per-operator details
+    /// For deep operator-level introspection for developers
     Dev,
-    // When adding new enum, update the error message in `from_str()` accordingly.
 }
 
-impl FromStr for ExplainAnalyzeLevel {
+impl MetricType {
+    /// Returns the set of metric types that should be shown for this level.
+    ///
+    /// `Dev` is a superset of `Summary`: when the user selects
+    /// `analyze_level = 'dev'`, both `Summary` and `Dev` metrics are shown.
+    pub fn included_types(self) -> Vec<MetricType> {
+        match self {
+            MetricType::Summary => vec![MetricType::Summary],
+            MetricType::Dev => vec![MetricType::Summary, MetricType::Dev],
+        }
+    }
+}
+
+impl FromStr for MetricType {
     type Err = DataFusionError;
 
-    fn from_str(level: &str) -> Result<Self, Self::Err> {
-        match level.to_lowercase().as_str() {
-            "summary" => Ok(ExplainAnalyzeLevel::Summary),
-            "dev" => Ok(ExplainAnalyzeLevel::Dev),
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.trim().to_lowercase().as_str() {
+            "summary" => Ok(Self::Summary),
+            "dev" => Ok(Self::Dev),
             other => Err(DataFusionError::Configuration(format!(
                 "Invalid explain analyze level. Expected 'summary' or 'dev'. Got '{other}'"
             ))),
         }
     }
 }
 
-impl Display for ExplainAnalyzeLevel {
+impl Display for MetricType {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let s = match self {
-            ExplainAnalyzeLevel::Summary => "summary",
-            ExplainAnalyzeLevel::Dev => "dev",
-        };
-        write!(f, "{s}")
+        match self {
+            Self::Summary => write!(f, "summary"),
+            Self::Dev => write!(f, "dev"),
+        }
+    }
+}
+
+impl ConfigField for MetricType {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
+        v.some(key, self, description)
+    }
+
+    fn set(&mut self, _: &str, value: &str) -> Result<()> {
+        *self = MetricType::from_str(value)?;
+        Ok(())
+    }
+}
+
+/// Classifies a metric by what it measures.
+///
+/// This is orthogonal to [`MetricType`] (Summary / Dev), which controls
+/// *verbosity*. `MetricCategory` controls *what kind of value* is shown,
+/// so that `EXPLAIN ANALYZE` output can be narrowed to only the categories
+/// that are useful in a given context.
+///
+/// In particular this is useful for testing since metrics differ in their stability across runs:
+/// - [`Rows`](Self::Rows) and [`Bytes`](Self::Bytes) depend only on the plan
+///   and the data, so they are mostly deterministic across runs (given the same
+///   input). Variations can existing e.g. because of non-deterministic ordering
+///   of evaluation between threads.
+///   Running with a single target partition often makes these metrics stable enough to assert on in tests.
+/// - [`Timing`](Self::Timing) depends on hardware, system load, scheduling,
+///   etc., so it varies from run to run even on the same machine.
+///
+/// [`MetricCategory`] is especially useful in sqllogictest (`.slt`) files:
+/// setting `datafusion.explain.analyze_categories = 'rows'` lets a test
+/// assert on row-count metrics without sprinkling `<slt:ignore>` over every
+/// timing value.
+///
+/// Metrics that do not declare a category (the default for custom
+/// `Count` / `Gauge` metrics) are treated as
+/// [`Uncategorized`](Self::Uncategorized) for filtering purposes.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum MetricCategory {
+    /// Row counts and related dimensionless counters: `output_rows`,
+    /// `spilled_rows`, `output_batches`, pruning metrics, ratios, etc.
+    ///
+    /// Mostly deterministic given the same plan and data.
+    Rows,
+    /// Byte measurements: `output_bytes`, `spilled_bytes`,
+    /// `current_memory_usage`, `bytes_scanned`, etc.
+    ///
+    /// Mostly deterministic given the same plan and data.
+    Bytes,
+    /// Wall-clock durations and timestamps: `elapsed_compute`,
+    /// operator-defined `Time` metrics, `start_timestamp` /
+    /// `end_timestamp`, etc.
+    ///
+    /// **Non-deterministic** — varies across runs even on the same hardware.
+    Timing,
+    /// Catch-all for metrics that do not fit into [`Rows`](Self::Rows),
+    /// [`Bytes`](Self::Bytes), or [`Timing`](Self::Timing).
+    ///
+    /// Custom `Count` / `Gauge` metrics that are not explicitly assigned
+    /// a category are treated as `Uncategorized` for filtering purposes.
+    ///
+    /// This variant lets users explicitly include or exclude these
+    /// metrics, e.g.:
+    /// ```sql
+    /// SET datafusion.explain.analyze_categories = 'rows, bytes, uncategorized';
+    /// ```
+    Uncategorized,
+}
+
+impl FromStr for MetricCategory {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.trim().to_lowercase().as_str() {
+            "rows" => Ok(Self::Rows),
+            "bytes" => Ok(Self::Bytes),
+            "timing" => Ok(Self::Timing),
+            "uncategorized" => Ok(Self::Uncategorized),
+            other => Err(DataFusionError::Configuration(format!(
+                "Invalid metric category '{other}'. \
+                 Expected 'rows', 'bytes', 'timing', or 'uncategorized'."
+            ))),
+        }
+    }
+}
+
+impl Display for MetricCategory {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Rows => write!(f, "rows"),
+            Self::Bytes => write!(f, "bytes"),
+            Self::Timing => write!(f, "timing"),
+            Self::Uncategorized => write!(f, "uncategorized"),
+        }
+    }
+}
+
+/// Controls which [`MetricCategory`] values are shown in `EXPLAIN ANALYZE`.
+///
+/// Set via `SET datafusion.explain.analyze_categories = '...'`.
+///
+/// See [`MetricCategory`] for the determinism properties that motivate
+/// this filter.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
+pub enum ExplainAnalyzeCategories {
+    /// Show all metrics regardless of category (the default).
+    #[default]
+    All,
+    /// Show only metrics whose category is in the list.
+    /// Metrics with no declared category are treated as
+    /// [`Uncategorized`](MetricCategory::Uncategorized) for filtering.
+    ///
+    /// An **empty** vec means "plan only" — suppress all metrics.
+    Only(Vec<MetricCategory>),
+}
+
+impl FromStr for ExplainAnalyzeCategories {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let s = s.trim().to_lowercase();
+        match s.as_str() {
+            "all" => Ok(Self::All),
+            "none" => Ok(Self::Only(vec![])),
+            other => {
+                let mut cats = Vec::new();
+                for part in other.split(',') {
+                    cats.push(part.trim().parse::<MetricCategory>()?);
+                }
+                cats.dedup();
+                Ok(Self::Only(cats))
+            }
+        }
+    }
+}
+
+impl Display for ExplainAnalyzeCategories {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::All => write!(f, "all"),
+            Self::Only(cats) if cats.is_empty() => write!(f, "none"),
+            Self::Only(cats) => {
+                let mut first = true;
+                for cat in cats {
+                    if !first {
+                        write!(f, ",")?;
+                    }
+                    first = false;
+                    write!(f, "{cat}")?;
+                }
+                Ok(())
+            }
+        }
     }
 }
 
-impl ConfigField for ExplainAnalyzeLevel {
+impl ConfigField for ExplainAnalyzeCategories {
     fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
         v.some(key, self, description)
     }
 
     fn set(&mut self, _: &str, value: &str) -> Result<()> {
-        *self = ExplainAnalyzeLevel::from_str(value)?;
+        *self = ExplainAnalyzeCategories::from_str(value)?;
         Ok(())
     }
 }
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -234,7 +234,8 @@ mod tests {
             let analyze_exec = Arc::new(AnalyzeExec::new(
                 false,
                 false,
-                vec![MetricType::SUMMARY, MetricType::DEV],
+                vec![MetricType::Summary, MetricType::Dev],
+                None,
                 // use a new ParquetSource to avoid sharing execution metrics
                 self.build_parquet_exec(
                     file_group.clone(),

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -64,7 +64,7 @@ use arrow_schema::Field;
 use datafusion_catalog::ScanArgs;
 use datafusion_common::Column;
 use datafusion_common::display::ToStringifiedPlan;
-use datafusion_common::format::ExplainAnalyzeLevel;
+use datafusion_common::format::ExplainAnalyzeCategories;
 use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeRecursion, TreeNodeVisitor,
 };
@@ -99,7 +99,6 @@ use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::empty::EmptyExec;
 use datafusion_physical_plan::execution_plan::InvariantLevel;
 use datafusion_physical_plan::joins::PiecewiseMergeJoinExec;
-use datafusion_physical_plan::metrics::MetricType;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::recursive_query::RecursiveQueryExec;
 use datafusion_physical_plan::unnest::ListUnnest;
@@ -2716,14 +2715,21 @@ impl DefaultPhysicalPlanner {
         let schema = Arc::clone(a.schema.inner());
         let show_statistics = session_state.config_options().explain.show_statistics;
         let analyze_level = session_state.config_options().explain.analyze_level;
-        let metric_types = match analyze_level {
-            ExplainAnalyzeLevel::Summary => vec![MetricType::SUMMARY],
-            ExplainAnalyzeLevel::Dev => vec![MetricType::SUMMARY, MetricType::DEV],
+        let metric_types = analyze_level.included_types();
+        let analyze_categories = session_state
+            .config_options()
+            .explain
+            .analyze_categories
+            .clone();
+        let metric_categories = match analyze_categories {
+            ExplainAnalyzeCategories::All => None,
+            ExplainAnalyzeCategories::Only(cats) => Some(cats),
         };
         Ok(Arc::new(AnalyzeExec::new(
             a.verbose,
             show_statistics,
             metric_types,
+            metric_categories,
             input,
             schema,
         )))