Skip to content

Commit 3e39110

Browse files
committed
python all the things
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 42f6c59 commit 3e39110

2 files changed

Lines changed: 25 additions & 29 deletions

File tree

scripts/compare-benchmark-jsons.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,31 @@ def extract_dataset_key(df):
6363
improvement_threshold = 1.0 - (threshold_pct / 100.0) # e.g., 0.7 for 30%, 0.9 for 10%
6464
regression_threshold = 1.0 + (threshold_pct / 100.0) # e.g., 1.3 for 30%, 1.1 for 10%
6565

66-
# After merge with suffixes, z-score columns become abs_z_score_base and abs_z_score_pr
67-
has_z_base = "abs_z_score_base" in df3.columns
68-
has_z_pr = "abs_z_score_pr" in df3.columns
66+
def compute_abs_z_score(runtimes):
67+
"""Compute |median - mean| / stddev from a list of runtimes."""
68+
if not isinstance(runtimes, list) or len(runtimes) < 2:
69+
return float("nan")
70+
n = len(runtimes)
71+
mean = sum(runtimes) / n
72+
variance = sum((x - mean) ** 2 for x in runtimes) / (n - 1)
73+
stddev = math.sqrt(variance)
74+
if stddev == 0:
75+
return 0.0
76+
sorted_rt = sorted(runtimes)
77+
if n % 2 == 1:
78+
median = sorted_rt[n // 2]
79+
else:
80+
median = (sorted_rt[n // 2 - 1] + sorted_rt[n // 2]) / 2
81+
return abs((median - mean) / stddev)
82+
83+
84+
# Compute |z-score| from all_runtimes when available
85+
has_z_pr = "all_runtimes_pr" in df3.columns
86+
has_z_base = "all_runtimes_base" in df3.columns
87+
if has_z_pr:
88+
df3["abs_z_score_pr"] = df3["all_runtimes_pr"].apply(compute_abs_z_score)
89+
if has_z_base:
90+
df3["abs_z_score_base"] = df3["all_runtimes_base"].apply(compute_abs_z_score)
6991

7092
# Generate summary statistics
7193
df3["ratio"] = df3["value_pr"] / df3["value_base"]

vortex-bench/src/measurements.rs

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -272,27 +272,6 @@ impl QueryMeasurement {
272272
)
273273
}
274274
}
275-
276-
/// Compute |z-score| = |median - mean| / stddev for the runs.
277-
/// Returns `None` if fewer than 2 runs (stddev is undefined).
278-
pub fn abs_z_score(&self) -> Option<f64> {
279-
let n = self.runs.len();
280-
if n < 2 {
281-
return None;
282-
}
283-
284-
let nanos: Vec<f64> = self.runs.iter().map(|d| d.as_nanos() as f64).collect();
285-
let mean = nanos.iter().sum::<f64>() / n as f64;
286-
let variance = nanos.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
287-
let stddev = variance.sqrt();
288-
289-
if stddev == 0.0 {
290-
return Some(0.0);
291-
}
292-
293-
let median = self.median_run().as_nanos() as f64;
294-
Some(((median - mean) / stddev).abs())
295-
}
296275
}
297276

298277
#[derive(Serialize, Deserialize)]
@@ -303,10 +282,6 @@ pub struct QueryMeasurementJson {
303282
pub unit: String,
304283
pub value: u128,
305284
pub all_runtimes: Vec<u128>,
306-
/// Absolute z-score of the median relative to the mean: |median - mean| / stddev.
307-
/// Indicates how representative the reported median is. `None` when fewer than 2 runs.
308-
#[serde(skip_serializing_if = "Option::is_none")]
309-
pub abs_z_score: Option<f64>,
310285
pub target: Target,
311286
pub commit_id: String,
312287
pub env_triple: TripleJson,
@@ -338,7 +313,6 @@ impl ToJson for QueryMeasurement {
338313
unit: "ns".to_string(),
339314
value: self.median_run().as_nanos(),
340315
all_runtimes: self.runs.iter().map(|r| r.as_nanos()).collect_vec(),
341-
abs_z_score: self.abs_z_score(),
342316
commit_id: GIT_COMMIT_ID.to_string(),
343317
target: self.target,
344318
env_triple: TripleJson {

0 commit comments

Comments
 (0)