Skip to content

Commit 27e362f

Browse files
committed
variance instead of z score
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 3d9f692 commit 27e362f

File tree

1 file changed

+8
-17
lines changed

1 file changed

+8
-17
lines changed

scripts/compare-benchmark-jsons.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -64,31 +64,22 @@ def extract_dataset_key(df):
6464
regression_threshold = 1.0 + (threshold_pct / 100.0) # e.g., 1.3 for 30%, 1.1 for 10%
6565

6666

67-
def compute_abs_z_score(runtimes):
68-
"""Compute |median - mean| / stddev from a list of runtimes."""
67+
def compute_variance(runtimes):
68+
"""Compute sample variance from a list of runtimes."""
6969
if not isinstance(runtimes, list) or len(runtimes) < 2:
7070
return float("nan")
7171
n = len(runtimes)
7272
mean = sum(runtimes) / n
73-
variance = sum((x - mean) ** 2 for x in runtimes) / (n - 1)
74-
stddev = math.sqrt(variance)
75-
if stddev == 0:
76-
return 0.0
77-
sorted_rt = sorted(runtimes)
78-
if n % 2 == 1:
79-
median = sorted_rt[n // 2]
80-
else:
81-
median = (sorted_rt[n // 2 - 1] + sorted_rt[n // 2]) / 2
82-
return abs((median - mean) / stddev)
73+
return sum((x - mean) ** 2 for x in runtimes) / (n - 1)
8374

8475

85-
# Compute |z-score| from all_runtimes when available
76+
# Compute variance from all_runtimes when available
8677
has_z_pr = "all_runtimes_pr" in df3.columns
8778
has_z_base = "all_runtimes_base" in df3.columns
8879
if has_z_pr:
89-
df3["abs_z_score_pr"] = df3["all_runtimes_pr"].apply(compute_abs_z_score)
80+
df3["variance_pr"] = df3["all_runtimes_pr"].apply(compute_variance)
9081
if has_z_base:
91-
df3["abs_z_score_base"] = df3["all_runtimes_base"].apply(compute_abs_z_score)
82+
df3["variance_base"] = df3["all_runtimes_base"].apply(compute_variance)
9283

9384
# Generate summary statistics
9485
df3["ratio"] = df3["value_pr"] / df3["value_base"]
@@ -219,9 +210,9 @@ def format_performance(ratio, target_name):
219210
}
220211

221212
if has_z_pr:
222-
table_dict["abs(z-score) PR"] = df3["abs_z_score_pr"]
213+
table_dict["variance PR"] = df3["variance_pr"]
223214
if has_z_base:
224-
table_dict["abs(z-score) base"] = df3["abs_z_score_base"]
215+
table_dict["variance base"] = df3["variance_base"]
225216

226217
table_dict["remark"] = df3["remark"]
227218
table_df = pd.DataFrame(table_dict)

0 commit comments

Comments
 (0)