Skip to content
Merged
Show file tree
Hide file tree
Changes from 44 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
4ac80dd
Initial commit
sarakodeiri Nov 3, 2025
3a74ac9
Finalized run_ept_attack for feature extraction.
sarakodeiri Nov 4, 2025
468429e
Merge branch 'main' into sk/ept
sarakodeiri Nov 4, 2025
d1bdd9c
Add pre_process_and_train in feature extraction
sarakodeiri Nov 4, 2025
c9fbb79
First draft of attribute prediction train and test
sarakodeiri Nov 5, 2025
171643f
Add tests
sarakodeiri Nov 6, 2025
c8c0c39
Minor change
sarakodeiri Nov 7, 2025
c764598
mypy fix
sarakodeiri Nov 7, 2025
3c03fb3
Merge branch 'main' into sk/ept
sarakodeiri Nov 7, 2025
0f562be
Resolve applicable coderabbit comments
sarakodeiri Nov 7, 2025
678fadb
Merge branch 'main' into sk/ept
sarakodeiri Nov 7, 2025
84bf255
Merge branch 'main' into classifier
sarakodeiri Nov 7, 2025
f67a58c
First draft
sarakodeiri Nov 10, 2025
045181d
Applied first round of reviews
sarakodeiri Nov 10, 2025
d50b250
Fix test
sarakodeiri Nov 10, 2025
ee2ed11
Merge branch 'main' into sk/ept
sarakodeiri Nov 10, 2025
1430a95
Merge sk/ept to classifier
sarakodeiri Nov 10, 2025
21bf14d
Merged main
sarakodeiri Nov 18, 2025
41e1c46
Fix feature extraction tests
sarakodeiri Nov 18, 2025
3a8edd5
Initial label handling
sarakodeiri Nov 21, 2025
b7a0d47
Merge branch 'main' into sk/ept-classifier
sarakodeiri Dec 17, 2025
a3d433b
Initial classifying process implementation
sarakodeiri Dec 18, 2025
c85923e
Merge branch 'main' into sk/ept-classifier
sarakodeiri Jan 14, 2026
346a982
Full classifier training first draft
sarakodeiri Jan 14, 2026
85f03de
Finalized classification and added tests
sarakodeiri Jan 15, 2026
cbbd129
Remove catbooost_info
sarakodeiri Jan 15, 2026
b2c47a3
Resolve coderabbit comments
sarakodeiri Jan 15, 2026
1d3f018
Fix gitignore
sarakodeiri Jan 15, 2026
2c6442a
Upgrade uv.lock
sarakodeiri Jan 15, 2026
ce534ee
Fix test assertions
sarakodeiri Jan 16, 2026
4a0674b
Second set of test fix
sarakodeiri Jan 16, 2026
c77e490
Ruff fix
sarakodeiri Jan 16, 2026
de38092
Scipy downgrade
sarakodeiri Jan 16, 2026
7e8d310
Ruff fix
sarakodeiri Jan 16, 2026
1205e11
First edits: David's comments
sarakodeiri Jan 19, 2026
e329ab2
Modify EPT classification tests
sarakodeiri Jan 19, 2026
942fcd1
Ruff fix
sarakodeiri Jan 19, 2026
56dc5b5
Second round of David's comments
sarakodeiri Jan 20, 2026
1d50172
Add training final classifier
sarakodeiri Jan 23, 2026
05bfdf2
Minor refactoring
sarakodeiri Jan 23, 2026
72033b9
Start inference
sarakodeiri Jan 24, 2026
f579f23
Add inference and tests
sarakodeiri Jan 24, 2026
4157238
Final modification
sarakodeiri Jan 24, 2026
77d616e
Resolve conflict
sarakodeiri Jan 24, 2026
0e9cbe1
Merge branch 'main' into sk/ept-classifier
emersodb Jan 26, 2026
7675f9a
Resolve coderabbit comments and ruff fix
sarakodeiri Jan 26, 2026
36cd862
Merge branch 'main' into sk/ept-classifier
sarakodeiri Jan 26, 2026
9931f52
Minor change to tabddpm
sarakodeiri Jan 27, 2026
fb82ccc
Resolved David's comments
sarakodeiri Jan 31, 2026
7e08320
Merge branch 'main' into sk/ept-classifier
sarakodeiri Jan 31, 2026
c8bac82
Fix test
sarakodeiri Jan 31, 2026
9635a8b
Second test fix
sarakodeiri Feb 2, 2026
d64ea02
Merge branch 'main' into sk/ept-classifier
emersodb Feb 2, 2026
c42794c
Third test fix
sarakodeiri Feb 2, 2026
a55f5d2
Fourth test fix
sarakodeiri Feb 2, 2026
58fde33
Second round of David's comments
sarakodeiri Feb 2, 2026
af83f44
Clean up
sarakodeiri Feb 3, 2026
cdd0e6a
Pip upgrade
sarakodeiri Feb 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion examples/ept_attack/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ data_paths:
output_data_path: ${base_data_dir}/output # Directory to save processed data and results
data_types_file_path: ${base_data_dir}/data_configs/data_types.json # Path to the JSON file defining column types
attribute_features_path: ${data_paths.output_data_path}/attribute_prediction_features # Path to save attribute prediction features
inference_results_path: ${data_paths.output_data_path}/inference_results # Path to save inference results
Comment thread
emersodb marked this conversation as resolved.
Outdated
# Pipeline control
pipeline:
run_data_processing: false # Whether to run data processing
run_shadow_model_training: false # Whether to run shadow model training
run_feature_extraction: false # Whether to run attribute prediction model training
run_attack_classifier_training: true # Whether to run attack classifier training
run_attack_classifier_training: false # Whether to run attack classifier training
run_inference: true # Whether to run inference on the target model

classifier_settings:
results_output_path: ${data_paths.output_data_path}/evaluation_ML
Expand Down
287 changes: 287 additions & 0 deletions examples/ept_attack/run_ept_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

import itertools
import json
import pickle
from collections import defaultdict
from datetime import datetime
from collections import defaultdict
from datetime import datetime
Comment thread
sarakodeiri marked this conversation as resolved.
Outdated
from logging import INFO
Expand Down Expand Up @@ -269,6 +272,285 @@ def run_attack_classifier_training(config: DictConfig) -> None:
log(INFO, f"Best performing attack configuration:\n{best_result}")


def _summarize_and_save_training_results(
summary_results: dict, output_summary_path: Path, summary_file_name: str
) -> pd.DataFrame:
"""
Processes summary results, saves them to a CSV, and returns the summary DataFrame.

Args:
summary_results: A dictionary containing the summary results.
output_summary_path: The path where the summary CSV will be saved.
summary_file_name: The name of the summary CSV file.

Returns:
A pandas DataFrame containing the summarized results.
"""
processed_results = []
for (classifier, columns_lst), model_scores in summary_results.items():
row: dict[str, str | float] = {"classifier": classifier, "column_types": columns_lst}
for diffusion_model_name, scores in model_scores:
for score_name, score_value in scores.items():
col_name = (
score_name.lower().replace(" ", "_").replace("-", "_").replace("_at_", "_").replace(".0", "")
)
row[f"{diffusion_model_name}_{col_name}"] = score_value
processed_results.append(row)

summary_df = pd.DataFrame(processed_results)
tpr_10_cols = [col for col in summary_df.columns if col.endswith("_tpr_fpr_10")]
if tpr_10_cols:
summary_df["final_tpr_fpr_10"] = summary_df[tpr_10_cols].max(axis=1)

summary_df.to_csv(output_summary_path / summary_file_name, index=False)
log(INFO, f"Saved attack classifier summary to {output_summary_path / summary_file_name}")
return summary_df


def _train_and_save_best_attack_classifier(
config: DictConfig, best_result: pd.DataFrame, diffusion_model_name: str, model_save_path: Path
) -> None:
"""
Trains and saves the best attack classifier based on the summary DataFrame.

Args:
config: Configuration object set in config.yaml.
best_result: DataFrame containing the best attack configuration (classifier and column types).
diffusion_model_name: Name of the diffusion model (e.g., 'tabddpm', 'tabsyn', 'clavaddpm').
Comment thread
emersodb marked this conversation as resolved.
model_save_path: Path where the trained model will be saved.
"""
# Train and save the best attack classifier
best_classifier_name = best_result["classifier"].iloc[0]
best_column_types_str = best_result["column_types"].iloc[0]
best_column_types = best_column_types_str.split(" ")

log(
INFO,
f"Training final attack model for {diffusion_model_name} with classifier: {best_classifier_name} and features: {best_column_types}",
)

train_features_data_path = (
Path(config.data_paths.attribute_features_path) / f"{diffusion_model_name}_black_box" / "train"
)

# Concatenate all train features and labels for final training
train_feature_files = train_features_data_path.glob("*.csv")
df_train_features = pd.concat([pd.read_csv(f) for f in train_feature_files], ignore_index=True)
train_labels = df_train_features["is_train"]
df_train_features = df_train_features.drop(columns=["is_train"])

# Train the final model
final_model_results = train_attack_classifier(
classifier_type=ClassifierType(best_classifier_name),
column_types=best_column_types,
x_train=df_train_features,
y_train=train_labels,
x_test=None, # No test set, training on all available data
y_test=None,
)

final_model = final_model_results["trained_model"]

model_save_path = Path(model_save_path) / f"{diffusion_model_name}_best_attack_classifier.pkl"

with open(model_save_path, "wb") as file:
pickle.dump(final_model, file)

log(INFO, f"Saved the best attack model to {model_save_path}")


# Step 4: Attack classifier training
def run_attack_classifier_training(config: DictConfig) -> None:
"""
Trains multiple attack classifiers to distinguish between training and
non-training data, and selects the best performing configuration based
on evaluation metrics.

This function orchestrates the training of various attack classifiers (XGBoost,
CatBoost, MLP) to perform a membership inference attack. It iterates through
different diffusion models used to generate synthetic data and all combinations
of feature types derived from the attribute prediction task.

The process involves:
1. Reading pre-computed feature files generated by the feature extraction step.
2. Splitting the feature files into training and testing sets.
3. For each diffusion model, iterating through all possible combinations of
feature columns ('actual', 'error', 'error_ratio', 'accuracy', 'prediction').
4. Training each classifier type on these feature combinations.
5. Evaluating the classifier's performance and saving the scores (e.g., AUC, TPR at
specific FPR) and prediction results for each configuration.
6. Aggregating all results into a summary CSV file, which includes a final
metric ('final_tpr_fpr_10') representing the best TPR at 10% FPR across
all diffusion models for a given classifier and feature set.
7. Logging the best-performing attack configuration based on this final metric.
8. Training and saving the best attack classifier using all available training data.

Args:
config: Configuration object set in config.yaml.
"""
log(INFO, "Running attack classifier training.")

data_format, diffusion_models = (
("single_table", ["tabddpm", "tabsyn"])
if config.attack_settings.single_table
else ("multi_table", ["clavaddpm"])
)

# Read all the files from the attribute prediction features directory
features_data_path = Path(config.data_paths.attribute_features_path)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# An example of summary_results structure:
# {
# ('XGBoost', 'actual error'): [
# ('tabddpm', {'AUC': 0.85, 'TPR at FPR=10%': 0.75, ...}),
# ('tabsyn', {'AUC': 0.80, 'TPR at FPR=10%': 0.70, ...}),
# ],
# ('CatBoost', 'accuracy prediction'): [
# ('tabddpm', {'AUC': 0.82, 'TPR at FPR=10%': 0.72, ...}),
# ('tabsyn', {'AUC': 0.78, 'TPR at FPR=10% ': 0.68, ...}),
# ],
# ...
# }

# TODO: Move this part of code to a separate function (hyper-parameter tuning)
# TODO: Move some of the code to midst_toolkit.attacks.ept.classification module
Comment thread
sarakodeiri marked this conversation as resolved.

summary_results: dict[tuple[str, str], list[tuple[str, dict[str, float]]]] = defaultdict(list)

for diffusion_model_name in diffusion_models:
train_features_path = features_data_path / f"{diffusion_model_name}_black_box" / "train"

directory_checks(train_features_path, "Make sure to run feature extraction first.")

sorted_feature_files = sorted(train_features_path.glob("*.csv"))
split_index = len(sorted_feature_files) * 5 // 6

# Get the first 25 feature files
train_feature_files = sorted_feature_files[:split_index]
# Concatenate all the train feature files into a single dataframe
df_train_features = pd.concat([pd.read_csv(f) for f in train_feature_files], ignore_index=True)
train_labels = df_train_features["is_train"]
df_train_features = df_train_features.drop(columns=["is_train"])

test_feature_files = sorted_feature_files[split_index:]
df_test_features = pd.concat([pd.read_csv(f) for f in test_feature_files], ignore_index=True)
test_labels = df_test_features["is_train"]
df_test_features = df_test_features.drop(columns=["is_train"])

classifier_types = ["XGBoost", "CatBoost", "MLP"]
column_types = ["actual", "error", "error_ratio", "accuracy", "prediction"]

output_summary_path = Path(config.classifier_settings.results_output_path) / data_format / f"{timestamp}_train"
output_summary_path.mkdir(parents=True, exist_ok=True)

for classifier in classifier_types:
for r in range(1, len(column_types) + 1):
for selected_column_types_tuple in itertools.combinations(column_types, r):
columns_str = " ".join(sorted(selected_column_types_tuple))
result_key = (classifier, columns_str)

classifier_type = ClassifierType(classifier)

results = train_attack_classifier(
classifier_type=classifier_type,
column_types=list(selected_column_types_tuple),
x_train=df_train_features,
y_train=train_labels,
x_test=df_test_features,
y_test=test_labels,
)

# Store raw scores for the current diffusion model
summary_results[result_key].append((diffusion_model_name, results["scores"]))

training_directory_name = f"{classifier}_{'_'.join(selected_column_types_tuple)}"
training_output_path = output_summary_path / training_directory_name
training_output_path.mkdir(parents=True, exist_ok=True)

# Save prediction results
prediction_results_df = results["prediction_results"]
prediction_results_file_name = f"{diffusion_model_name}_prediction_results.csv"
save_dataframe(
df=pd.DataFrame(prediction_results_df),
file_path=training_output_path,
file_name=prediction_results_file_name,
)

# Save scores
scores_file_name = f"{diffusion_model_name}_results.txt"
with open(training_output_path / scores_file_name, "w") as f:
for score_name, score_value in results["scores"].items():
f.write(f"{score_name}: {score_value}\n")

summary_df = _summarize_and_save_training_results(
summary_results, output_summary_path, "attack_classifier_summary.csv"
)

summary_df.sort_values(by=["final_tpr_fpr_10"], ascending=False, inplace=True)
best_result = summary_df.head(1)
log(INFO, f"Best performing attack configuration:\n{best_result}")

for diffusion_model_name in diffusion_models:
model_save_path = Path(config.classifier_settings.results_output_path) / data_format
_train_and_save_best_attack_classifier(config, best_result, diffusion_model_name, model_save_path)

def run_inference(config: DictConfig) -> None:
"""
Runs inference using the trained attack classifier on the challenge data.

Args:
config: Configuration object set in config.yaml.
"""
log(INFO, "Running inference with the trained attack classifier.")

data_format, diffusion_models = (
("single_table", ["tabddpm", "tabsyn"])
if config.attack_settings.single_table
else ("multi_table", ["clavaddpm"])
)

for diffusion_model_name in diffusion_models:
Comment thread
emersodb marked this conversation as resolved.
# Load the trained attack classifier
model_path = (
Path(config.classifier_settings.results_output_path)
/ data_format
/ f"{diffusion_model_name}_best_attack_classifier.pkl"
)

with open(model_path, "rb") as file:
trained_model = pickle.load(file)

# Load new feature data for inference
features_data_path = Path(config.data_paths.attribute_features_path)
inference_features_path = features_data_path / f"{diffusion_model_name}_black_box" / "final"

directory_checks(inference_features_path, "Make sure to run feature extraction on final data first.")

challenge_feature_files = inference_features_path.glob("*.csv")

df_inference_features = pd.concat([pd.read_csv(f) for f in challenge_feature_files], ignore_index=True)

predictions = trained_model.predict(df_inference_features)
Comment thread
sarakodeiri marked this conversation as resolved.
Outdated

# Save inference results
inference_output_path = Path(config.data_paths.inference_results_path)
inference_output_path.mkdir(parents=True, exist_ok=True)

inference_results_file_name = f"{diffusion_model_name}_attack_inference_results.csv"

save_dataframe(
df=pd.DataFrame({"prediction": predictions}),
file_path=inference_output_path,
file_name=inference_results_file_name,
)

log(INFO, f"Saved inference results to {inference_output_path / inference_results_file_name}")

#TODO: Implement evaluation of inference results using the challenege labels
# _evaluate_inference_results(predictions, diffusion_model_name)
Comment thread
sarakodeiri marked this conversation as resolved.
Outdated

@hydra.main(config_path=".", config_name="config", version_base=None)
def main(config: DictConfig) -> None:
"""
Expand Down Expand Up @@ -298,6 +580,11 @@ def main(config: DictConfig) -> None:
if config.pipeline.run_attack_classifier_training:
run_attack_classifier_training(config)

if config.pipeline.run_inference:
run_inference(config)
if config.pipeline.run_attack_classifier_training:
run_attack_classifier_training(config)
Comment thread
sarakodeiri marked this conversation as resolved.
Outdated


if __name__ == "__main__":
main()
Loading
Loading