Fairlearn is Microsoft's open source toolkit for assessing and improving fairness of machine learning models. ATHENA integrates with Fairlearn to correlate its fairness metrics with human trust patterns.
Prerequisites
pipinstallfairlearn
Supported Metrics
Fairlearn Metric
ATHENA Metric Name
Value Range
demographic_parity_difference()
demographic_parity
-1 to 1
demographic_parity_ratio()
disparate_impact
0 to infinity
equalized_odds_difference()
equalized_odds
-1 to 1
equalized_odds_ratio()
equalized_odds_ratio
0 to infinity
false_positive_rate_difference()
fpr_difference
-1 to 1
false_negative_rate_difference()
fnr_difference
-1 to 1
Integration Code
Complete Example
Fairlearn Dashboard Integration
Fairlearn provides a dashboard widget. You can extend it to show ATHENA amplification alerts:
Using with ThresholdOptimizer
Fairlearn mitigation algorithms can be combined with ATHENA monitoring:
Azure ML Integration
If using Azure ML, you can automate Fairlearn and ATHENA in your pipeline:
from fairlearn.metrics import (
MetricFrame,
demographic_parity_difference,
demographic_parity_ratio,
equalized_odds_difference,
false_positive_rate,
false_negative_rate
)
import requests
from datetime import datetime, timezone
import numpy as np
ATHENA_API_KEY = "sk_live_xxxxx"
ATHENA_API_URL = "https://api.athenatrust.ai/v1"
def analyze_with_fairlearn_and_send_to_athena(
y_true,
y_pred,
sensitive_features,
model_id: str,
protected_attribute: str,
fairlearn_version: str = "0.10.0"
):
"""
Analyze predictions with Fairlearn and send results to ATHENA.
Args:
y_true: Ground truth labels
y_pred: Model predictions
sensitive_features: Protected attribute values for each sample
model_id: Your model identifier
protected_attribute: Name of the protected attribute (for example, gender)
"""
# Calculate group level metrics with MetricFrame
metric_frame = MetricFrame(
metrics={
"accuracy": lambda y, p: np.mean(y == p),
"fpr": false_positive_rate,
"fnr": false_negative_rate,
},
y_true=y_true,
y_pred=y_pred,
sensitive_features=sensitive_features
)
# Calculate fairness metrics
metrics = {
"demographic_parity": {
"value": demographic_parity_difference(y_true, y_pred, sensitive_features=sensitive_features),
"threshold": 0.1,
"ideal": 0
},
"disparate_impact": {
"value": demographic_parity_ratio(y_true, y_pred, sensitive_features=sensitive_features),
"threshold": 0.8,
"ideal": 1
},
"equalized_odds": {
"value": equalized_odds_difference(y_true, y_pred, sensitive_features=sensitive_features),
"threshold": 0.1,
"ideal": 0
}
}
# Get unique groups
groups = np.unique(sensitive_features)
privileged_group = str(groups[0]) if len(groups) > 0 else "unknown"
unprivileged_group = str(groups[-1]) if len(groups) > 1 else "unknown"
results = []
for metric_name, metric_data in metrics.items():
value = metric_data["value"]
threshold = metric_data["threshold"]
ideal = metric_data["ideal"]
# Skip if NaN
if value is None or np.isnan(value):
continue
# Determine if passes threshold
if metric_name == "disparate_impact":
passes_threshold = value >= threshold
else:
passes_threshold = abs(value - ideal) <= threshold
# Normalize to 0 to 1
normalized = normalize_fairlearn_metric(metric_name, value)
payload = {
"externalToolId": "fairlearn",
"externalToolVersion": fairlearn_version,
"modelId": model_id,
"metricName": metric_name,
"metricValue": normalized,
"threshold": threshold,
"passesThreshold": passes_threshold,
"protectedAttribute": protected_attribute,
"privilegedGroup": privileged_group,
"unprivilegedGroup": unprivileged_group,
"sampleSize": len(y_true),
"rawPayload": {
"original_value": value,
"metric_frame_by_group": metric_frame.by_group.to_dict(),
"all_metrics": {k: v["value"] for k, v in metrics.items()}
},
"signalTimestamp": datetime.now(timezone.utc).isoformat()
}
response = requests.post(
f"{ATHENA_API_URL}/model-fairness-signals",
headers={
"Authorization": f"Bearer {ATHENA_API_KEY}",
"Content-Type": "application/json"
},
json=payload
)
results.append({
"metric": metric_name,
"original_value": value,
"normalized_value": normalized,
"passes_threshold": passes_threshold,
"status": "success" if response.status_code == 201 else "failed",
"signalId": response.json().get("signalId") if response.status_code == 201 else None
})
return results
def normalize_fairlearn_metric(metric_name: str, value: float) -> float:
"""Normalize Fairlearn metrics to 0 to 1 range."""
if metric_name == "disparate_impact":
return min(1.0, max(0.0, value / 2))
elif metric_name in ["demographic_parity", "equalized_odds"]:
return (value + 1) / 2
return min(1.0, max(0.0, abs(value)))
# Usage Example
if __name__ == "__main__":
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
# Create synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
sensitive = np.random.choice(["male", "female"], size=1000)
# Train model
X_train, X_test, y_train, y_test, s_train, s_test = train_test_split(
X, y, sensitive, test_size=0.3, random_state=42
)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Analyze and send to ATHENA
results = analyze_with_fairlearn_and_send_to_athena(
y_true=y_test,
y_pred=y_pred,
sensitive_features=s_test,
model_id="hiring_classifier_v2",
protected_attribute="gender"
)
for r in results:
print(f"{r['metric']}: {r['original_value']:.4f} -> ATHENA signal {r['signalId']}")
from fairlearn.widget import FairlearnDashboard
# After sending to ATHENA, fetch amplification alerts
def get_athena_alerts(model_id: str):
response = requests.get(
f"{ATHENA_API_URL}/bias/amplification",
headers={"Authorization": f"Bearer {ATHENA_API_KEY}"},
params={"modelId": model_id, "status": "open"}
)
return response.json().get("alerts", [])
# Display Fairlearn dashboard
FairlearnDashboard(
sensitive_features=s_test,
y_true=y_test,
y_pred=y_pred
)
# Then show ATHENA alerts
alerts = get_athena_alerts("hiring_classifier_v2")
for alert in alerts:
print(f"ATHENA Alert: {alert['severity']} - {alert['message']}")
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
# Train mitigated model
mitigator = ExponentiatedGradient(
estimator=LogisticRegression(),
constraints=DemographicParity()
)
mitigator.fit(X_train, y_train, sensitive_features=s_train)
# Get predictions from mitigated model
y_pred_mitigated = mitigator.predict(X_test)
# Send mitigated model metrics to ATHENA
results_mitigated = analyze_with_fairlearn_and_send_to_athena(
y_true=y_test,
y_pred=y_pred_mitigated,
sensitive_features=s_test,
model_id="hiring_classifier_v2_mitigated",
protected_attribute="gender"
)
# Compare before and after in ATHENA dashboard
from azureml.core import Run
run = Run.get_context()
# After model training
results = analyze_with_fairlearn_and_send_to_athena(...)
# Log to Azure ML
for r in results:
run.log(f"fairness_{r['metric']}", r['original_value'])
run.log(f"athena_signal_{r['metric']}", r['signalId'] or "failed")