AWS SageMaker Clarify

Tool ID: aws_clarify Latest Tested Version: SageMaker SDK 2.200+ Documentation: SageMaker Clarify Docs

Overview

Amazon SageMaker Clarify helps detect bias in ML models and data, providing pre training and post training bias metrics. ATHENA integrates with Clarify to correlate statistical bias with human trust patterns.

Prerequisites

pip install sagemaker boto3

Ensure you have AWS credentials configured with SageMaker access.

Supported Metrics

Clarify Metric

ATHENA Metric Name

Stage

Class Imbalance (CI)

class_imbalance

Pre training

Difference in Proportions of Labels (DPL)

dpl

Pre training

Demographic Parity (DPPL)

demographic_parity

Post training

Disparate Impact (DI)

disparate_impact

Post training

Difference in Conditional Acceptance (DCAcc)

conditional_acceptance

Post training

Treatment Equality (TE)

treatment_equality

Post training

Flip Test (FT)

flip_rate

Post training

Integration Code

Using Clarify Processing Job Output

import boto3
import json
import requests
from datetime import datetime, timezone

ATHENA_API_KEY = "sk_live_xxxxx"
ATHENA_API_URL = "https://api.athenatrust.ai/v1"

def send_clarify_results_to_athena(
    clarify_output_s3_uri: str,
    model_id: str,
    protected_attribute: str
):
    """
    Parse Clarify processing job output and send to ATHENA.
    
    Args:
        clarify_output_s3_uri: S3 URI of Clarify analysis output
        model_id: Your model identifier
        protected_attribute: Name of the facet (for example, gender)
    """
    
    # Download Clarify output from S3
    s3 = boto3.client("s3")
    bucket, key = parse_s3_uri(clarify_output_s3_uri)
    
    response = s3.get_object(Bucket=bucket, Key=f"{key}/analysis.json")
    analysis = json.loads(response["Body"].read().decode("utf-8"))
    
    results = []
    
    # Process pre training bias metrics
    if "pre_training_bias_metrics" in analysis:
        for facet in analysis["pre_training_bias_metrics"]["facets"]:
            if facet["name_or_index"] == protected_attribute:
                for metric in facet["metrics"]:
                    result = send_metric_to_athena(
                        metric_name=clarify_to_athena_metric(metric["name"]),
                        metric_value=normalize_clarify_metric(metric["name"], metric["value"]),
                        original_value=metric["value"],
                        description=metric.get("description", ""),
                        model_id=model_id,
                        protected_attribute=protected_attribute,
                        privileged_group=str(facet.get("value_or_threshold", "1")),
                        stage="pre_training"
                    )
                    results.append(result)
    
    # Process post training bias metrics
    if "post_training_bias_metrics" in analysis:
        for facet in analysis["post_training_bias_metrics"]["facets"]:
            if facet["name_or_index"] == protected_attribute:
                for metric in facet["metrics"]:
                    result = send_metric_to_athena(
                        metric_name=clarify_to_athena_metric(metric["name"]),
                        metric_value=normalize_clarify_metric(metric["name"], metric["value"]),
                        original_value=metric["value"],
                        description=metric.get("description", ""),
                        model_id=model_id,
                        protected_attribute=protected_attribute,
                        privileged_group=str(facet.get("value_or_threshold", "1")),
                        stage="post_training"
                    )
                    results.append(result)
    
    return results


def send_metric_to_athena(
    metric_name: str,
    metric_value: float,
    original_value: float,
    description: str,
    model_id: str,
    protected_attribute: str,
    privileged_group: str,
    stage: str
):
    """Send a single Clarify metric to ATHENA."""
    
    # Define thresholds based on Clarify recommendations
    thresholds = {
        "demographic_parity": 0.1,
        "disparate_impact": 0.8,
        "conditional_acceptance": 0.1,
        "treatment_equality": 0.1,
        "flip_rate": 0.1,
        "class_imbalance": 0.1,
        "dpl": 0.1
    }
    
    threshold = thresholds.get(metric_name, 0.1)
    
    if metric_name == "disparate_impact":
        passes_threshold = metric_value >= threshold
    else:
        passes_threshold = abs(metric_value) <= threshold
    
    payload = {
        "externalToolId": "aws_clarify",
        "modelId": model_id,
        "metricName": metric_name,
        "metricValue": metric_value,
        "threshold": threshold,
        "passesThreshold": passes_threshold,
        "protectedAttribute": protected_attribute,
        "privilegedGroup": privileged_group,
        "rawPayload": {
            "original_value": original_value,
            "description": description,
            "stage": stage,
            "source": "sagemaker_clarify"
        },
        "signalTimestamp": datetime.now(timezone.utc).isoformat()
    }
    
    response = requests.post(
        f"{ATHENA_API_URL}/model-fairness-signals",
        headers={
            "Authorization": f"Bearer {ATHENA_API_KEY}",
            "Content-Type": "application/json"
        },
        json=payload
    )
    
    return {
        "metric": metric_name,
        "stage": stage,
        "original_value": original_value,
        "status": "success" if response.status_code == 201 else "failed",
        "signalId": response.json().get("signalId") if response.status_code == 201 else None
    }


def clarify_to_athena_metric(clarify_name: str) -> str:
    """Map Clarify metric names to ATHENA names."""
    mapping = {
        "CI": "class_imbalance",
        "DPL": "dpl",
        "DPPL": "demographic_parity",
        "DI": "disparate_impact",
        "DCAcc": "conditional_acceptance",
        "TE": "treatment_equality",
        "FT": "flip_rate",
        "AD": "accuracy_difference",
        "CDDPL": "conditional_demographic_disparity"
    }
    return mapping.get(clarify_name, "custom")


def normalize_clarify_metric(name: str, value: float) -> float:
    """Normalize Clarify metrics to 0 to 1 range."""
    if name == "DI":  # Disparate Impact
        return min(1.0, max(0.0, value / 2))
    else:
        # Most Clarify metrics are in negative 1 to 1 range
        return (value + 1) / 2


def parse_s3_uri(uri: str):
    """Parse S3 URI into bucket and key."""
    parts = uri.replace("s3://", "").split("/", 1)
    return parts[0], parts[1] if len(parts) > 1 else ""

Running Clarify as Part of SageMaker Pipeline

from sagemaker.clarify import (
    SageMakerClarifyProcessor,
    BiasConfig,
    ModelConfig,
    DataConfig
)

def run_clarify_and_send_to_athena(
    role: str,
    model_name: str,
    input_data_s3_uri: str,
    output_s3_uri: str,
    model_id: str
):
    """Run Clarify analysis and automatically send results to ATHENA."""
    
    clarify_processor = SageMakerClarifyProcessor(
        role=role,
        instance_count=1,
        instance_type="ml.m5.xlarge",
        sagemaker_session=sagemaker.Session()
    )
    
    bias_config = BiasConfig(
        label_values_or_threshold=[1],
        facet_name="gender",
        facet_values_or_threshold=[1]
    )
    
    model_config = ModelConfig(
        model_name=model_name,
        instance_type="ml.m5.xlarge",
        instance_count=1
    )
    
    data_config = DataConfig(
        s3_data_input_path=input_data_s3_uri,
        s3_output_path=output_s3_uri,
        label="target",
        headers=["feature1", "feature2", "gender", "target"],
        dataset_type="text/csv"
    )
    
    # Run Clarify
    clarify_processor.run_bias(
        data_config=data_config,
        bias_config=bias_config,
        model_config=model_config,
        pre_training_methods="all",
        post_training_methods="all"
    )
    
    # Send results to ATHENA
    results = send_clarify_results_to_athena(
        clarify_output_s3_uri=output_s3_uri,
        model_id=model_id,
        protected_attribute="gender"
    )
    
    return results

Lambda Trigger for Automated Integration

Deploy a Lambda function that triggers when Clarify outputs to S3:

# lambda_function.py
import json
import boto3
import requests

ATHENA_API_KEY_SECRET_NAME = "athena/api_key"

def lambda_handler(event, context):
    """
    Triggered when Clarify output is written to S3.
    Sends results to ATHENA automatically.
    """
    
    # Get ATHENA API key from Secrets Manager
    secrets = boto3.client("secretsmanager")
    api_key = secrets.get_secret_value(SecretId=ATHENA_API_KEY_SECRET_NAME)["SecretString"]
    
    # Get S3 event details
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    
    # Only process analysis.json files
    if not key.endswith("analysis.json"):
        return {"statusCode": 200, "body": "Not a Clarify output file"}
    
    # Extract model ID from path (customize based on your naming convention)
    model_id = key.split("/")[1]  # Example: clarify_outputs/model_v1/analysis.json
    
    # Send to ATHENA
    s3_uri = f"s3://{bucket}/{'/'.join(key.split('/')[:-1])}"
    results = send_clarify_results_to_athena(
        clarify_output_s3_uri=s3_uri,
        model_id=model_id,
        protected_attribute="gender"  # Customize as needed
    )
    
    return {
        "statusCode": 200,
        "body": json.dumps({"processed": len(results), "results": results})
    }

CloudWatch Integration

Set up alerts when ATHENA detects amplification:

import boto3

def create_athena_amplification_alarm(
    model_id: str,
    sns_topic_arn: str
):
    """Create CloudWatch alarm for ATHENA amplification alerts."""
    
    cloudwatch = boto3.client("cloudwatch")
    
    cloudwatch.put_metric_alarm(
        AlarmName=f"ATHENA_BiasAmplification_{model_id}",
        MetricName="AmplificationAlerts",
        Namespace="ATHENA/Fairness",
        Dimensions=[
            {"Name": "ModelId", "Value": model_id},
            {"Name": "Severity", "Value": "high"}
        ],
        Statistic="Sum",
        Period=3600,  # 1 hour
        EvaluationPeriods=1,
        Threshold=1,
        ComparisonOperator="GreaterThanOrEqualToThreshold",
        AlarmActions=[sns_topic_arn],
        AlarmDescription=f"ATHENA detected high severity bias amplification for model {model_id}"
    )

Next Steps

PreviousMicrosoft Fairlearn NextGoogle Vertex AI

Last updated 1 month ago

Good morning

hashtagOverview

hashtagPrerequisites

hashtagSupported Metrics

hashtagIntegration Code

hashtagUsing Clarify Processing Job Output

hashtagRunning Clarify as Part of SageMaker Pipeline

hashtagLambda Trigger for Automated Integration

hashtagCloudWatch Integration

hashtagNext Steps

Overview

Prerequisites

Supported Metrics

Integration Code

Using Clarify Processing Job Output

Running Clarify as Part of SageMaker Pipeline

Lambda Trigger for Automated Integration

CloudWatch Integration

Next Steps