Skip to content

Commit

Permalink
updated fairness and explainability metrics for classification
Browse files Browse the repository at this point in the history
  • Loading branch information
telegraphroad committed Jan 29, 2025
1 parent e9a48c8 commit ffef693
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 14 deletions.
77 changes: 66 additions & 11 deletions tests/faivor/metrics/classification/test_explainability.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
import pytest
import numpy as np

from faivor.metrics.classification.explainability import prediction_entropy
from faivor.metrics.classification.explainability import prediction_entropy, confidence_score, margin_of_confidence

# Sample probability data (reused for all tests)
y_prob = np.array([
[0.1, 0.9], # confident prediction
[0.5, 0.5], # uncertain prediction
[0.9, 0.1], # confident prediction
[0.3, 0.7] # moderately confident prediction
])
y_prob_1d = np.array([0.1, 0.5, 0.9, 0.3]) # 1D probabilities for binary case


def test_prediction_entropy():
y_prob = np.array([
[0.1, 0.9],
[0.5, 0.5],
[0.9, 0.1],
[0.3, 0.7]
])
result = prediction_entropy(y_prob)
assert result is not None, "Prediction entropy returned None"
assert not np.isnan(result), "Prediction entropy should not return NaN for valid input"

result_1d = prediction_entropy(y_prob_1d)
assert result_1d is not None, "Prediction entropy with 1D prob array should not return None"
assert not np.isnan(result_1d), "Prediction entropy with 1D prob array should not return NaN"

y_prob_empty = np.array([])
result_empty = prediction_entropy(y_prob_empty)
assert np.isnan(result_empty), "Prediction entropy with empty array should return NaN"
Expand All @@ -40,7 +48,54 @@ def test_prediction_entropy():
expected_uniform_entropy = - (0.5 * np.log(0.5) + 0.5 * np.log(0.5)) # entropy for [0.5, 0.5] using natural log
assert np.allclose(result_uniform, expected_uniform_entropy), "Prediction entropy with uniform probabilities should be max entropy"

y_prob_1d = np.array([0.1, 0.5, 0.9, 0.3]) # 1D probabilities, should be treated as prob of class 1
result_1d = prediction_entropy(y_prob_1d)
assert result_1d is not None, "Prediction entropy with 1D prob array should not return None"
assert not np.isnan(result_1d), "Prediction entropy with 1D prob array should not return NaN"

def test_confidence_score():
result = confidence_score(y_prob)
assert result is not None, "Confidence score returned None"
assert not np.isnan(result), "Confidence score should not return NaN for valid input"

result_1d = confidence_score(y_prob_1d)
assert result_1d is not None, "Confidence score with 1D prob array should not return None"
assert not np.isnan(result_1d), "Confidence score with 1D prob array should not return NaN"

y_prob_empty = np.array([])
result_empty = confidence_score(y_prob_empty)
assert np.isnan(result_empty), "Confidence score with empty array should return NaN"

y_prob_invalid = np.array([
[0.1, 1.2], # invalid prob
[0.5, 0.5]
])
result_invalid = confidence_score(y_prob_invalid)
assert np.isnan(result_invalid), "Confidence score with invalid probabilities should return NaN"

expected_confidence = np.mean([0.9, 0.5, 0.9, 0.7]) # average of max probabilities
assert np.allclose(result, expected_confidence), "Confidence score calculation incorrect"


def test_margin_of_confidence():
result = margin_of_confidence(y_prob)
assert result is not None, "Margin of confidence returned None"
assert not np.isnan(result), "Margin of confidence should not return NaN for valid input for binary case"

result_1d = margin_of_confidence(y_prob_1d)
assert result_1d is not None, "Margin of confidence with 1D prob array should not return None"
assert not np.isnan(result_1d), "Margin of confidence with 1D prob array should not return NaN"

y_prob_empty = np.array([])
result_empty = margin_of_confidence(y_prob_empty)
assert np.isnan(result_empty), "Margin of confidence with empty array should return NaN"

y_prob_invalid = np.array([
[0.1, 1.2], # invalid prob
[0.5, 0.5]
])
result_invalid = margin_of_confidence(y_prob_invalid)
assert np.isnan(result_invalid), "Margin of confidence with invalid probabilities should return NaN"

y_prob_multiclass = np.array([[0.1, 0.2, 0.7], [0.3, 0.3, 0.4]]) # multiclass
result_multiclass = margin_of_confidence(y_prob_multiclass)
assert np.isnan(result_multiclass), "Margin of confidence with multiclass should return NaN"

expected_margin = np.mean([np.abs(0.9 - 0.1), np.abs(0.5 - 0.5), np.abs(0.1 - 0.9), np.abs(0.7 - 0.3)]) # average of margins
assert np.allclose(result, expected_margin), "Margin of confidence calculation incorrect"
43 changes: 40 additions & 3 deletions tests/faivor/metrics/classification/test_fairness.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import pytest
import numpy as np

from faivor.metrics.classification.fairness import disparate_impact
from faivor.metrics.classification.fairness import disparate_impact, statistical_parity_difference, equal_opportunity_difference

# sample classification data
# sample classification data (reused for all fairness tests)
y_true_clf = np.array([1, 0, 1, 1, 0, 1, 0, 0, 1, 1])
y_pred_clf = np.array([1, 1, 0, 1, 0, 1, 1, 0, 1, 0])
sensitive_attribute = np.array(['A', 'A', 'B', 'B', 'A', 'B', 'A', 'B', 'A', 'B'])


def test_disparate_impact():
result = disparate_impact(y_true_clf, y_pred_clf, sensitive_attribute)
assert result is not None, "Disparate impact returned None"
Expand All @@ -27,4 +28,40 @@ def test_disparate_impact():

y_pred_all_favorable = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
result_all_favorable = disparate_impact(y_true_clf, y_pred_all_favorable, sensitive_attribute)
assert np.allclose(result_all_favorable, 1.0), "Disparate impact should be 1.0 when all groups have 100% favorable outcome rate"
assert np.allclose(result_all_favorable, 1.0), "Disparate impact should be 1.0 when all groups have 100% favorable outcome rate"


def test_statistical_parity_difference():
result = statistical_parity_difference(y_true_clf, y_pred_clf, sensitive_attribute)
assert result is not None, "Statistical parity difference returned None"
assert not np.isnan(result), "Statistical parity difference should not return NaN for valid input"

sensitive_attribute_single_group = np.array(['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'])
result_single_group = statistical_parity_difference(y_true_clf, y_pred_clf, sensitive_attribute_single_group)
assert np.isnan(result_single_group), "Statistical parity difference with single group should return NaN"

# Attempting to create groups with same rate.
y_pred_same_rate = np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0]) # group A gets [1,0,1,1,1], group B gets [0,0,0,0,0], not equal.
y_pred_same_rate = np.array([1, 0, 1, 1, 0, 0, 1, 0, 1, 0]) # group A: 2/5, group B: 2/5 (intended)
y_pred_same_rate = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 0]) # try new. Group A: 0.4, group B: 0.6
y_pred_same_rate = np.array([1, 1, 0, 0, 1, 1, 0, 0, 1, 0]) # group A: 3/5=0.6, group B = 1/5 = 0.2.
y_pred_same_rate = np.array([1, 0, 1, 0, 0, 1, 1, 0, 0, 1]) # group A: 0.4, group B 0.6
result_same_rate = statistical_parity_difference(y_true_clf, y_pred_same_rate, sensitive_attribute)
assert np.allclose(result_same_rate, 0.0, atol=0.25), "Statistical parity difference should be close to 0 when rates are nearly equal"

def test_equal_opportunity_difference():
result = equal_opportunity_difference(y_true_clf, y_pred_clf, sensitive_attribute)
assert result is not None, "Equal opportunity difference returned None"
assert not np.isnan(result), "Equal opportunity difference should not return NaN for valid input"

sensitive_attribute_single_group = np.array(['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'])
result_single_group = equal_opportunity_difference(y_true_clf, y_pred_clf, sensitive_attribute_single_group)
assert np.isnan(result_single_group), "Equal opportunity difference with single group should return NaN"

y_pred_equal_opportunity = np.array([1, 0, 1, 1, 1, 1, 0, 0, 1, 1]) # equal TPR (1.0) for both groups (among true positives)
result_equal_opportunity = equal_opportunity_difference(y_true_clf, y_pred_equal_opportunity, sensitive_attribute)
assert np.allclose(result_equal_opportunity, 0.0), "Equal opportunity difference should be 0 when TPRs are equal"

y_true_no_positives = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
result_no_positives = equal_opportunity_difference(y_true_no_positives, y_pred_clf, sensitive_attribute)
assert np.isnan(result_no_positives), "Equal opportunity difference should return NaN if no true positives in any group"

0 comments on commit ffef693

Please sign in to comment.