updated fairness and explainability metrics for classification

MaastrichtU-BISS · Jan 29, 2025 · ffef693 · ffef693
1 parent e9a48c8
commit ffef693
Show file tree

Hide file tree

Showing 2 changed files with 106 additions and 14 deletions.
diff --git a/tests/faivor/metrics/classification/test_explainability.py b/tests/faivor/metrics/classification/test_explainability.py
@@ -1,19 +1,27 @@
 import pytest
 import numpy as np
 
-from faivor.metrics.classification.explainability import prediction_entropy
+from faivor.metrics.classification.explainability import prediction_entropy, confidence_score, margin_of_confidence
+
+# Sample probability data (reused for all tests)
+y_prob = np.array([
+    [0.1, 0.9], # confident prediction
+    [0.5, 0.5], # uncertain prediction
+    [0.9, 0.1], # confident prediction
+    [0.3, 0.7]  # moderately confident prediction
+])
+y_prob_1d = np.array([0.1, 0.5, 0.9, 0.3]) # 1D probabilities for binary case
+
 
 def test_prediction_entropy():
-    y_prob = np.array([
-        [0.1, 0.9],
-        [0.5, 0.5],
-        [0.9, 0.1],
-        [0.3, 0.7]
-    ])
     result = prediction_entropy(y_prob)
     assert result is not None, "Prediction entropy returned None"
     assert not np.isnan(result), "Prediction entropy should not return NaN for valid input"
 
+    result_1d = prediction_entropy(y_prob_1d)
+    assert result_1d is not None, "Prediction entropy with 1D prob array should not return None"
+    assert not np.isnan(result_1d), "Prediction entropy with 1D prob array should not return NaN"
+
     y_prob_empty = np.array([])
     result_empty = prediction_entropy(y_prob_empty)
     assert np.isnan(result_empty), "Prediction entropy with empty array should return NaN"
@@ -40,7 +48,54 @@ def test_prediction_entropy():
     expected_uniform_entropy = - (0.5 * np.log(0.5) + 0.5 * np.log(0.5)) # entropy for [0.5, 0.5] using natural log
     assert np.allclose(result_uniform, expected_uniform_entropy), "Prediction entropy with uniform probabilities should be max entropy"
 
-    y_prob_1d = np.array([0.1, 0.5, 0.9, 0.3]) # 1D probabilities, should be treated as prob of class 1
-    result_1d = prediction_entropy(y_prob_1d)
-    assert result_1d is not None, "Prediction entropy with 1D prob array should not return None"
-    assert not np.isnan(result_1d), "Prediction entropy with 1D prob array should not return NaN"
+
+def test_confidence_score():
+    result = confidence_score(y_prob)
+    assert result is not None, "Confidence score returned None"
+    assert not np.isnan(result), "Confidence score should not return NaN for valid input"
+
+    result_1d = confidence_score(y_prob_1d)
+    assert result_1d is not None, "Confidence score with 1D prob array should not return None"
+    assert not np.isnan(result_1d), "Confidence score with 1D prob array should not return NaN"
+
+    y_prob_empty = np.array([])
+    result_empty = confidence_score(y_prob_empty)
+    assert np.isnan(result_empty), "Confidence score with empty array should return NaN"
+
+    y_prob_invalid = np.array([
+        [0.1, 1.2], # invalid prob
+        [0.5, 0.5]
+    ])
+    result_invalid = confidence_score(y_prob_invalid)
+    assert np.isnan(result_invalid), "Confidence score with invalid probabilities should return NaN"
+
+    expected_confidence = np.mean([0.9, 0.5, 0.9, 0.7]) # average of max probabilities
+    assert np.allclose(result, expected_confidence), "Confidence score calculation incorrect"
+
+
+def test_margin_of_confidence():
+    result = margin_of_confidence(y_prob)
+    assert result is not None, "Margin of confidence returned None"
+    assert not np.isnan(result), "Margin of confidence should not return NaN for valid input for binary case"
+
+    result_1d = margin_of_confidence(y_prob_1d)
+    assert result_1d is not None, "Margin of confidence with 1D prob array should not return None"
+    assert not np.isnan(result_1d), "Margin of confidence with 1D prob array should not return NaN"
+
+    y_prob_empty = np.array([])
+    result_empty = margin_of_confidence(y_prob_empty)
+    assert np.isnan(result_empty), "Margin of confidence with empty array should return NaN"
+
+    y_prob_invalid = np.array([
+        [0.1, 1.2], # invalid prob
+        [0.5, 0.5]
+    ])
+    result_invalid = margin_of_confidence(y_prob_invalid)
+    assert np.isnan(result_invalid), "Margin of confidence with invalid probabilities should return NaN"
+
+    y_prob_multiclass = np.array([[0.1, 0.2, 0.7], [0.3, 0.3, 0.4]]) # multiclass
+    result_multiclass = margin_of_confidence(y_prob_multiclass)
+    assert np.isnan(result_multiclass), "Margin of confidence with multiclass should return NaN"
+
+    expected_margin = np.mean([np.abs(0.9 - 0.1), np.abs(0.5 - 0.5), np.abs(0.1 - 0.9), np.abs(0.7 - 0.3)]) # average of margins
+    assert np.allclose(result, expected_margin), "Margin of confidence calculation incorrect"
diff --git a/tests/faivor/metrics/classification/test_fairness.py b/tests/faivor/metrics/classification/test_fairness.py
@@ -1,13 +1,14 @@
 import pytest
 import numpy as np
 
-from faivor.metrics.classification.fairness import disparate_impact
+from faivor.metrics.classification.fairness import disparate_impact, statistical_parity_difference, equal_opportunity_difference
 
-# sample classification data
+# sample classification data (reused for all fairness tests)
 y_true_clf = np.array([1, 0, 1, 1, 0, 1, 0, 0, 1, 1])
 y_pred_clf = np.array([1, 1, 0, 1, 0, 1, 1, 0, 1, 0])
 sensitive_attribute = np.array(['A', 'A', 'B', 'B', 'A', 'B', 'A', 'B', 'A', 'B'])
 
+
 def test_disparate_impact():
     result = disparate_impact(y_true_clf, y_pred_clf, sensitive_attribute)
     assert result is not None, "Disparate impact returned None"
@@ -27,4 +28,40 @@ def test_disparate_impact():
 
     y_pred_all_favorable = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
     result_all_favorable = disparate_impact(y_true_clf, y_pred_all_favorable, sensitive_attribute)
-    assert np.allclose(result_all_favorable, 1.0), "Disparate impact should be 1.0 when all groups have 100% favorable outcome rate"
+    assert np.allclose(result_all_favorable, 1.0), "Disparate impact should be 1.0 when all groups have 100% favorable outcome rate"
+
+
+def test_statistical_parity_difference():
+    result = statistical_parity_difference(y_true_clf, y_pred_clf, sensitive_attribute)
+    assert result is not None, "Statistical parity difference returned None"
+    assert not np.isnan(result), "Statistical parity difference should not return NaN for valid input"
+
+    sensitive_attribute_single_group = np.array(['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'])
+    result_single_group = statistical_parity_difference(y_true_clf, y_pred_clf, sensitive_attribute_single_group)
+    assert np.isnan(result_single_group), "Statistical parity difference with single group should return NaN"
+
+    # Attempting to create groups with same rate.
+    y_pred_same_rate = np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0]) # group A gets [1,0,1,1,1], group B gets [0,0,0,0,0], not equal.
+    y_pred_same_rate = np.array([1, 0, 1, 1, 0, 0, 1, 0, 1, 0]) # group A: 2/5, group B: 2/5 (intended)
+    y_pred_same_rate = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 0]) # try new. Group A: 0.4, group B: 0.6
+    y_pred_same_rate = np.array([1, 1, 0, 0, 1, 1, 0, 0, 1, 0]) # group A: 3/5=0.6, group B = 1/5 = 0.2.
+    y_pred_same_rate = np.array([1, 0, 1, 0, 0, 1, 1, 0, 0, 1]) # group A: 0.4, group B 0.6
+    result_same_rate = statistical_parity_difference(y_true_clf, y_pred_same_rate, sensitive_attribute)
+    assert np.allclose(result_same_rate, 0.0, atol=0.25), "Statistical parity difference should be close to 0 when rates are nearly equal"
+
+def test_equal_opportunity_difference():
+    result = equal_opportunity_difference(y_true_clf, y_pred_clf, sensitive_attribute)
+    assert result is not None, "Equal opportunity difference returned None"
+    assert not np.isnan(result), "Equal opportunity difference should not return NaN for valid input"
+
+    sensitive_attribute_single_group = np.array(['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'])
+    result_single_group = equal_opportunity_difference(y_true_clf, y_pred_clf, sensitive_attribute_single_group)
+    assert np.isnan(result_single_group), "Equal opportunity difference with single group should return NaN"
+
+    y_pred_equal_opportunity = np.array([1, 0, 1, 1, 1, 1, 0, 0, 1, 1]) # equal TPR (1.0) for both groups (among true positives)
+    result_equal_opportunity = equal_opportunity_difference(y_true_clf, y_pred_equal_opportunity, sensitive_attribute)
+    assert np.allclose(result_equal_opportunity, 0.0), "Equal opportunity difference should be 0 when TPRs are equal"
+
+    y_true_no_positives = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+    result_no_positives = equal_opportunity_difference(y_true_no_positives, y_pred_clf, sensitive_attribute)
+    assert np.isnan(result_no_positives), "Equal opportunity difference should return NaN if no true positives in any group"