CoCalc -- auc.py

Path: examples/bayesian-methods-for-hackers / Chapter7_BayesianMachineLearning / auc.py
Views: ³¹⁹³
1
#contributed by Ben Hammer, 2013 
2

3

4
def tied_rank(x):
5
    """
6
    Computes the tied rank of elements in x.
7

8
    This function computes the tied rank of elements in x.
9

10
    Parameters
11
    ----------
12
    x : list of numbers, numpy array
13

14
    Returns
15
    -------
16
    score : list of numbers
17
            The tied rank f each element in x
18

19
    """
20
    sorted_x = sorted(zip(x,range(len(x))))
21
    r = [0 for k in x]
22
    cur_val = sorted_x[0][0]
23
    last_rank = 0
24
    for i in range(len(sorted_x)):
25
        if cur_val != sorted_x[i][0]:
26
            cur_val = sorted_x[i][0]
27
            for j in range(last_rank, i): 
28
                r[sorted_x[j][1]] = float(last_rank+1+i)/2.0
29
            last_rank = i
30
        if i==len(sorted_x)-1:
31
            for j in range(last_rank, i+1): 
32
                r[sorted_x[j][1]] = float(last_rank+i+2)/2.0
33
    return r
34

35
def auc(actual, posterior):
36
    """
37
    Computes the area under the receiver-operator characteristic (AUC)
38

39
    This function computes the AUC error metric for binary classification.
40

41
    Parameters
42
    ----------
43
    actual : list of binary numbers, numpy array
44
             The ground truth value
45
    posterior : same type as actual
46
                Defines a ranking on the binary numbers, from most likely to
47
                be positive to least likely to be positive.
48

49
    Returns
50
    -------
51
    score : double
52
            The mean squared error between actual and posterior
53

54
    """
55
    r = tied_rank(posterior)
56
    num_positive = len([0 for x in actual if x==1])
57
    num_negative = len(actual)-num_positive
58
    sum_positive = sum([r[i] for i in range(len(r)) if actual[i]==1])
59
    auc = ((sum_positive - num_positive*(num_positive+1)/2.0) /
60
           (num_negative*num_positive))
61
    return auc
62