CoCalc -- other

Path: examples/bayesian-methods-for-hackers / Chapter6_Priorities / other_strats.py
Views: ³¹⁹⁵
1
#other strats.
2
# TODO: UBC strat, epsilon-greedy
3

4
import scipy.stats as stats
5
import numpy as np
6

7
rand = np.random.rand
8
beta = stats.beta
9

10

11
class GeneralBanditStrat(object):	
12

13
    """
14
    Implements a online, learning strategy to solve
15
    the Multi-Armed Bandit problem.
16
    
17
    parameters:
18
        bandits: a Bandit class with .pull method
19
		choice_function: accepts a self argument (which gives access to all the variables), and 
20
						returns and int between 0 and n-1
21
    methods:
22
        sample_bandits(n): sample and train on n pulls.
23

24
    attributes:
25
        N: the cumulative number of samples
26
        choices: the historical choices as a (N,) array
27
        bb_score: the historical score as a (N,) array
28

29
    """
30
    
31
    def __init__(self, bandits, choice_function):
32
        
33
        self.bandits = bandits
34
        n_bandits = len(self.bandits)
35
        self.wins = np.zeros(n_bandits)
36
        self.trials = np.zeros(n_bandits)
37
        self.N = 0
38
        self.choices = []
39
        self.score = []
40
        self.choice_function = choice_function
41

42
    def sample_bandits(self, n=1):
43
        
44
        score = np.zeros(n)
45
        choices = np.zeros(n)
46
        
47
        for k in range(n):
48
            #sample from the bandits's priors, and select the largest sample
49
            choice = self.choice_function(self)
50
            
51
            #sample the chosen bandit
52
            result = self.bandits.pull(choice)
53
            
54
            #update priors and score
55
            self.wins[choice] += result
56
            self.trials[choice] += 1
57
            score[k] = result 
58
            self.N += 1
59
            choices[k] = choice
60
            
61
        self.score = np.r_[self.score, score]
62
        self.choices = np.r_[self.choices, choices]
63
        return 
64
        
65
	
66
def bayesian_bandit_choice(self):
67
	return np.argmax(np.random.beta(1 + self.wins, 1 + self.trials - self.wins))
68
    
69
def max_mean(self):
70
    """pick the bandit with the current best observed proportion of winning """
71
    return np.argmax(self.wins / (self.trials +1))
72

73
def lower_credible_choice( self ):
74
    """pick the bandit with the best LOWER BOUND. See chapter 5"""
75
    def lb(a,b):
76
        return a/(a+b) - 1.65*np.sqrt((a*b)/( (a+b)**2*(a+b+1)))
77
    a = self.wins + 1
78
    b = self.trials - self.wins + 1
79
    return np.argmax(lb(a,b))
80
    
81
def upper_credible_choice(self):
82
    """pick the bandit with the best LOWER BOUND. See chapter 5"""
83
    def lb(a,b):
84
        return a/(a+b) + 1.65*np.sqrt((a*b)/((a+b)**2*(a+b+1)))
85
    a = self.wins + 1
86
    b = self.trials - self.wins + 1
87
    return np.argmax(lb(a,b))
88
    
89
def random_choice(self):
90
    return np.random.randint(0, len(self.wins))
91
    
92
    
93
def ucb_bayes(self):
94
	C = 0
95
	n = 10000
96
	alpha =1 - 1./((self.N+1))
97
	return np.argmax(beta.ppf(alpha,
98
							   1 + self.wins, 
99
							   1 + self.trials - self.wins))
100
							   
101
	
102
	
103
	
104
class Bandits(object):
105
    """
106
    This class represents N bandits machines.
107

108
    parameters:
109
        p_array: a (n,) Numpy array of probabilities >0, <1.
110

111
    methods:
112
        pull( i ): return the results, 0 or 1, of pulling 
113
                   the ith bandit.
114
    """
115
    def __init__(self, p_array):
116
        self.p = p_array
117
        self.optimal = np.argmax(p_array)
118
        
119
    def pull(self, i):
120
        #i is which arm to pull
121
        return rand() < self.p[i]
122
    
123
    def __len__(self):
124
        return len(self.p)
125

126