CoCalc -- DarkWorldsMetric.py

Path: examples/bayesian-methods-for-hackers / Chapter5_LossFunctions / DarkWorldsMetric.py
Views: ³¹⁹⁴
1
""" DarkWorldsMetricMountianOsteric.py
2
Custom evaluation metric for the 'Observing Dark Worlds' competition.
3

4
[Description of metric, or reference to documentation.]
5

6
Update: Made for the training set only so users can check there results from the training c
7

8
@Author: David Harvey
9
Created: 22 August 2012
10
"""
11

12
import numpy as np
13
import math as mt
14
import itertools as it
15
import csv as c
16
import getopt as gt
17
import sys as sys
18
import argparse as ap
19
import string as st
20
import random as rd
21

22
def calc_delta_r(x_predicted,y_predicted,x_true,y_true): 
23
    """ Compute the scalar distance between predicted halo centers
24
    and the true halo centers. Predictions are matched to the closest
25
    halo center.
26
    Notes: It takes in the predicted and true positions, and then loops over each possible configuration and finds the most optimal one.
27
    Arguments:
28
        x_predicted, y_predicted: vector for predicted x- and y-positions (1 to 3 elements)
29
        x_true, y_true: vector for known x- and y-positions (1 to 3 elements)
30
    Returns:
31
        radial_distance: vector containing the scalar distances between the predicted halo centres and the true halo centres (1 to 3 elements)
32
        true_halo_indexes: vector containing indexes of the input true halos which matches the predicted halo indexes (1 to 3 elements)
33
        measured_halo_indexes: vector containing indexes of the predicted halo position with the  reference to the true halo position.
34
       e.g if true_halo_indexes=[0,1] and measured_halo_indexes=[1,0] then the first x,y coordinates of the true halo position matches the second input of the predicted x,y coordinates.
35
    """
36
    
37
    num_halos=len(x_true) #Only works for number of halos > 1
38
    num_configurations=mt.factorial(num_halos) #The number of possible different comb
39
    configurations=np.zeros([num_halos,num_configurations],int) #The array of combinations
40
                                                                #I will pass back
41
    distances = np.zeros([num_configurations],float) #The array of the distances
42
                                                     #for all possible combinations
43
    
44
    radial_distance=[]  #The vector of distances
45
                        #I will pass back
46
    
47
    #Pick a combination of true and predicted 
48
    a=['01','012'] #Input for the permutations, 01 number halos or 012
49
    count=0 #For the index of the distances array
50
    true_halo_indexes=[] #The tuples which will show the order of halos picked
51
    predicted_halo_indexes=[]
52
    distances_perm=np.zeros([num_configurations,num_halos],float) #The distance between each
53
                                                                  #true and predicted
54
                                                                  #halo for every comb
55
    true_halo_indexes_perm=[] #log of all the permutations of true halos used
56
    predicted_halo_indexes_perm=[] #log of all the predicted permutations
57
    
58
    for  perm in it.permutations(a[num_halos-2],num_halos):
59
        which_true_halos=[]
60
        which_predicted_halos=[]
61
        for j in range(num_halos): #loop through all the true halos with the
62

63
            distances_perm[count,j]=np.sqrt((x_true[j]-x_predicted[int(perm[j])])**2\
64
                                      +(y_true[j]-y_predicted[int(perm[j])])**2)
65
                                      #This array logs the distance between true and
66
                                      #predicted halo for ALL configurations
67
                                      
68
            which_true_halos.append(j) #log the order in which I try each true halo
69
            which_predicted_halos.append(int(perm[j])) #log the order in which I true
70
                                                       #each predicted halo
71
        true_halo_indexes_perm.append(which_true_halos) #this is a tuple of tuples of
72
                                                        #all of thifferent config
73
                                                        #true halo indexes
74
        predicted_halo_indexes_perm.append(which_predicted_halos)
75
        
76
        distances[count]=sum(distances_perm[count,0::]) #Find what the total distances
77
                                                        #are for each configuration
78
        count=count+1
79

80
    config = np.where(distances == min(distances))[0][0] #The configuration used is the one
81
                                                         #which has the smallest distance
82
    radial_distance.append(distances_perm[config,0::]) #Find the tuple of distances that
83
                                                       #correspond to this smallest distance
84
    true_halo_indexes=true_halo_indexes_perm[config] #Find the tuple of the index which refers
85
                                                     #to the smallest distance
86
    predicted_halo_indexes=predicted_halo_indexes_perm[config]
87
            
88
    return radial_distance,true_halo_indexes,predicted_halo_indexes
89

90

91
def calc_theta(x_predicted, y_predicted, x_true, y_true, x_ref, y_ref):
92
    """ Calculate the angle the predicted position and the true position, where the zero degree corresponds to the line joing the true halo position and the reference point given.
93
    Arguments:
94
        x_predicted, y_predicted: vector for predicted x- and y-positions (1 to 3 elements)
95
        x_true, y_true: vector for known x- and y-positions (1 to 3 elements)
96
        Note that the input of these are matched up so that the first elements of each
97
        vector are associated with one another
98
        x_ref, y_ref: scalars of the x,y coordinate of reference point
99
    Returns:
100
        Theta: A vector containing the angles of the predicted halo w.r.t the true halo
101
        with the vector joining the reference point and the halo as the zero line. 
102
    """
103

104
    num_halos=len(x_predicted)
105
    theta=np.zeros([num_halos+1],float) #Set up the array which will pass back the values
106
    phi = np.zeros([num_halos],float)
107
    
108
    psi = np.arctan( (y_true-y_ref)/(x_true-x_ref) )
109

110
    
111
                     # Angle at which the halo is at
112
                                                     #with respect to the reference point
113
    phi[x_true != x_ref] = np.arctan((y_predicted[x_true != x_predicted]-\
114
                                      y_true[x_true != x_predicted])\
115
                    /(x_predicted[x_true != x_predicted]-\
116
                      x_true[x_true != x_predicted])) # Angle of the estimate
117
                                                               #wrt true halo centre
118

119
    #Before finding the angle with the zero line as the line joining the halo and the reference
120
    #point I need to convert the angle produced by Python to an angle between 0 and 2pi
121
    phi =convert_to_360(phi, x_predicted-x_true,\
122
         y_predicted-y_true)
123
    psi = convert_to_360(psi, x_true-x_ref,\
124
                             y_true-y_ref)
125
    theta = phi-psi #The angle with the baseline as the line joing the ref and the halo
126

127
    
128
    theta[theta< 0.0]=theta[theta< 0.0]+2.0*mt.pi #If the angle of the true pos wrt the ref is
129
                                                  #greater than the angle of predicted pos
130
                                                  #and the true pos then add 2pi
131
    return theta
132

133

134
def convert_to_360(angle, x_in, y_in):
135
    """ Convert the given angle to the true angle in the range 0:2pi 
136
    Arguments:
137
        angle:
138
        x_in, y_in: the x and y coordinates used to determine the quartile
139
        the coordinate lies in so to add of pi or 2pi
140
    Returns:
141
        theta: the angle in the range 0:2pi
142
    """
143
    n = len(x_in)
144
    for i in range(n):
145
        if x_in[i] < 0 and y_in[i] > 0:
146
            angle[i] = angle[i]+mt.pi
147
        elif x_in[i] < 0 and y_in[i] < 0:
148
            angle[i] = angle[i]+mt.pi
149
        elif x_in[i] > 0 and y_in[i] < 0:
150
            angle[i] = angle[i]+2.0*mt.pi
151
        elif x_in[i] == 0 and y_in[i] == 0:
152
            angle[i] = 0
153
        elif x_in[i] == 0 and y_in[i] > 0:
154
            angle[i] = mt.pi/2.
155
        elif x_in[i] < 0 and y_in[i] == 0:
156
            angle[i] = mt.pi
157
        elif x_in[i] == 0 and y_in[i] < 0:
158
            angle[i] = 3.*mt.pi/2.
159

160

161

162
    return angle
163

164
def get_ref(x_halo,y_halo,weight):
165
    """ Gets the reference point of the system of halos by weighted averaging the x and y
166
    coordinates.
167
    Arguments:
168
         x_halo, y_halo: Vector num_halos referring to the coordinates of the halos
169
         weight: the weight which will be assigned to the position of the halo
170
         num_halos: number of halos in the system
171
    Returns:
172
         x_ref, y_ref: The coordinates of the reference point for the metric
173
    """
174
 
175

176
        #Find the weighted average of the x and y coordinates
177
    x_ref = np.sum([x_halo*weight])/np.sum([weight])
178
    y_ref = np.sum([y_halo*weight])/np.sum([weight])
179

180

181
    return x_ref,y_ref
182

183
    
184
def main_score( nhalo_all, x_true_all, y_true_all, x_ref_all, y_ref_all, sky_prediction):
185
    """abstracts the score from the old command-line interface. 
186
       sky_prediction is a dx2 array of predicted x,y positions
187
    
188
    -camdp"""
189
    
190
    r=np.array([],dtype=float) # The array which I will log all the calculated radial distances
191
    angle=np.array([],dtype=float) #The array which I will log all the calculated angles
192
    #Load in the sky_ids from the true
193
    num_halos_total=0 #Keep track of how many halos are input into the metric
194

195
        
196

197
    for selectskyinsolutions, sky in enumerate(sky_prediction): #Loop through each line in result.csv and analyse each one
198

199

200
        nhalo=int(nhalo_all[selectskyinsolutions])#How many halos in the
201
                                                       #selected sky?
202
        x_true=x_true_all[selectskyinsolutions][0:nhalo]
203
        y_true=y_true_all[selectskyinsolutions][0:nhalo]
204
                    
205
        x_predicted=np.array([],dtype=float)
206
        y_predicted=np.array([],dtype=float)
207
        for i in range(nhalo):
208
            x_predicted=np.append(x_predicted,float(sky[0])) #get the predicted values
209
            y_predicted=np.append(y_predicted,float(sky[1]))
210
            #The solution file for the test data provides masses 
211
            #to calculate the centre of mass where as the Training_halo.csv
212
            #direct provides x_ref y_ref. So in the case of test data
213
            #we need to calculate the ref point from the masses using
214
            #Get_ref()
215
  
216
        x_ref=x_ref_all[selectskyinsolutions]
217
        y_ref=y_ref_all[selectskyinsolutions]
218

219
        num_halos_total=num_halos_total+nhalo
220

221

222
        #Single halo case, this needs to be separately calculated since
223
        #x_ref = x_true
224
        if nhalo == 1:
225
            #What is the radial distance between the true and predicted position
226
            r=np.append(r,np.sqrt( (x_predicted-x_true)**2 \
227
                                          + (y_predicted-y_true)**2)) 
228
            #What is the angle between the predicted position and true halo position
229
            if (x_predicted-x_true) != 0:
230
                psi = np.arctan((y_predicted-y_true)/(x_predicted-x_true))
231
            else: psi=0.
232
            theta = convert_to_360([psi], [x_predicted-x_true], [y_predicted-y_true])
233
            angle=np.append(angle,theta)
234

235
        
236
        else:        
237
            #r_index_index, contains the radial distances of the predicted to
238
            #true positions. These are found by matching up the true halos to
239
            #the predicted halos such that the average of all the radial distances
240
            #is optimal. it also contains indexes of the halos used which are used to
241
            #show which halo has been matched to which.
242
            
243
            r_index_index = calc_delta_r(x_predicted, y_predicted, x_true, \
244
                                         y_true)
245
  
246
            r=np.append(r,r_index_index[0][0])
247
            halo_index= r_index_index[1] #The true halos indexes matched with the 
248
            predicted_index=r_index_index[2] #predicted halo index
249

250
            angle=np.append(angle,calc_theta\
251
                                  (x_predicted[predicted_index],\
252
                                   y_predicted[predicted_index],\
253
                                   x_true[halo_index],\
254
                                   y_true[halo_index],x_ref,\
255
                                   y_ref)) # Find the angles of the predicted
256
                                               #position wrt to the halo and
257
                                               # add to the vector angle
258

259
    
260
    # Find what the average distance the estimate is from the halo position
261
    av_r=sum(r)/len(r)
262
    
263
    #In order to quantify the orientation invariance we will express each angle 
264
    # as a vector and find the average vector
265
    #R_bar^2=(1/N Sum^Ncos(theta))^2+(1/N Sum^Nsin(theta))**2
266
    
267
    N = float(num_halos_total)
268
    angle_vec = np.sqrt(( 1.0/N * sum(np.cos(angle)) )**2 + \
269
        ( 1.0/N * sum(np.sin(angle)) )**2)
270
    
271
    W1=1./1000. #Weight the av_r such that < 1 is a good score > 1 is not so good.
272
    W2=1.
273
    metric = W1*av_r + W2*angle_vec #Weighted metric, weights TBD
274
    print('Your average distance in pixels you are away from the true halo is', av_r)
275
    print('Your average angular vector is', angle_vec)
276
    print('Your score for the training data is', metric)
277
    return metric
278
    
279
    
280
def main(user_fname, fname):
281
    """ Script to compute the evaluation metric for the Observing Dark Worlds competition. You can run it on your training data to understand how well you have done with the training data.
282
    """
283

284
    r=np.array([],dtype=float) # The array which I will log all the calculated radial distances
285
    angle=np.array([],dtype=float) #The array which I will log all the calculated angles
286
    #Load in the sky_ids from the true
287
    
288
    true_sky_id=[]
289
    sky_loader = c.reader(open(fname, 'rb')) #Load in the sky_ids from the solution file
290
    for row in sky_loader:
291
        true_sky_id.append(row[0])
292

293
    #Load in the true values from the solution file
294

295
    nhalo_all=np.loadtxt(fname,usecols=(1,),delimiter=',',skiprows=1)
296
    x_true_all=np.loadtxt(fname,usecols=(4,6,8),delimiter=',',skiprows=1)
297
    y_true_all=np.loadtxt(fname,usecols=(5,7,9),delimiter=',',skiprows=1)
298
    x_ref_all=np.loadtxt(fname,usecols=(2,),delimiter=',',skiprows=1)
299
    y_ref_all=np.loadtxt(fname,usecols=(3,),delimiter=',',skiprows=1)
300

301
    
302
    for row in sky_loader:
303
        true_sky_id.append(row[1])
304
        
305

306
    
307
    num_halos_total=0 #Keep track of how many halos are input into the metric
308

309

310
    sky_prediction = c.reader(open(user_fname, 'rb')) #Open the result.csv   
311
   
312
    try: #See if the input file from user has a header on it
313
         #with open('JoyceTest/trivialUnitTest_Pred.txt', 'r') as f:
314
        with open(user_fname, 'r') as f:   
315
            header = float((f.readline()).split(',')[1]) #try and make where the
316
                                                         #first input would be
317
                                                         #a float, if succeed it
318
                                                         #is not a header
319
        print('THE INPUT FILE DOES NOT APPEAR TO HAVE A HEADER')
320
    except :
321
        print('THE INPUT FILE APPEARS TO HAVE A HEADER, SKIPPING THE FIRST LINE')
322
        skip_header = sky_prediction.next()
323
        
324

325
    for sky in sky_prediction: #Loop through each line in result.csv and analyse each one
326
        sky_id = str(sky[0]) #Get the sky_id of the input
327
        does_it_exist=true_sky_id.count(sky_id) #Is the input sky_id
328
                                                #from user a real one?
329
        
330
        if does_it_exist > 0: #If it does then find the matching solutions to the sky_id
331
                            selectskyinsolutions=true_sky_id.index(sky_id)-1
332
        else: #Otherwise exit
333
            print('Sky_id does not exist, formatting problem: ',sky_id)
334
            sys.exit(2)
335

336

337
        nhalo=int(nhalo_all[selectskyinsolutions])#How many halos in the
338
                                                       #selected sky?
339
        x_true=x_true_all[selectskyinsolutions][0:nhalo]
340
        y_true=y_true_all[selectskyinsolutions][0:nhalo]
341
                    
342
        x_predicted=np.array([],dtype=float)
343
        y_predicted=np.array([],dtype=float)
344
        for i in range(nhalo):
345
            x_predicted=np.append(x_predicted,float(sky[2*i+1])) #get the predicted values
346
            y_predicted=np.append(y_predicted,float(sky[2*i+2]))
347
            #The solution file for the test data provides masses 
348
            #to calculate the centre of mass where as the Training_halo.csv
349
            #direct provides x_ref y_ref. So in the case of test data
350
            #we need to calculae the ref point from the masses using
351
            #Get_ref()
352
  
353
        x_ref=x_ref_all[selectskyinsolutions]
354
        y_ref=y_ref_all[selectskyinsolutions]
355

356
        num_halos_total=num_halos_total+nhalo
357

358

359
        #Single halo case, this needs to be separately calculated since
360
        #x_ref = x_true
361
        if nhalo == 1:
362
            #What is the radial distance between the true and predicted position
363
            r=np.append(r,np.sqrt( (x_predicted-x_true)**2 \
364
                                          + (y_predicted-y_true)**2)) 
365
            #What is the angle between the predicted position and true halo position
366
            if (x_predicted-x_true) != 0:
367
                psi = np.arctan((y_predicted-y_true)/(x_predicted-x_true))
368
            else: psi=0.
369
            theta = convert_to_360([psi], [x_predicted-x_true], [y_predicted-y_true])
370
            angle=np.append(angle,theta)
371

372
        
373
        else:        
374
            #r_index_index, contains the radial distances of the predicted to
375
            #true positions. These are found by matching up the true halos to
376
            #the predicted halos such that the average of all the radial distances
377
            #is optimal. it also contains indexes of the halos used which are used to
378
            #show which halo has been matched to which.
379
            
380
            r_index_index = calc_delta_r(x_predicted, y_predicted, x_true, \
381
                                         y_true)
382
  
383
            r=np.append(r,r_index_index[0][0])
384
            halo_index= r_index_index[1] #The true halos indexes matched with the 
385
            predicted_index=r_index_index[2] #predicted halo index
386

387
            angle=np.append(angle,calc_theta\
388
                                  (x_predicted[predicted_index],\
389
                                   y_predicted[predicted_index],\
390
                                   x_true[halo_index],\
391
                                   y_true[halo_index],x_ref,\
392
                                   y_ref)) # Find the angles of the predicted
393
                                               #position wrt to the halo and
394
                                               # add to the vector angle
395

396
    
397
    # Find what the average distance the estimate is from the halo position
398
    av_r=sum(r)/len(r)
399
    
400
    #In order to quantify the orientation invariance we will express each angle 
401
    # as a vector and find the average vector
402
    #R_bar^2=(1/N Sum^Ncos(theta))^2+(1/N Sum^Nsin(theta))**2
403
    
404
    N = float(num_halos_total)
405
    angle_vec = np.sqrt(( 1.0/N * sum(np.cos(angle)) )**2 + \
406
        ( 1.0/N * sum(np.sin(angle)) )**2)
407
    
408
    W1=1./1000. #Weight the av_r such that < 1 is a good score > 1 is not so good.
409
    W2=1.
410
    metric = W1*av_r + W2*angle_vec #Weighted metric, weights TBD
411
    print('Your average distance in pixels you are away from the true halo is', av_r)
412
    print('Your average angular vector is', angle_vec)
413
    print('Your score for the training data is', metric)
414

415

416
if __name__ == "__main__":
417
    #For help just typed 'python DarkWorldsMetric.py -h'
418

419
    parser = ap.ArgumentParser(description='Work out the Metric for your input file')
420
    parser.add_argument('inputfile',type=str,nargs=1,help='Input file of halo positions. Needs to be in the format SkyId,halo_x1,haloy1,halox_2,halo_y2,halox3,halo_y3 ')
421
    parser.add_argument('reffile',type=str,nargs=1,help='This should point to Training_halos.csv')
422
    args = parser.parse_args()
423

424
    user_fname=args.inputfile[0]
425
    filename = (args.reffile[0]).count('Training_halos.csv')
426
    if filename == 0:
427
        fname=args.reffile[0]+str('Training_halos.csv')
428
    else:
429
        fname=args.reffile[0]
430

431
    main(user_fname, fname)
432
    
433

434