Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Project: RNApolys
Views: 491
1
from rna_poly import RNAPolytope
2
from glob import glob
3
from os.path import basename,splitext,isfile
4
from datetime import datetime
5
import pickle
6
import __main__
7
from basic_sec_struct import sec_struct
8
__main__.RNAPolytope = RNAPolytope
9
load('rna_poly.py')
10
load('polytope_plots.sage')
11
load('main.sage')
12
13
get_bname = lambda fname:splitext(basename(fname))[0]
14
def timestamp(message=''):
15
return "[{0}] {1}".format(datetime.now(),message)
16
def load_obj(name):
17
with open(name, 'rb') as f:
18
return pickle.load(f)
19
def my_hot(x):
20
return colormaps['hot_r'](x/2+0.25)
21
def my_Greys(x):
22
return colormaps['Greys'](x/4+0.5)
23
pkl_names = glob.glob('5s/st_tuples/*.pkl')
24
names=[ get_bname(n)[:-3] for n in pkl_names]
25
sobj_names = ['5s/sobjs/{0}.sobj'.format(n) for n in names]
26
from itertools import combinations,chain
27
from numpy import average as avg, median as med, std
28
from collections import defaultdict
29
30
def get_sobj_name(name):
31
return '5s/sobjs/{0}.sobj'.format(name)
32
33
def get_suboptimal_ranking(name,P,b):
34
D = load_obj("5s/st_tuples/"+name+"_ss.pkl")
35
K=D.keys()
36
region_density = {}
37
slices = sliced_cones_b(P, b)
38
relevant_signatures = [tuple(sl.original_vertex) for sl in slices]
39
max_relevant_subopt = 0
40
total_relevant_structs=0
41
for k in K:
42
#k is a tuple: (a,b,c,d,default_ss)
43
#D[k] is a list of (ss,(x,y,z,w,t1,t2,t3,t4,acc))'s
44
for S,(x,y,z,w,t1,t2,t3,t4,acc) in D[k]:
45
break
46
x,y,z,w = (QQ(e) for e in (x,y,z,w))
47
if (x,y,z,w) in relevant_signatures:
48
n=len(D[k])
49
total_relevant_structs +=n
50
region_density[(x,y,z,w)] = n
51
if n > max_relevant_subopt:
52
max_relevant_subopt = n
53
54
for k in region_density:
55
region_density[k] /= float(max_relevant_subopt)
56
57
total_structures = sum([len(D[k]) for k in K])
58
max_structures = max([len(D[k]) for k in K])
59
return region_density, (total_relevant_structs,total_structures), (max_relevant_subopt,max_structures)
60
61
def get_average_accuracy_ranking(name):
62
D = load_obj("5s/st_tuples/"+name+"_ss.pkl")
63
K=D.keys()
64
avg_accuracy = {}
65
max_accuracy = {}
66
b_avg = {}
67
b_max = {}
68
for k in K:
69
#k is a tuple: (a,b,c,d,default_ss)
70
#D[k] is a list of (ss,(x,y,z,w,t1,t2,t3,t4,acc))'s
71
avg_acc = 0
72
n = len(D[k])
73
max_acc = 0
74
#print D[k][0]
75
for S,(x,y,z,w,t1,t2,t3,t4,acc) in D[k]:
76
avg_acc += acc
77
if acc>max_acc:
78
max_acc = acc
79
avg_acc /= n
80
x,y,z,w = (QQ(e) for e in (x,y,z,w))
81
avg_accuracy[(x,y,z,w)] = avg_acc
82
b_avg[avg_acc] = QQ(k[1])
83
max_accuracy[(x,y,z,w)] = max_acc
84
b_max[max_acc] = QQ(k[1])
85
return avg_accuracy,b_avg
86
87
remove_dangles = lambda s: s.replace('<','.').replace('<','.')
88
89
def compute_subopt_similarities(name,omit_singletons=True):
90
D = load_obj("5s/st_tuples/"+name+"_ss.pkl")
91
K=D.keys()
92
min_accs = {}
93
signs = []
94
n_subopts = []
95
structs={}
96
for k in K:
97
subopts = list(set([remove_dangles(s[0]) for s in D[k]]))
98
n_subopts.append(len(subopts))
99
_,(x,y,z,w,_,_,_,_,_) = D[k][0]
100
if n_subopts[-1]==1:
101
if omit_singletons:
102
continue
103
min_acc = 1.0
104
else:
105
min_acc = min([min([f_measure(s,t) for s,t in combinations(subopts,2)]),min([f_measure(t,s) for s,t in combinations(subopts,2)])])
106
min_accs[(x,y,z,w)]=min_acc
107
signs.append((x,y,z,w))
108
structs[(x,y,z,w)]=subopts
109
print timestamp("{0}: max_subopts={1}, avg_subopts={2}; std_subopts={3}".format(name,max(n_subopts),avg(n_subopts),std(n_subopts)))
110
return min_accs,structs,signs
111
def get_subopt_structures(name,default_ss,sign=False):
112
D = load_obj("st_tuples/"+name+"_ss.pkl")
113
K=D.keys()
114
for k in K:
115
ss = k[4]
116
if ss==default_ss:
117
if sign:
118
S,(x,y,z,w,t1,t2,t3,t4,acc) = D[k][0]
119
return [s[0] for s in D[k]],(x,y,z)
120
else:
121
return [x[0] for x in D[k]]
122
#print "Returning no structures?"
123
if sign:
124
return [], (0,0,0)
125
return []
126
def get_accuracies_from_struct(name,default_ss):
127
D = load_obj("5s/st_tuples/"+name+"_ss.pkl")
128
K=D.keys()
129
for k in K:
130
ss = k[4]
131
if ss==default_ss:
132
return [x[1][-1] for x in D[k]]
133
134
def get_accuracies(name,default_ss):
135
D = load_obj("5s/st_tuples/"+name+"_ss.pkl")
136
K=D.keys()
137
for k in K:
138
ss = k[4]
139
if ss==default_ss:
140
n = len(D[k])
141
return [acc for S,(x,y,z,w,t1,t2,t3,t4,acc) in D[k]]
142
return None
143
144
classic_pt = (34/10,0,4/10)
145
common_pt = (489/40, 51/320, -231/80)
146
147
def get_classic_slice(P,name):
148
classic_slices=[]
149
for sl in P.d1_slices():
150
if (34/10,0,4/10) in sl:
151
classic_slices.append(sl)
152
153
return classic_slices
154
155
def classic_slices_and_accuracy(names):
156
classic_slices = {}
157
accuracies = {}
158
for i in range(len(names)):
159
name = names[i]
160
sobj_name = get_sobj_name(name)
161
162
P =RNAPolytope.construct_from_file(sobj_name)
163
classic_slices[name] = get_classic_slice(P,name)
164
accs = []
165
for sl in classic_slices[name]:
166
ss = sl.original_structure
167
accs.extend(get_accuracies(name,ss))
168
accuracies[name] = accs
169
170
return classic_slices, accuracies
171
172
def get_slice_containing_pt(P,pt):
173
slices = []
174
for sl in P.d1_slices():
175
if pt in sl:
176
slices.append(sl)
177
return slices
178
179
def pt_slices_and_accuracy(names,pt):
180
slices={}
181
accuracies={}
182
for i in range(len(names)):
183
name = names[i]
184
sobj_name = get_sobj_name(name)
185
186
P =RNAPolytope.construct_from_file(sobj_name)
187
slices[name] = get_slice_containing_pt(P,pt)
188
accs = []
189
for sl in slices[name]:
190
ss = sl.original_structure
191
accs.extend(get_accuracies(name,ss))
192
accuracies[name] = accs
193
194
return slices, accuracies
195
196
197
198
def get_max_average_slice(P,name):
199
avg_accuracy, b_avg = get_average_accuracy_ranking(name)
200
max_avg_acc = 0
201
max_sigs = []
202
for sig in avg_accuracy:
203
if avg_accuracy[sig]>max_avg_acc:
204
max_avg_acc = avg_accuracy[sig]
205
max_sigs = [sig]
206
elif avg_accuracy[sig]==max_avg_acc:
207
max_sigs.append(sig)
208
out_slices = []
209
for sl in P.d1_slices():
210
x,y,z,w = sl.original_vertex
211
if (x,y,z,w) in max_sigs:
212
out_slices.append(sl)
213
return out_slices
214
215
def get_neighboring_slices(P,sl):
216
neighbors = []
217
bounded_nbrs = []
218
for s in P.d1_slices():
219
if s==sl:
220
print "Avoiding self"
221
continue
222
inter = sl.intersection(s)
223
if not inter.is_empty():
224
neighbors.append(s)
225
if len(inter.rays())==0:
226
bounded_nbrs.append(s)
227
return neighbors, bounded_nbrs
228
229
def f_measure(s,t):
230
S = sec_struct(s)
231
T = sec_struct(t)
232
acc = S.relative_accuracy(T)
233
return acc
234
235
def get_similarity_matrices(P,name,sl):
236
nbrs,bdd_nbrs = get_neighboring_slices(P,sl)
237
subopts = {}
238
239
for nbr in nbrs:
240
subopts[nbr] = get_subopt_structures(name,nbr.original_structure)
241
242
subopts[sl] = get_subopt_structures(name,sl.original_structure)
243
244
matrices={}
245
bdd_matrices={}
246
for nbr in nbrs:
247
matrix = []
248
for s in subopts[nbr]:
249
row = []
250
for t in subopts[sl]:
251
row.append(f_measure(s,t))
252
matrix.append(row)
253
matrices[nbr] = matrix
254
if nbr in bdd_nbrs:
255
bdd_matrices[nbr]=minteract
256
return matrices,bdd_matrices
257
258
def get_neighbors_accuracies(P,name,sl):
259
nbrs,bdd_nbrs = get_neighboring_slices(P,sl)
260
accuracies={}
261
bdd_accuracies={}
262
for nbr in nbrs:
263
accs = get_accuracies(name, nbr.original_structure)
264
matrices[nbr] = accs
265
if nbr in bdd_nbrs:
266
bdd_accuracies[nbr]=accs
267
return accuracies,bdd_accuracies
268
269
def neighbors_accuracies(names,pt):
270
accuracies = {}
271
bdd_accuracies = {}
272
for i in range(len(names)):
273
print timestamp("{0}".format(i+1)),
274
name = names[i]
275
sobj_name = get_sobj_name(name)
276
277
accuracies[name]={}
278
bdd_accuracies[name]={}
279
280
P =RNAPolytope.construct_from_file(sobj_name)
281
slice_list = get_slice_containing_pt(P,pt)
282
for sl in slice_list:
283
if len(sl.rays())==0:
284
accuracies[name][sl],_ = get_neighbors_accuracies(P,name,sl)
285
else:
286
accuracies[name][sl],bdd_accuracies[name][sl] = get_neighbor_accuracies(P,name,sl)
287
return accuracies,bdd_accuracies
288
289
def similarity_matrices(names,pt):
290
matrices = {}
291
bdd_matrices = {}
292
for i in range(len(names)):
293
print timestamp("{0}".format(i)),
294
name = names[i]
295
sobj_name = get_sobj_name(name)
296
297
matrices[name]={}
298
bdd_matrices[name]={}
299
300
P =RNAPolytope.construct_from_file(sobj_name)
301
slice_list = get_slice_containing_pt(P,pt)
302
for sl in slice_list:
303
if len(sl.rays())==0:
304
matrices[name][sl],_ = get_similarity_matrices(P,name,sl)
305
else:
306
matrices[name][sl],bdd_matrices[name][sl] = get_similarity_matrices(P,name,sl)
307
return matrices,bdd_matrices
308
309
def get_max_average_slices(names=names):
310
slices = {}
311
sl2na = {}
312
for i in range(len(names)):
313
name = names[i]
314
sobj_name = get_sobj_name(name)
315
316
P =RNAPolytope.construct_from_file(sobj_name)
317
P_slices = get_max_average_slice(P,name)
318
slices[name]=P_slices
319
for sl in P_slices:
320
sl2na[sl] = name
321
if len(P_slices)>1:
322
print "*"*40
323
print "Found more than one max avg slice in {0}".format(name)
324
return slices, sl2na
325
326
def max_avg_intersection_graph(slices):
327
slice_to_label = {sl:i for i,sl in enumerate(slices)}
328
label_to_slice = {i:sl for i,sl in enumerate(slices)}
329
n = len(slice_to_label.keys())
330
G=Graph(n)
331
edges = []
332
for sl1,sl2 in combinations(slices,2):
333
inter = sl1.intersection(sl2)
334
# Bug! Change this to inter.is_empty()
335
if len(inter.rays())>0 or inter.volume()>0:
336
edges.append([slice_to_label[sl1],slice_to_label[sl2]])
337
G.add_edges(edges)
338
return G,label_to_slice
339
340
def greedy_clique_partition(G):
341
H=G.copy()
342
cliques = []
343
while H.num_verts()>0:
344
c = H.clique_maximum()
345
H.delete_vertices(c)
346
cliques.append(c)
347
return cliques
348
349
def plot_max_avg_acc_slice(name,P,colormap):
350
avg_accuracy, b_avg = get_average_accuracy_ranking(name)
351
max_avg = max(avg_accuracy.values())
352
return accuracy_cone_plots_b(P,avg_accuracy,b=b_avg[max_avg],abounds=(-200,200),cbounds=(-200,200),plot_title=name,colormap=colormap)#.show(figsize=12)
353
354
def plot_suboptimal_density_slice(name,P,b=0,colormap=my_hot):
355
region_density, totals, maxes = get_suboptimal_ranking(name,P,b)
356
title = '{0}; total structs: {1}; max per region: {3}'.format(name, totals[0],totals[1], maxes[0],maxes[1])
357
return accuracy_cone_plots_b(P,region_density,b=b,abounds=(-200,200),cbounds=(-200,200),plot_title=title,colormap=colormap)#.show(figsize=12)
358
359
def plot_max_avg_slices(names):
360
for i in range(len(names)):
361
name = names[i]
362
sobj_name = get_sobj_name(name)
363
364
P = P=RNAPolytope.construct_from_file(sobj_name)
365
plot_max_avg_acc_slice(name,P)
366
367
def slice_center(sl,delta=1/2):
368
vertex_sum = vector(sl.base_ring(), [0]*sl.ambient_dim())
369
for v in sl.vertex_generator():
370
vertex_sum += v.vector()
371
vertex_sum=vertex_sum / (sl.n_vertices())
372
for v in sl.ray_generator():
373
vertex_sum += delta*v.vector()
374
vertex_sum.set_immutable()
375
return vertex_sum
376
377
classic = (34/10,0,4/10)
378
def get_classic_slices(P):
379
slices = []
380
for i,sl in enumerate(P.d1_slices()):
381
if classic in sl:
382
slices.append(sl)
383
return slices
384
385
plot_slice = lambda sl: sum(sl.plot().all[1:])
386
387
def get_slice_width_height_depth_intervals(sl):
388
from sage.numerical.mip import MIPSolverException
389
I = []
390
for i in range(3):
391
lp,V=sl.to_linear_program(return_variable=True)
392
var = V[i]
393
lp.set_objective(var)
394
try:
395
o1 = lp.solve()
396
except MIPSolverException as e:
397
if str(e).find("unbounded")>-1:
398
o1 = infinity
399
else:
400
raise e
401
lp.set_objective(-var)
402
try:
403
o2 = -lp.solve()
404
except MIPSolverException as e:
405
if str(e).find("unbounded")>-1:
406
o2 = -infinity
407
else:
408
print e
409
I.append((o2,o1))
410
return I
411
412
def is_bounded(sl):
413
return len(sl.rays())==0
414
def is_unbounded(sl):
415
return len(sl.rays())>0
416
def is_wedge(sl):
417
return len(sl.rays())==2
418
def is_strip(sl):
419
return len(sl.rays())==1
420
421
def get_slice_width_height_depth(sl):
422
from sage.numerical.mip import MIPSolverException
423
I = []
424
for i in range(3):
425
lp,V=sl.to_linear_program(return_variable=True)
426
var = V[i]
427
lp.set_objective(var)
428
try:
429
o1 = lp.solve()
430
except MIPSolverException as e:
431
if str(e).find("unbounded")>-1:
432
o1 = infinity
433
else:
434
raise e
435
lp.set_objective(-var)
436
try:
437
o2 = -lp.solve()
438
except MIPSolverException as e:
439
if str(e).find("unbounded")>-1:
440
o2 = -infinity
441
else:
442
print e
443
I.append(o1-o2)
444
return I
445
446
def get_slice_width_interval(sl):
447
return get_slice_width_height_depth(sl)[0]
448
def get_slice_height_interval(sl):
449
return get_slice_width_height_depth(sl)[1]
450
def get_slice_depth_interval(sl):
451
return get_slice_width_height_depth(sl)[2]
452
453
def get_slice_width(sl):
454
a,b=get_slice_width_interval(sl)
455
return b-a
456
def get_slice_height(sl):
457
a,b=get_slice_height_interval(sl)
458
return b-a
459
def get_slice_depth(sl):
460
a,b=get_slice_depth_interval(sl)
461
return b-a
462
463
464
def get_var_intervals(sl,pt=None):
465
if pt is None:
466
pt = slice_center(sl)
467
elif pt=='classic':
468
pt=classic
469
from sage.numerical.mip import MIPSolverException
470
I = []
471
for i in range(3):
472
lp,V=sl.to_linear_program(return_variable=True)
473
var = V[i]
474
lp.set_objective(var)
475
for j in range(3):
476
if j!=i:
477
lp.add_constraint(V[j]==pt[j])
478
try:
479
o1 = lp.solve()
480
except MIPSolverException as e:
481
if str(e).find("unbounded")>-1:
482
o1 = infinity
483
else:
484
print e
485
lp.set_objective(-var)
486
try:
487
# Check if this should have a negative sign!
488
o2 = lp.solve()
489
except MIPSolverException as e:
490
if str(e).find("unbounded")>-1:
491
o2 = -infinity
492
else:
493
print e
494
w = min([abs(o1-pt[i]),abs(o2-pt[i])])
495
I.append(w)
496
return I
497
498
def get_bounded_partitions(sname,pct=False):
499
P=RNAPolytope.construct_from_file(sname)
500
bounded = defaultdict(lambda:0)
501
dirs = 'abc'
502
for sl in P.d1_slices():
503
if sl.dimension()!=3:
504
continue
505
rays = sl.rays()
506
unb = ''
507
for i in range(3):
508
unb= unb + (dirs[i] if any([v[i] for v in rays]) else '')
509
510
bounded[unb] += 1
511
if unb=='a':
512
print timestamp("Unbounded only in a: {0}; {1}; {2}".format(sname, sl.original_vertex, slice_center(sl)))
513
elif unb=='c':
514
print timestamp("Unbounded only in c: {0}; {1}; {2}".format(sname, sl.original_vertex, slice_center(sl)))
515
elif unb=='ab':
516
print timestamp("Unbounded only in ab: {0}; {1}; {2}".format(sname, sl.original_vertex, slice_center(sl)))
517
elif unb == 'bc':
518
print timestamp(sname)
519
520
n = len(P.d1_slices())
521
522
total_bounded = bounded['']
523
total_unbounded = n-total_bounded
524
525
if pct:
526
for k in bounded:
527
bounded[k] *= 100.0/n
528
529
bounded['bounded'] = (100.0*total_bounded)/n
530
bounded['unbounded'] = (100.0*total_unbounded)/n
531
532
return bounded
533