from itertools import combinations,chain
from numpy import average as avg, median as med, std
from collections import defaultdict
def get_sobj_name(name):
return '5s/sobjs/{0}.sobj'.format(name)
def get_suboptimal_ranking(name,P,b):
D = load_obj("{0}/{1}_ss.pkl".format(_5S_st_tuples_dir,name))
K=D.keys()
region_density = {}
slices = sliced_cones_b(P, b)
relevant_signatures = [tuple(sl.original_vertex) for sl in slices]
max_relevant_subopt = 0
total_relevant_structs=0
for k in K:
for S,(x,y,z,w,t1,t2,t3,t4,acc) in D[k]:
break
x,y,z,w = (QQ(e) for e in (x,y,z,w))
if (x,y,z,w) in relevant_signatures:
n=len(D[k])
total_relevant_structs +=n
region_density[(x,y,z,w)] = n
if n > max_relevant_subopt:
max_relevant_subopt = n
for k in region_density:
region_density[k] /= float(max_relevant_subopt)
total_structures = sum([len(D[k]) for k in K])
max_structures = max([len(D[k]) for k in K])
return region_density, (total_relevant_structs,total_structures), (max_relevant_subopt,max_structures)
def get_average_accuracy_ranking(name):
D = load_obj("{0}/{1}_ss.pkl".format(_5S_st_tuples_dir,name))
K=D.keys()
avg_accuracy = {}
max_accuracy = {}
b_avg = {}
b_max = {}
for k in K:
avg_acc = 0
n = len(D[k])
max_acc = 0
for S,(x,y,z,w,t1,t2,t3,t4,acc) in D[k]:
avg_acc += acc
if acc>max_acc:
max_acc = acc
avg_acc /= n
x,y,z,w = (QQ(e) for e in (x,y,z,w))
avg_accuracy[(x,y,z,w)] = avg_acc
b_avg[avg_acc] = QQ(k[1])
max_accuracy[(x,y,z,w)] = max_acc
b_max[max_acc] = QQ(k[1])
return avg_accuracy,b_avg
remove_dangles = lambda s: s.replace('<','.').replace('<','.')
def compute_subopt_similarities(name,omit_singletons=True):
D = load_obj("{0}/{1}_ss.pkl".format(_5S_st_tuples_dir,name))
K=D.keys()
min_accs = {}
signs = []
n_subopts = []
structs={}
for k in K:
subopts = list(set([remove_dangles(s[0]) for s in D[k]]))
n_subopts.append(len(subopts))
_,(x,y,z,w,_,_,_,_,_) = D[k][0]
if n_subopts[-1]==1:
if omit_singletons:
continue
min_acc = 1.0
else:
min_acc = min([min([f_measure(s,t) for s,t in combinations(subopts,2)]),min([f_measure(t,s) for s,t in combinations(subopts,2)])])
min_accs[(x,y,z,w)]=min_acc
signs.append((x,y,z,w))
structs[(x,y,z,w)]=subopts
print timestamp("{0}: max_subopts={1}, avg_subopts={2}; std_subopts={3}".format(name,max(n_subopts),avg(n_subopts),std(n_subopts)))
return min_accs,structs,signs
def get_subopt_structures(name,default_ss,sign=False):
D = load_obj("{0}/{1}_ss.pkl".format(_5S_st_tuples_dir,name))
K=D.keys()
for k in K:
ss = k[4]
if ss==default_ss:
if sign:
S,(x,y,z,w,t1,t2,t3,t4,acc) = D[k][0]
return [s[0] for s in D[k]],(x,y,z)
else:
return [x[0] for x in D[k]]
if sign:
return [], (0,0,0)
return []
def get_accuracies_from_struct(name,default_ss):
D = load_obj("{0}/{1}_ss.pkl".format(_5S_st_tuples_dir,name))
K=D.keys()
for k in K:
ss = k[4]
if ss==default_ss:
return [x[1][-1] for x in D[k]]
def get_accuracies(name,default_ss):
D = load_obj("{0}/{1}_ss.pkl".format(_5S_st_tuples_dir,name))
K=D.keys()
for k in K:
ss = k[4]
if ss==default_ss:
n = len(D[k])
return [acc for S,(x,y,z,w,t1,t2,t3,t4,acc) in D[k]]
return None
classic_pt = (34/10,0,4/10)
common_pt = (489/40, 51/320, -231/80)
def get_classic_slice(P,name):
classic_slices=[]
for sl in P.d1_slices():
if (34/10,0,4/10) in sl:
classic_slices.append(sl)
return classic_slices
def classic_slices_and_accuracy(names):
classic_slices = {}
accuracies = {}
for i in range(len(names)):
name = names[i]
sobj_name = get_sobj_name(name)
P =RNAPolytope.construct_from_file(sobj_name)
classic_slices[name] = get_classic_slice(P,name)
accs = []
for sl in classic_slices[name]:
ss = sl.original_structure
accs.extend(get_accuracies(name,ss))
accuracies[name] = accs
return classic_slices, accuracies
def get_slice_containing_pt(P,pt):
slices = []
for sl in P.d1_slices():
if pt in sl:
slices.append(sl)
return slices
def pt_slices_and_accuracy(names,pt):
slices={}
accuracies={}
for i in range(len(names)):
name = names[i]
sobj_name = get_sobj_name(name)
P =RNAPolytope.construct_from_file(sobj_name)
slices[name] = get_slice_containing_pt(P,pt)
accs = []
for sl in slices[name]:
ss = sl.original_structure
accs.extend(get_accuracies(name,ss))
accuracies[name] = accs
return slices, accuracies
def get_max_average_slice(P,name):
avg_accuracy, b_avg = get_average_accuracy_ranking(name)
max_avg_acc = 0
max_sigs = []
for sig in avg_accuracy:
if avg_accuracy[sig]>max_avg_acc:
max_avg_acc = avg_accuracy[sig]
max_sigs = [sig]
elif avg_accuracy[sig]==max_avg_acc:
max_sigs.append(sig)
out_slices = []
for sl in P.d1_slices():
x,y,z,w = sl.original_vertex
if (x,y,z,w) in max_sigs:
out_slices.append(sl)
return out_slices
def get_neighboring_slices(P,sl):
neighbors = []
bounded_nbrs = []
for s in P.d1_slices():
if s==sl:
print "Avoiding self"
continue
inter = sl.intersection(s)
if not inter.is_empty():
neighbors.append(s)
if len(inter.rays())==0:
bounded_nbrs.append(s)
return neighbors, bounded_nbrs
def f_measure(s,t):
S = sec_struct(s)
T = sec_struct(t)
acc = S.relative_accuracy(T)
return acc
def get_similarity_matrices(P,name,sl):
nbrs,bdd_nbrs = get_neighboring_slices(P,sl)
subopts = {}
for nbr in nbrs:
subopts[nbr] = get_subopt_structures(name,nbr.original_structure)
subopts[sl] = get_subopt_structures(name,sl.original_structure)
matrices={}
bdd_matrices={}
for nbr in nbrs:
matrix = []
for s in subopts[nbr]:
row = []
for t in subopts[sl]:
row.append(f_measure(s,t))
matrix.append(row)
matrices[nbr] = matrix
if nbr in bdd_nbrs:
bdd_matrices[nbr]=minteract
return matrices,bdd_matrices
def get_neighbors_accuracies(P,name,sl):
nbrs,bdd_nbrs = get_neighboring_slices(P,sl)
accuracies={}
bdd_accuracies={}
for nbr in nbrs:
accs = get_accuracies(name, nbr.original_structure)
matrices[nbr] = accs
if nbr in bdd_nbrs:
bdd_accuracies[nbr]=accs
return accuracies,bdd_accuracies
def neighbors_accuracies(names,pt):
accuracies = {}
bdd_accuracies = {}
for i in range(len(names)):
print timestamp("{0}".format(i+1)),
name = names[i]
sobj_name = get_sobj_name(name)
accuracies[name]={}
bdd_accuracies[name]={}
P =RNAPolytope.construct_from_file(sobj_name)
slice_list = get_slice_containing_pt(P,pt)
for sl in slice_list:
if len(sl.rays())==0:
accuracies[name][sl],_ = get_neighbors_accuracies(P,name,sl)
else:
accuracies[name][sl],bdd_accuracies[name][sl] = get_neighbor_accuracies(P,name,sl)
return accuracies,bdd_accuracies
def similarity_matrices(names,pt):
matrices = {}
bdd_matrices = {}
for i in range(len(names)):
print timestamp("{0}".format(i)),
name = names[i]
sobj_name = get_sobj_name(name)
matrices[name]={}
bdd_matrices[name]={}
P =RNAPolytope.construct_from_file(sobj_name)
slice_list = get_slice_containing_pt(P,pt)
for sl in slice_list:
if len(sl.rays())==0:
matrices[name][sl],_ = get_similarity_matrices(P,name,sl)
else:
matrices[name][sl],bdd_matrices[name][sl] = get_similarity_matrices(P,name,sl)
return matrices,bdd_matrices
def get_max_average_slices(names=names):
slices = {}
sl2na = {}
for i in range(len(names)):
name = names[i]
sobj_name = get_sobj_name(name)
P =RNAPolytope.construct_from_file(sobj_name)
P_slices = get_max_average_slice(P,name)
slices[name]=P_slices
for sl in P_slices:
sl2na[sl] = name
if len(P_slices)>1:
print "*"*40
print "Found more than one max avg slice in {0}".format(name)
return slices, sl2na
def max_avg_intersection_graph(slices):
slice_to_label = {sl:i for i,sl in enumerate(slices)}
label_to_slice = {i:sl for i,sl in enumerate(slices)}
n = len(slice_to_label.keys())
G=Graph(n)
edges = []
for sl1,sl2 in combinations(slices,2):
inter = sl1.intersection(sl2)
if len(inter.rays())>0 or inter.volume()>0:
edges.append([slice_to_label[sl1],slice_to_label[sl2]])
G.add_edges(edges)
return G,label_to_slice
def greedy_clique_partition(G):
H=G.copy()
cliques = []
while H.num_verts()>0:
c = H.clique_maximum()
H.delete_vertices(c)
cliques.append(c)
return cliques
def plot_max_avg_acc_slice(name,P,colormap):
avg_accuracy, b_avg = get_average_accuracy_ranking(name)
max_avg = max(avg_accuracy.values())
return accuracy_cone_plots_b(P,avg_accuracy,b=b_avg[max_avg],abounds=(-200,200),cbounds=(-200,200),plot_title=name,colormap=colormap)
def plot_suboptimal_density_slice(name,P,b=0,colormap=my_hot):
region_density, totals, maxes = get_suboptimal_ranking(name,P,b)
title = '{0}; total structs: {1}; max per region: {3}'.format(name, totals[0],totals[1], maxes[0],maxes[1])
return accuracy_cone_plots_b(P,region_density,b=b,abounds=(-200,200),cbounds=(-200,200),plot_title=title,colormap=colormap)
def plot_max_avg_slices(names):
for i in range(len(names)):
name = names[i]
sobj_name = get_sobj_name(name)
P = P=RNAPolytope.construct_from_file(sobj_name)
plot_max_avg_acc_slice(name,P)
def slice_center(sl,delta=1/2):
vertex_sum = vector(sl.base_ring(), [0]*sl.ambient_dim())
for v in sl.vertex_generator():
vertex_sum += v.vector()
vertex_sum=vertex_sum / (sl.n_vertices())
for v in sl.ray_generator():
vertex_sum += delta*v.vector()
vertex_sum.set_immutable()
return vertex_sum
classic = (34/10,0,4/10)
def get_classic_slices(P):
slices = []
for i,sl in enumerate(P.d1_slices()):
if classic in sl:
slices.append(sl)
return slices
plot_slice = lambda sl: sum(sl.plot().all[1:])
def get_slice_width_height_depth_intervals(sl):
from sage.numerical.mip import MIPSolverException
I = []
for i in range(3):
lp,V=sl.to_linear_program(return_variable=True)
var = V[i]
lp.set_objective(var)
try:
o1 = lp.solve()
except MIPSolverException as e:
if str(e).find("unbounded")>-1:
o1 = infinity
else:
raise e
lp.set_objective(-var)
try:
o2 = -lp.solve()
except MIPSolverException as e:
if str(e).find("unbounded")>-1:
o2 = -infinity
else:
print e
I.append((o2,o1))
return I
def is_bounded(sl):
return len(sl.rays())==0
def is_unbounded(sl):
return len(sl.rays())>0
def is_wedge(sl):
return len(sl.rays())==2
def is_strip(sl):
return len(sl.rays())==1
def get_slice_width_height_depth(sl):
from sage.numerical.mip import MIPSolverException
I = []
for i in range(3):
lp,V=sl.to_linear_program(return_variable=True)
var = V[i]
lp.set_objective(var)
try:
o1 = lp.solve()
except MIPSolverException as e:
if str(e).find("unbounded")>-1:
o1 = infinity
else:
raise e
lp.set_objective(-var)
try:
o2 = -lp.solve()
except MIPSolverException as e:
if str(e).find("unbounded")>-1:
o2 = -infinity
else:
print e
I.append(o1-o2)
return I
def get_slice_width_interval(sl):
return get_slice_width_height_depth(sl)[0]
def get_slice_height_interval(sl):
return get_slice_width_height_depth(sl)[1]
def get_slice_depth_interval(sl):
return get_slice_width_height_depth(sl)[2]
def get_slice_width(sl):
a,b=get_slice_width_interval(sl)
return b-a
def get_slice_height(sl):
a,b=get_slice_height_interval(sl)
return b-a
def get_slice_depth(sl):
a,b=get_slice_depth_interval(sl)
return b-a
def get_var_intervals(sl,pt=None):
if pt is None:
pt = slice_center(sl)
elif pt=='classic':
pt=classic
from sage.numerical.mip import MIPSolverException
I = []
for i in range(3):
lp,V=sl.to_linear_program(return_variable=True)
var = V[i]
lp.set_objective(var)
for j in range(3):
if j!=i:
lp.add_constraint(V[j]==pt[j])
try:
o1 = lp.solve()
except MIPSolverException as e:
if str(e).find("unbounded")>-1:
o1 = infinity
else:
print e
lp.set_objective(-var)
try:
o2 = lp.solve()
except MIPSolverException as e:
if str(e).find("unbounded")>-1:
o2 = -infinity
else:
print e
w = min([abs(o1-pt[i]),abs(o2-pt[i])])
I.append(w)
return I
def get_bounded_partitions(sname,pct=False):
P=RNAPolytope.construct_from_file(sname)
bounded = defaultdict(lambda:0)
dirs = 'abc'
for sl in P.d1_slices():
if sl.dimension()!=3:
continue
rays = sl.rays()
unb = ''
for i in range(3):
unb= unb + (dirs[i] if any([v[i] for v in rays]) else '')
bounded[unb] += 1
if unb=='a':
print timestamp("Unbounded only in a: {0}; {1}; {2}".format(sname, sl.original_vertex, slice_center(sl)))
elif unb=='c':
print timestamp("Unbounded only in c: {0}; {1}; {2}".format(sname, sl.original_vertex, slice_center(sl)))
elif unb=='ab':
print timestamp("Unbounded only in ab: {0}; {1}; {2}".format(sname, sl.original_vertex, slice_center(sl)))
elif unb == 'bc':
print timestamp(sname)
n = len(P.d1_slices())
total_bounded = bounded['']
total_unbounded = n-total_bounded
if pct:
for k in bounded:
bounded[k] *= 100.0/n
bounded['bounded'] = (100.0*total_bounded)/n
bounded['unbounded'] = (100.0*total_unbounded)/n
return bounded