import pickle
import glob
from os.path import basename, splitext
from csv import writer
from datetime import datetime
import numpy as np
from sage.geometry.polyhedron.plot import cyclic_sort_vertices_2d
load('rna_poly.py')
cone2poly = lambda c: Polyhedron(vertices = [[0,0,0,0]],rays=c.rays().matrix())
def get_pairs(s):
stack=[]
pair = {}
for i,c in enumerate(s):
if c=='(':
stack.append(i)
elif c==')':
j=stack.pop()
pair[i] = j
pair[j] = i
else:
pair[i] = -1
return pair
def get_arc_descriptions(pairs):
arcs = {}
for i in pairs:
j = pairs[i]
if j == -1:
continue
pair = tuple(sorted([i,j]))
if pair in arcs:
continue
else:
radius = pair[1]-pair[0]
x_center = pair[1]+pair[0]
arcs[pair] = (radius,(x_center,0))
return arcs
def plot_arcs(arcs,annotate=False,name='name',signature=(1,1,2,3),box=True):
P = Graphics()
if annotate:
max_rad = max([r for r,c in arcs.values()])
last_pt = max([r+c[0] for r,c in arcs.values()])
for i in range(-1,last_pt/20):
P+=line([((i+1)*20,0),((i+1)*20,max_rad)],color='black',alpha=0.15)
P+=text("{0}".format((i+1)*10),((i+1)*20,max_rad),horizontal_alignment='center',vertical_alignment='bottom',rgbcolor=(0,0,0),alpha=0.5)
P+=text("{0}: {1}".format(name, signature),(last_pt/2,-5),horizontal_alignment='center',vertical_alignment='top',rgbcolor=(0,0,0))
for pair in arcs:
radius, center = arcs[pair]
P += arc( center, radius,radius, sector=(0,pi) )
x,y,X,Y = [P.xmin(),P.ymin(),P.xmax(),P.ymax()]
width = X-x
height = Y-y
x -= 0.05*width
X += 0.05*width
y -= 0.07*height
Y += 0.07*height
P += line([(x,y),(x,Y),(X,Y),(X,y),(x,y)],color='black')
return P
def plot_2s(ss,name,signature):
p=get_pairs(ss)
a=get_arc_descriptions(p)
D=plot_arcs(a,True,name,signature)
return D
def plot_array(structs, name):
plot_list = []
for signature, reg in structs:
structure = reg.original_structure
P = plot_2s( structure, name, signature )
plot_list.append(P)
ncols = 1
nrows = len(plot_list)
G = graphics_array(plot_list, nrows, ncols )
return G
def get_bounded_cones_stripes_polygons( P, xbds=(-200,200), ybds=(-200,200) ):
x,X=xbds
y,Y=ybds
slices = sliced_cones_b(P, 0)
bounded_regs = bounded_regions(slices, (), (x,X),(y,Y))
cones = []
stripes = []
polygons = []
all_regions = []
for br in bounded_regs:
s = br.original_region
discr = len(s.rays())
signature = tuple(br.original_vertex)
if discr == 2:
cones.append((signature,br))
elif discr==1:
stripes.append((signature,br))
elif discr==0:
polygons.append((signature,br))
all_regions.append((signature,br))
cones.sort()
stripes.sort()
polygons.sort()
all_regions.sort()
return (all_regions,cones,stripes,polygons)
def get_ray_directions(P,b=0):
slices = sliced_cones_b(P, b)
cone_dir = {}
stripe_dir = {}
all_dir = {}
for s in slices:
discr = len(s.rays())
if discr == 2:
cone_dir[s.rays()[0]]=True
cone_dir[s.rays()[1]]=True
all_dir[s.rays()[0]] = True
all_dir[s.rays()[1]] = True
elif discr==1:
stripe_dir[s.rays()[0]]=True
all_dir[s.rays()[0]] = True
return (all_dir.keys(),cone_dir.keys(),stripe_dir.keys())
def get_all_region_data(P,b=0):
slices = sliced_cones_b(P, b)
region_data = []
for s in slices:
discr = len(s.rays())
x,y,z,w = tuple(s.original_vertex)
reg_type = np.nan
direction = np.nan
ray2 = np.nan
ray1 = np.nan
if discr == 0:
bounded = True
reg_type = 'p'
else:
bounded = False
if discr == 1:
reg_type = 's'
ray1 = tuple(s.rays()[0])
x_coord = ray1[0]
if x_coord < 0:
direction = 'l'
elif x_coord > 0:
direction = 'r'
else:
direction = 'a'
if discr == 2:
reg_type = 'w'
ray1 = tuple(s.rays()[0])
ray2 = tuple(s.rays()[1])
x_coord1 = ray1[0]
x_coord2 = ray2[0]
if x_coord1 <0 and x_coord2 < 0:
direction = 'l'
elif x_coord1 > 0 and x_coord2>0:
direction = 'r'
elif x_coord1*x_coord2 <0:
direction = 'b'
else:
direction = 'a'
region_data.append((b,x,y,z,w,reg_type,bounded,direction,ray1,ray2))
region_data.sort()
return region_data
def get_cones_stripes_polygons( P, b=0, signatures_only=False):
slices = sliced_cones_b(P, b)
cones = []
stripes = []
polygons = []
all_regions = []
for s in slices:
discr = len(s.rays())
signature = tuple(s.original_vertex)
if signatures_only:
if discr == 2:
cones.append(signature)
elif discr==1:
stripes.append(signature)
elif discr==0:
polygons.append(signature)
all_regions.append(signature)
else:
if discr == 2:
cones.append((signature,s))
elif discr==1:
stripes.append((signature,s))
elif discr==0:
polygons.append((signature,s))
all_regions.append((signature,s))
cones.sort()
stripes.sort()
polygons.sort()
all_regions.sort()
return (all_regions,cones,stripes,polygons)
def get_polygons(P, min_b=0, max_b=None):
if max_b is None:
max_b=min_b
max_b += 1
for b in range(min_b,max_b):
slices = sliced_cones_b(P, b)
polygons = []
for s in slices:
discr = len(s.rays())
if discr==0:
polygons.append(s)
polygons.sort()
return polygons
def slope(p,q):
x,y = p
a,b = q
if x == a:
return -infinity
return (y-b)/(x-a)
def find_pos_slope(P, min_b=0,max_b=None):
if max_b is None:
max_b=min_b
max_b += 1
output = []
for b in range(min_b,max_b):
polys = get_polygons(P,b)
for p in polys:
hull=cyclic_sort_vertices_2d(p.vertices())
for i,v in enumerate(hull):
u = hull[i-1]
if slope(u,v)>0:
output.append((b,tuple(p.original_vertex)))
return output
def rectangle_pl(a,b,color='blue'):
return polygon2d(((a,b),(a+1,b),(a+1,b+1),(a,b+1)),color=color)
def plot_positive_intervals(P,b_min,b_max):
output = find_pos_slope(P,b_min,b_max)
xy_pairs = sorted(list(set([(o[1][0],o[1][2]) for o in output])))
xy_dict = {xy:[] for xy in xy_pairs}
for o in output:
b=o[0]
sig=o[1]
x,y = sig[0],sig[2]
xy_dict[(x,y)].append(b)
for k in xy_pairs:
xy_dict[k].sort()
G = Graphics()
n = len(xy_pairs)
for i,k in enumerate(xy_pairs):
G += text('{0}'.format(k),(b_min-0.5,i+0.5),horizontal_alignment='right')
G += polygon2d(((b_min,i),(b_max+1,i),(b_max+1,i+1),(b_min,i+1)),fill=False,color='black',thickness=1)
for b in xy_dict[k]:
G += rectangle_pl(b,i,'red' if i%2 else 'blue')
return G
def iterate_sorted(dirname,pattern='*',order='N',reverse=False):
"""
Returns a list of filenames under `dirname` satisfying `pattern`. The output
is increasingly sorted by name (`N`) or size (`S`). You may choose
to reverse the order by using the `reverse` flag.
"""
fnames = glob.glob('{0}/{1}'.format(dirname,pattern))
if order=='S':
fname_size = sorted([(os.path.getsize(f),f) for f in fnames],reverse=reverse)
return [e[1] for e in fname_size]
elif order=='N':
return sorted(fnames,reverse=reverse)
return fnames
def max_z(regions,x0=-100):
"""
Sorts the regions by d values.
"""
maxi = -infinity
mini = infinity
unbounded_above = []
unbounded_below = []
maxi_bounded_region = None
mini_bounded_region = None
for br in regions:
c = br.original_region.original_cone
p = cone2poly(c)
lp,x=p.to_linear_program(return_variable=True,base_ring=QQ)
lp.add_constraint(x[0]==x0)
lp.set_objective(x[2])
try:
max_z = lp.solve()
except sage.numerical.mip.MIPSolverException as e:
if e.args[0].find('unbounded'):
max_z=infinity
pass
else:
raise e
if max_z == infinity:
unbounded_above.append(br)
elif maxi < max_z:
maxi = max_z
maxi_bounded_region = br
lp.set_objective(-x[2])
try:
min_z = lp.solve()
except sage.numerical.mip.MIPSolverException as e:
if e.args[0].find('unbounded'):
min_z = -infinity
pass
else:
raise e
if min_z == -infinity:
unbounded_below.append(br)
elif min_z < mini:
mini = min_z
mini_bounded_region = br
return ((maxi,maxi_bounded_region),(mini,mini_bounded_region),
unbounded_above, unbounded_below)
get_code = lambda x: x[x.find('/')+1:x.find('.')]
get_filename_with_code = lambda fnames, c: [x for x in fnames if x.find(c)>-1][0]
def gen_data(filename,xbds=(-200,200), ybds=(-200,200)):
P=RNAPolytope.construct_from_file(filename)
base_name = splitext(basename(filename))[0]
ar, c, s, p = get_cones_stripes_polygons( P )
signatures = [sign for sign,reg in ar]
xvals = list(set([s[0] for s in signatures]))
xvals.sort()
sign_dict={}
for x in xvals:
sign_dict[x]=[s for s in signatures if s[0]==x]
outfile_name = 'my_sample/out_data/{0}--z.csv'.format(base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for x in xvals:
zs = [s[2] for s in sign_dict[x]]
minz = min(zs)
maxz = max(zs)
outcsv.writerow((x,minz,maxz))
print "[{0}] Finished processsing {1}.".format(datetime.now(),base_name)
print "*"*30
def generate_data():
with open('acc_fname.pkl', 'rb') as pk_file:
acc_dens_fnames_pairs=pickle.load(pk_file)
filenames = iterate_sorted('sobjs/','*.sobj')
for prefix, fname in acc_dens_fnames_pairs:
code = get_code(fname)
try:
filename = get_filename_with_code(filenames,code)
except IndexError:
print "[{0}] No sobj file found for {1}.".format(datetime.now(),code)
print "*"*30
continue
base_name = splitext(basename(filename))[0]
print "[{0}] Sarting process for {1}.".format(datetime.now(),base_name )
P = RNAPolytope.construct_from_file(filename)
try:
rnapoly2ggb(P,'{0}--{1}'.format(prefix,base_name))
except:
e = sys.exc_info()[0]
print "ERROR: {0} ... moving on.".format(e)
print "[{0}] Done {1}.".format(datetime.now(),base_name )
continue
ar, c, s, p = get_cones_stripes_polygons( P )
print "[{0}] Finished generating regions for {1}.".format(datetime.now(),base_name)
outfile_name = 'my_sample/161012-out_data/{0}--{1}.csv'.format(prefix,base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for sign, reg in ar:
outcsv.writerow(sign)
outfile_name = 'my_sample/161012-out_data/{0}--{1}--cones.csv'.format(prefix,base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for sign, reg in c:
outcsv.writerow(sign)
outfile_name = 'my_sample/161012-out_data/{0}--{1}--stripes.csv'.format(prefix,base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for sign, reg in s:
outcsv.writerow(sign)
outfile_name = 'my_sample/161012-out_data/{0}--{1}--poly.csv'.format(prefix,base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for sign, reg in p:
outcsv.writerow(sign)
print "[{0}] Finished generating data for {1}.".format(datetime.now(),base_name)
signatures = [sign for sign,reg in ar]
xvals = list(set([s[0] for s in signatures]))
xvals.sort()
sign_dict={}
for x in xvals:
sign_dict[x]=[s for s in signatures if s[0]==x]
outfile_name = 'my_sample/161012-out_data/{0}--{1}--z.csv'.format(prefix,base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for x in xvals:
zs = [s[2] for s in sign_dict[x]]
minz = min(zs)
maxz = max(zs)
missing_zs=[z for z in range(minz,maxz) if z not in zs]
outcsv.writerow((x,minz,maxz,missing_zs))
if minz != 3*x:
print "[{0}] COUNTEREXAMPLE! : See {1} for the x_max != 3 min_z.".format(datetime.now(),base_name)
print "[{0}] Finished processsing {1}.".format(datetime.now(),base_name)
print "*"*30
def gen_max_norms():
filenames = iterate_sorted('my_sample/sobjs/','*.sobj')
with open('my_sample/max_norms.csv','w') as outfile:
Maxi = 0
for filename in filenames:
base_name = splitext(basename(filename))[0]
print "[{0}] Sarting process for {1}.".format(datetime.now(),base_name )
P = RNAPolytope.construct_from_file(filename)
ar, c, s, p = get_cones_stripes_polygons(P)
maxi = 0
for v in p.vertices_list():
V = vector(v)
if maxi <= V.norm():
maxi = V.norm()
if maxi >= Maxi:
Maxi = maxi
outfile.write('{0}\n'.format(n(maxi)))
print "Max norm: {0} ({1})".format(Maxi,n(Maxi))
def generate_cdata(xbds=(-200,200), ybds=(-200,200)):
filenames = iterate_sorted('my_sample/sobjs/','*.sobj')
for filename in filenames:
base_name = splitext(basename(filename))[0]
print "[{0}] Sarting process for {1}.".format(datetime.now(),base_name )
P = RNAPolytope.construct_from_file(filename)
ar, c, s, p = get_bounded_cones_stripes_polygons(P,xbds,ybds)
print "[{0}] Finished generating regions for {1}.".format(datetime.now(),base_name)
outfile_name = 'my_sample/161012-out_data/{0}.csv'.format(base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for sign, reg in ar:
outcsv.writerow(sign)
outfile_name = 'my_sample/161012-out_data/{0}--cones.csv'.format(base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for sign, reg in c:
outcsv.writerow(sign)
outfile_name = 'my_sample/161012-out_data/{0}--stripes.csv'.format(base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for sign, reg in s:
outcsv.writerow(sign)
outfile_name = 'my_sample/161012-out_data/{0}--poly.csv'.format(base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for sign, reg in p:
outcsv.writerow(sign)
print "[{0}] Finished generating data for {1}.".format(datetime.now(),base_name)
P = sliced_cone_plots_b(P, abounds=xbds, cbounds=ybds)
for sign,br in c:
P += text('{0}'.format(sign), br.center() )
for sign,br in s:
P += text('{0}'.format(sign), br.center() )
P.save('my_sample/161012-out_data/pngs/{0}.png'.format(base_name),figsize=12)
signatures = [sign for sign,reg in ar]
xvals = list(set([s[0] for s in signatures]))
xvals.sort()
sign_dict={}
for x in xvals:
sign_dict[x]=[s for s in signatures if s[0]==x]
outfile_name = 'my_sample/161012-out_data/{0}--z.csv'.format(base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for x in xvals:
zs = [s[2] for s in sign_dict[x]]
minz = min(zs)
maxz = max(zs)
missing_zs=[z for z in range(minz,maxz) if z not in zs]
outcsv.writerow((x,minz,maxz,missing_zs))
if minz != 3*x:
print "[{0}] COUNTEREXAMPLE! : See {1} for the x_max != 3 min_z.".format(datetime.now(),base_name)
print "[{0}] Finished processsing {1}.".format(datetime.now(),base_name)
print "*"*30
def filter_bottom_left(L):
in_Q4 = lambda v: v[0]>=0 and v[1]<=0
SE_wedges = []
for s,R in L:
insert=False
for r in R.rays():
v = r.vector()
if in_Q4(v):
insert = True
break
if insert:
SE_wedges.append((s,R))
return SE_wedges
def get_xmax(L):
xmax = max([x[0][0] for x in L])
return [x for x in L if x[0][0]==xmax]
def get_relevant_regions(L):
bl = filter_bottom_left(L)
xm = get_xmax(L)
for x in xm:
if x not in bl:
bl.append(x)
bl.sort()
return bl
def generate_arc_diagrams():
with open('my_sample/acc_fname.pkl', 'rb') as pk_file:
acc_dens_fnames_pairs=pickle.load(pk_file)
filenames = iterate_sorted('my_sample/sobjs/','*.sobj')
for prefix, fname in acc_dens_fnames_pairs:
code = get_code(fname)
try:
filename = get_filename_with_code(filenames,code)
except IndexError:
print "[{0}] No sobj file found for {1}.".format(datetime.now(),code)
print "*"*30
continue
base_name = splitext(basename(filename))[0]
print "[{0}] Sarting process for {1}.".format(datetime.now(),base_name )
P = RNAPolytope.construct_from_file(filename)
ar, c, s, p = get_cones_stripes_polygons( P )
print "[{0}] Finished generating regions for {1}.".format(datetime.now(),base_name)
rr = get_relevant_regions( ar )
G=plot_array(rr,base_name)
outfile_name = 'my_sample/161020-out_data/{0}--{1}.svg'.format(prefix,base_name)
G.save(outfile_name,axes=False,figsize=24)
print "[{0}] Finished processsing {1}.".format(datetime.now(),base_name)
print "*"*30
def generate_d1_data():
with open('my_sample/acc_fname.pkl', 'rb') as pk_file:
acc_dens_fnames_pairs=pickle.load(pk_file)
filenames = iterate_sorted('my_sample/sobjs/','*.sobj')
for prefix, fname in acc_dens_fnames_pairs:
code = get_code(fname)
try:
filename = get_filename_with_code(filenames,code)
except IndexError:
print "[{0}] No sobj file found for {1}.".format(datetime.now(),code)
print "*"*30
continue
base_name = splitext(basename(filename))[0]
print "[{0}] Sarting process for {1}.".format(datetime.now(),base_name )
P = RNAPolytope.construct_from_file(filename)
ar = [(tuple(s.original_vertex),s) for s in P.d1_slices()]
signatures = [sign for sign,reg in ar]
xvals = list(set([s[0] for s in signatures]))
xvals.sort()
sign_dict={}
for x in xvals:
sign_dict[x]=[s for s in signatures if s[0]==x]
outfile_name = 'my_sample/161021-out_data/{0}--{1}--d1--z.csv'.format(prefix,base_name)
with open(outfile_name,'wb') as outfile:
outcsv = writer( outfile )
for x in xvals:
zs = [s[2] for s in sign_dict[x]]
minz = min(zs)
maxz = max(zs)
missing_zs=[z for z in range(minz,maxz) if z not in zs]
outcsv.writerow((x,minz,maxz,missing_zs))
if minz != 3*x:
print "[{0}] COUNTEREXAMPLE! : See {1} for the x_max != 3 min_z.".format(datetime.now(),base_name)
print "[{0}] Finished processsing {1}.".format(datetime.now(),base_name)
print "*"*30