Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Lab 2

Views: 43
Kernel: SageMath (stable)
letters = ["A","G","C","U"] codons = { "Ala": ("GCU","GCC","GCA","GCG"), "Arg": ("CGU","CGC","CGA","CGG","AGA","AGG"), "Asn": ("AAU", "AAC"), "Asp": ("GAU", "GAC"), "Cys": ("UGU", "UGC"), "Gln": ("CAA","CAG"), "Glu": ("GAA", "GAG"), "Gly": ("GGU","GGC","GGA","GGG"), "His": ("CAU","CAC"), "IIE": ("AUU","AUC","AUA"), "Leu": ("UAA","UUG","CUU","CUC","CUA","CUG"), "Lys": ("AAA","AAG"), "Met": ("AUG"), "Phe": ("UUU","UUC"), "Pro": ("CCU","CCC","CCA","CCG"), "Ser": ("UCU","UCC","UCA","UCG","AGU","AGC"), "Thr": ("ACU","ACC","ACA","ACG"), "Trp": ("UGG"), "Tyr": ("UAU","UAC"), "Val": ("GUU","GUC","GUA","GUG") }
# There are only 61 Amino Acids despite 64 total combinations of ACGU bases = [] for k,v in codons.items(): if(isinstance(v,str)): bases.append(v) else: for i in v: bases.append(i) print("There are "+str(len(bases))+" bases, listed below:") print(bases)
There are 61 bases, listed below: ['UGU', 'UGC', 'GAU', 'GAC', 'UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC', 'CAA', 'CAG', 'AAA', 'AAG', 'UGG', 'CCU', 'CCC', 'CCA', 'CCG', 'ACU', 'ACC', 'ACA', 'ACG', 'UUU', 'UUC', 'GCU', 'GCC', 'GCA', 'GCG', 'AUU', 'AUC', 'AUA', 'GGU', 'GGC', 'GGA', 'GGG', 'CAU', 'CAC', 'UAA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG', 'CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG', 'AUG', 'GAA', 'GAG', 'AAU', 'AAC', 'UAU', 'UAC', 'GUU', 'GUC', 'GUA', 'GUG']
count = 0 for i in bases: if(i=="AUG"): count = count + 1 print("The probability of a start codon is " + str(float(count/len(bases))*100) + "%") print("This agrees with an expected percent of "+str(float(1/len(bases))*100)+"%")
The probability of a start codon is 1.6393442623% This agrees with an expected percent of 1.6393442623%
from collections import Counter #imports package that can find frequency inside a list import re
# Creates ordered pairs of Amino Acids with Codons combos = [] for k,v in codons.items(): if(isinstance(v,str)): combos.append((k,v)) else: for i in v: combos.append((k,i))
print(combos)
[('Cys', 'UGU'), ('Cys', 'UGC'), ('Asp', 'GAU'), ('Asp', 'GAC'), ('Ser', 'UCU'), ('Ser', 'UCC'), ('Ser', 'UCA'), ('Ser', 'UCG'), ('Ser', 'AGU'), ('Ser', 'AGC'), ('Gln', 'CAA'), ('Gln', 'CAG'), ('Lys', 'AAA'), ('Lys', 'AAG'), ('Trp', 'UGG'), ('Pro', 'CCU'), ('Pro', 'CCC'), ('Pro', 'CCA'), ('Pro', 'CCG'), ('Thr', 'ACU'), ('Thr', 'ACC'), ('Thr', 'ACA'), ('Thr', 'ACG'), ('Phe', 'UUU'), ('Phe', 'UUC'), ('Ala', 'GCU'), ('Ala', 'GCC'), ('Ala', 'GCA'), ('Ala', 'GCG'), ('IIE', 'AUU'), ('IIE', 'AUC'), ('IIE', 'AUA'), ('Gly', 'GGU'), ('Gly', 'GGC'), ('Gly', 'GGA'), ('Gly', 'GGG'), ('His', 'CAU'), ('His', 'CAC'), ('Leu', 'UAA'), ('Leu', 'UUG'), ('Leu', 'CUU'), ('Leu', 'CUC'), ('Leu', 'CUA'), ('Leu', 'CUG'), ('Arg', 'CGU'), ('Arg', 'CGC'), ('Arg', 'CGA'), ('Arg', 'CGG'), ('Arg', 'AGA'), ('Arg', 'AGG'), ('Met', 'AUG'), ('Glu', 'GAA'), ('Glu', 'GAG'), ('Asn', 'AAU'), ('Asn', 'AAC'), ('Tyr', 'UAU'), ('Tyr', 'UAC'), ('Val', 'GUU'), ('Val', 'GUC'), ('Val', 'GUA'), ('Val', 'GUG')]
full_save = [] for l in letters: for w in letters: let = l+w #Creates a combo of letters (ex AA,GG) count_temp = 0 save = [] for i in combos: if(i[1][0:2]==let): save.append(i[0])# Saves the Amino Acid full_save.append(save)
freqs = []#gets list of frequencies for each amino acid for i in full_save: print(i) print(Counter(i).values())# Counts the frequency of elements freqs.append(Counter(i).values())
['Lys', 'Lys', 'Asn', 'Asn'] [2, 2] ['Ser', 'Ser', 'Arg', 'Arg'] [2, 2] ['Thr', 'Thr', 'Thr', 'Thr'] [4] ['IIE', 'IIE', 'IIE', 'Met'] [3, 1] ['Asp', 'Asp', 'Glu', 'Glu'] [2, 2] ['Gly', 'Gly', 'Gly', 'Gly'] [4] ['Ala', 'Ala', 'Ala', 'Ala'] [4] ['Val', 'Val', 'Val', 'Val'] [4] ['Gln', 'Gln', 'His', 'His'] [2, 2] ['Arg', 'Arg', 'Arg', 'Arg'] [4] ['Pro', 'Pro', 'Pro', 'Pro'] [4] ['Leu', 'Leu', 'Leu', 'Leu'] [4] ['Leu', 'Tyr', 'Tyr'] [1, 2] ['Cys', 'Cys', 'Trp'] [2, 1] ['Ser', 'Ser', 'Ser', 'Ser'] [4] ['Phe', 'Phe', 'Leu'] [1, 2]
count_weighted = 0 total = 0 for i in freqs: amt = float(sum(i)) total =+ amt high = float(max(i)) percent = float(high/amt) #print(percent) weight = float(percent)*float(amt) #print(weight) count_weighted = count_weighted + weight #print(count_weighted) predicted = count_weighted/len(bases) print("The probability of a Codons having a same first two bases encodes for a different amino acid is "+ str(100 - predicted*100)+"%") print("The probability of a Codons having the same first two bases encodes for the same amino acid is "+ str(predicted*100)+"%")
The probability of a Codons having a different first two bases encodes for the same amino acid is 19.6721311475% The probability of a Codons having the same first two bases encodes for the same amino acid is 80.3278688525%
#reg_ex = ['AA.', 'A.A', '.AA', 'GA.', 'G.A', '.GA', 'CA.', 'C.A', '.CA', 'UA.', 'U.A', '.UA', , 'GG.', 'G.G', '.GG', 'CG.', 'C.G', '.CG', 'UG.', 'U.G', '.UG', 'AC.', 'A.C', '.AC', 'GC.', 'G.C', '.GC', 'CC.', 'C.C', '.CC', 'UC.', 'U.C', '.UC', 'AU.', 'A.U', '.AU', 'GU.', 'G.U', '.GU', 'CU.', 'C.U', '.CU', 'UU.', 'U.U', '.UU'] reg_ex = [] reg_ex.append(['AA.', 'A.A', '.AA']) reg_ex.append(['GA.', 'G.A', '.GA','AG.', 'A.G', '.AG']) reg_ex.append(['CA.', 'C.A', '.CA', 'AC.', 'A.C', '.AC']) reg_ex.append(['UA.', 'U.A', '.UA','AU.', 'A.U', '.AU']) reg_ex.append(['GG.', 'G.G', '.GG']) reg_ex.append(['CG.', 'C.G', '.CG','GC.', 'G.C', '.GC']) reg_ex.append(['CC.', 'C.C', '.CC']) reg_ex.append(['UC.', 'U.C', '.UC','CU.', 'C.U', '.CU']) reg_ex.append(['UU.', 'U.U', '.UU']) reg_ex.append(['GU.', 'G.U', '.GU','UG.', 'U.G', '.UG'])
reg_ex_count = [] full_save_reg = [] for i in reg_ex: count = 0 save_reg = [] for t in i: for c in combos: if(re.match(t,c[1])): count += 1 save_reg.append(c[0])# Saves the Amino Acid reg_ex_count.append(count) full_save_reg.append(save_reg) print(reg_ex_count) print(full_save_reg)
[12, 22, 24, 20, 12, 24, 12, 24, 11, 22] [['Lys', 'Lys', 'Asn', 'Asn', 'Lys', 'Thr', 'IIE', 'Arg', 'Gln', 'Lys', 'Leu', 'Glu'], ['Asp', 'Asp', 'Glu', 'Glu', 'Ala', 'Gly', 'Glu', 'Val', 'Gly', 'Arg', 'Arg', 'Ser', 'Ser', 'Arg', 'Arg', 'Lys', 'Thr', 'Arg', 'Met', 'Gln', 'Lys', 'Glu'], ['Gln', 'Gln', 'His', 'His', 'Gln', 'Pro', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala', 'Thr', 'Thr', 'Thr', 'Thr', 'Ser', 'Thr', 'IIE', 'Asn', 'Asp', 'His', 'Asn', 'Tyr'], ['Leu', 'Tyr', 'Tyr', 'Ser', 'Leu', 'IIE', 'Leu', 'Val', 'IIE', 'IIE', 'IIE', 'Met', 'Ser', 'Thr', 'IIE', 'Asn', 'Asp', 'His', 'Asn', 'Tyr'], ['Gly', 'Gly', 'Gly', 'Gly', 'Ala', 'Gly', 'Glu', 'Val', 'Trp', 'Gly', 'Arg', 'Arg'], ['Arg', 'Arg', 'Arg', 'Arg', 'Gln', 'Pro', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala', 'Ala', 'Ala', 'Ala', 'Ala', 'Asp', 'Ala', 'Gly', 'Val', 'Cys', 'Ser', 'Gly', 'Arg'], ['Pro', 'Pro', 'Pro', 'Pro', 'Pro', 'His', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala'], ['Ser', 'Ser', 'Ser', 'Ser', 'Cys', 'Ser', 'Phe', 'Tyr', 'Phe', 'IIE', 'Leu', 'Val', 'Leu', 'Leu', 'Leu', 'Leu', 'Pro', 'His', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala'], ['Phe', 'Phe', 'Leu', 'Cys', 'Ser', 'Phe', 'Tyr', 'Phe', 'IIE', 'Leu', 'Val'], ['Val', 'Val', 'Val', 'Val', 'Asp', 'Ala', 'Gly', 'Val', 'Cys', 'Ser', 'Gly', 'Arg', 'Cys', 'Cys', 'Trp', 'Ser', 'Trp', 'Leu', 'Leu', 'Leu', 'Met', 'Val']]
freqs2 = []#gets list of frequencies for each amino acid for i in full_save_reg: print(i) print(Counter(i).values())# Counts the frequency of elements freqs2.append(Counter(i).values())
['Lys', 'Lys', 'Asn', 'Asn', 'Lys', 'Thr', 'IIE', 'Arg', 'Gln', 'Lys', 'Leu', 'Glu'] [1, 4, 1, 1, 1, 1, 1, 2] ['Asp', 'Asp', 'Glu', 'Glu', 'Ala', 'Gly', 'Glu', 'Val', 'Gly', 'Arg', 'Arg', 'Ser', 'Ser', 'Arg', 'Arg', 'Lys', 'Thr', 'Arg', 'Met', 'Gln', 'Lys', 'Glu'] [2, 2, 1, 2, 1, 1, 1, 2, 5, 1, 4] ['Gln', 'Gln', 'His', 'His', 'Gln', 'Pro', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala', 'Thr', 'Thr', 'Thr', 'Thr', 'Ser', 'Thr', 'IIE', 'Asn', 'Asp', 'His', 'Asn', 'Tyr'] [3, 2, 3, 2, 6, 1, 1, 1, 1, 1, 2, 1] ['Leu', 'Tyr', 'Tyr', 'Ser', 'Leu', 'IIE', 'Leu', 'Val', 'IIE', 'IIE', 'IIE', 'Met', 'Ser', 'Thr', 'IIE', 'Asn', 'Asp', 'His', 'Asn', 'Tyr'] [1, 2, 1, 1, 5, 1, 1, 3, 2, 3] ['Gly', 'Gly', 'Gly', 'Gly', 'Ala', 'Gly', 'Glu', 'Val', 'Trp', 'Gly', 'Arg', 'Arg'] [1, 1, 6, 2, 1, 1] ['Arg', 'Arg', 'Arg', 'Arg', 'Gln', 'Pro', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala', 'Ala', 'Ala', 'Ala', 'Ala', 'Asp', 'Ala', 'Gly', 'Val', 'Cys', 'Ser', 'Gly', 'Arg'] [1, 1, 2, 1, 2, 1, 6, 2, 1, 6, 1] ['Pro', 'Pro', 'Pro', 'Pro', 'Pro', 'His', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala'] [1, 1, 6, 1, 1, 1, 1] ['Ser', 'Ser', 'Ser', 'Ser', 'Cys', 'Ser', 'Phe', 'Tyr', 'Phe', 'IIE', 'Leu', 'Val', 'Leu', 'Leu', 'Leu', 'Leu', 'Pro', 'His', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala'] [1, 1, 6, 1, 2, 1, 2, 1, 1, 6, 1, 1] ['Phe', 'Phe', 'Leu', 'Cys', 'Ser', 'Phe', 'Tyr', 'Phe', 'IIE', 'Leu', 'Val'] [1, 1, 1, 4, 1, 2, 1] ['Val', 'Val', 'Val', 'Val', 'Asp', 'Ala', 'Gly', 'Val', 'Cys', 'Ser', 'Gly', 'Arg', 'Cys', 'Cys', 'Trp', 'Ser', 'Trp', 'Leu', 'Leu', 'Leu', 'Met', 'Val'] [3, 1, 2, 6, 1, 1, 2, 3, 1, 2]
count_weighted2 = 0 total = 0 for i in freqs2: amt = float(sum(i)) total += amt high = float(max(i)) percent = float(high/amt) print(percent) weight = float(percent)*float(amt) #print(weight) count_weighted2 = count_weighted2 + weight #print(count_weighted) predicted = count_weighted2/total #print(predicted) print("The probability of a Codon having the same two bases implies the same amino acid is "+ str(predicted*100)+"%") print("The probability of a Codon having different two bases implies the same amino acid is "+ str(100-predicted*100)+"%")
0.333333333333 0.227272727273 0.25 0.25 0.5 0.25 0.5 0.25 0.363636363636 0.272727272727 The probability of a Codon having the same two bases implies the same amino acid is 29.5081967213% The probability of a Codon having different two bases implies the same amino acid is 70.4918032787%