CoCalc -- Lab02_Lynch_Feldman

Lab 2

Project: [email protected] - PHYS-360-Optimization-Algorithms

Path: DCS303/Lab02_Lynch_Feldman_Somkuta.ipynb

Views: ⁴³

Kernel: SageMath (stable)

In [46]:

letters = ["A","G","C","U"]
codons = {
"Ala": ("GCU","GCC","GCA","GCG"),
"Arg": ("CGU","CGC","CGA","CGG","AGA","AGG"),
"Asn": ("AAU", "AAC"),
"Asp": ("GAU", "GAC"),
"Cys": ("UGU", "UGC"),
"Gln": ("CAA","CAG"),
"Glu": ("GAA", "GAG"),
"Gly": ("GGU","GGC","GGA","GGG"),
"His": ("CAU","CAC"),
"IIE": ("AUU","AUC","AUA"),
"Leu": ("UAA","UUG","CUU","CUC","CUA","CUG"),
"Lys": ("AAA","AAG"),
"Met": ("AUG"),
"Phe": ("UUU","UUC"),
"Pro": ("CCU","CCC","CCA","CCG"),
"Ser": ("UCU","UCC","UCA","UCG","AGU","AGC"),
"Thr": ("ACU","ACC","ACA","ACG"),
"Trp": ("UGG"),
"Tyr": ("UAU","UAC"),
"Val": ("GUU","GUC","GUA","GUG")
    }

In [47]:

# There are only 61 Amino Acids despite 64 total combinations of ACGU
bases = []
for k,v in codons.items():
    if(isinstance(v,str)):
        bases.append(v)
    else:
        for i in v:
            bases.append(i)
print("There are "+str(len(bases))+" bases, listed below:")
print(bases)

There are 61 bases, listed below:
['UGU', 'UGC', 'GAU', 'GAC', 'UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC', 'CAA', 'CAG', 'AAA', 'AAG', 'UGG', 'CCU', 'CCC', 'CCA', 'CCG', 'ACU', 'ACC', 'ACA', 'ACG', 'UUU', 'UUC', 'GCU', 'GCC', 'GCA', 'GCG', 'AUU', 'AUC', 'AUA', 'GGU', 'GGC', 'GGA', 'GGG', 'CAU', 'CAC', 'UAA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG', 'CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG', 'AUG', 'GAA', 'GAG', 'AAU', 'AAC', 'UAU', 'UAC', 'GUU', 'GUC', 'GUA', 'GUG']

In [48]:

count = 0
for i in bases:
    if(i=="AUG"):
        count = count + 1
print("The probability of a start codon is " + str(float(count/len(bases))*100) + "%")
print("This agrees with an expected percent of "+str(float(1/len(bases))*100)+"%")

The probability of a start codon is 1.6393442623%
This agrees with an expected percent of 1.6393442623%

In [49]:

from collections import Counter #imports package that can find frequency inside a list
import re

In [50]:

# Creates ordered pairs of Amino Acids with Codons
combos = []
for k,v in codons.items():
    if(isinstance(v,str)):
        combos.append((k,v))
    else:
        for i in v:
            combos.append((k,i))

In [51]:

print(combos)

[('Cys', 'UGU'), ('Cys', 'UGC'), ('Asp', 'GAU'), ('Asp', 'GAC'), ('Ser', 'UCU'), ('Ser', 'UCC'), ('Ser', 'UCA'), ('Ser', 'UCG'), ('Ser', 'AGU'), ('Ser', 'AGC'), ('Gln', 'CAA'), ('Gln', 'CAG'), ('Lys', 'AAA'), ('Lys', 'AAG'), ('Trp', 'UGG'), ('Pro', 'CCU'), ('Pro', 'CCC'), ('Pro', 'CCA'), ('Pro', 'CCG'), ('Thr', 'ACU'), ('Thr', 'ACC'), ('Thr', 'ACA'), ('Thr', 'ACG'), ('Phe', 'UUU'), ('Phe', 'UUC'), ('Ala', 'GCU'), ('Ala', 'GCC'), ('Ala', 'GCA'), ('Ala', 'GCG'), ('IIE', 'AUU'), ('IIE', 'AUC'), ('IIE', 'AUA'), ('Gly', 'GGU'), ('Gly', 'GGC'), ('Gly', 'GGA'), ('Gly', 'GGG'), ('His', 'CAU'), ('His', 'CAC'), ('Leu', 'UAA'), ('Leu', 'UUG'), ('Leu', 'CUU'), ('Leu', 'CUC'), ('Leu', 'CUA'), ('Leu', 'CUG'), ('Arg', 'CGU'), ('Arg', 'CGC'), ('Arg', 'CGA'), ('Arg', 'CGG'), ('Arg', 'AGA'), ('Arg', 'AGG'), ('Met', 'AUG'), ('Glu', 'GAA'), ('Glu', 'GAG'), ('Asn', 'AAU'), ('Asn', 'AAC'), ('Tyr', 'UAU'), ('Tyr', 'UAC'), ('Val', 'GUU'), ('Val', 'GUC'), ('Val', 'GUA'), ('Val', 'GUG')]

In [52]:

full_save = []
for l in letters:
    for w in letters:
        let = l+w #Creates a combo of letters (ex AA,GG)
        count_temp = 0
        save = []
        for i in combos:
            if(i[1][0:2]==let):
                save.append(i[0])# Saves the Amino Acid
        full_save.append(save)

In [53]:

freqs = []#gets list of frequencies for each amino acid
for i in full_save:
    print(i)
    print(Counter(i).values())# Counts the frequency of elements
    freqs.append(Counter(i).values())

['Lys', 'Lys', 'Asn', 'Asn']
[2, 2]
['Ser', 'Ser', 'Arg', 'Arg']
[2, 2]
['Thr', 'Thr', 'Thr', 'Thr']
[4]
['IIE', 'IIE', 'IIE', 'Met']
[3, 1]
['Asp', 'Asp', 'Glu', 'Glu']
[2, 2]
['Gly', 'Gly', 'Gly', 'Gly']
[4]
['Ala', 'Ala', 'Ala', 'Ala']
[4]
['Val', 'Val', 'Val', 'Val']
[4]
['Gln', 'Gln', 'His', 'His']
[2, 2]
['Arg', 'Arg', 'Arg', 'Arg']
[4]
['Pro', 'Pro', 'Pro', 'Pro']
[4]
['Leu', 'Leu', 'Leu', 'Leu']
[4]
['Leu', 'Tyr', 'Tyr']
[1, 2]
['Cys', 'Cys', 'Trp']
[2, 1]
['Ser', 'Ser', 'Ser', 'Ser']
[4]
['Phe', 'Phe', 'Leu']
[1, 2]

In [55]:

count_weighted = 0
total = 0
for i in freqs:
    amt = float(sum(i))
    total =+ amt
    high = float(max(i))
    percent = float(high/amt)
    #print(percent)
    weight = float(percent)*float(amt)
    #print(weight)
    count_weighted = count_weighted + weight
#print(count_weighted)
predicted = count_weighted/len(bases)
print("The probability of a Codons having a same first two bases encodes for a different amino acid is "+ str(100 - predicted*100)+"%")
print("The probability of a Codons having the same first two bases encodes for the same amino acid is "+ str(predicted*100)+"%")

The probability of a Codons having a different first two bases encodes for the same amino acid is 19.6721311475%
The probability of a Codons having the same first two bases encodes for the same amino acid is 80.3278688525%

In [39]:

#reg_ex = ['AA.', 'A.A', '.AA', 'GA.', 'G.A', '.GA', 'CA.', 'C.A', '.CA', 'UA.', 'U.A', '.UA', , 'GG.', 'G.G', '.GG', 'CG.', 'C.G', '.CG', 'UG.', 'U.G', '.UG', 'AC.', 'A.C', '.AC', 'GC.', 'G.C', '.GC', 'CC.', 'C.C', '.CC', 'UC.', 'U.C', '.UC', 'AU.', 'A.U', '.AU', 'GU.', 'G.U', '.GU', 'CU.', 'C.U', '.CU', 'UU.', 'U.U', '.UU']
reg_ex = []
reg_ex.append(['AA.', 'A.A', '.AA'])
reg_ex.append(['GA.', 'G.A', '.GA','AG.', 'A.G', '.AG'])
reg_ex.append(['CA.', 'C.A', '.CA', 'AC.', 'A.C', '.AC'])
reg_ex.append(['UA.', 'U.A', '.UA','AU.', 'A.U', '.AU'])
reg_ex.append(['GG.', 'G.G', '.GG'])
reg_ex.append(['CG.', 'C.G', '.CG','GC.', 'G.C', '.GC'])
reg_ex.append(['CC.', 'C.C', '.CC'])
reg_ex.append(['UC.', 'U.C', '.UC','CU.', 'C.U', '.CU'])
reg_ex.append(['UU.', 'U.U', '.UU'])
reg_ex.append(['GU.', 'G.U', '.GU','UG.', 'U.G', '.UG'])

In [40]:

reg_ex_count = []
full_save_reg = []
for i in reg_ex:
    count = 0
    save_reg = []
    for t in i:
        for c in combos:
            if(re.match(t,c[1])):
                count += 1
                save_reg.append(c[0])# Saves the Amino Acid
    reg_ex_count.append(count)
    full_save_reg.append(save_reg)
print(reg_ex_count)
print(full_save_reg)

[12, 22, 24, 20, 12, 24, 12, 24, 11, 22]
[['Lys', 'Lys', 'Asn', 'Asn', 'Lys', 'Thr', 'IIE', 'Arg', 'Gln', 'Lys', 'Leu', 'Glu'], ['Asp', 'Asp', 'Glu', 'Glu', 'Ala', 'Gly', 'Glu', 'Val', 'Gly', 'Arg', 'Arg', 'Ser', 'Ser', 'Arg', 'Arg', 'Lys', 'Thr', 'Arg', 'Met', 'Gln', 'Lys', 'Glu'], ['Gln', 'Gln', 'His', 'His', 'Gln', 'Pro', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala', 'Thr', 'Thr', 'Thr', 'Thr', 'Ser', 'Thr', 'IIE', 'Asn', 'Asp', 'His', 'Asn', 'Tyr'], ['Leu', 'Tyr', 'Tyr', 'Ser', 'Leu', 'IIE', 'Leu', 'Val', 'IIE', 'IIE', 'IIE', 'Met', 'Ser', 'Thr', 'IIE', 'Asn', 'Asp', 'His', 'Asn', 'Tyr'], ['Gly', 'Gly', 'Gly', 'Gly', 'Ala', 'Gly', 'Glu', 'Val', 'Trp', 'Gly', 'Arg', 'Arg'], ['Arg', 'Arg', 'Arg', 'Arg', 'Gln', 'Pro', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala', 'Ala', 'Ala', 'Ala', 'Ala', 'Asp', 'Ala', 'Gly', 'Val', 'Cys', 'Ser', 'Gly', 'Arg'], ['Pro', 'Pro', 'Pro', 'Pro', 'Pro', 'His', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala'], ['Ser', 'Ser', 'Ser', 'Ser', 'Cys', 'Ser', 'Phe', 'Tyr', 'Phe', 'IIE', 'Leu', 'Val', 'Leu', 'Leu', 'Leu', 'Leu', 'Pro', 'His', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala'], ['Phe', 'Phe', 'Leu', 'Cys', 'Ser', 'Phe', 'Tyr', 'Phe', 'IIE', 'Leu', 'Val'], ['Val', 'Val', 'Val', 'Val', 'Asp', 'Ala', 'Gly', 'Val', 'Cys', 'Ser', 'Gly', 'Arg', 'Cys', 'Cys', 'Trp', 'Ser', 'Trp', 'Leu', 'Leu', 'Leu', 'Met', 'Val']]

In [41]:

freqs2 = []#gets list of frequencies for each amino acid
for i in full_save_reg:
    print(i)
    print(Counter(i).values())# Counts the frequency of elements
    freqs2.append(Counter(i).values())

['Lys', 'Lys', 'Asn', 'Asn', 'Lys', 'Thr', 'IIE', 'Arg', 'Gln', 'Lys', 'Leu', 'Glu']
[1, 4, 1, 1, 1, 1, 1, 2]
['Asp', 'Asp', 'Glu', 'Glu', 'Ala', 'Gly', 'Glu', 'Val', 'Gly', 'Arg', 'Arg', 'Ser', 'Ser', 'Arg', 'Arg', 'Lys', 'Thr', 'Arg', 'Met', 'Gln', 'Lys', 'Glu']
[2, 2, 1, 2, 1, 1, 1, 2, 5, 1, 4]
['Gln', 'Gln', 'His', 'His', 'Gln', 'Pro', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala', 'Thr', 'Thr', 'Thr', 'Thr', 'Ser', 'Thr', 'IIE', 'Asn', 'Asp', 'His', 'Asn', 'Tyr']
[3, 2, 3, 2, 6, 1, 1, 1, 1, 1, 2, 1]
['Leu', 'Tyr', 'Tyr', 'Ser', 'Leu', 'IIE', 'Leu', 'Val', 'IIE', 'IIE', 'IIE', 'Met', 'Ser', 'Thr', 'IIE', 'Asn', 'Asp', 'His', 'Asn', 'Tyr']
[1, 2, 1, 1, 5, 1, 1, 3, 2, 3]
['Gly', 'Gly', 'Gly', 'Gly', 'Ala', 'Gly', 'Glu', 'Val', 'Trp', 'Gly', 'Arg', 'Arg']
[1, 1, 6, 2, 1, 1]
['Arg', 'Arg', 'Arg', 'Arg', 'Gln', 'Pro', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala', 'Ala', 'Ala', 'Ala', 'Ala', 'Asp', 'Ala', 'Gly', 'Val', 'Cys', 'Ser', 'Gly', 'Arg']
[1, 1, 2, 1, 2, 1, 6, 2, 1, 6, 1]
['Pro', 'Pro', 'Pro', 'Pro', 'Pro', 'His', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala']
[1, 1, 6, 1, 1, 1, 1]
['Ser', 'Ser', 'Ser', 'Ser', 'Cys', 'Ser', 'Phe', 'Tyr', 'Phe', 'IIE', 'Leu', 'Val', 'Leu', 'Leu', 'Leu', 'Leu', 'Pro', 'His', 'Leu', 'Arg', 'Ser', 'Pro', 'Thr', 'Ala']
[1, 1, 6, 1, 2, 1, 2, 1, 1, 6, 1, 1]
['Phe', 'Phe', 'Leu', 'Cys', 'Ser', 'Phe', 'Tyr', 'Phe', 'IIE', 'Leu', 'Val']
[1, 1, 1, 4, 1, 2, 1]
['Val', 'Val', 'Val', 'Val', 'Asp', 'Ala', 'Gly', 'Val', 'Cys', 'Ser', 'Gly', 'Arg', 'Cys', 'Cys', 'Trp', 'Ser', 'Trp', 'Leu', 'Leu', 'Leu', 'Met', 'Val']
[3, 1, 2, 6, 1, 1, 2, 3, 1, 2]

In [45]:

count_weighted2 = 0
total = 0
for i in freqs2:
    amt = float(sum(i))
    total += amt
    high = float(max(i))
    percent = float(high/amt)
    print(percent)
    weight = float(percent)*float(amt)
    #print(weight)
    count_weighted2 = count_weighted2 + weight
#print(count_weighted)
predicted = count_weighted2/total
#print(predicted)
print("The probability of a Codon having the same two bases implies the same amino acid is "+ str(predicted*100)+"%")
print("The probability of a Codon having different two bases implies the same amino acid is "+ str(100-predicted*100)+"%")

333333333333
227272727273
25
25
5
25
5
25
363636363636
272727272727
The probability of a Codon having the same two bases implies the same amino acid is 29.5081967213%
The probability of a Codon having different two bases implies the same amino acid is 70.4918032787%