CoCalc -- 2018-11-29-104034.ipynb

Project: alchemist

Path: 2018-11-29-104034.ipynb

Views: ¹³⁴

Kernel: Python 3 (Anaconda 2019)

In [0]:

In [0]:

#### import sys

class Alchemist:
    def __init__(self, filename, resnum, resname, ffbonded, rtp, charge_file=None, types_file=None):
        """
        alch = Alchemist('file.itp', 10)
        """
        self.filename = filename
        self.deprot = {"ASH":"ASP", "GLH":"GLU", "GLU":"GLU", "ASP":"GLU", "HSP":"HSD", "GLUP":"GLU", "ASPP":"ASP"}
        self.resnum = resnum
        self.resname = resname
        self.rtp = rtp
        self.ffbonded = [l for l in open(ffbonded)]
        self.content = [line for line in open(self.filename)]
        self.sections, self.sections_names = self.get_sections()
        self.names_to_types_A, self.nums_to_names, self.names_to_nums = self.get_types()
        if charge_file and types_file:
            self.charges = {line.split()[0]:line.split()[1] for line in open(charge_file)}
            self.names_to_types_B = self.parse_types(types_file)
        else:
            self.charges, self.names_to_types_B = self.parse_rtp()
        self.different_types = self.find_different_types()
        self.prefix = 'alch-'
    
    def __call__(self):
        """
        alch()
        """
        self.add_state_B()
        self.all_sections_add_B()
        self.write()
    
    def parse_rtp(self):
        # read and return charges & names_to_types
        with open(self.rtp, 'r') as f:
            lista=[]
            x = 0
            for line in f:
                if len(line.split()) > 1 and line.split()[0] == '[' and line.split()[1] == self.deprot[self.resname]: 
                    x=1
                if x==1 and line.split()[0] != '[':
                    lista.append(line)
                if len(line.split()) > 1 and line.split()[0] == '[' and line.split()[1] == 'bonds' and x==1:
                    x=0
                    break
        charges_dict={}
        name_types_dict={}
        for line in lista:
            name_types_dict[line.split()[0]]= line.split()[1]
            charges_dict[line.split()[0]]=line.split()[2]
        return charges_dict, name_types_dict
    
    def get_types(self):
        # return a dict of names:types in state A
        # self.names_to_types_A, self.nums_to_names, self.names_to_nums = self.get_types()
        section = [l for l in self.get_section_content('atoms') if len(l.split()) > 5 and l.split()[2] == str(self.resnum) and l.split()[3] == self.resname and not l.lstrip().startswith('[') and not l.lstrip().startswith(';')]
        ntta = {l.split()[4]:l.split()[1] for l in section}
        ntna = {l.split()[0]:l.split()[4] for l in section}
        natn = {l.split()[4]:l.split()[0] for l in section}
        return ntta, ntna, natn
    
    def find_different_types(self):
        diffs = []
        for num in self.nums_to_names.keys():
            print(num)
            name = self.nums_to_names[num]
            try:
              if self.names_to_types_A[name] != self.names_to_types_B[name]:
                diffs.append(num)
            except KeyError:
                diffs.append(num)
        return diffs
    
    def parse_types(self, types_file):
        #nazwa atomu pierwsza kolumna, typ druga kolumna
        types = [line for line in open(types_file)]
        return {line.split()[0]:line.split()[1] for line in types if len(line.split())>1}
    
    def get_sections(self):
        # petla po content
        # dwie listy
        # jesli zaczyna sie od '[', to nazwa do listy z nazwami, a pusta lista do listy z sekcjami
        # dodajemy kolejne linijki do ostatniej podlisty
        names,section_insides =[],[]
        section_insides.append([])
        names.append('header')
        for line in self.content:
            if line.strip().startswith('['):
                names.append(line.split()[1])
                section_insides.append([])
            section_insides[-1].append(line)
        return section_insides, names
    
    def get_section_content(self, section_name, order=0):
        # znalezc wszystkie instancje nazwy "section_name" w self.sections_names
        # ustalic, o ktora chodzi (order) -> numer sekcji w self.sections
        # zwrocic odpowiedni element self.sections
        n = len(self.sections_names)
        indegz = [j for j in range(n) if self.sections_names[j]==section_name][order]
        return self.sections[indegz]
     
    def set_section_content(self, section_name, order, line_num, new_line):
        n = len(self.sections_names)
        indegz = [j for j in range(n) if self.sections_names[j]==section_name][order]
        self.sections[indegz][line_num] = new_line
    
    def add_state_B(self):
        section_content = self.get_section_content('atoms')
        # w petli po liniach:
        # (1) sprawdzic, czy linia jest legitna
        # (2) znalezc typ, ladunek i mase
        # (3) dodac w odp kolejnosci do linii
        # (4) zapisac zmodyfikowana linie w odpowiednim elemencie self.sections
        num = 0
        for linenum, line in enumerate(section_content):
            if len(line.split()) > 7 and not line.lstrip().startswith(';') and str(line.split()[2])==str(self.resnum):
                nazwa, typ, masa = line.split()[4], line.split()[1], line.split()[7]
                try:
                    ladunek = self.charges[nazwa] 
                except KeyError:
                    ladunek = "0.00"
                if nazwa in self.names_to_types_B.keys():
                    typ = self.names_to_types_B[nazwa]
                else:
                    typ = 'DH'
                    self.names_to_types_B[nazwa] = 'DH'
                line = line.rstrip()
                if ';' in line:
                    line = line[:line.index(';')]
                suffix = "{:10s} {:10s} {:10s}".format(typ, ladunek, masa)
                new_line = line + suffix + '\n'
                self.set_section_content('atoms', 0, linenum, new_line)
                num += 1   
    
    def all_sections_add_B(self):
        self.add_B_to_section('bonds', 2)
        self.add_B_to_section('angles', 3)
        self.add_B_to_section('dihedrals', 4)
    
    def add_B_to_section(self, section, n_entries):
        section_content = self.get_section_content(section)
        for linenum, line in enumerate(section_content):
            atomnum_list = line.split()[0:n_entries]
            if any([True for i in atomnum_list if i in self.different_types]):
                parA=[self.names_to_types_A[self.nums_to_names[l]] for l in atomnum_list]
                if parA is None:
                    raise RuntimeError('{} not found in ffbonded'.format(' '.join(parA)))
                parB=[self.names_to_types_B[self.nums_to_names[l]] for l in atomnum_list]
                form="{:10} "*len(self.find_param(parA))*2
                try:
                    suffix = form.format(*self.find_param(parA), *self.find_param(parB))
                except KeyError:
                    suffix = form.format(*self.find_param(parA), *self.find_param(parA))
                new_line = line.rstrip() + suffix + '\n'
                self.set_section_content(section,0,linenum,new_line)
        
    def find_param(self, atomtypes_list):
        nparams = len(atomtypes_list)
        interaction_types = ['1', '2', '4', '5', '9']
        param = None
        for line in self.ffbonded[::-1]:
            lspl = line.split()
            if len(lspl) > nparams and lspl[nparams] in interaction_types:
                if self.test_line(lspl[:nparams], atomtypes_list):
                    if ';' not in lspl[nparams+1:]:
                        param = [float(x) for x in lspl[nparams+1:]]
                    else:
                        endpos = lspl.index(';')
                        param = [float(x) for x in lspl[nparams+1:endpos]]
        if param:
            return param
        else:
            raise KeyError()
    
    def test_line(self, line_list, ref_list):
        assert len(line_list) == len(ref_list)
        if 'X' in line_list:
            non_x_nums = [x for x in range(len(line_list)) if line_list[x] != "X"]
            line_list = [line_list[a] for a in non_x_nums]
            ref_list = [ref_list[a] for a in non_x_nums]
        n = len(line_list)
        if all([line_list[i] == ref_list[i] for i in range(n)]) or all([line_list[i] == ref_list[n-i-1] for i in range(n)]):
            return True
        else:
            return False
    
    def write(self):
        with open(self.prefix + self.filename, 'w') as outfile:
            for section in self.sections:
                for line in section:
                    outfile.write(line)
                    






q = Alchemist('topol.itp', 23, 'GLU', '/home/miloszw/gmx5_plu21/share/gromacs/top/amber99bsc1.ff/ffbonded.itp', '/home/miloszw/gmx5_plu21/share/gromacs/top/amber99bsc1.ff/aminoacids.rtp')

q()