#task4-group7 #the ion statistics calculator #Liyue Chang #import necessary libraries import argparse #use argparse to define command lines parser=argparse.ArgumentParser() parser.add_argument("inputfile", help="directory to the input file") parser.add_argument("-i", "--minimum_mass_to_charge_ratio_range", help="the minimum range of mass-to-charge of the peptide fragment", type=float,default=1000) parser.add_argument("-a", "--maximum_mass_to_charge_ratio_range", help="the maximum range of mass-to-charge of the peptide fragment", type=float,default=1500) parser.add_argument("-w", "--window_size", help="size of the sliding window", type=float, default=0.5) parser.add_argument("-s", "--step_size", help="size of steps the sliding window slides(must be smaller than window size)", type=float, default=0.2) parser.add_argument("-m", "--match_pattern", help="how the input amino acid matches the sequence", choices=['start with','contain'], default='contain') parser.add_argument("-p", "--amino_acid_matches_sequence", help="input amino acid matching the sequence", choices=['K','N','R','T','S','I','M','Q','H','P','L','E','D','A','G','V','Y','S','C','W','F','no'],default='no') parser.add_argument("create_file", help="name of the output file") args = parser.parse_args() filename=args.inputfile min_mz=round(args.minimum_mass_to_charge_ratio_range,4) max_mz=round(args.maximum_mass_to_charge_ratio_range,4) window_size=args.window_size step_size=args.step_size match_pattern=args.match_pattern pep_find=args.amino_acid_matches_sequence create_file=args.create_file #lists of useful elements protein_name=[] mass_to_charge=[] sequence=[] output=open(create_file,'w') #define a function to read file and store data def readfile(filename): global output readfile=open(filename,'r') next(readfile) lines=readfile.readlines() list1=[] cleavage_site=[] for line in lines: #use split to handle the data data=line.split() protein_name.append(data[0]) mass_to_charge.append(data[3]) cleavage_site.append(data[4]) sequence.append(data[5]) cleavage_site=set(cleavage_site) for i in cleavage_site: if i==0: print("There is no missed cleavage site") list1.append("no missed cleavage site") else: print("Missed cleavage site: ",i) list1.append("cleavage site: ") list1.append(i) output.writelines([list1[0],str(list1[1]),'\n']) #define a function to count number of peptides def count_num(min_mz,max_mz): count_pep=0 #use lists to store output data list1=[] list2=[] list3=[] global output #use defined list mass_to_charge, loops and if statement to count for m in mass_to_charge: mz=float(m) if ((min_mz1000.0: cons_pr[key]=[] for key in cons_pr.keys(): for index,nums in enumerate(mass_to_charge): if nums==key: cons_pr[key].append(protein_name[index]) #compare protein with same m/z for i in rep_pr: check_pr=[] check_p=[] for i in rep_pr[i]: check_pr.append(i) check_p=set(check_pr) if check_p!=1: rep_pr1.append(i) #use dictionary to compare duplicated protein with all proteins and proteins within m/z range of 1000 and 1500 for rp in range(len(rep_pr1)): if rep_pr1[rp] in num_rep: num_rep[rep_pr1[rp]]+=1 else: num_rep[rep_pr1[rp]]=1 for i in cons_pr: check_pr1=[] check_p1=[] for i in cons_pr[i]: check_pr1.append(i) check_p1=set(check_pr) if check_p!=1: cons_pr1.append(i) for cp in range(len(cons_pr1)): if cons_pr1[cp] in num_rep1: num_rep1[cons_pr1[cp]]+=1 else: num_rep1[cons_pr1[cp]]=1 for pr in range(len(protein_name)): if protein_name[pr] in num_all: num_all[protein_name[pr]]+=1 else: num_all[protein_name[pr]]=1 for m in num_rep.keys(): if m in num_all.keys(): if num_rep[m]max_mz: window=max_mz count_num(min_mz,window) min_mz+=step_size output.close()