User Tools

Site Tools


trudiff.py
# Measures distance between center of mass (COM), nearest residue, and a custom selection to the average phopshate layer of a specified binding leaflet
# Measurements have sign accuracy, so the distance is negative if the peptide is below the average phosphate layer of the membrane
# Measurements are accurate even if the peptide crosses the PBC boundary
# Output is a single data file with a column for each measurement type time series
# ASSUMPTION: membrane centered at origin (z-coord), PBC cell is centered at origin, membrane thickness is constant in z-direction over timeseries (take average)
# Written by Ky Wildermuth, 11/16. Feel free to edit. Feel free to ask questions: kywildermuth@gmail.com
 
import pandas as pd
import numpy as np
 
top_phos_path = "top_phos_coord.dat"
bot_phos_path = "bot_phos_coord.dat"
prot_path = "prot_coord.dat"
prot_min_path = "prot_minmax_coord.dat"
prot_custom_path = "custom_coord.dat"
PBC_cell_path = "PBC_coord.dat"
 
### USER DEFINED PARAMETERS ###
 
# define primary interaction leaflet (value can be top or bottom)
leaflet = 'top'
# can specify end limit if desired, though default is to keep this the same length as num_frame (see below)
prebind_limit = 4000 
# define the number of dcd 
num_dcd = 1
#define the number of frames per dcd
num_frame = 4000
 
##########################
 
 
# compute and define the theoretical total number of data points per coor file (will be used to check if files have same number of data entries)
theo_length = num_dcd*num_frame
 
# confirm valid and correct leaflet choice
leaflet_choices = set(['top','bottom'])
if leaflet in leaflet_choices:
    print("primary leaflet is %s" % leaflet)
else:
    print('not a valid leaflet choice. Choose top or bottom')
    exit()
 
# load the coordinate data files to be used (phosphate and protein coordinates)  - only load up to defined prebind_limit
tp = pd.read_csv(top_phos_path, delim_whitespace = True, nrows=prebind_limit, header = None, names = ["dyn_num", "tp_x", "tp_y", "tp_z"])
bp = pd.read_csv(bot_phos_path, delim_whitespace = True, nrows=prebind_limit, header = None, names = ["dyn_num", "bp_x", "bp_y", "bp_z"])
prot = pd.read_csv(prot_path, delim_whitespace = True, nrows=prebind_limit, header = None, names = ["dyn_num", "prot_x", "prot_y", "prot_z"])
prot_min = pd.read_csv(prot_min_path, delim_whitespace = True, nrows=prebind_limit, header = None, names = ["dyn_num", "prot_x", "prot_y", "prot_z", "prot_mx", "prot_my", "prot_mz"])
prot_custom = pd.read_csv(prot_custom_path, delim_whitespace = True, nrows=prebind_limit, header = None, names = ["dyn_num", "prot_x", "prot_y", "prot_z"])
PBC = pd.read_csv(PBC_cell_path, delim_whitespace = True, nrows = prebind_limit, header = None, names = ["dyn_num", "cell_x", "cell_y", "cell_z", "ang_x", "ang_y", "ang_z"])
 
#### PROT COM ###
 
# extract the coordinates from each file to be used (z coord)
c1 = tp['tp_z']
c2 = bp['bp_z']
c3 = prot['prot_z']
c4 = PBC['cell_z']
 
# concatenate the coordinates to be used to one data frame and label the columns
defect_cond_dat = list(zip(c1,c2,c3,c4))
df1 = pd.DataFrame(data = defect_cond_dat, columns = ['tp_z', 'bp_z', 'prot_z', 'cell_z'])
 
# compute average z-dimensions for tp, bp, cell
tp_avg = df1["tp_z"].mean()
bp_avg = df1["bp_z"].mean()
p_avg = (abs(tp_avg) + abs(bp_avg))/2
cell_avg = df1["cell_z"].mean()
 
# define PBC_z as the average distance from leaflet surface to PBC boundary
PBC_z = ((cell_avg)/2) - p_avg
 
# visual check if assumptions are valid:
print(df1.describe())
 
yes = set(['yes','y', 'ye', ''])
no = set(['no','n'])
print 'Confirm that cell z-coordinate is constant and is symmetrical about membrane origin [y/n]'
choice = raw_input().lower()
if choice in yes:
   print "confirmed"
elif choice in no:
   print('Ending analysis')
   exit(1)
 
 
print('PBC_z is %d' % PBC_z)
 
 
# compute the z-distance between phosphate group of each bilayer and the protein COM
df1['top_diff'] = df1['tp_z'] - df1['prot_z']
df1['bot_diff'] = df1['bp_z'] - df1['prot_z']
# take the absolute value of each computation above; the protein is closer to the bilayer with the smaller z-dist
df1['top_diff_abs'] = abs(df1['top_diff'])
df1['bot_diff_abs'] = abs(df1['bot_diff'])
 
# define a new column, tru_neg, value is -1 if prot_z is less than tp_z and greater than bp_z; otherwise 0
df1['tru_neg'] = 1
df1.loc[(df1['prot_z'] < df1['tp_z']) & (df1['prot_z'] > df1['bp_z']), 'tru_neg'] = -1
 
# define a new column, tru_diff, value is difference between primary leaflet and peptide, across PBC if applicable
df1['tru_diff_com'] = 0
if leaflet == 'top':
	df1.loc[df1['top_diff_abs'] < df1['bot_diff_abs'], 'tru_diff_com'] = df1['top_diff_abs']*df1['tru_neg']
	df1.loc[df1['bot_diff_abs'] < df1['top_diff_abs'], 'tru_diff_com'] = 2*PBC_z - df1['bot_diff_abs']*df1['tru_neg']
elif leaflet == 'bottom':
	df1.loc[df1['bot_diff_abs'] < df1['top_diff_abs'], 'tru_diff_com'] = df1['bot_diff_abs']*df1['tru_neg']
	df1.loc[df1['top_diff_abs'] < df1['bot_diff_abs'], 'tru_diff_com'] = 2*PBC_z - df1['top_diff_abs']*df1['tru_neg']
 
 
df2 = pd.concat([df1['tru_diff_com']], axis=1, keys=['tru_diff_com'])
#df2.to_csv('noPBC_dist_prot.dat', sep = ' ', index = False, header = False)
 
 
### custom COM ###
 
# extract the coordinates from each file to be used (z coord)
c1 = tp['tp_z']
c2 = bp['bp_z']
c3 = prot_custom['prot_z']
c4 = PBC['cell_z']
 
# concatenate the coordinates to be used to one data frame and label the columns
defect_cond_dat = list(zip(c1,c2,c3,c4))
df1 = pd.DataFrame(data = defect_cond_dat, columns = ['tp_z', 'bp_z', 'prot_z', 'cell_z'])
 
# remove the brackets from the dataframe (if they exist)
df1 = df1.replace( '[\},)]','', regex=True )
 
# convert to string 
df1.prot_z = df1.prot_z.astype(float)
 
#print df1[:10]
 
# compute the z-distance between phosphate group of each bilayer and the protein COM
df1['top_diff'] = df1['tp_z'] - df1['prot_z']
df1['bot_diff'] = df1['bp_z'] - df1['prot_z']
# take the absolute value of each computation above; the protein is closer to the bilayer with the smaller z-dist
df1['top_diff_abs'] = abs(df1['top_diff'])
df1['bot_diff_abs'] = abs(df1['bot_diff'])
 
# define a new column, tru_neg, value is -1 if prot_z is less than tp_z and greater than bp_z; otherwise 0
df1['tru_neg'] = 1
df1.loc[(df1['prot_z'] < df1['tp_z']) & (df1['prot_z'] > df1['bp_z']), 'tru_neg'] = -1
 
# define a new column, tru_diff_custom, value is difference between primary leaflet and peptide, across PBC if applicable
df1['tru_diff_custom'] = 0
if leaflet == 'top':
  df1.loc[df1['top_diff_abs'] < df1['bot_diff_abs'], 'tru_diff_custom'] = df1['top_diff_abs']*df1['tru_neg']
  df1.loc[df1['bot_diff_abs'] < df1['top_diff_abs'], 'tru_diff_custom'] = 2*PBC_z - df1['bot_diff_abs']*df1['tru_neg']
elif leaflet == 'bottom':
  df1.loc[df1['bot_diff_abs'] < df1['top_diff_abs'], 'tru_diff_custom'] = df1['bot_diff_abs']*df1['tru_neg']
  df1.loc[df1['top_diff_abs'] < df1['bot_diff_abs'], 'tru_diff_custom'] = 2*PBC_z - df1['top_diff_abs']*df1['tru_neg']
 
 
df3 = pd.concat([df1['tru_diff_custom']], axis=1, keys=['tru_diff_custom'])
#df3.to_csv('noPBC_dist_custom.dat', sep = ' ', index = False, header = False)
 
 
### MIN COM ###
 
# extract the coordinates from each file to be used (z coord)
c1 = tp['tp_z']
c2 = bp['bp_z']
c3 = prot_min['prot_z']
c4 = prot_min['prot_mz']
c5 = PBC['cell_z']
 
# concatenate the coordinates to be used to one data frame and label the columns
defect_cond_dat = list(zip(c1,c2,c3,c4,c5))
df1 = pd.DataFrame(data = defect_cond_dat, columns = ['tp_z', 'bp_z', 'prot_z_min', 'prot_z_max', 'cell_z'])
 
# remove the brackets from the dataframe (if they exist)
df1 = df1.replace( '[\},)]','', regex=True )
 
# convert to string 
df1.prot_z_min= df1.prot_z_min.astype(float)
# convert to string 
df1.prot_z_max= df1.prot_z_max.astype(float)
 
# compute the z-distance between phosphate group of each bilayer and the protein min
df1['top_diff_min'] = df1['tp_z'] - df1['prot_z_min']
df1['bot_diff_min'] = df1['bp_z'] - df1['prot_z_min']
# take the absolute value of each computation above; the protein is closer to the bilayer with the smaller z-dist
df1['top_diff_abs_min'] = abs(df1['top_diff_min'])
df1['bot_diff_abs_min'] = abs(df1['bot_diff_min'])
 
# compute the z-distance between phosphate group of each bilayer and the protein max
df1['top_diff_max'] = df1['tp_z'] - df1['prot_z_max']
df1['bot_diff_max'] = df1['bp_z'] - df1['prot_z_max']
# take the absolute value of each computation above; the protein is closer to the bilayer with the smaller z-dist
df1['top_diff_abs_max'] = abs(df1['top_diff_max'])
df1['bot_diff_abs_max'] = abs(df1['bot_diff_max'])
 
# define a new column, tru_neg, value is -1 if prot_z is less than tp_z and greater than bp_z; otherwise 0
df1['tru_neg_min'] = 1
df1['tru_neg_max'] = 1
df1.loc[(df1['prot_z_min'] < df1['tp_z']) & (df1['prot_z_min'] > df1['bp_z']), 'tru_neg_min'] = -1
df1.loc[(df1['prot_z_max'] < df1['tp_z']) & (df1['prot_z_max'] > df1['bp_z']), 'tru_neg_max'] = -1
 
df1['tru_diff_min'] = 0
if leaflet == 'top' : 
  df1['top_diff_abs'] = df1['top_diff_abs_min']
  df1['bot_diff_abs'] = df1['bot_diff_abs_min']
  df1.loc[df1['top_diff_abs'] < df1['bot_diff_abs'], 'tru_diff_min'] = df1['top_diff_abs']*df1['tru_neg_min']
  df1.loc[df1['bot_diff_abs'] < df1['top_diff_abs'], 'tru_diff_min'] = 2*PBC_z - df1['bot_diff_abs']*df1['tru_neg_min']
elif leaflet == 'bottom' :
  df1['top_diff_abs'] = df1['top_diff_abs_max']
  df1['bot_diff_abs'] = df1['bot_diff_abs_max']
  df1.loc[df1['bot_diff_abs'] < df1['top_diff_abs'], 'tru_diff_min'] = df1['bot_diff_abs']*df1['tru_neg_max']
  df1.loc[df1['top_diff_abs'] < df1['bot_diff_abs'], 'tru_diff_min'] = 2*PBC_z - df1['top_diff_abs']*df1['tru_neg_max']
 
 
df4 = pd.concat([df1['tru_diff_min']], axis=1, keys=['tru_diff_min'])
#df4.to_csv('noPBC_dist_min.dat', sep = ' ', index = False, header = False)
 
 
l1 = df2['tru_diff_com']
l2 = df3['tru_diff_custom']
l3 = df4['tru_diff_min']
 
truDiff_all = list(zip(l1,l2,l3))
df1 = pd.DataFrame(data = truDiff_all, columns = ['com', 'custom', 'min'])
df1.to_csv('binding_distances.dat', sep = ' ', index = False, header = True)
trudiff.py.txt · Last modified: 2017/05/21 23:06 by edit