Klauda Lab Simulation Wiki

# MEMBRANE PACKING DEFECT ANALYSIS 
# WRITTEN FOR: PYTHON (v 2.7.8) and openCV (v 3.1.0) 
# USES MODULES: numpy, os, cv2
# CREATED BY: KY WILDERMUTH (05/2016)
#
# This script will conduct various packing defect analysis on a single leaflet of a lipid membrane.
# It will produce three output files that provide the following data for each frame in the image set: 
# (1) 	"local_defect.dat" provides the total normal area of defect exposure directly below the protein, 
# 		the normal area of the protein, and a comparison value that describes the magnitude of difference
#		between the two defect shapes.
# (2)	"global_defect.dat" provides the total normal area of the entire leaflet followed by the total normal
#		area of defect exposure
# (3)	for each frame in the image set, "defect_freq_area.dat" provides a list of the normal area of all individual 
#		defects as well as a comparison value that describes the magnitude of difference between the selected 
#		defect and the protein shape.
#
# To run this script, an appropriate image set must be prepared. 
# See the associated tcl script for outputting packing defect images from VMD
#
# All inputs that must be defined by the user are found in Step 1 below. I recommend visualizing various 
# steps using the cv2.imshow() command (see openCV documentation) when using a new image set to check for expected results
#
# Area measurements in this script are converted to Angstroms from pixels via an in-program conversion factor.
# The conversion factor is based on a VMD measurement of the PBC box dimensions in the first frame 
# of the image set and is assumed to be in Angstroms.
# The conversion factor is reported in the "pbc_scale_dimensions.dat" file
#
# This script can be used on a membrane simulation that does not have a protein. If this is the case, ignore
# the output file "local_defect.dat"
#
 
import numpy as np 
import cv2
import os, os.path
 
###############################
### STEP 1: SET USER INPUTS ###
###############################
 
# set a limit on defect selection based on area of contour (removes very small defects)
def is_contour_bad(c):
	# calculate the contour area in A^2 
	area = cv2.contourArea(c)*conv
	# the contour is "bad" if its area is smaller than 1 A^2
	return not area > 1
 
# Define the color range used for the masks of each component in the system
# NOTE: cv2 uses BGR color space 
# 1 is lipid core, 2 is protein, 3 is pbc box (default color ranges are for yellow, red, and green respectively)
boundary_1 = [([0, 200, 200], [25, 255, 255])]
boundary_2 = [([0, 0, 200], [25, 25, 255])]
boundary_3 = [([0, 150, 0], [60, 255, 60])]
 
parent_dir = "/lustre/kwilder/nvt-o/rep1/dyn666"
 
conv_path = os.path.join(parent_dir, "data_files/pbc_scale_dimensions.dat")
local_path = os.path.join(parent_dir, "data_files/local_defect_top.dat")
global_path = os.path.join(parent_dir, "data_files/global_defect_top.dat")
freq_path = os.path.join(parent_dir, "data_files/defect_freq_area_top.dat")
 
# select image input directories:
lipid_path = os.path.join(parent_dir, "top/top_lipid")
prot_path = os.path.join(parent_dir, "top/top_prot")
 
# enter x and y dimensions of the pbc box (in A^2) from the first frame used in the image set (obtained from VMD)
box_x, box_y = 77.432, 77.432
 
 
################################
### STEP 2: PROTEIN ANALYSIS ###
################################
 
# start the frame number counter
count = 0
#iterate over all *.bmp files in the selected directories
lst = os.listdir(lipid_path)
srt = sorted(lst)
for i in srt:
	count = count + 1
	print('%d' % count)
	os.chdir(prot_path)
	if i.endswith(".bmp"):
		# import protein image
		prot_image = cv2.imread(i)
		# loop over the boundary color range for the protein
		for (lower, upper) in boundary_2:
			# create NumPy arrays from the boundaries
			lower = np.array(lower, dtype = "uint8")
			upper = np.array(upper, dtype = "uint8")
		# apply the protein mask based on the color range
		prot_select = cv2.inRange(prot_image, lower, upper)
		# define a contour for the protein
		(_,prot_c, _) = cv2.findContours(prot_select, cv2.RETR_EXTERNAL,
		cv2.CHAIN_APPROX_SIMPLE)
		# draw the contour on the original mask and fill it in
		prot_mask = cv2.drawContours(prot_select, prot_c,-1,255,-1)
 
################################
### STEP 3: DEFINE UNIT CELL ###
################################
 
	# import lipid image
	os.chdir(lipid_path)
	if i.endswith(".bmp"):
		image_in = cv2.imread(i)
		# add filter with edge conservation to smooth masks (run only one of the following filters - try MSF if BF is not sufficient)
		#image = image_in 	# enable this if you you don't need to use a filter 
		#image = cv2.pyrMeanShiftFiltering(image_in, 21, 51)
		image = cv2.bilateralFilter(image_in,10,75,75)
 
		# for filter optimization, check the original and filtered images side by side:
		#cv2.imshow("", np.hstack([image, image_in]))
		#cv2.waitKey(0)
 
		# loop over the boundary color range of the pbc box
		for (lower, upper) in boundary_3:
			# create NumPy arrays from the boundaries
			lower = np.array(lower, dtype = "uint8")
			upper = np.array(upper, dtype = "uint8")	 
		# apply the pbc box mask based on the color range
		pbc_select = cv2.inRange(image, lower, upper)
		# select the contour corresponding to the unit cell box
		(_,box_c, _) = cv2.findContours(pbc_select, cv2.RETR_EXTERNAL,
		cv2.CHAIN_APPROX_SIMPLE)
		# draw the contour on the original mask and fill it in
		box_mask = cv2.drawContours(pbc_select, box_c,-1,255,-1)
		box = cv2.bitwise_and(image, image, mask = box_mask)
 
		# use the area of the pbc box in the first image as a reference for conversion
		if count < 2:
			# define area of first frame pbc box in A^2 from VMD data
			box_ang = box_x*box_y
			#define area of first frame pbc box in picels from mask size
			box_pix = np.count_nonzero(box_mask)
			# define the conversion ratio from pixels to A^2
			conv = box_ang / box_pix
			# remove existing output files if they exist
			try:
				os.remove(local_path)
				os.remove(global_path)
				os.remove(freq_path)
			except OSError:
				pass
			# add data to output file
			f0 = open(conv_path, 'a')
			f0.write('conversion factor is %f A^2/pixel. All values are reported in A^2 \n' % conv)
			f1 = open(local_path, 'a')
			f1.write('frame_# area_prot_defect area_protein comparison_value \n')
			f2 = open(freq_path, 'a')
			f2.write('frame_# area_defect comparison_value \n')
			f3 = open(global_path, 'a')
			f3.write('frame_# area_box area_total_defect \n')
 
#################################
### STEP 4: MEMBRANE ANALYSIS ###
#################################
 
		box_pix = np.count_nonzero(box_mask)
		box_ang = conv*box_pix
		print('area of box %d is %d square angstrom' % (count, box_ang))
		f3 = open(global_path, 'a')
		f3.write('%d %f ' % (count, box_ang))
 
		# loop over the boundary color range for defects
		for (lower, upper) in boundary_1:
			# create NumPy arrays from the boundaries
			lower = np.array(lower, dtype = "uint8")
			upper = np.array(upper, dtype = "uint8")
		# apply the mask based on color range
		mask = cv2.inRange(box, lower, upper)
		output_defect = cv2.bitwise_and(box, box, mask = mask)
		#cv2.imshow("defects", output_defect)
		#cv2.waitKey(0)
 
		# compute total area of defect mask and convert to square angstroms
		defectt_area_pix = np.count_nonzero(mask)
		defectt_area_ang = defectt_area_pix*conv
		print('area of defects: %d square angstrom' % defectt_area_ang)
		# add data to defect area file
		f3.write('%f \n' % defectt_area_ang)
 
###################################
### STEP 5: REMOVE BAD CONTOURS ###
###################################
 
		# Prepare image for contour analysis
		# convert defect mask to grayscale
		gray = cv2.cvtColor(output_defect, cv2.COLOR_BGR2GRAY)
		#add threshold to image
		(T, thresh) = cv2.threshold(gray, 5, 255, cv2.THRESH_BINARY)
		# find all contours
		(_,cnts_all, _) = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
			cv2.CHAIN_APPROX_SIMPLE)
		# find all bad contours
		mask = np.ones(image.shape[:2], dtype="uint8")*255
		# loop over all contours
		for c in cnts_all:
			# if the contour is bad, draw it on the mask
			if is_contour_bad(c):
				cv2.drawContours(mask, [c],-1,0,-1)
		# remove bad contours from the image
		good_defects = cv2.bitwise_and(thresh, thresh, mask=mask)
		# find the remaining good contours
		(_,cnts_good, _) = cv2.findContours(good_defects, cv2.RETR_EXTERNAL,
			cv2.CHAIN_APPROX_SIMPLE)
		print ("There are a total of %d selected contours" % (len(cnts_good)))
 
###################################
### STEP 6: DEFECT ANALYSIS #######
###################################
 
		prot_defect = np.ones(image.shape[:2], dtype="uint8")*0
		# loop over the countours and compute the following
		for c in cnts_good:
			#define the single contour object mask
			mask = np.ones(image.shape[:2], dtype="uint8")*0
			single_contour = cv2.drawContours(mask, [c],-1,255,-1)
			#cv2.imshow("single contour", single_contour)
			#cv2.waitKey(0)
			prot_overlap = cv2.bitwise_and(prot_mask, prot_mask, mask = single_contour)
			prot_defect = prot_overlap + prot_defect
			# compare protein contour to defect contour
			compare = cv2.matchShapes(single_contour, prot_mask, 1, 0)
			# calculate the area of the contour
			defectf_area_pix = cv2.contourArea(c)
			defectf_area_ang = defectf_area_pix*conv
			# add area and distance data to defect frequency file
			f2.write("%d %f %f \n" % (count, defectf_area_ang, compare))
 
		# compare the protein shape to the shape of the defects directly below it
		prot_defect_area = np.count_nonzero(prot_defect)*conv
		prot_area = np.count_nonzero(prot_mask)*conv
		compare = cv2.matchShapes(prot_mask, prot_defect, 1, 0)
		f1.write("%d %f %f %f \n" % (count, prot_defect_area, prot_area, compare))