EDV = ['a', 'b', 's', 'l', 'v']
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dirname = "SingleDomain"
PIDs = [ "2f4k", "1bdd", "2abd_0", "1gb1_0", "1mi0_swissmin", "1mhx_swissmin", "2ptl_0",
"1shg", "1srm_1", "2ci2", "1aps_0", "2kjv_0", "2kjw_0", "1qys" ]
fn = os.path.join(dirname, "NS_stats.out")
df = pd.read_table(fn, sep='\s+')
df = df[["pid", "numNodes", "numLinks", "numSC", "numSCLE"]]
x = df.pid.values
fig, axes = plt.subplots(1, 2, figsize=(10, 4), constrained_layout=True)
ax1 = axes[0]; ax2 = axes[1];
ax1.set_title("Number of shortcuts per structure by EDS variant.")
ax1.plot(x, df.numSC.values, '.--', label='a' + ' ' + str(np.round(df.numSC.mean(), 3)))
ax2.set_title("Number of long-range shortcuts per structure by EDS variant.")
ax2.plot(x, df.numSCLE.values, '.--', label='a' + ' ' + str(np.round(df.numSCLE.mean(), 3)))
for v in EDV[1:]:
fn = os.path.join(dirname, "NS_stats_" + v + ".out")
df = pd.read_table(fn, sep='\s+')
ax1.plot(x, df.numSC.values, '.--', label = v + ' ' + str(np.round(df.numSC.mean(), 3)))
ax2.plot(x, df.numSCLE.values, '.--', label= v + ' ' + str(np.round(df.numSCLE.mean(), 3)))
# matplotlib 3.3.0
# https://stackoverflow.com/questions/63723514/userwarning-fixedformatter-should-only-be-used-together-with-fixedlocator
ax1.set_xticks(x); ax1.set_xticklabels(x, rotation=90); ax1.grid()
ax1.legend(title="EDV mean", loc='upper left', bbox_to_anchor=(1.0, 1.0))
ax2.set_xticks(x); ax2.set_xticklabels(x, rotation=90); ax2.grid()
_ = ax2.legend(title="EDV mean", loc='upper left', bbox_to_anchor=(1.0, 1.0))
from scipy.stats import ttest_rel
def onesided_paired_ttest(a, b):
stats, pval = ttest_rel(a, b) # two-sided paired ttest
onesided_ttest_alt = ' < ' if stats < 0 else ' > '
onesided_ttest_pval = pval/2
return (onesided_ttest_alt, onesided_ttest_pval)
# EDS may use distance between any residue pair, not just edges!
# compare CA-CA with CB-CB distances of edges (a sample)
CA_eds, CB_eds = [], []
for pid in PIDs:
fn = os.path.join(dirname, pid + "_edges.out")
da = pd.read_table(fn, sep='\s+')
CA_eds.append(da.ed.mean())
fn = os.path.join(dirname, pid + "_edges_b.out")
db = pd.read_table(fn, sep='\s+')
CB_eds.append(db.ed.mean())
print(pid, np.round([da.ed.mean(), da.ed.std(), db.ed.mean(), db.ed.std()], 3))
# On average, CA-CA distances are significantly shorter than CB-CB distances.
print(onesided_paired_ttest(CB_eds, CA_eds))
plt.plot(CA_eds, label='CA')
plt.plot(CB_eds, label='CB')
_ = plt.legend();
# Plot the native-state contact map on the given axis,
# Black and red cells denote PRN0 edges and shortcuts, respectively.
import matplotlib.colors as matcol
def plot_NS_contactmap(cmwsc, ax):
n, m = cmwsc.shape
ax.imshow(cmwsc, origin="lower", cmap=matcol.ListedColormap(['white', 'black', 'red']))
ax.set_xticks(np.arange(0, n, 10))
ax.set_yticks(np.arange(0, m, 10))
def plot_EDV_NScontactmaps(pid):
fig1, axes1 = plt.subplots(2, 3, figsize=(9, 6), constrained_layout=True)
fig1.suptitle("Native contact maps by EDS variant " + pid, fontsize=14)
axes1[1][2].remove()
for i, v in enumerate(EDV):
if (v == 'a'):
fn = os.path.join(dirname, pid + "_cmwsc.out")
else:
fn = os.path.join(dirname, pid + "_cmwsc_" + v + ".out")
df = pd.read_table(fn, sep=' ', header=None)
r = i//3; c = i%3; ax = axes1[r][c]
plot_NS_contactmap(df.values, ax)
ax.set_title("EDS variant " + v)
plt.savefig("EDV_NAcontactmaps_" + pid + ".png")
return
plot_EDV_NScontactmaps("2ptl_0")
# for interactivity
%matplotlib inline
from ipywidgets import interact
_ = interact(plot_EDV_NScontactmaps, pid = PIDs)
def initial_foldstep(pid, edv):
"""Returns the initial pair of SSEs as a comma-separated string."""
if edv == 'a':
fn = os.path.join(dirname, pid + "_scn0_pathway.out")
else:
fn = os.path.join(dirname, pid + "_scn0_pathway_" + edv + ".out")
df = pd.read_table(fn, sep='\t')
configs = df.config.values
combi = configs[1].split()
first = [ e for e in combi if len(e) > 1 ][0] # get the string within the list
return first
# initial fold step by EDS variant
firsts_dict = {}
for pid in PIDs:
firsts = [ initial_foldstep(pid, v) for v in EDV ]
# print(pid, firsts)
firsts_dict[pid] = firsts
df = pd.DataFrame(firsts_dict).transpose()
df.columns = EDV
df
# agreement with initial foldstep of EDS variant 'a'
agreefirst_df = pd.DataFrame(df.a)
for v in EDV[1:]:
b = (df.a == df[v]) # series
b.name = v
agreefirst_df = agreefirst_df.join(b)
print(v, b.sum())
agreefirst_df
Structures with initial fold steps unaffected by EDS variant: 1bdd, 1mi0, 1shg, 2ci2, 2kjv, 2kjw and 1qys.
No EDS variant produces the same initial fold steps as 'a' for the four structures: 1gb1, 2ptl, 1mi0 and 1mhx.
SKLC September 2020