Of interest is the $Q$ at which $P_{init} \ge 0.5$ first occurs. Do these $Q$s lie within the ND-TS region $Q < 0.5$?
The table below reports the number of pids with $P_{init} \ge 50%$ within the ND-TS region. | |'a'|'c'|'x'|'r'|'y'|Sum| |:---:|:---:|:---:|:---:|:---:|:---:|:---:| |'a'|9| 7| 8| 8| 3| 35| |'b'|8| 8| 9| 8| 1| 34| |'s'|8| 8| 6| 8| 4| 34| |'l'|8| 7| 7| 9| 4| 35| |'v'|8| 7| 5| 8| 4| 32| |Sum|41|37|35|41|16|
The 'a' ND variant with 'a' EDS variant produced the best overall result. This combination boasts the largest column-wise sum and the largest row-wise sum at $Q < 0.4$ and at $Q < 0.5$.
import pandas as pd
import numpy as np
import os
import pickle # to save Python dict structure with tuple key, not supported by JSON directly.
import matplotlib.pyplot as plt
import pathways as pwy # my lib
# Uses ProcessPoolExecutor to speed up pathway checking.
Qs = pwy.Qs
PIDs = pwy.PIDs
print(PIDs)
# EDV=a; peak Qs computed previously
a_peakQs = np.array([0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.3, 0.2, 0.2, 0.3, 0.3, 0.2, 0.2, 0.2])
c_peakQs = np.array([0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3, 0.2, 0.2, 0.3, 0.3, 0.3, 0.2, 0.2])
x_peakQs = np.array([0.1, 0.4, 0.3, 0.2, 0.3, 0.3, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.3, 0.2])
r_peakQs = np.array([0.1, 0.2, 0.2, 0.2, 0.1, 0.1, 0.2, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1])
y_peakQs = np.array([0.1, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1])
print('a', np.mean(a_peakQs))
print('c', np.mean(c_peakQs))
print('x', np.mean(x_peakQs))
print('r', np.mean(r_peakQs))
print('y', np.mean(y_peakQs))
# This takes about 2 minutes with my computer setup.
# Comment out if loading results from pickle file.
# edv = 'a'
# pwy.main(edv)
# pickle results
#with open("pinit_" + edv + ".pickle", 'wb') as fout:
# pickle.dump(pwy.props_dict, fout)
# import json
# with open("pinit_" + edv + ".txt", 'w') as fout:
# json.dump(pwy.props_dict, fout) # doesn't like tuple keys
# https://stackoverflow.com/questions/12337583/saving-dictionary-whose-keys-are-tuples-with-json-python/34204513
# numpy interpolate and from scipy.interpolate import interp1d
# my interpolate
def find_q(pinit):
for i, p in enumerate(pinit):
if p == 0.5:
return Qs[i]
elif p > 0.5:
if i == 0:
return Qs[i]
else:
return Qs[i-1]
# load a pickled result
edv = 'a'; print(edv)
with open("pinit_" + edv + ".pickle", 'rb') as fin:
props_dict = pickle.load(fin)
props_df = pd.DataFrame(props_dict).sort_index(axis=1)
props_df["Q"] = Qs
props_df = props_df.set_index("Q")
props_df.head()
pid = "1gb1_0"
props_df[pid]
# my interpolate
c_pinit_Qs, x_pinit_Qs, r_pinit_Qs, a_pinit_Qs, y_pinit_Qs = [], [], [], [], []
for pid in PIDs:
df = props_df[pid]
c_pinit_q = find_q(df['c'].values)
x_pinit_q = find_q(df['x'].values)
r_pinit_q = find_q(df['r'].values)
a_pinit_q = find_q(df['a'].values)
y_pinit_q = find_q(df['y'].values)
print(pid, np.round([a_pinit_q, c_pinit_q, x_pinit_q, r_pinit_q, y_pinit_q], 3))
c_pinit_Qs.append(c_pinit_q)
x_pinit_Qs.append(x_pinit_q)
r_pinit_Qs.append(r_pinit_q)
a_pinit_Qs.append(a_pinit_q)
y_pinit_Qs.append(y_pinit_q)
a_pinit_Qs = np.array(a_pinit_Qs)
c_pinit_Qs = np.array(c_pinit_Qs)
x_pinit_Qs = np.array(x_pinit_Qs)
r_pinit_Qs = np.array(r_pinit_Qs)
y_pinit_Qs = np.array(y_pinit_Qs)
# analyze interpolated results
print("EDV", edv)
print("Mean p_init Q")
print('a', np.mean(a_pinit_Qs), np.std(a_pinit_Qs), np.round(a_pinit_Qs, 3))
print('c', np.mean(c_pinit_Qs), np.std(c_pinit_Qs), np.round(c_pinit_Qs, 3))
print('x', np.mean(x_pinit_Qs), np.std(x_pinit_Qs), np.round(x_pinit_Qs, 3))
print('r', np.mean(r_pinit_Qs), np.std(r_pinit_Qs), np.round(r_pinit_Qs, 3))
print('y', np.mean(y_pinit_Qs), np.std(y_pinit_Qs), np.round(y_pinit_Qs, 3))
print("\nMAD p_init Q from peak Q")
print('a', np.mean(np.abs(a_pinit_Qs - a_peakQs)))
print('c', np.mean(np.abs(c_pinit_Qs - c_peakQs)))
print('x', np.mean(np.abs(x_pinit_Qs - x_peakQs)))
print('r', np.mean(np.abs(r_pinit_Qs - r_peakQs)))
print('y', np.mean(np.abs(y_pinit_Qs - y_peakQs)))
print()
print('a', np.sum(a_pinit_Qs < 0.4), np.sum(a_pinit_Qs < 0.5), np.sum(a_pinit_Qs < 0.6))
print('c', np.sum(c_pinit_Qs < 0.4), np.sum(c_pinit_Qs < 0.5), np.sum(c_pinit_Qs < 0.6))
print('x', np.sum(x_pinit_Qs < 0.4), np.sum(x_pinit_Qs < 0.5), np.sum(x_pinit_Qs < 0.6))
print('r', np.sum(r_pinit_Qs < 0.4), np.sum(r_pinit_Qs < 0.5), np.sum(r_pinit_Qs < 0.6))
print('y', np.sum(y_pinit_Qs < 0.4), np.sum(y_pinit_Qs < 0.5), np.sum(y_pinit_Qs < 0.6))
pinit_df = pd.DataFrame({'a' : a_pinit_Qs, 'c': c_pinit_Qs, 'x' : x_pinit_Qs,
'r': r_pinit_Qs, 'y': y_pinit_Qs})
pinit_df.boxplot(showmeans=True)
plt.title("Q where P_init first approaches 0.5.\nEDS variant '" + edv + "'.")
plt.xlabel('ND variant', fontsize=14)
# plt.ylabel("Q where P_init >= 0.5", fontsize=14)
plt.ylabel("P_init Q", fontsize=14)
plt.xticks(range(1, 6, 1), ['a', 'c', 'x', 'r', 'y'], fontsize=14)
_ = plt.yticks(np.arange(0, 1.1, 0.1))
plt.savefig("ND_ndv_pinitQ_" + edv + ".png")
fig, axes = plt.subplots(5, 3, figsize=(12, 20), constrained_layout=True)
plt.suptitle("Proportion of ND runs per $Q$ with native-like initial fold step $P_{init}$. " +
"(EDS variant '" + edv + "') ", fontsize=14)
plt.delaxes(axes[4][2])
for i, pid in enumerate(PIDs):
r = i//3; c = i%3; ax = axes[r][c]
props_df[pid]['c'].plot(ax=ax, style='--', label='c ' + str(c_pinit_Qs[i]))
props_df[pid]['x'].plot(ax=ax, style='--', label='x ' + str(x_pinit_Qs[i]))
props_df[pid]['r'].plot(ax=ax, style='--', label='r ' + str(r_pinit_Qs[i]))
props_df[pid]['a'].plot(ax=ax, style='--', label='a ' + str(a_pinit_Qs[i]))
props_df[pid]['y'].plot(ax=ax, style='--', label='y ' + str(y_pinit_Qs[i]))
ax.plot([0.1, 0.55], [0.5, 0.5], color='black') # upper left quadrant
ax.plot([0.55, 0.55], [0.5, 1.0], color='black') # upper left quadrant
ax.set_title(pid[:4])
ax.set_xlabel('Q'); ax.set_xticks(Qs)
# ax.set_ylabel('P_init');
ax.set_yticks(np.arange(0, 1.1, 0.1))
if pid == "2ptl_0":
ax.legend(loc="upper left"); ax.grid()
else:
ax.legend(loc="lower right"); ax.grid()
plt.savefig("ND_firststep_" + edv + ".png")
# Q < 0.4 a c x r y
a_counts = [8, 6, 7, 4, 0]
b_counts = [6, 5, 5, 5, 0]
s_counts = [6, 7, 4, 7, 1]
l_counts = [6, 5, 6, 7, 0]
v_counts = [5, 5, 4, 5, 1]
counts = np.vstack([a_counts, b_counts, s_counts, l_counts, v_counts])
print("row sums ", counts.sum(axis=1))
print("col sums ", counts.sum(axis=0))
# Q < 0.5 a c x r y
a_counts = [9, 7, 8, 8, 3]
b_counts = [8, 8, 9, 8, 1]
s_counts = [8, 8, 6, 8, 4]
l_counts = [8, 7, 7, 9, 4]
v_counts = [8, 7, 5, 8, 4]
counts = np.vstack([a_counts, b_counts, s_counts, l_counts, v_counts])
print("row sums ", counts.sum(axis=1))
print("col sums ", counts.sum(axis=0))
# Q < 0.6 a c x r y
a_counts = [11, 8, 11, 10, 6]
b_counts = [10, 9, 10, 11, 5]
s_counts = [9, 10, 9, 13, 5]
l_counts = [10, 10, 11, 10, 7]
v_counts = [9, 9, 9, 10, 8]
counts = np.vstack([a_counts, b_counts, s_counts, l_counts, v_counts])
print("row sums ", counts.sum(axis=1))
print("col sums ", counts.sum(axis=0))
NDVs = ['a', 'c', 'x', 'r', 'y']
a_MADS = [0.193, 0.221, 0.178, 0.321, 0.457]
b_MADS = [0.221, 0.243, 0.214, 0.286, 0.471]
s_MADS = [0.236, 0.228, 0.271, 0.214, 0.450]
l_MADS = [0.221, 0.221, 0.193, 0.278, 0.428]
v_MADS = [0.271, 0.278, 0.286, 0.314, 0.421]
plt.plot(NDVs, a_MADS, 'o--', label='a')
plt.plot(NDVs, b_MADS, 'o--', label='b')
plt.plot(NDVs, s_MADS, 'o--', label='s')
plt.plot(NDVs, l_MADS, 'o--', label='l')
plt.plot(NDVs, v_MADS, 'o--', label='v')
plt.title("MAD between ND peak $Q$ and $P_{init} Q$", fontsize=14)
plt.xticks(NDVs, fontsize=14)
plt.xlabel("ND variant", fontsize=14)
plt.legend(title="EDV"); plt.grid()
plt.savefig("Pinit_MAD.png")
SKLC October 2020