import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.stats import spearmanr, ttest_rel
from scipy.stats import ttest_ind_from_stats as tts
Qs =[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
dirname = "Paci_ND3"
PIDs = ["1imq", "1lmb4", "1bf4", "2ptl", "2ci2", "1aps", "1fmk", "1bk2", "1shf", "1ten"]
EDVs = ['a'] # EDVs = ['a', 'b', 's', 'l', 'v']
NDVs = ['c', 'x', 'r', 'a', 'y']
ndv_colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple']
# computed previously
c_peak_Qs = np.array([0.2, 0.3, 0.3, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2, 0.3])
x_peak_Qs = np.array([0.2, 0.3, 0.3, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2, 0.2])
r_peak_Qs = np.array([0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2])
a_peak_Qs = np.array([0.2, 0.3, 0.3, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2, 0.2])
y_peak_Qs = np.array([0.2, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1])
# cxray
peak_Qs = np.column_stack([c_peak_Qs, x_peak_Qs, r_peak_Qs, a_peak_Qs, y_peak_Qs])
peak_Qs
def mad_from_phi(ndv, edv):
dirn = os.path.join(dirname, "edv_a") # ND data
ofn = os.path.join(dirname, ndv + "_phi_mad.out")
fout = open(ofn, "w"); fout.close() # truncates existing file
fout = open(ofn, "a") # appends to file
fout.write("pid Q mad_deg_avg mad_deg_avg2 mad_cent_avg mad_cent_avg2\n")
for pid in PIDs:
fn0 = os.path.join(dirname, pid + "_nodestats.out") # NS
df0 = pd.read_table(fn0, sep="\s+") # nid exp_phi deg0 cent0
N = len(df0)
fn1 = os.path.join(dirn, pid + "_" + ndv + "_nodestats.out") # ND
df1 = pd.read_table(fn1, sep="\s+") # Q nid deg_avg cent_avg
for q in Qs:
dq = df1.query("Q == @q")
m = dq.deg_avg.max() # normalize then select phi residues
dq = dq.assign(deg_avg_norm = dq.deg_avg.values/m)
# prop. of native degree
dq = dq.assign(deg_avg_norm2 = dq.deg_avg.values/df0.deg0.values)
m = dq.cent_avg.max()
dq = dq.assign(cent_avg_norm = dq.cent_avg.values/m)
# fraction of all paths
dq = dq.assign(cent_avg_norm2 = dq.cent_avg.values/(N*N-N))
dq = dq.assign(exp_phi = df0.exp_phi.values)
de = dq.query("exp_phi > -1")
a = np.round(np.mean(np.abs(de.deg_avg_norm.values - de.exp_phi.values)), 3)
b = np.round(np.mean(np.abs(de.cent_avg_norm.values - de.exp_phi.values)), 3)
a2 = np.round(np.mean(np.abs(de.deg_avg_norm2.values - de.exp_phi.values)), 3)
b2 = np.round(np.mean(np.abs(de.cent_avg_norm2.values - de.exp_phi.values)), 3)
fout.write("{0} {1} {2} {3} {4} {5}\n".format(pid, q, a, a2, b, b2))
fout.close()
df = pd.read_table(ofn, sep=' ')
# in general a2 > a
# count = np.sum(df.mad_deg_avg.values > df.mad_deg_avg2.values) # 21, 22, 19, 21, 14
# in general b2 > b
# count = np.sum(df.mad_cent_avg.values > df.mad_cent_avg2.values) # 16, 16, 56, 20, 64
# in general a2 > b2, desire to minimize mad, but cannot conclude to use b2 at this point!
count = np.sum(df.mad_deg_avg2.values > df.mad_cent_avg2.values) # 80, 78, 88, 86, 89
# print(ndv, edv, count, len(df))
return count
# this cell takes some time to run, only need to do this once to create ndv_phi_mad.out files
counts = [] # deg > cent mad across all Qs for each pid
for edv in EDVs:
row = []
for ndv in NDVs:
count = mad_from_phi(ndv, edv)
row.append(count)
counts.append(row)
counts
# formats counts and do summation
df = pd.DataFrame(counts).assign(EDV=EDVs).set_index("EDV")
df.columns = NDVs
s = df.sum(axis=0) # col sum
s.name = "sum"
df = df.append(s)
df = df.assign(sum = df.sum(axis=1)) # row sum
df
# load the ndv_phi_mad.out files
edv = 'a'
cxray_dfs = []
for ndv in NDVs:
fn = os.path.join(dirname, ndv + "_phi_mad.out")
df = pd.read_table(fn, sep=' ')
cxray_dfs.append(df)
import random
random.seed()
# mean_95CI with resampling/bootstrap
def bootstrap(vals):
means = np.zeros(10000); stdevs = np.zeros(10000)
for i in np.arange(10000):
resample = random.choices(vals, k=10) # with replacement
means[i], stdevs[i] = np.mean(resample), np.std(resample, ddof=1)
a, z = np.percentile(means, 2.5), np.percentile(means, 97.5)
moe = (z-a)/2.0
return moe, a, z
# calc the 95%CI of the mean, with sample stdev, n >= 30
def mean_95CI(sample_mean, sample_stdev, sample_size):
moe = (1.96 * sample_stdev)/np.sqrt(sample_size) # margin of error
return moe, sample_mean - moe, sample_mean + moe
def summary_stats(vals):
mu = np.mean(vals); snu = np.std(vals, ddof=1)
gmoe, ga, gz = mean_95CI(mu, snu, 10) # Gaussian estimated
bmoe, ba, bz = bootstrap(vals) # bootstrap
return np.round([mu, snu, np.ptp(vals), gmoe, ga, gz, bmoe, ba, bz], 4)
def plot_CIs(MADstats_df, minQstats_df):
methods = MADstats_df.columns.values; n = len(methods)
plt_colors = plt.get_cmap("tab20")
gcolors = plt_colors(list(range(1, n*2, 2))) # odd
bcolors = plt_colors(list(range(0, n*2, 2))) # even
fig, axes = plt.subplots(2, 1, figsize=(6, 8), constrained_layout=True)
ax = axes[0]
mu = MADstats_df.loc['sample_avg'].values
ga = MADstats_df.loc['95meanCI_gleft'].values
gz = MADstats_df.loc['95meanCI_gright'].values
ba = MADstats_df.loc['95meanCI_bleft'].values
bz = MADstats_df.loc['95meanCI_bright'].values
for i, p in enumerate(zip(ga, gz, ba, bz)):
ax.plot([i, i], [p[0], p[1]], marker='o', color=gcolors[i])
ax.plot([i+0.2, i+0.2], [p[2], p[3]], marker='o', color=bcolors[i], label=str(p[3]))
ax.scatter(i+0.2, mu[i], marker='^')
ax.legend(title="97.5 percentile", loc="upper left", bbox_to_anchor=(1.0, 1.0))
ax.set_title("mean MAD from exp_phi", fontsize=14)
ax.set_ylabel("95% confidence interval", fontsize=14)
ax.set_xticks(list(range(0, n, 1))); ax.set_xticklabels(methods, fontsize=14)
ax.grid()
ax = axes[1]
mu = minQstats_df.loc['sample_avg'].values
ga = minQstats_df.loc['95meanCI_gleft'].values
gz = minQstats_df.loc['95meanCI_gright'].values
ba = minQstats_df.loc['95meanCI_bleft'].values
bz = minQstats_df.loc['95meanCI_bright'].values
for i, p in enumerate(zip(ga, gz, ba, bz)):
ax.plot([i, i], [p[0], p[1]], marker='o', color=gcolors[i])
ax.plot([i+0.2, i+0.2], [p[2], p[3]], marker='o', color=bcolors[i], label=str(p[3]))
ax.scatter(i+0.2, mu[i], marker='^')
ax.legend(title="97.5 percentile", loc="upper left", bbox_to_anchor=(1.0, 1.0))
ax.set_title("mean minQ", fontsize=14)
ax.set_ylabel("95% confidence interval", fontsize=14)
ax.set_xticks(list(range(0, n, 1))); ax.set_xticklabels(methods, fontsize=14)
ax.grid()
return plt
# compare mads of ND variants
fig, axes = plt.subplots(4, 3, figsize=(12, 16), constrained_layout=True, sharey=True)
plt.suptitle("Mean absolute difference between phi-values from experiments and \n \
phi-values calculated with NS normalized node centrality from ND nets, EDS variant " + edv)
plt.delaxes(axes[3][1])
plt.delaxes(axes[3][2])
min_mads, min_Qs = [], []
for i, pid in enumerate(PIDs):
r = i//3; c = i%3; ax = axes[r][c]
res = []
for i, ndv in enumerate(NDVs):
dp = cxray_dfs[i].query("pid == @pid")
# mad = dp.mad_cent_avg.values
mad = dp.mad_cent_avg2.values
ax.plot(Qs, mad, 'o--', label=ndv)
res.append(mad)
ax.set_title(pid)
ax.set_xlabel('Q'); ax.set_xticks(np.arange(0, 1.1, 0.1))
ax.legend(); ax.grid()
min_mad = np.round(np.min(res, axis=1), 3) # row-wise per ND variant, over Qs for a pid
min_q = (np.round(np.argmin(res, axis=1), 3) + 1)/10
min_mads.append(min_mad)
min_Qs.append(min_q)
print(pid, min_mad, min_q)
# ofn = os.path.join(dirname, "ND_centphi_" + edv + ".png")
# print(ofn)
# plt.savefig(ofn)
# can skip if file already created
cent_minmad_df = pd.DataFrame(min_mads)
cent_minmad_df.columns = NDVs
cent_minmad_df = cent_minmad_df.assign(pid=PIDs).set_index('pid')
ofn = os.path.join(dirname, "ND_centphi_minmad_" + edv + ".out")
cent_minmad_df.to_csv(ofn, sep=' ')
cent_MADstats_df = pd.DataFrame({ "MAD_stats": ["sample_avg", "sample_stdev", "range", \
"gMOE", "95meanCI_gleft", "95meanCI_gright",
"bMOE", "95meanCI_bleft", "95meanCI_bright",],
"c": summary_stats(cent_minmad_df['c'].values),
"x": summary_stats(cent_minmad_df['x'].values),
"r": summary_stats(cent_minmad_df['r'].values),
"a": summary_stats(cent_minmad_df['a'].values),
"y": summary_stats(cent_minmad_df['y'].values) })
cent_MADstats_df = cent_MADstats_df.set_index("MAD_stats")
# ofn = os.path.join(dirname, "ND_cent_MADstats_" + edv + ".out")
# cent_MADstats_df.to_csv(ofn, sep=' ')
# can skip if file already created
cent_minQ_df = pd.DataFrame(min_Qs)
cent_minQ_df.columns = NDVs
cent_minQ_df = cent_minQ_df.assign(pid=PIDs).set_index('pid')
ofn = os.path.join(dirname, "ND_centphi_minQ_" + edv + ".out")
cent_minQ_df.to_csv(ofn, sep=' ')
cent_minQstats_df = pd.DataFrame({ "minQ_stats": ["sample_avg", "sample_stdev", "range", \
"gMOE", "95meanCI_gleft", "95meanCI_gright",
"bMOE", "95meanCI_bleft", "95meanCI_bright"],
"c": summary_stats(cent_minQ_df['c'].values),
"x": summary_stats(cent_minQ_df['x'].values),
"r": summary_stats(cent_minQ_df['r'].values),
"a": summary_stats(cent_minQ_df['a'].values),
"y": summary_stats(cent_minQ_df['y'].values)})
cent_minQstats_df = cent_minQstats_df.set_index("minQ_stats")
# ofn = os.path.join(dirname, "ND_cent_minQstats_" + edv + ".out")
# cent_minQstats_df.to_csv(ofn, sep=' ')
# edv = 'a'
# fn = os.path.join(dirname, "ND_centphi_minmad_" + edv + ".out")
# cent_minmad_df = pd.read_table(fn, sep=' ').set_index('pid')
print(cent_minmad_df.describe()) # sample std
cent_minmad_df
# fn = os.path.join(dirname, "ND_centphi_minQ_" + edv + ".out")
# cent_minQ_df = pd.read_table(fn, sep=' ').set_index('pid')
print(cent_minQ_df.describe())
cent_minQ_df
fig, axes = plt.subplots(1, 2, figsize=(8, 4), constrained_layout=True)
plt.suptitle("Calculating $\phi$ values with NS normalized node centrality from ND nets.",
fontsize=14)
ax0 = axes[0]
cent_minmad_df.boxplot(ax = ax0, showmeans=True)
ax0.set_title('Minimum MAD from exp_phi')
ax0.set_xticklabels(NDVs, fontsize=14); ax0.set_xlabel('ND variant')
# ax0.set_yticks(np.arange(0.2, 0.55, 0.05))
ax1 = axes[1]
cent_minQ_df.boxplot(ax = ax1, showmeans=True)
ax1.set_title('$Q$ with min MAD');
ax1.set_xticklabels(NDVs, fontsize=14); ax1.set_xlabel('ND variant')
_ = ax1.set_yticks(np.arange(0, 1.1, 0.1))
# MAD between min_Qs (Q where MAD from exp_phi is minimum) and peak Qs for cxray NDVs
np.mean(np.abs(cent_minQ_df.values - peak_Qs), axis=0)
print("MAD between experimental and calculated phi-values")
cent_MADstats_df
print("Q with minimum MAD between experimental and calculated phi-values")
cent_minQstats_df
plt = plot_CIs(cent_MADstats_df, cent_minQstats_df)
_ = plt.suptitle("Calculating $\phi$ values with NS normalized ND node centrality.",
fontsize=14)
ndc_cent = cent_minmad_df['c'].values
ndx_cent = cent_minmad_df['x'].values
nda_cent = cent_minmad_df['a'].values
ndr_cent = cent_minmad_df['r'].values
ndy_cent = cent_minmad_df['y'].values
print("c a")
print(ttest_rel(ndc_cent, nda_cent))
print(tts(ndc_cent.mean(), ndc_cent.std(ddof=1), 10,
nda_cent.mean(), nda_cent.std(ddof=1), 10, equal_var=False))
print("\nx a")
print(ttest_rel(ndx_cent, nda_cent))
print(tts(ndx_cent.mean(), ndx_cent.std(ddof=1), 10,
nda_cent.mean(), nda_cent.std(ddof=1), 10, equal_var=False))
print("\nr a")
print(ttest_rel(ndr_cent, nda_cent))
print(tts(ndr_cent.mean(), ndr_cent.std(ddof=1), 10,
nda_cent.mean(), nda_cent.std(ddof=1), 10, equal_var=False))
print("\ny a")
print(ttest_rel(ndy_cent, nda_cent))
print(tts(ndy_cent.mean(), ndy_cent.std(ddof=1), 10,
nda_cent.mean(), nda_cent.std(ddof=1), 10, equal_var=False))
minMad_c = cent_minmad_df['c'].values
minMad_a = cent_minmad_df['a'].values
print('minMad c', minMad_c)
print('minMad a', minMad_a)
minQ_c = cent_minQ_df['c'].values
minQ_a = cent_minQ_df['a'].values
print('minQ c', minQ_c)
print('minQ a', minQ_a)
fig, axes = plt.subplots(4, 3, figsize=(12, 18), constrained_layout=True)
plt.suptitle("Calculated $\phi$ values from NS normalized node centrality, \
ND variants 'c' and 'a' with EDS variant 'a'.", fontsize=14)
plt.delaxes(axes[3, 1])
plt.delaxes(axes[3, 2])
dirn = os.path.join(dirname, "edv_a") # ND data
for i, pid in enumerate(PIDs):
r = i//3; c = i%3; ax = axes[r][c]
fn0 = os.path.join(dirname, pid + "_nodestats.out")
df0 = pd.read_table(fn0, sep="\s+") # nid exp_phi deg0 cent0
N = len(df0); m = N*N-N
fn1 = os.path.join(dirn, pid + "_c_nodestats.out")
df1 = pd.read_table(fn1, sep="\s+")
q_c = minQ_c[i]
dq = df1.query("Q == @q_c") # 1lmb4 > 1.0?
dq = dq.assign(cent_avg_norm2 = dq.cent_avg.values/m) # node centality is pre-averaged!
dq = dq.assign(exp_phi = df0.exp_phi.values)
de = dq.query("exp_phi > -1")
mad_c = np.round(np.mean(np.abs(de.cent_avg_norm2.values - de.exp_phi.values)), 2)
ax.plot(dq.nid.values+1, dq.cent_avg_norm2.values, color="tab:blue", alpha=0.5,
label="'c' cent " + str(mad_c))
fn1 = os.path.join(dirn, pid + "_a_nodestats.out")
df1 = pd.read_table(fn1, sep="\s+")
q_a = minQ_a[i]
dq = df1.query("Q == @q_a")
dq = dq.assign(cent_avg_norm2 = dq.cent_avg.values/m)
dq = dq.assign(exp_phi = df0.exp_phi.values)
de = dq.query("exp_phi > -1")
mad_a = np.round(np.mean(np.abs(de.cent_avg_norm2.values - de.exp_phi.values)), 2)
ax.plot(dq.nid.values+1, dq.cent_avg_norm2.values, color="tab:red", alpha=0.5,
label="'a' cent " + str(mad_a))
ax.plot(de.nid.values+1, de.exp_phi.values, "o--g", alpha=0.5, label='exp_phi')
ax.set_title(pid + " " + str(q_c) + " " + str(q_a), loc='left')
ax.set_xlabel('Residue'); ax.set_yticks(np.arange(0, 1.3, 0.1))
ax.legend(loc="lower left", bbox_to_anchor=(0.5, 1.0))
# ax.grid()
# ofn = os.path.join(dirname, "ND_centphi_ca_" + edv + ".png")
# plt.savefig(ofn)
# compare mads of ND variants
fig, axes = plt.subplots(4, 3, figsize=(12, 16), constrained_layout=True, sharey=True)
plt.suptitle("Mean absolute difference between phi-values from experiments and \n \
phi-values calculated with NS normalized node degree from ND nets, EDS variant " + edv)
plt.delaxes(axes[3][1])
plt.delaxes(axes[3][2])
min_mads, min_Qs = [], []
for i, pid in enumerate(PIDs):
r = i//3; c = i%3; ax = axes[r][c]
res = []
for i, ndv in enumerate(NDVs):
dp = cxray_dfs[i].query("pid == @pid")
mad = dp.mad_deg_avg2.values
ax.plot(Qs, mad, 'o--', label=ndv)
res.append(mad)
ax.set_title(pid)
ax.set_xlabel('Q'); ax.set_xticks(np.arange(0, 1.1, 0.1))
ax.legend(); ax.grid()
min_mad = np.round(np.min(res, axis=1), 3) # row-wise per ND variant, over Qs for a pid
min_q = (np.round(np.argmin(res, axis=1), 3) + 1)/10
min_mads.append(min_mad)
min_Qs.append(min_q)
print(pid, min_mad, min_q)
# ofn = os.path.join(dirname, "ND_degphi_" + edv + ".png")
# plt.savefig(ofn)
# can skip if file already exists
deg_minmad_df = pd.DataFrame(min_mads)
deg_minmad_df.columns = NDVs
deg_minmad_df = deg_minmad_df.assign(pid=PIDs).set_index('pid')
ofn = os.path.join(dirname, "ND_degphi_minmad_" + edv + ".out")
deg_minmad_df.to_csv(ofn, sep=' ')
deg_MADstats_df = pd.DataFrame({ "MAD_stats": ["sample_avg", "sample_stdev", "range", \
"gMOE", "95meanCI_gleft", "95meanCI_gright",
"bMOE", "95meanCI_bleft", "95meanCI_bright",],
"c": summary_stats(deg_minmad_df['c'].values),
"x": summary_stats(deg_minmad_df['x'].values),
"r": summary_stats(deg_minmad_df['r'].values),
"a": summary_stats(deg_minmad_df['a'].values),
"y": summary_stats(deg_minmad_df['y'].values) })
deg_MADstats_df = deg_MADstats_df.set_index("MAD_stats")
# ofn = os.path.join(dirname, "ND_deg_MADstats_" + edv + ".out")
# deg_MADstats_df.to_csv(ofn, sep=' ')
# can skip if file already exists
deg_minQ_df = pd.DataFrame(min_Qs)
deg_minQ_df.columns = NDVs
deg_minQ_df = deg_minQ_df.assign(pid=PIDs).set_index('pid')
ofn = os.path.join(dirname, "ND_degphi_minQ_" + edv + ".out")
deg_minQ_df.to_csv(ofn, sep=' ')
deg_minQstats_df = pd.DataFrame({ "minQ_stats": ["sample_avg", "sample_stdev", "range", \
"gMOE", "95meanCI_gleft", "95meanCI_gright",
"bMOE", "95meanCI_bleft", "95meanCI_bright"],
"c": summary_stats(deg_minQ_df['c'].values),
"x": summary_stats(deg_minQ_df['x'].values),
"r": summary_stats(deg_minQ_df['r'].values),
"a": summary_stats(deg_minQ_df['a'].values),
"y": summary_stats(deg_minQ_df['y'].values)})
deg_minQstats_df = deg_minQstats_df.set_index("minQ_stats")
# ofn = os.path.join(dirname, "ND_deg_minQstats_" + edv + ".out")
# deg_minQstats_df.to_csv(ofn, sep=' ')
# fn = os.path.join(dirname, "ND_degphi_minmad_" + edv + ".out")
# deg_minmad_df = pd.read_table(fn, sep=' ').set_index('pid')
print(deg_minmad_df.describe())
deg_minmad_df
# fn = os.path.join(dirname, "ND_degphi_minQ_" + edv + ".out")
# deg_minQ_df = pd.read_table(fn, sep=' ').set_index('pid')
print(deg_minQ_df.describe())
deg_minQ_df
fig, axes = plt.subplots(1, 2, figsize=(8, 4), constrained_layout=True)
plt.suptitle("Calculating $\phi$ values with NS normalized node degree from ND nets.",
fontsize=14)
ax0 = axes[0]
deg_minmad_df.boxplot(ax = ax0, showmeans=True)
ax0.set_title('Minimum MAD from exp_phi')
ax0.set_xticklabels(NDVs, fontsize=14); ax0.set_xlabel('ND variant')
# ax0.set_yticks(np.arange(0.2, 0.55, 0.05))
ax1 = axes[1]
deg_minQ_df.boxplot(ax = ax1, showmeans=True)
ax1.set_title('$Q$ with min MAD');
ax1.set_xticklabels(NDVs, fontsize=14); ax1.set_xlabel('ND variant')
_ = ax1.set_yticks(np.arange(0, 1.1, 0.1))
# MAD between min_Qs (Q where MAD from exp_phi is minimum) and peak Qs for cxray NDVs
np.mean(np.abs(deg_minQ_df.values - peak_Qs), axis=0)
print("MAD between experimental and calculated phi-values")
deg_MADstats_df
print("Q with minimum MAD between experimental and calculated phi-values")
deg_minQstats_df
plt = plot_CIs(deg_MADstats_df, deg_minQstats_df)
_ = plt.suptitle("Calculating $\phi$ values with NS normalized ND node degree.",
fontsize=14)
minMad_a = deg_minmad_df['a'].values
print('minMad a', minMad_a)
minQ_a = deg_minQ_df['a'].values
print('minQ a', minQ_a)
fig, axes = plt.subplots(4, 3, figsize=(12, 18), constrained_layout=True)
plt.suptitle("Calculated $\phi$ values from NS normalized node degree, \
ND variant 'a' with EDS variant 'a'.", fontsize=14)
plt.delaxes(axes[3, 1])
plt.delaxes(axes[3, 2])
dirn = os.path.join(dirname, "edv_a") # ND data
for i, pid in enumerate(PIDs):
r = i//3; c = i%3; ax = axes[r][c]
fn0 = os.path.join(dirname, pid + "_nodestats.out")
df0 = pd.read_table(fn0, sep="\s+") # nid exp_phi deg0 cent0
fn1 = os.path.join(dirn, pid + "_a_nodestats.out")
df1 = pd.read_table(fn1, sep="\s+")
q_a = minQ_a[i]
dq = df1.query("Q == @q_a")
dq = dq.assign(deg_avg_norm2 = dq.deg_avg.values/df0.deg0.values)
dq = dq.assign(exp_phi = df0.exp_phi.values)
de = dq.query("exp_phi > -1")
mad_a = np.round(np.mean(np.abs(de.deg_avg_norm2.values - de.exp_phi.values)), 2)
ax.plot(dq.nid.values+1, dq.deg_avg_norm2.values, color="tab:red", alpha=0.5,
label="'a' deg " + str(mad_a))
ax.plot(de.nid.values+1, de.exp_phi.values, "o--g", alpha=0.5, label='exp_phi')
ax.set_title(pid + " " + str(q_a), loc='left')
ax.set_xlabel('Residue'); ax.set_yticks(np.arange(0, 1.1, 0.1))
ax.legend(loc="lower left", bbox_to_anchor=(0.5, 1.0))
# ofn = os.path.join(dirname, "ND_degphi_a_" + edv + ".png")
# plt.savefig(ofn)
fig, axes = plt.subplots(1, 2, figsize=(10, 5), constrained_layout=True, sharey=True)
plt.suptitle("Calculating $\phi$ values with ND PRNs, EDS variant " + edv, fontsize=14)
ax = axes[0]
ax.set_title("Node centrality", fontsize=14)
for ndv in NDVs:
vals = cent_minmad_df[ndv].values
ax.plot(PIDs, vals, 'o-', label=ndv + ' ' + str(np.round(np.mean(vals), 3)))
ax.set_xticks(PIDs); ax.set_xticklabels(PIDs, rotation=90, fontsize=14)
ax.set_ylabel("Minimum MAD from exp_phi", fontsize=14)
ax.legend(); ax.grid()
ax = axes[1]
ax.set_title("Node degree", fontsize=14)
for ndv in NDVs:
vals = deg_minmad_df[ndv].values
ax.plot(PIDs, vals, 'o--', label=ndv + ' ' + str(np.round(np.mean(vals), 3)))
ax.set_xticks(PIDs); ax.set_xticklabels(PIDs, rotation=90, fontsize=14)
ax.legend(); ax.grid()
# ax.set_ylabel("Minimum MAD ", fontsize=14)
# ofn = os.path.join(dirname, "ND_minMadphi_" + edv + ".png")
# plt.savefig(ofn)
fig, axes = plt.subplots(2, 2, figsize=(8, 8), constrained_layout=True)
plt.suptitle("Calculating $\phi$ values with ND PRNs, EDS variant " + edv, fontsize=14)
ax0 = axes[0][0]
cent_minmad_df.boxplot(ax = ax0, showmeans=True)
ax0.set_title('Node centrality', fontsize=14)
ax0.set_xticklabels(NDVs, fontsize=14)
ax0.set_ylabel('Minimum MAD from exp_phi', fontsize=12)
ax0.set_yticks(np.arange(0.15, 0.6, 0.05))
ax1 = axes[0][1]
deg_minmad_df.boxplot(ax = ax1, showmeans=True)
ax1.set_title('Node degree', fontsize=14)
ax1.set_xticklabels(NDVs, fontsize=14)
ax1.set_yticks(np.arange(0.15, 0.6, 0.05))
ax2 = axes[1][0]
cent_minQ_df.boxplot(ax = ax2, showmeans=True)
ax2.set_xticklabels(NDVs, fontsize=14)
ax2.set_ylabel('$Q$ with minimum MAD', fontsize=12); ax2.set_yticks(np.arange(0, 1.1, 0.1))
ax2.set_xlabel('ND variant', fontsize=14)
ax3 = axes[1][1]
deg_minQ_df.boxplot(ax = ax3, showmeans=True)
ax3.set_yticks(np.arange(0, 1.1, 0.1))
ax3.set_xticklabels(NDVs, fontsize=14)
_ = ax3.set_xlabel('ND variant', fontsize=14)
# ofn = os.path.join(dirname, "ND_phi_" + edv + ".png")
# plt.savefig(ofn)
methods = cent_MADstats_df.columns.values; n = len(methods)
plt_colors = plt.get_cmap("tab20")
gcolors = plt_colors(list(range(1, n*2, 2))) # odd
bcolors = plt_colors(list(range(0, n*2, 2))) # even
fig, axes = plt.subplots(2, 2, figsize=(8, 8), constrained_layout=True)
plt.suptitle("Calculating $\phi$ values from ND nets.", fontsize=14)
ax = axes[0][0]
mu = cent_MADstats_df.loc['sample_avg'].values
ga = cent_MADstats_df.loc['95meanCI_gleft'].values
gz = cent_MADstats_df.loc['95meanCI_gright'].values
ba = cent_MADstats_df.loc['95meanCI_bleft'].values
bz = cent_MADstats_df.loc['95meanCI_bright'].values
for i, p in enumerate(zip(ga, gz, ba, bz)):
ax.plot([i, i], [p[0], p[1]], marker='o', color=gcolors[i])
ax.plot([i+0.2, i+0.2], [p[2], p[3]], marker='o', color=bcolors[i], label=str(p[3]))
ax.scatter(i+0.2, mu[i], marker='^')
ax.set_title("Node centrality", fontsize=14)
ax.set_yticks(np.arange(0.12, 0.32, 0.02))
ax.set_ylabel("95% CI mean MAD from exp_phi", fontsize=14)
ax.set_xticks(list(range(0, n, 1))); ax.set_xticklabels(methods, fontsize=14)
ax.legend(); ax.grid()
ax = axes[1][0]
mu = cent_minQstats_df.loc['sample_avg'].values
ga = cent_minQstats_df.loc['95meanCI_gleft'].values
gz = cent_minQstats_df.loc['95meanCI_gright'].values
ba = cent_minQstats_df.loc['95meanCI_bleft'].values
bz = cent_minQstats_df.loc['95meanCI_bright'].values
for i, p in enumerate(zip(ga, gz, ba, bz)):
ax.plot([i, i], [p[0], p[1]], marker='o', color=gcolors[i])
ax.plot([i+0.2, i+0.2], [p[2], p[3]], marker='o', color=bcolors[i], label=str(p[3]))
ax.scatter(i+0.2, mu[i], marker='^')
ax.set_yticks(np.arange(0.0, 0.5, 0.1))
ax.set_ylabel("95% CI mean minQ", fontsize=14)
ax.set_xticks(list(range(0, n, 1))); ax.set_xticklabels(methods, fontsize=14)
ax.set_xlabel("ND variant", fontsize=14)
ax.legend(); ax.grid()
ax = axes[0][1]
mu = deg_MADstats_df.loc['sample_avg'].values
ga = deg_MADstats_df.loc['95meanCI_gleft'].values
gz = deg_MADstats_df.loc['95meanCI_gright'].values
ba = deg_MADstats_df.loc['95meanCI_bleft'].values
bz = deg_MADstats_df.loc['95meanCI_bright'].values
for i, p in enumerate(zip(ga, gz, ba, bz)):
ax.plot([i, i], [p[0], p[1]], marker='o', color=gcolors[i])
ax.plot([i+0.2, i+0.2], [p[2], p[3]], marker='o', color=bcolors[i], label=str(p[3]))
ax.scatter(i+0.2, mu[i], marker='^')
ax.set_title("Node degree", fontsize=14)
ax.set_yticks(np.arange(0.12, 0.32, 0.02))
# ax.set_ylabel("95% confidence interval mean MAD from exp_phi", fontsize=14)
ax.set_xticks(list(range(0, n, 1))); ax.set_xticklabels(methods, fontsize=14)
ax.legend(); ax.grid()
ax = axes[1][1]
mu = deg_minQstats_df.loc['sample_avg'].values
ga = deg_minQstats_df.loc['95meanCI_gleft'].values
gz = deg_minQstats_df.loc['95meanCI_gright'].values
ba = deg_minQstats_df.loc['95meanCI_bleft'].values
bz = deg_minQstats_df.loc['95meanCI_bright'].values
for i, p in enumerate(zip(ga, gz, ba, bz)):
ax.plot([i, i], [p[0], p[1]], marker='o', color=gcolors[i])
ax.plot([i+0.2, i+0.2], [p[2], p[3]], marker='o', color=bcolors[i], label=str(p[3]))
ax.scatter(i+0.2, mu[i], marker='^')
ax.set_yticks(np.arange(0.0, 0.5, 0.1))
ax.set_xticks(list(range(0, n, 1))); ax.set_xticklabels(methods, fontsize=14)
ax.set_xlabel("ND variant", fontsize=14)
ax.legend(); ax.grid()
# ofn = os.path.join(dirname, "ND_95CIstats_" + edv + ".png")
# print(ofn)
# plt.savefig(ofn)
# MAD values computed previously
ns_deg = np.array([0.48, 0.34, 0.49, 0.35, 0.34, 0.37, 0.45, 0.35, 0.51, 0.41])
ns_cent = np.array([0.44, 0.3, 0.41, 0.29, 0.32, 0.3, 0.43, 0.35, 0.44, 0.36])
ns_locent = np.array([0.33, 0.5, 0.2, 0.19, 0.24, 0.22, 0.3, 0.15, 0.24, 0.16])
ts_deg = np.array([0.46, 0.24, 0.35, 0.29, 0.28, 0.26, 0.3, 0.28, 0.3, 0.3])
ts_deg2 = np.array([0.25, 0.21, 0.4, 0.29, 0.36, 0.35, 0.28, 0.31, 0.22, 0.32])
ts_cent = np.array([0.41, 0.34, 0.37, 0.34, 0.29, 0.26, 0.42, 0.26, 0.43, 0.4])
ndc_cent = np.array([0.394, 0.286, 0.36, 0.327, 0.26, 0.26, 0.304, 0.23, 0.391, 0.252])
nda_cent = np.array([0.274, 0.372, 0.301, 0.287, 0.251, 0.242, 0.302, 0.204, 0.399, 0.313])
nda_deg = np.array([0.33, 0.315, 0.402, 0.318, 0.271, 0.295, 0.355, 0.268, 0.444, 0.299])
ndc_cent2 = np.array([0.186, 0.29, 0.221, 0.221, 0.155, 0.193, 0.26, 0.177, 0.283, 0.174])
nda_cent2 = np.array([0.187, 0.377, 0.232, 0.236, 0.175, 0.211, 0.276, 0.203, 0.304, 0.151])
nda_deg2 = np.array([0.158, 0.247, 0.191, 0.212, 0.141, 0.209, 0.266, 0.206, 0.282, 0.155])
# sample size is too small for CLT which 95%meanCI assumes (should actual do resampling?)
print("MAD between experimental and calculated phi-values")
df = pd.DataFrame({ "MAD_stats": ["sample_avg", "sample_stdev", "range", \
"gMOE", "95meanCI_gleft", "95meanCI_gright",
"bMOE", "95meanCI_bleft", "95meanCI_bright"],
"NS_deg": summary_stats(ns_deg), "NS_cent": summary_stats(ns_cent),
"NS_locent": summary_stats(ns_locent), "TSE_deg": summary_stats(ts_deg),
"TSE_deg2": summary_stats(ts_deg2), "TSE_cent": summary_stats(ts_cent),
"NDc_cent": summary_stats(ndc_cent), "NDa_cent": summary_stats(nda_cent),
"NDa_deg": summary_stats(nda_deg),
"NDc_cent2": summary_stats(ndc_cent2), "NDa_cent2": summary_stats(nda_cent2),
"NDa_deg2": summary_stats(nda_deg2)})
df = df.set_index("MAD_stats")
df
plt.figure(figsize=(8, 6))
methods = df.columns.values; n = len(methods)
plt_colors = plt.get_cmap("tab20")
gcolors = plt_colors(list(range(1, n*2, 2))) # odd
bcolors = plt_colors(list(range(0, n*2, 2))) # even
mu = df.loc['sample_avg'].values
ga = df.loc['95meanCI_gleft'].values; gz = df.loc['95meanCI_gright'].values
ba = df.loc['95meanCI_bleft'].values; bz = df.loc['95meanCI_bright'].values
for i, p in enumerate(zip(ga, gz, ba, bz)):
plt.plot([i, i], [p[0], p[1]], color=gcolors[i], marker='o')
plt.scatter(i+0.2, mu[i], marker='^')
plt.plot([i+0.2, i+0.2], [p[2], p[3]], color=bcolors[i], marker='o', label=str(p[3]))
plt.title("Calculating $\phi$ values", fontsize=14)
plt.xticks(list(range(0, n, 1)), methods, rotation=45, fontsize=12)
plt.xlabel("Method: PRN type and node statistic combination", fontsize=14)
plt.ylabel("95% confidence interval around mean MAD", fontsize=14)
plt.legend(title="97.5 percentile", loc="upper left", bbox_to_anchor=(1.0, 1.0)); plt.grid()
# ofn = os.path.join(dirname, "NSTSEND_95CIstats_" + edv + ".png")
# print(ofn)
# plt.savefig(ofn)
SKLC December 2020