# Adapted from https://www3.cmbi.umcn.nl/xssp/api/examples
# Python 3
"""
The client takes a PDB file (must end with .pdb), sends it to the REST service, which
creates DSSP data. The DSSP data is then output to a file.
Example:
python pdb_to_dssp.py 1crn.pdb https://www3.cmbi.umcn.nl/xssp/
"""
# https://docs.python.org/3/library/argparse.html?highlight=requests#module-argparse
# https://requests.readthedocs.io/en/master/
import argparse
import json
import requests
import time
def pdb_to_dssp(pdb_file_path, rest_url):
# Read the pdb file data into a variable
files = {'file_': open(pdb_file_path, 'rb')}
# Send a request to the server to create hssp data from the pdb file data.
# If an error occurs, an exception is raised and the program exits. If the
# request is successful, the id of the job running on the server is
# returned.
url_create = '{}api/create/pdb_file/dssp/'.format(rest_url)
r = requests.post(url_create, files=files)
r.raise_for_status()
job_id = json.loads(r.text)['id']
print("Job submitted successfully. Id is: '{}'".format(job_id))
# Loop until the job running on the server has finished, either successfully
# or due to an error.
ready = False
while not ready:
# Check the status of the running job. If an error occurs an exception
# is raised and the program exits. If the request is successful, the
# status is returned.
url_status = '{}api/status/pdb_file/dssp/{}/'.format(rest_url, job_id)
r = requests.get(url_status)
r.raise_for_status()
status = json.loads(r.text)['status']
print("Job status is: '{}'".format(status))
# If the status equals SUCCESS, exit out of the loop by changing the
# condition ready. This causes the code to drop into the `else` block
# below.
#
# If the status equals either FAILURE or REVOKED, an exception is raised
# containing the error message. The program exits.
#
# Otherwise, wait for five seconds and start at the beginning of the
# loop again.
if status == 'SUCCESS':
ready = True
elif status in ['FAILURE', 'REVOKED']:
raise Exception(json.loads(r.text)['message'])
else:
time.sleep(5)
else:
# Requests the result of the job. If an error occurs an exception is
# raised and the program exits. If the request is successful, the result
# is returned.
url_result = '{}api/result/pdb_file/dssp/{}/'.format(rest_url, job_id)
r = requests.get(url_result)
r.raise_for_status()
result = json.loads(r.text)['result']
# Return the result to the caller, which prints it to the screen.
return result
# dirname = "Paci"
# PIDs = ["1imq", "1lmb4", "1bf4", "2ptl", "2ci2", "1aps", "1fmk", "1bk2", "1shf", "1ten"]
# dirname = "Uzunoglo"
# PIDs = ["2pdd", "1enh", "1idy", "1lmb", "1ayi", "1imq", "2abd", "1hrc", "1div", "2ptl",
# "1bf4", "2ci2", "1ubq", "1poh", "1urn", "1ris", "1aps", "2acy", "1spr", "1fkb",
# "2vik", "1e0l", "1rlq", "1shg", "1shf", "1csp", "1mjc", "1ten", "1afi", "1c9o",
# "1e0m", "1fex", "1g6p", "1jo8", "1k9q", "1m9s", "1n88", "1o6x", "1psf", "1rfa",
# "1ryk", "1ss1", "1w4e", "1w4j", "1yza", "2bth", "2qjl", "3ait", "1pgb", "1tit"]
# 1div_n, 1fkb, 1ten, 1c9o, 1jo8, 1m9s, 2qjl - need to re-download pdb to generate dssp
# 1div split to 1div_n 1div_c
# 1m9s extract subset of residues, 1c9o extract A chain
# check for broken chain: grep '!' *_dssp.out
# 2pdd dssp output excludes residue 43
# dirname = "Kamagata"
# PIDs = ["1pgb", "2cro", "1cei", "2abd", "1hng", "1gxt", "1bta", "1tit", "1ttg", "1bni",
# "1adw", "3chy", "1ael", "1joo", "1i1b", "1dwr", "2rn2", "2a5e", "1ra9", "1l63",
# "1php"]
# 1gxt 3chy 1i1b - need hetatoms in pdb to generate dssp
# exclude 1hrh missing residues 538...543
# check for broken chain: grep '!' *_dssp.out
# Canonical set 14 pids
dirname = "SingleDomain"
PIDs = [ "2f4k", "1bdd", "2abd_0", "1gb1_0", "1mi0", "1mhx", "2ptl_0",
"1shg", "1srm_1", "2ci2", "1aps_0", "2kjv_0", "2kjw_0", "1qys" ]
# 2f4k, 1mi0, 1qys - need to re-download pdb to generate dssp
import os
from shutil import copy2
def get_dssp(dirname, pid, copy=False):
pdb_fn = os.path.join(dirname, pid + ".pdb")
if copy: # only the first time, check if file already exists
copy2(os.path.join(dirname, pid + "_pdb.txt"), pdb_fn)
res = pdb_to_dssp(pdb_fn, "https://www3.cmbi.umcn.nl/xssp/")
dssp_fn = os.path.join(dirname, pid + "_dssp.out")
print(dssp_fn)
with open(dssp_fn, 'w') as fout:
fout.write("nid rid Chain AA DSSP\n")
for line in res[res.find('#'):].splitlines()[1:]: # ignore first line, header
s = line.lstrip()[:15]
fout.write(s+'\n')
return
# Demo with one.
get_dssp("SingleDomain", "1bdd", copy=True)
# Run all
for pid in PIDs:
print(pid)
get_dssp(dirname, pid, copy=False)
//ND/FP1
//Converts a pid_dssp.out input text file to a pid_sse.txt output text file.
//input header: nid rid Chain AA DSSP
//output header: nid rid SS SSE AA
void FU::prep_sse(string pid) {
string fn = pid + "_dssp.out";
ifstream fin(fn.c_str());
if (fin.fail()) {
cout << fn << " input error." << endl;
return;
}
string ofn = pid + "_sse1.txt";
ofstream fout(ofn.c_str());
fout << "nid rid SS SSE AA" << endl;
char prev_d = '-', d; int nid = 0, sse = 0;
string s, ds; vector<string> ss;
getline(fin, s); //header: nid rid Chain AA DSSP
getline(fin, s);
while (!fin.eof()) {
::tokenize_string(s, ss); //external function to tokenize space separated string
ds = ss.at(4);
if (ds == "H" || ds == "I" || ds == "G") //helix
d = 'H';
else if (ds == "B" || ds == "E") //strand
d = 'S';
else
d = 'T'; //otherwise
if ('-' == prev_d) { //first residue
prev_d = d;
fout << nid++ << " " << ss.at(1) << " " << d << " "
<< sse << " " << ss.at(3) << endl;
}
else {
if (d == prev_d) { //remain in same sse
fout << nid++ << " " << ss.at(1) << " " << d << " "
<< sse << " " << ss.at(3) << endl;
}
else { //switch to new sse
fout << nid++ << " " << ss.at(1) << " " << d << " "
<< ++sse << " " << ss.at(3) << endl;
prev_d = d;
}
}
getline(fin, s);
}
fin.close();
fout.close();
}
import pandas as pd
import numpy as np
import os
import re
pid = "1aps_0"
fn = os.path.join(dirname, pid + "_sse.txt")
df = pd.read_table(fn, sep='\s+')
df.sample(5)
# calculate number of H, S, T residues
n = len(df)
h = len(df.query("SS == 'H'"))
s = len(df.query("SS == 'S'"))
t = len(df.query("SS == 'T'"))
print(pid, n)
print("H", h, round(h/n, 2))
print("S", s, round(s/n, 2))
print("T", t, round(t/n, 2))
ds = ''.join(df.SS.values)
ds
s = df.SSE.values
s
# number of SSEs, non-Turn SSEs (fold stpes)
gdf = df.groupby('SSE').min()
print("Number of SSEs", len(gdf))
print("Number of non-Turn SSEs", len(gdf.query("SS != 'T'")))
gdf.query("SS != 'T'").index
gdf.query("SS != 'T'").SS
# get the non-Turn SSE strings
p = re.compile(r'[S|H]+')
m = p.findall(ds)
m
# longest Turn SSE
p = re.compile(r'T+')
m = p.findall(ds)
print(m)
longest = max([ len(s) for s in m ])
longest
SKLC August 2020