Support for new PDB ID's

This commit is contained in:
Maarten L. Hekkelman
2026-04-13 12:30:54 +02:00
parent 5e9fbd73c9
commit 519c1d416e
4 changed files with 46 additions and 44 deletions

View File

@@ -164,52 +164,52 @@ void writeDSSP(const dssp &dssp, std::ostream &os)
if (version.length() < 10)
version.insert(version.end(), 10 - version.length(), ' ');
os << "==== Secondary Structure Definition by the program DSSP, NKI version " << version << " ==== DATE=" << std::put_time(tm, "%F") << " ." << std::endl
<< "REFERENCE M.L. HEKKELMAN ET AL, PROTEIN SCIENCE 34.8 (2025) e70208; W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637    ." << std::endl
<< dssp.get_pdb_header_line(dssp::pdb_record_type::HEADER) << '.' << std::endl
<< dssp.get_pdb_header_line(dssp::pdb_record_type::COMPND) << '.' << std::endl
<< dssp.get_pdb_header_line(dssp::pdb_record_type::SOURCE) << '.' << std::endl
<< dssp.get_pdb_header_line(dssp::pdb_record_type::AUTHOR) << '.' << std::endl;
os << "==== Secondary Structure Definition by the program DSSP, NKI version " << version << " ==== DATE=" << std::put_time(tm, "%F") << " .\n"
<< "REFERENCE M.L. HEKKELMAN ET AL, PROTEIN SCIENCE 34.8 (2025) e70208; W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637    .\n"
<< dssp.get_pdb_header_line(dssp::pdb_record_type::HEADER) << ".\n"
<< dssp.get_pdb_header_line(dssp::pdb_record_type::COMPND) << ".\n"
<< dssp.get_pdb_header_line(dssp::pdb_record_type::SOURCE) << ".\n"
<< dssp.get_pdb_header_line(dssp::pdb_record_type::AUTHOR) << ".\n";
os << std::format("{:5d}{:3d}{:3d}{:3d}{:3d} TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN) .",
stats.count.residues, stats.count.chains, stats.count.SS_bridges, stats.count.intra_chain_SS_bridges, (stats.count.SS_bridges - stats.count.intra_chain_SS_bridges))
<< std::endl;
<< '\n';
os << std::format("{:8.1f} ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2) .", stats.accessible_surface) << std::endl;
os << std::format("{:8.1f} ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2) .", stats.accessible_surface) << '\n';
// hydrogenbond summary
os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds, (stats.count.H_bonds * 100.0 / stats.count.residues)) << std::endl;
os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds, (stats.count.H_bonds * 100.0 / stats.count.residues)) << '\n';
os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds_in_parallel_bridges, (stats.count.H_bonds_in_parallel_bridges * 100.0 / stats.count.residues)) << std::endl;
os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds_in_parallel_bridges, (stats.count.H_bonds_in_parallel_bridges * 100.0 / stats.count.residues)) << '\n';
os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds_in_antiparallel_bridges, (stats.count.H_bonds_in_antiparallel_bridges * 100.0 / stats.count.residues)) << std::endl;
os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds_in_antiparallel_bridges, (stats.count.H_bonds_in_antiparallel_bridges * 100.0 / stats.count.residues)) << '\n';
for (int k = 0; k < 11; ++k)
os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I{:1c}{:1d}), SAME NUMBER PER 100 RESIDUES .", stats.count.H_Bonds_per_distance[k], (stats.count.H_Bonds_per_distance[k] * 100.0 / stats.count.residues), (k - 5 < 0 ? '-' : '+'), abs(k - 5)) << std::endl;
os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I{:1c}{:1d}), SAME NUMBER PER 100 RESIDUES .", stats.count.H_Bonds_per_distance[k], (stats.count.H_Bonds_per_distance[k] * 100.0 / stats.count.residues), (k - 5 < 0 ? '-' : '+'), abs(k - 5)) << '\n';
// histograms...
os << " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** ." << std::endl;
os << " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** .\n";
for (auto hi : stats.histogram.residues_per_alpha_helix)
os << std::format("{:3d}", hi);
os << " RESIDUES PER ALPHA HELIX ." << std::endl;
os << " RESIDUES PER ALPHA HELIX .\n";
for (auto hi : stats.histogram.parallel_bridges_per_ladder)
os << std::format("{:3d}", hi);
os << " PARALLEL BRIDGES PER LADDER ." << std::endl;
os << " PARALLEL BRIDGES PER LADDER .\n";
for (auto hi : stats.histogram.antiparallel_bridges_per_ladder)
os << std::format("{:3d}", hi);
os << " ANTIPARALLEL BRIDGES PER LADDER ." << std::endl;
os << " ANTIPARALLEL BRIDGES PER LADDER .\n";
for (auto hi : stats.histogram.ladders_per_sheet)
os << std::format("{:3d}", hi);
os << " LADDERS PER SHEET ." << std::endl;
os << " LADDERS PER SHEET .\n";
// per residue information
os << " # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA" << std::endl;
os << " # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA\n";
int last = 0;
for (auto ri : dssp)
@@ -220,9 +220,9 @@ void writeDSSP(const dssp &dssp, std::ostream &os)
if (ri.nr() != last + 1)
os << std::format("{:5d} !{:1c} 0 0 0 0, 0.0 0, 0.0 0, 0.0 0, 0.0 0.000 360.0 360.0 360.0 360.0 0.0 0.0 0.0",
(last + 1), (ri.chain_break() == dssp::chain_break_type::NewChain ? '*' : ' '))
<< std::endl;
<< '\n';
os << ResidueToDSSPLine(ri) << std::endl;
os << ResidueToDSSPLine(ri) << '\n';
last = ri.nr();
}
}
@@ -771,7 +771,7 @@ void annotateDSSP(cif::datablock &db, const dssp &dssp, bool writeOther, bool wr
if (dssp.empty())
{
if (cif::VERBOSE > 0)
std::cout << "No secondary structure information found" << std::endl;
std::cout << "No secondary structure information found\n";
}
else
{

View File

@@ -872,7 +872,7 @@ bool Linked(const bridge &a, const bridge &b)
void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats, std::vector<std::tuple<uint32_t, uint32_t>> &q)
{
// if (cif::VERBOSE)
// std::cerr << "calculating beta sheets" << std::endl;
// std::cerr << "calculating beta sheets\n";
std::unique_ptr<cif::progress_bar> progress;
if (cif::VERBOSE >= 0)
@@ -1133,7 +1133,7 @@ void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats, st
void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats, bool inPreferPiHelices = true)
{
if (cif::VERBOSE)
std::cerr << "calculating alpha helices" << std::endl;
std::cerr << "calculating alpha helices\n";
// Helix and Turn
for (helix_type helixType : { helix_type::_3_10, helix_type::alpha, helix_type::pi })
@@ -1252,7 +1252,7 @@ void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats,
void CalculatePPHelices(std::vector<residue> &inResidues, statistics &stats, int stretch_length)
{
if (cif::VERBOSE)
std::cerr << "calculating pp helices" << std::endl;
std::cerr << "calculating pp helices\n";
size_t N = inResidues.size();
@@ -1420,7 +1420,7 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin
using namespace cif::literals;
if (cif::VERBOSE)
std::cerr << "loading residues" << std::endl;
std::cerr << "loading residues\n";
int resNumber = 0;
@@ -1557,7 +1557,7 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin
void DSSP_impl::calculateSecondaryStructure()
{
if (cif::VERBOSE)
std::cerr << "calculating secondary structure" << std::endl;
std::cerr << "calculating secondary structure\n";
using namespace cif::literals;
@@ -1568,7 +1568,7 @@ void DSSP_impl::calculateSecondaryStructure()
if (r1 == mResidues.end())
{
if (cif::VERBOSE > 0)
std::cerr << "Missing (incomplete?) residue for SS bond when trying to find " << asym1 << '/' << seq1 << std::endl;
std::cerr << "Missing (incomplete?) residue for SS bond when trying to find " << asym1 << '/' << seq1 << '\n';
continue;
// throw std::runtime_error("Invalid file, missing residue for SS bond");
}
@@ -1577,7 +1577,7 @@ void DSSP_impl::calculateSecondaryStructure()
if (r2 == mResidues.end())
{
if (cif::VERBOSE > 0)
std::cerr << "Missing (incomplete?) residue for SS bond when trying to find " << asym2 << '/' << seq2 << std::endl;
std::cerr << "Missing (incomplete?) residue for SS bond when trying to find " << asym2 << '/' << seq2 << '\n';
continue;
// throw std::runtime_error("Invalid file, missing residue for SS bond");
}
@@ -1618,7 +1618,7 @@ void DSSP_impl::calculateSecondaryStructure()
}
if (cif::VERBOSE > 0)
std::cerr << "Considering " << near.size() << " pairs of residues" << std::endl;
std::cerr << "Considering " << near.size() << " pairs of residues\n";
progress.reset(nullptr);
@@ -1649,7 +1649,7 @@ void DSSP_impl::calculateSecondaryStructure()
std::cerr << id << std::string(12 - id.length(), ' ')
<< static_cast<char>(r.mSecondaryStructure) << ' '
<< helix
<< std::endl;
<< '\n';
}
}
@@ -1663,7 +1663,7 @@ void DSSP_impl::calculateSecondaryStructure()
if (a == b)
{
if (cif::VERBOSE > 0)
std::cerr << "In the SS bonds list, the residue " << a->mAsymID << ':' << a->mSeqID << " is bonded to itself" << std::endl;
std::cerr << "In the SS bonds list, the residue " << a->mAsymID << ':' << a->mSeqID << " is bonded to itself\n";
continue;
}
@@ -1783,16 +1783,18 @@ std::string DSSP_impl::GetPDBHEADERLine()
// 0 1 2 3 4 5 6 7 8
// HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
char header[] =
"HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII";
"HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII ";
std::ranges::copy(keywords, header + 10);
std::ranges::copy(date, header + 50);
std::string id = mDB.name();
if (id.starts_with("pdb_"))
id.erase(0, 4);
if (id.length() < 4)
id.insert(id.end(), 4 - id.length(), ' ');
else if (id.length() > 4)
id.erase(id.begin() + 4, id.end());
else if (id.length() > 8)
id.erase(id.begin() + 8, id.end());
std::ranges::copy(id, header + 62);

View File

@@ -46,7 +46,7 @@ namespace fs = std::filesystem;
// recursively print exception whats:
void print_what(const std::exception &e)
{
std::cerr << e.what() << std::endl;
std::cerr << e.what() << '\n';
try
{
std::rethrow_if_nested(e);
@@ -95,13 +95,13 @@ int d_main(int argc, const char *argv[])
if (config.has("help") or config.operands().empty())
{
std::cerr << config << std::endl;
std::cerr << config << '\n';
exit(config.has("help") ? 0 : 1);
}
if (config.has("output-format") and config.get<std::string>("output-format") != "dssp" and config.get<std::string>("output-format") != "mmcif")
{
std::cerr << "Output format should be one of 'dssp' or 'mmcif'" << std::endl;
std::cerr << "Output format should be one of 'dssp' or 'mmcif'\n";
exit(1);
}
@@ -132,7 +132,7 @@ int d_main(int argc, const char *argv[])
cif::gzio::ifstream in(config.operands().front());
if (not in.is_open())
{
std::cerr << "Could not open file" << std::endl;
std::cerr << "Could not open file\n";
exit(1);
}
@@ -195,7 +195,7 @@ int d_main(int argc, const char *argv[])
{
if (chain_id.length() > 1 or seq_nr > 99999)
{
std::cerr << "The data in this file won't fit in the old DSSP format, please use the mmCIF format instead." << std::endl;
std::cerr << "The data in this file won't fit in the old DSSP format, please use the mmCIF format instead.\n";
exit(2);
}
}
@@ -209,7 +209,7 @@ int d_main(int argc, const char *argv[])
if (not out.is_open())
{
std::cerr << "Could not open output file" << std::endl;
std::cerr << "Could not open output file\n";
exit(1);
}

View File

@@ -129,9 +129,9 @@ TEST_CASE("ut_dssp")
if (cif::starts_with(line_t, "REFERENCE ") and cif::starts_with(line_r, "REFERENCE "))
continue;
std::cerr << line_nr << std::endl
<< line_t << std::endl
<< line_r << std::endl;
std::cerr << line_nr << '\n'
<< line_t << '\n'
<< line_r << '\n';
}
if (line_t != line_r)
@@ -193,7 +193,7 @@ TEST_CASE("dssp_1")
std::string line;
getline(t, line);
// std::cout << line << std::endl;
// std::cout << line << '\n';
auto fld = cif::split(line, "\t");