From 519c1d416e01208cbd8524ec7963ea70a76c716b Mon Sep 17 00:00:00 2001 From: "Maarten L. Hekkelman" Date: Mon, 13 Apr 2026 12:30:54 +0200 Subject: [PATCH] Support for new PDB ID's --- libdssp/src/dssp-io.cpp | 42 ++++++++++++++++++++--------------------- libdssp/src/dssp.cpp | 28 ++++++++++++++------------- src/mkdssp.cpp | 12 ++++++------ test/unit-test-dssp.cpp | 8 ++++---- 4 files changed, 46 insertions(+), 44 deletions(-) diff --git a/libdssp/src/dssp-io.cpp b/libdssp/src/dssp-io.cpp index 5ba20e7..41bf60e 100644 --- a/libdssp/src/dssp-io.cpp +++ b/libdssp/src/dssp-io.cpp @@ -164,52 +164,52 @@ void writeDSSP(const dssp &dssp, std::ostream &os) if (version.length() < 10) version.insert(version.end(), 10 - version.length(), ' '); - os << "==== Secondary Structure Definition by the program DSSP, NKI version " << version << " ==== DATE=" << std::put_time(tm, "%F") << " ." << std::endl - << "REFERENCE M.L. HEKKELMAN ET AL, PROTEIN SCIENCE 34.8 (2025) e70208; W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637    ." << std::endl - << dssp.get_pdb_header_line(dssp::pdb_record_type::HEADER) << '.' << std::endl - << dssp.get_pdb_header_line(dssp::pdb_record_type::COMPND) << '.' << std::endl - << dssp.get_pdb_header_line(dssp::pdb_record_type::SOURCE) << '.' << std::endl - << dssp.get_pdb_header_line(dssp::pdb_record_type::AUTHOR) << '.' << std::endl; + os << "==== Secondary Structure Definition by the program DSSP, NKI version " << version << " ==== DATE=" << std::put_time(tm, "%F") << " .\n" + << "REFERENCE M.L. HEKKELMAN ET AL, PROTEIN SCIENCE 34.8 (2025) e70208; W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637    .\n" + << dssp.get_pdb_header_line(dssp::pdb_record_type::HEADER) << ".\n" + << dssp.get_pdb_header_line(dssp::pdb_record_type::COMPND) << ".\n" + << dssp.get_pdb_header_line(dssp::pdb_record_type::SOURCE) << ".\n" + << dssp.get_pdb_header_line(dssp::pdb_record_type::AUTHOR) << ".\n"; os << std::format("{:5d}{:3d}{:3d}{:3d}{:3d} TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN) .", stats.count.residues, stats.count.chains, stats.count.SS_bridges, stats.count.intra_chain_SS_bridges, (stats.count.SS_bridges - stats.count.intra_chain_SS_bridges)) - << std::endl; + << '\n'; - os << std::format("{:8.1f} ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2) .", stats.accessible_surface) << std::endl; + os << std::format("{:8.1f} ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2) .", stats.accessible_surface) << '\n'; // hydrogenbond summary - os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds, (stats.count.H_bonds * 100.0 / stats.count.residues)) << std::endl; + os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds, (stats.count.H_bonds * 100.0 / stats.count.residues)) << '\n'; - os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds_in_parallel_bridges, (stats.count.H_bonds_in_parallel_bridges * 100.0 / stats.count.residues)) << std::endl; + os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds_in_parallel_bridges, (stats.count.H_bonds_in_parallel_bridges * 100.0 / stats.count.residues)) << '\n'; - os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds_in_antiparallel_bridges, (stats.count.H_bonds_in_antiparallel_bridges * 100.0 / stats.count.residues)) << std::endl; + os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .", stats.count.H_bonds_in_antiparallel_bridges, (stats.count.H_bonds_in_antiparallel_bridges * 100.0 / stats.count.residues)) << '\n'; for (int k = 0; k < 11; ++k) - os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I{:1c}{:1d}), SAME NUMBER PER 100 RESIDUES .", stats.count.H_Bonds_per_distance[k], (stats.count.H_Bonds_per_distance[k] * 100.0 / stats.count.residues), (k - 5 < 0 ? '-' : '+'), abs(k - 5)) << std::endl; + os << std::format("{:5d}{:5.1f} TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I{:1c}{:1d}), SAME NUMBER PER 100 RESIDUES .", stats.count.H_Bonds_per_distance[k], (stats.count.H_Bonds_per_distance[k] * 100.0 / stats.count.residues), (k - 5 < 0 ? '-' : '+'), abs(k - 5)) << '\n'; // histograms... - os << " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** ." << std::endl; + os << " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** .\n"; for (auto hi : stats.histogram.residues_per_alpha_helix) os << std::format("{:3d}", hi); - os << " RESIDUES PER ALPHA HELIX ." << std::endl; + os << " RESIDUES PER ALPHA HELIX .\n"; for (auto hi : stats.histogram.parallel_bridges_per_ladder) os << std::format("{:3d}", hi); - os << " PARALLEL BRIDGES PER LADDER ." << std::endl; + os << " PARALLEL BRIDGES PER LADDER .\n"; for (auto hi : stats.histogram.antiparallel_bridges_per_ladder) os << std::format("{:3d}", hi); - os << " ANTIPARALLEL BRIDGES PER LADDER ." << std::endl; + os << " ANTIPARALLEL BRIDGES PER LADDER .\n"; for (auto hi : stats.histogram.ladders_per_sheet) os << std::format("{:3d}", hi); - os << " LADDERS PER SHEET ." << std::endl; + os << " LADDERS PER SHEET .\n"; // per residue information - os << " # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA" << std::endl; + os << " # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA\n"; int last = 0; for (auto ri : dssp) @@ -220,9 +220,9 @@ void writeDSSP(const dssp &dssp, std::ostream &os) if (ri.nr() != last + 1) os << std::format("{:5d} !{:1c} 0 0 0 0, 0.0 0, 0.0 0, 0.0 0, 0.0 0.000 360.0 360.0 360.0 360.0 0.0 0.0 0.0", (last + 1), (ri.chain_break() == dssp::chain_break_type::NewChain ? '*' : ' ')) - << std::endl; + << '\n'; - os << ResidueToDSSPLine(ri) << std::endl; + os << ResidueToDSSPLine(ri) << '\n'; last = ri.nr(); } } @@ -771,7 +771,7 @@ void annotateDSSP(cif::datablock &db, const dssp &dssp, bool writeOther, bool wr if (dssp.empty()) { if (cif::VERBOSE > 0) - std::cout << "No secondary structure information found" << std::endl; + std::cout << "No secondary structure information found\n"; } else { diff --git a/libdssp/src/dssp.cpp b/libdssp/src/dssp.cpp index 454d74f..313adc5 100644 --- a/libdssp/src/dssp.cpp +++ b/libdssp/src/dssp.cpp @@ -872,7 +872,7 @@ bool Linked(const bridge &a, const bridge &b) void CalculateBetaSheets(std::vector &inResidues, statistics &stats, std::vector> &q) { // if (cif::VERBOSE) - // std::cerr << "calculating beta sheets" << std::endl; + // std::cerr << "calculating beta sheets\n"; std::unique_ptr progress; if (cif::VERBOSE >= 0) @@ -1133,7 +1133,7 @@ void CalculateBetaSheets(std::vector &inResidues, statistics &stats, st void CalculateAlphaHelices(std::vector &inResidues, statistics &stats, bool inPreferPiHelices = true) { if (cif::VERBOSE) - std::cerr << "calculating alpha helices" << std::endl; + std::cerr << "calculating alpha helices\n"; // Helix and Turn for (helix_type helixType : { helix_type::_3_10, helix_type::alpha, helix_type::pi }) @@ -1252,7 +1252,7 @@ void CalculateAlphaHelices(std::vector &inResidues, statistics &stats, void CalculatePPHelices(std::vector &inResidues, statistics &stats, int stretch_length) { if (cif::VERBOSE) - std::cerr << "calculating pp helices" << std::endl; + std::cerr << "calculating pp helices\n"; size_t N = inResidues.size(); @@ -1420,7 +1420,7 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin using namespace cif::literals; if (cif::VERBOSE) - std::cerr << "loading residues" << std::endl; + std::cerr << "loading residues\n"; int resNumber = 0; @@ -1557,7 +1557,7 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin void DSSP_impl::calculateSecondaryStructure() { if (cif::VERBOSE) - std::cerr << "calculating secondary structure" << std::endl; + std::cerr << "calculating secondary structure\n"; using namespace cif::literals; @@ -1568,7 +1568,7 @@ void DSSP_impl::calculateSecondaryStructure() if (r1 == mResidues.end()) { if (cif::VERBOSE > 0) - std::cerr << "Missing (incomplete?) residue for SS bond when trying to find " << asym1 << '/' << seq1 << std::endl; + std::cerr << "Missing (incomplete?) residue for SS bond when trying to find " << asym1 << '/' << seq1 << '\n'; continue; // throw std::runtime_error("Invalid file, missing residue for SS bond"); } @@ -1577,7 +1577,7 @@ void DSSP_impl::calculateSecondaryStructure() if (r2 == mResidues.end()) { if (cif::VERBOSE > 0) - std::cerr << "Missing (incomplete?) residue for SS bond when trying to find " << asym2 << '/' << seq2 << std::endl; + std::cerr << "Missing (incomplete?) residue for SS bond when trying to find " << asym2 << '/' << seq2 << '\n'; continue; // throw std::runtime_error("Invalid file, missing residue for SS bond"); } @@ -1618,7 +1618,7 @@ void DSSP_impl::calculateSecondaryStructure() } if (cif::VERBOSE > 0) - std::cerr << "Considering " << near.size() << " pairs of residues" << std::endl; + std::cerr << "Considering " << near.size() << " pairs of residues\n"; progress.reset(nullptr); @@ -1649,7 +1649,7 @@ void DSSP_impl::calculateSecondaryStructure() std::cerr << id << std::string(12 - id.length(), ' ') << static_cast(r.mSecondaryStructure) << ' ' << helix - << std::endl; + << '\n'; } } @@ -1663,7 +1663,7 @@ void DSSP_impl::calculateSecondaryStructure() if (a == b) { if (cif::VERBOSE > 0) - std::cerr << "In the SS bonds list, the residue " << a->mAsymID << ':' << a->mSeqID << " is bonded to itself" << std::endl; + std::cerr << "In the SS bonds list, the residue " << a->mAsymID << ':' << a->mSeqID << " is bonded to itself\n"; continue; } @@ -1783,16 +1783,18 @@ std::string DSSP_impl::GetPDBHEADERLine() // 0 1 2 3 4 5 6 7 8 // HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII char header[] = - "HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII"; + "HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII "; std::ranges::copy(keywords, header + 10); std::ranges::copy(date, header + 50); std::string id = mDB.name(); + if (id.starts_with("pdb_")) + id.erase(0, 4); if (id.length() < 4) id.insert(id.end(), 4 - id.length(), ' '); - else if (id.length() > 4) - id.erase(id.begin() + 4, id.end()); + else if (id.length() > 8) + id.erase(id.begin() + 8, id.end()); std::ranges::copy(id, header + 62); diff --git a/src/mkdssp.cpp b/src/mkdssp.cpp index 42dd98a..75eb1ce 100644 --- a/src/mkdssp.cpp +++ b/src/mkdssp.cpp @@ -46,7 +46,7 @@ namespace fs = std::filesystem; // recursively print exception whats: void print_what(const std::exception &e) { - std::cerr << e.what() << std::endl; + std::cerr << e.what() << '\n'; try { std::rethrow_if_nested(e); @@ -95,13 +95,13 @@ int d_main(int argc, const char *argv[]) if (config.has("help") or config.operands().empty()) { - std::cerr << config << std::endl; + std::cerr << config << '\n'; exit(config.has("help") ? 0 : 1); } if (config.has("output-format") and config.get("output-format") != "dssp" and config.get("output-format") != "mmcif") { - std::cerr << "Output format should be one of 'dssp' or 'mmcif'" << std::endl; + std::cerr << "Output format should be one of 'dssp' or 'mmcif'\n"; exit(1); } @@ -132,7 +132,7 @@ int d_main(int argc, const char *argv[]) cif::gzio::ifstream in(config.operands().front()); if (not in.is_open()) { - std::cerr << "Could not open file" << std::endl; + std::cerr << "Could not open file\n"; exit(1); } @@ -195,7 +195,7 @@ int d_main(int argc, const char *argv[]) { if (chain_id.length() > 1 or seq_nr > 99999) { - std::cerr << "The data in this file won't fit in the old DSSP format, please use the mmCIF format instead." << std::endl; + std::cerr << "The data in this file won't fit in the old DSSP format, please use the mmCIF format instead.\n"; exit(2); } } @@ -209,7 +209,7 @@ int d_main(int argc, const char *argv[]) if (not out.is_open()) { - std::cerr << "Could not open output file" << std::endl; + std::cerr << "Could not open output file\n"; exit(1); } diff --git a/test/unit-test-dssp.cpp b/test/unit-test-dssp.cpp index 4329a03..bfd0faf 100644 --- a/test/unit-test-dssp.cpp +++ b/test/unit-test-dssp.cpp @@ -129,9 +129,9 @@ TEST_CASE("ut_dssp") if (cif::starts_with(line_t, "REFERENCE ") and cif::starts_with(line_r, "REFERENCE ")) continue; - std::cerr << line_nr << std::endl - << line_t << std::endl - << line_r << std::endl; + std::cerr << line_nr << '\n' + << line_t << '\n' + << line_r << '\n'; } if (line_t != line_r) @@ -193,7 +193,7 @@ TEST_CASE("dssp_1") std::string line; getline(t, line); - // std::cout << line << std::endl; + // std::cout << line << '\n'; auto fld = cif::split(line, "\t");