diff --git a/.gitignore b/.gitignore index f8a913d..615d7e8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ build/ **/*.dssp src/revision.hpp libdssp/src/revision.hpp +python-module/mkdssp.so diff --git a/CMakeLists.txt b/CMakeLists.txt index 07ad84c..7f12e62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,7 @@ endif() # Optionally build a version to be installed inside CCP4 option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4" OFF) option(BUILD_DOCUMENTATION "Generate the documentation files using pandoc" OFF) +option(BUILD_PYTHON_MODULE "Build an experimental Python module" OFF) option(INSTALL_LIBRARY "Install the libdssp library" OFF) if(BUILD_FOR_CCP4) @@ -167,6 +168,11 @@ if(BUILD_TESTING) add_subdirectory(test) endif() +# Python +if(BUILD_PYTHON_MODULE) + add_subdirectory(python-module) +endif() + include(InstallRequiredSystemLibraries) set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") diff --git a/libdssp/include/dssp.hpp b/libdssp/include/dssp.hpp index 2677ba6..ffe429c 100644 --- a/libdssp/include/dssp.hpp +++ b/libdssp/include/dssp.hpp @@ -68,6 +68,12 @@ class dssp Middle }; + enum class ladder_direction_type + { + parallel, + antiparallel + }; + static constexpr size_t kHistogramSize = 30; struct statistics @@ -146,9 +152,18 @@ class dssp bool is_cis() const { return std::abs(omega().value_or(360)) < 30.0f; } float chiral_volume() const; + std::size_t nr_of_chis() const; float chi(std::size_t index) const; + std::vector chi() const + { + std::vector result; + for (size_t i = 0; i < nr_of_chis(); ++i) + result.push_back(chi(i)); + return result; + } + std::tuple ca_location() const; chain_break_type chain_break() const; @@ -169,7 +184,7 @@ class dssp double accessibility() const; /// \brief returns resinfo, ladder and parallel - std::tuple bridge_partner(int i) const; + std::tuple bridge_partner(int i) const; int sheet() const; int strand() const; @@ -187,6 +202,8 @@ class dssp /// \brief Returns \result true if there is a bond between two residues friend bool test_bond(residue_info const &a, residue_info const &b); + residue_info next() const; + private: residue_info(residue *res) : m_impl(res) @@ -250,7 +267,7 @@ class dssp // -------------------------------------------------------------------- // Writing out the data, either in legacy format... - void write_legacy_output(std::ostream& os) const; + void write_legacy_output(std::ostream &os) const; // ... or as annotation in the cif::datablock void annotate(cif::datablock &db, bool writeOther, bool writeDSSPCategories) const; @@ -270,4 +287,3 @@ class dssp private: struct DSSP_impl *m_impl; }; - diff --git a/libdssp/src/dssp-io.cpp b/libdssp/src/dssp-io.cpp index 49cbe19..f9422cc 100644 --- a/libdssp/src/dssp-io.cpp +++ b/libdssp/src/dssp-io.cpp @@ -98,12 +98,12 @@ std::string ResidueToDSSPLine(const dssp::residue_info &info) char bridgelabel[2] = { ' ', ' ' }; for (uint32_t i : { 0, 1 }) { - const auto &[p, ladder, parallel] = info.bridge_partner(i); + const auto &[p, ladder, direction] = info.bridge_partner(i); if (not p) continue; bp[i] = p.nr() % 10000; // won't fit otherwise... - bridgelabel[i] = (parallel ? 'a' : 'A') + ladder % 26; + bridgelabel[i] = (direction == dssp::ladder_direction_type::parallel ? 'a' : 'A') + ladder % 26; } char sheet = ' '; @@ -402,10 +402,10 @@ void writeLadders(cif::datablock &db, const dssp &dssp) // Write out the DSSP ladders struct ladder_info { - ladder_info(int label, int sheet, bool parallel, const dssp::residue_info &a, const dssp::residue_info &b) + ladder_info(int label, int sheet, dssp::ladder_direction_type direction, const dssp::residue_info &a, const dssp::residue_info &b) : ladder(label) , sheet(sheet) - , parallel(parallel) + , direction(direction) , pairs({ { a, b } }) { } @@ -417,7 +417,7 @@ void writeLadders(cif::datablock &db, const dssp &dssp) int ladder; int sheet; - bool parallel; + dssp::ladder_direction_type direction; std::vector> pairs; }; @@ -427,7 +427,7 @@ void writeLadders(cif::datablock &db, const dssp &dssp) { for (int i : { 0, 1 }) { - const auto &[p, ladder, parallel] = res.bridge_partner(i); + const auto &[p, ladder, direction] = res.bridge_partner(i); if (not p) continue; @@ -438,7 +438,7 @@ void writeLadders(cif::datablock &db, const dssp &dssp) if (l.ladder != ladder) continue; - assert(l.parallel == parallel); + assert(l.direction == direction); if (find_if(l.pairs.begin(), l.pairs.end(), [na = p.nr(), nb = res.nr()](const auto &p) { return p.first.nr() == na and p.second.nr() == nb; }) != l.pairs.end()) @@ -455,7 +455,7 @@ void writeLadders(cif::datablock &db, const dssp &dssp) if (not is_new) continue; - ladders.emplace_back(ladder, res.sheet() - 1, parallel, res, p); + ladders.emplace_back(ladder, res.sheet() - 1, direction, res, p); } } @@ -472,7 +472,7 @@ void writeLadders(cif::datablock &db, const dssp &dssp) { "sheet_id", cif::cif_id_for_number(l.sheet) }, { "range_id_1", cif::cif_id_for_number(beg1.strand() - 1) }, { "range_id_2", cif::cif_id_for_number(beg2.strand() - 1) }, - { "type", l.parallel ? "parallel" : "anti-parallel" }, + { "type", l.direction == dssp::ladder_direction_type::parallel ? "parallel" : "anti-parallel" }, { "beg_1_label_comp_id", beg1.compound_id() }, { "beg_1_label_asym_id", beg1.asym_id() }, diff --git a/libdssp/src/dssp.cpp b/libdssp/src/dssp.cpp index 3bc78ef..58e8c8a 100644 --- a/libdssp/src/dssp.cpp +++ b/libdssp/src/dssp.cpp @@ -2063,21 +2063,13 @@ const std::map> kChiAtomsMap = { { MapResidue("VAL"), { "CG1" } } }; -std::size_t dssp::residue_info::nr_of_chis() const +std::vector dssp::residue_info::chi() const { - auto i = kChiAtomsMap.find(m_impl->mType); - - return i != kChiAtomsMap.end() ? i->second.size() : 0; -} - -float dssp::residue_info::chi(std::size_t index) const -{ - float result = 0; + std::vector result; auto type = m_impl->mType; - auto i = kChiAtomsMap.find(type); - if (i != kChiAtomsMap.end() and index < i->second.size()) + if (auto i = kChiAtomsMap.find(type); i != kChiAtomsMap.end()) { std::vector atoms{ "N", "CA", "CB" }; @@ -2092,11 +2084,14 @@ float dssp::residue_info::chi(std::size_t index) const atoms.back() = "CG2"; } - result = static_cast(dihedral_angle( - m_impl->get_atom(atoms[index + 0]), - m_impl->get_atom(atoms[index + 1]), - m_impl->get_atom(atoms[index + 2]), - m_impl->get_atom(atoms[index + 3]))); + for (size_t ix = 0; ix < i->second.size(); ++ix) + { + result.push_back(static_cast(dihedral_angle( + m_impl->get_atom(atoms[ix + 0]), + m_impl->get_atom(atoms[ix + 1]), + m_impl->get_atom(atoms[ix + 2]), + m_impl->get_atom(atoms[ix + 3])))); + } } return result; @@ -2152,13 +2147,13 @@ double dssp::residue_info::accessibility() const return m_impl->mAccessibility; } -std::tuple dssp::residue_info::bridge_partner(int i) const +std::tuple dssp::residue_info::bridge_partner(int i) const { auto bp = m_impl->GetBetaPartner(i); residue_info ri(bp.m_residue); - return std::make_tuple(std::move(ri), bp.ladder, bp.parallel); + return std::make_tuple(std::move(ri), bp.ladder, bp.parallel ? ladder_direction_type::parallel : ladder_direction_type::antiparallel); } int dssp::residue_info::sheet() const @@ -2183,6 +2178,11 @@ std::tuple dssp::residue_info::donor(int i) const return { residue_info(d.res), d.energy }; } +dssp::residue_info dssp::residue_info::next() const +{ + return residue_info(m_impl ? m_impl->mNext : nullptr); +} + // -------------------------------------------------------------------- dssp::iterator::iterator(residue *res) diff --git a/python-module/CMakeLists.txt b/python-module/CMakeLists.txt new file mode 100644 index 0000000..b691b2b --- /dev/null +++ b/python-module/CMakeLists.txt @@ -0,0 +1,9 @@ +find_package(Python REQUIRED COMPONENTS Interpreter Development) +find_package(Boost REQUIRED COMPONENTS python) + +add_library(DSSP SHARED dssp-python-plugin.cpp) + +target_compile_features(DSSP PUBLIC cxx_std_20) + +target_include_directories(DSSP PRIVATE ${Python_INCLUDE_DIRS}) +target_link_libraries(DSSP dssp::dssp Boost::python ${Python_LIBRARIES}) \ No newline at end of file diff --git a/python-module/dssp-python-plugin.cpp b/python-module/dssp-python-plugin.cpp new file mode 100644 index 0000000..40a251b --- /dev/null +++ b/python-module/dssp-python-plugin.cpp @@ -0,0 +1,324 @@ +#include +#include + +#include + +struct statistics_wrapper +{ + int get_residues() { return m_stats.count.residues; } + int get_chains() { return m_stats.count.chains; } + int get_SS_bridges() { return m_stats.count.SS_bridges; } + int get_intra_chain_SS_bridges() { return m_stats.count.intra_chain_SS_bridges; } + int get_H_bonds() { return m_stats.count.H_bonds; } + int get_H_bonds_in_antiparallel_bridges() { return m_stats.count.H_bonds_in_antiparallel_bridges; } + int get_H_bonds_in_parallel_bridges() { return m_stats.count.H_bonds_in_parallel_bridges; } + + dssp::statistics m_stats; +}; + +struct residue_info_wrapper +{ +}; + +class dssp_wrapper +{ + public: + dssp_wrapper(std::string data, int model_nr = 1, int min_poly_proline_stretch_length = 3, bool calculateSurfaceAccessibility = false) + { + struct membuf : public std::streambuf + { + membuf(char *text, size_t length) + { + this->setg(text, text, text + length); + } + } buffer(const_cast(data.data()), data.length()); + + std::istream is(&buffer); + + auto f = cif::pdb::read(is); + m_dssp.reset(new dssp(f.front(), model_nr, min_poly_proline_stretch_length, calculateSurfaceAccessibility)); + } + + dssp_wrapper(const dssp_wrapper &) = default; + dssp_wrapper &operator=(const dssp_wrapper &) = default; + + statistics_wrapper get_statistics() + { + statistics_wrapper result{ .m_stats = m_dssp->get_statistics() }; + return result; + } + + struct residue_info_handle + { + residue_info_handle(dssp::residue_info info) + : m_info(info) + { + } + + operator dssp::residue_info &() { return m_info; } + + dssp::residue_info m_info; + }; + + class iterator + { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = dssp::residue_info; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = residue_info_handle; + + iterator(dssp::residue_info info) + : m_current(info) + { + } + + iterator(const iterator &i) = default; + iterator &operator=(const iterator &i) = default; + + reference operator*() { return residue_info_handle(m_current); } + pointer operator->() { return &m_current; } + + iterator &operator++() + { + m_current = m_current.next(); + return *this; + } + + iterator operator++(int) + { + auto tmp(*this); + this->operator++(); + return tmp; + } + + bool operator==(const iterator &rhs) const + { + return m_current == rhs.m_current; + } + + private: + dssp::residue_info m_current; + }; + + auto begin() { return iterator(*m_dssp->begin()); } + auto end() { return iterator({}); } + + auto get(std::string asym_id, int nr) + { + return m_dssp->operator[]({ asym_id, nr }); + } + + private: + std::shared_ptr m_dssp; +}; + +// shamelessly copied from a stack overflow comment +// https://stackoverflow.com/questions/36485840/wrap-boostoptional-using-boostpython + +// Custom exceptions +struct AttributeError : std::exception +{ + const char *what() const throw() { return "AttributeError exception"; } +}; + +struct TypeError : std::exception +{ + const char *what() const throw() { return "TypeError exception"; } +}; + +// Set python exceptions +void translate(const std::exception &e) +{ + if (dynamic_cast(&e)) + PyErr_SetString(PyExc_AttributeError, e.what()); + if (dynamic_cast(&e)) + PyErr_SetString(PyExc_TypeError, e.what()); +} + +struct to_python_optional +{ + static PyObject *convert(const std::optional &obj) + { + if (obj) + return boost::python::incref(boost::python::object(*obj).ptr()); + else + return boost::python::incref(boost::python::object().ptr()); + } +}; + +struct to_python_list_of_floats +{ + static PyObject *convert(const std::vector &v) + { + boost::python::list list; + for (auto value : v) + list.append(value); + return boost::python::incref(boost::python::object(list).ptr()); + } +}; + +struct to_python_point +{ + static PyObject *convert(const std::tuple &v) + { + boost::python::dict p; + p["x"] = std::get<0>(v); + p["y"] = std::get<1>(v); + p["z"] = std::get<2>(v); + return boost::python::incref(boost::python::object(p).ptr()); + } +}; + +struct to_python_partner +{ + static PyObject *convert(const std::tuple &v) + { + boost::python::type_info iv = boost::python::type_id(); + const boost::python::converter::registration* cv = boost::python::converter::registry::query(iv); + assert(cv != nullptr); + if (cv == nullptr) + throw std::runtime_error("Missing registration"); + + if (auto &[ri, e] = v; ri) + { + auto c = cv->to_python(&ri); + return boost::python::incref(boost::python::make_tuple(boost::python::handle<>(c), e).ptr()); + } + else + return boost::python::incref(boost::python::make_tuple(boost::python::object(), 0).ptr()); + } +}; + +struct to_python_bridge_partner +{ + static PyObject *convert(const std::tuple &v) + { + if (auto &[ri, nr, direction] = v; ri) + { + boost::python::type_info iv = boost::python::type_id(); + const boost::python::converter::registration* cv = boost::python::converter::registry::query(iv); + assert(cv != nullptr); + + boost::python::type_info dv = boost::python::type_id(); + const boost::python::converter::registration* ev = boost::python::converter::registry::query(dv); + assert(ev != nullptr); + + auto c = cv->to_python(&ri); + auto e = ev->to_python(&direction); + + return boost::python::incref(boost::python::make_tuple( + boost::python::handle<>(c), nr, boost::python::handle<>(e)).ptr()); + } + else + return boost::python::incref(boost::python::make_tuple(boost::python::object(), 0, boost::python::object()).ptr()); + } +}; + +bool test_bond_between_residues(PyObject *a, PyObject *b) +{ + const auto &a_ri = boost::python::extract(a); + const auto &b_ri = boost::python::extract(b); + + return test_bond(a_ri, b_ri); +} + +BOOST_PYTHON_MODULE(mkdssp) +{ + using namespace boost::python; + + register_exception_translator(&translate); + register_exception_translator(&translate); + + to_python_converter, to_python_optional>(); + to_python_converter, to_python_list_of_floats>(); + to_python_converter, to_python_point>(); + to_python_converter, to_python_partner>(); + to_python_converter, to_python_bridge_partner>(); + + enum_("structure_type") + .value("Loop", dssp::structure_type::Loop) + .value("Alphahelix", dssp::structure_type::Alphahelix) + .value("Betabridge", dssp::structure_type::Betabridge) + .value("Strand", dssp::structure_type::Strand) + .value("Helix_3", dssp::structure_type::Helix_3) + .value("Helix_5", dssp::structure_type::Helix_5) + .value("Helix_PPII", dssp::structure_type::Helix_PPII) + .value("Turn", dssp::structure_type::Turn) + .value("Bend", dssp::structure_type::Bend); + + enum_("helix_type") + .value("_3_10", dssp::helix_type::_3_10) + .value("alpha", dssp::helix_type::alpha) + .value("pi", dssp::helix_type::pi) + .value("pp", dssp::helix_type::pp); + + enum_("helix_position_type") + .value("None", dssp::helix_position_type::None) + .value("Start", dssp::helix_position_type::Start) + .value("End", dssp::helix_position_type::End) + .value("StartAndEnd", dssp::helix_position_type::StartAndEnd) + .value("Middle", dssp::helix_position_type::Middle); + + enum_("ladder_direction_type") + .value("parallel", dssp::ladder_direction_type::parallel) + .value("anti_parallel", dssp::ladder_direction_type::antiparallel); + + enum_("chain_break_type") + .value("None", dssp::chain_break_type::None) + .value("NewChain", dssp::chain_break_type::NewChain) + .value("Gap", dssp::chain_break_type::Gap); + + class_("statistic_counts") + .add_property("residues", &statistics_wrapper::get_residues) + .add_property("chains", &statistics_wrapper::get_chains) + .add_property("SS_bridges", &statistics_wrapper::get_SS_bridges) + .add_property("intra_chain_SS_bridges", &statistics_wrapper::get_intra_chain_SS_bridges) + .add_property("H_bonds", &statistics_wrapper::get_H_bonds) + .add_property("H_bonds_in_antiparallel_bridges", &statistics_wrapper::get_H_bonds_in_antiparallel_bridges) + .add_property("H_bonds_in_parallel_bridges", &statistics_wrapper::get_H_bonds_in_parallel_bridges); + + class_("residue_info") + .add_property("asym_id", &dssp::residue_info::asym_id) + .add_property("seq_id", &dssp::residue_info::seq_id) + .add_property("alt_id", &dssp::residue_info::alt_id) + .add_property("compound_id", &dssp::residue_info::compound_id) + .add_property("compound_letter", &dssp::residue_info::compound_letter) + .add_property("auth_asym_id", &dssp::residue_info::auth_asym_id) + .add_property("auth_seq_id", &dssp::residue_info::auth_seq_id) + .add_property("pdb_strand_id", &dssp::residue_info::pdb_strand_id) + .add_property("pdb_seq_num", &dssp::residue_info::pdb_seq_num) + .add_property("pdb_ins_code", &dssp::residue_info::pdb_ins_code) + .add_property("alpha", &dssp::residue_info::alpha) + .add_property("kappa", &dssp::residue_info::kappa) + .add_property("phi", &dssp::residue_info::phi) + .add_property("psi", &dssp::residue_info::psi) + .add_property("tco", &dssp::residue_info::tco) + .add_property("omega", &dssp::residue_info::omega) + .add_property("is_pre_pro", &dssp::residue_info::is_pre_pro) + .add_property("is_cis", &dssp::residue_info::is_cis) + .add_property("chiral_volume", &dssp::residue_info::chiral_volume) + .add_property("chi", &dssp::residue_info::chi) + .add_property("ca_location", &dssp::residue_info::ca_location) + .add_property("chain_break", &dssp::residue_info::chain_break) + .add_property("nr", &dssp::residue_info::nr) + .add_property("type", &dssp::residue_info::type) + .add_property("ssBridgeNr", &dssp::residue_info::ssBridgeNr) + .def("helix", &dssp::residue_info::helix) + .add_property("is_alpha_helix_end_before_start", &dssp::residue_info::is_alpha_helix_end_before_start) + .add_property("bend", &dssp::residue_info::bend) + .add_property("accessibility", &dssp::residue_info::accessibility) + .def("bridge_partner", &dssp::residue_info::bridge_partner) + .add_property("sheet", &dssp::residue_info::sheet) + .add_property("strand", &dssp::residue_info::strand) + .def("acceptor", &dssp::residue_info::acceptor) + .def("donor", &dssp::residue_info::donor); + + class_("dssp", init>()) + .add_property("statistics", &dssp_wrapper::get_statistics) + .def("__iter__", iterator()) + .def("get", &dssp_wrapper::get); + + def("TestBond", test_bond_between_residues); +} \ No newline at end of file diff --git a/python-module/test-mkdssp.py b/python-module/test-mkdssp.py new file mode 100644 index 0000000..0eaeeda --- /dev/null +++ b/python-module/test-mkdssp.py @@ -0,0 +1,82 @@ +import mkdssp +import os +import gzip + +file_path = os.path.join("..", "test", "1cbs.cif.gz") + +with gzip.open(file_path, "rt") as f: + file_content = f.read() + +dssp = mkdssp.dssp(file_content) + +print("residues: ", dssp.statistics.residues) +print("chains: ", dssp.statistics.chains) +print("SS_bridges: ", dssp.statistics.SS_bridges) +print("intra_chain_SS_bridges: ", dssp.statistics.intra_chain_SS_bridges) +print("H_bonds: ", dssp.statistics.H_bonds) +print("H_bonds_in_antiparallel_bridges: ", dssp.statistics.H_bonds_in_antiparallel_bridges) +print("H_bonds_in_parallel_bridges: ", dssp.statistics.H_bonds_in_parallel_bridges) + +count = 0 +for res in dssp: + count += 1 + print(res.asym_id, res.seq_id, res.compound_id) + print("alt_id", res.alt_id) + print("compound_letter", res.compound_letter) + print("auth_asym_id", res.auth_asym_id) + print("auth_seq_id", res.auth_seq_id) + print("pdb_strand_id", res.pdb_strand_id) + print("pdb_seq_num", res.pdb_seq_num) + print("pdb_ins_code", res.pdb_ins_code) + print("alpha", res.alpha) + print("kappa", res.kappa) + print("phi", res.phi) + print("psi", res.psi) + print("tco", res.tco) + print("omega", res.omega) + print("is_pre_pro", res.is_pre_pro) + print("is_cis", res.is_cis) + print("chiral_volume", res.chiral_volume) + print("chi", res.chi) + print("ca_location", res.ca_location) + print("chain_break", res.chain_break) + print("nr", res.nr) + print("type", res.type) + print("ssBridgeNr", res.ssBridgeNr) + print("helix(_3_10)", res.helix(mkdssp.helix_type._3_10)) + print("helix(alpha)", res.helix(mkdssp.helix_type.alpha)) + print("helix(pi)", res.helix(mkdssp.helix_type.pi)) + print("helix(pp)", res.helix(mkdssp.helix_type.pp)) + print("is_alpha_helix_end_before_start", res.is_alpha_helix_end_before_start) + print("bend", res.bend) + print("sheet", res.sheet) + print("strand", res.strand) + + for i in range(0, 1): + (ri, nr, dir) = res.bridge_partner(i) + if ri != None: + print("bridge partner ", i, ri.asym_id, ri.seq_id, ri.compound_id, nr, dir) + + for i in range(0, 1): + (ri, e) = res.acceptor(i) + if ri != None: + print("acceptor ", i, ri.asym_id, ri.seq_id, ri.compound_id, e) + print("test bond: ", mkdssp.TestBond(res, ri)) + + for i in range(0, 1): + (ri, e) = res.donor(i) + if ri != None: + print("donor ", i, ri.asym_id, ri.seq_id, ri.compound_id, e) + print("test bond: ", mkdssp.TestBond(res, ri)) + + print("accessibility", res.accessibility) + + +print("count: ", count) + +a = dssp.get('A', 137) +b = dssp.get('A', 6) + +print ("a & b", a, b) + +assert(mkdssp.TestBond(a, b))