mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-04 13:54:25 +08:00
split out pdbx code
fix dangling memory reference
This commit is contained in:
120
CMakeLists.txt
120
CMakeLists.txt
@@ -159,7 +159,7 @@ if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
|
||||
# Test to see if the submodule was loaded
|
||||
find_file(HAVE_BOOST_REGEX_HPP regex.hpp PATHS ${PROJECT_SOURCE_DIR}/regex/include/boost NO_DEFAULT_PATH)
|
||||
|
||||
if (NOT HAVE_BOOST_REGEX_HPP)
|
||||
if(NOT HAVE_BOOST_REGEX_HPP)
|
||||
message(FATAL_ERROR "The submodule regex was not loaded, please run git submodule update --init ")
|
||||
endif()
|
||||
|
||||
@@ -183,85 +183,42 @@ list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPATOMIC_LIBRARY})
|
||||
include(VersionString)
|
||||
write_version_header("LibCIFPP")
|
||||
|
||||
# SymOp data table
|
||||
if(CIFPP_RECREATE_SYMOP_DATA)
|
||||
# The tool to create the table
|
||||
add_executable(symop-map-generator "${CMAKE_SOURCE_DIR}/tools/symop-map-generator.cpp")
|
||||
|
||||
target_link_libraries(symop-map-generator Threads::Threads ${CIFPP_REQUIRED_LIBRARIES})
|
||||
|
||||
set($ENV{CLIBD} ${CLIBD})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp
|
||||
COMMAND $<TARGET_FILE:symop-map-generator> ${CLIBD}/syminfo.lib ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
OUTPUT ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp
|
||||
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Sources
|
||||
set(project_sources
|
||||
${PROJECT_SOURCE_DIR}/src/cif/category.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/cif/condition.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/cif/datablock.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/cif/dictionary_parser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/cif/file.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/cif/item.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/cif/parser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/cif/row.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/cif/validate.cpp
|
||||
|
||||
# ${PROJECT_SOURCE_DIR}/src/pdb/Cif2PDB.cpp
|
||||
# ${PROJECT_SOURCE_DIR}/src/pdb/PDB2Cif.cpp
|
||||
# ${PROJECT_SOURCE_DIR}/src/pdb/PDB2CifRemark3.cpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/src/structure/AtomType.cpp
|
||||
# ${PROJECT_SOURCE_DIR}/src/structure/BondMap.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/structure/Compound.cpp
|
||||
# ${PROJECT_SOURCE_DIR}/src/structure/Structure.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/structure/Symmetry.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/structure/TlsParser.cpp
|
||||
|
||||
# ${PROJECT_SOURCE_DIR}/src/point.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/category.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/condition.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/datablock.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/dictionary_parser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/file.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/item.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/parser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/row.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/validate.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/text.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/utilities.cpp
|
||||
)
|
||||
|
||||
set(project_headers
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/utilities.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/item.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/datablock.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/file.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/writer.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/validate.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/list.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/iterator.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/parser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/forward_decl.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/dictionary_parser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/condition.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/category.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/cif/row.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/structure/AtomType.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/structure/BondMap.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/structure/TlsParser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/structure/Symmetry.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/structure/Structure.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/structure/Compound.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/PDB2Cif.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/PDB2CifRemark3.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/Cif2PDB.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Cif++.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/item.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/datablock.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/file.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/validate.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/list.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/iterator.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/parser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/forward_decl.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/dictionary_parser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
|
||||
|
||||
# ${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
|
||||
)
|
||||
|
||||
add_library(cifpp ${project_sources} ${project_headers} ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp)
|
||||
add_library(cifpp ${project_sources} ${project_headers})
|
||||
add_library(cifpp::cifpp ALIAS cifpp)
|
||||
|
||||
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
@@ -282,11 +239,6 @@ if(BOOST_REGEX_STANDALONE)
|
||||
target_include_directories(cifpp PRIVATE regex/include)
|
||||
endif()
|
||||
|
||||
# if(BOOST_REGEX_STANDALONE)
|
||||
# target_link_libraries(cifpp PRIVATE Boost::regex)
|
||||
# endif()
|
||||
|
||||
# target_link_libraries(cifpp PRIVATE)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
|
||||
endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
@@ -325,8 +277,8 @@ if(UNIX)
|
||||
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
|
||||
endif()
|
||||
|
||||
generate_export_header(cifpp
|
||||
EXPORT_FILE_NAME cif++/Cif++Export.hpp)
|
||||
# generate_export_header(cifpp
|
||||
# EXPORT_FILE_NAME cif++/Cif++Export.hpp)
|
||||
|
||||
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR})
|
||||
@@ -363,20 +315,16 @@ install(
|
||||
COMPONENT Devel
|
||||
)
|
||||
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/cif++/Cif++Export.hpp"
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cif++
|
||||
COMPONENT Devel
|
||||
)
|
||||
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
|
||||
${COMPONENTS_CIF}
|
||||
DESTINATION ${SHARE_INSTALL_DIR}
|
||||
)
|
||||
|
||||
configure_package_config_file(Config.cmake.in
|
||||
configure_package_config_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
|
||||
PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR SHARE_INSTALL_DIR
|
||||
@@ -426,12 +374,6 @@ if(CIFPP_BUILD_TESTS)
|
||||
find_package(Boost REQUIRED)
|
||||
|
||||
list(APPEND CIFPP_tests
|
||||
|
||||
# pdb2cif
|
||||
# rename-compound
|
||||
# structure
|
||||
# sugar
|
||||
# unit
|
||||
unit-v2)
|
||||
|
||||
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
|
||||
#include <cif++/cif.hpp>
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
@@ -27,5 +27,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
#include <cif++/cif/file.hpp>
|
||||
#include <cif++/cif/parser.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
@@ -28,12 +28,12 @@
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <cif++/cif/forward_decl.hpp>
|
||||
#include <cif++/forward_decl.hpp>
|
||||
|
||||
#include <cif++/cif/condition.hpp>
|
||||
#include <cif++/cif/iterator.hpp>
|
||||
#include <cif++/cif/row.hpp>
|
||||
#include <cif++/cif/validate.hpp>
|
||||
#include <cif++/condition.hpp>
|
||||
#include <cif++/iterator.hpp>
|
||||
#include <cif++/row.hpp>
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
// TODO: implement all of:
|
||||
// https://en.cppreference.com/w/cpp/named_req/Container
|
||||
@@ -1,168 +0,0 @@
|
||||
// void write(std::ostream &os, const std::vector<size_t> &order, bool includeEmptyColumns)
|
||||
// {
|
||||
// if (empty())
|
||||
// return;
|
||||
|
||||
// // If there are multiple rows in this category, we need a _loop
|
||||
// if (size() == 1)
|
||||
// {
|
||||
// os << "loop_" << std::endl;
|
||||
|
||||
// std::vector<size_t> columnWidths;
|
||||
|
||||
// for (auto cix : order)
|
||||
// {
|
||||
// auto &col = mColumns[cix];
|
||||
// os << '_' << mName << '.' << col.mName << ' ' << std::endl;
|
||||
// columnWidths.push_back(2);
|
||||
// }
|
||||
|
||||
// for (auto Row = mHead; Row != nullptr; Row = Row->mNext)
|
||||
// {
|
||||
// for (auto v = Row->mValues; v != nullptr; v = v->mNext)
|
||||
// {
|
||||
// if (strchr(v->mText, '\n') == nullptr)
|
||||
// {
|
||||
// size_t l = strlen(v->mText);
|
||||
|
||||
// if (not isUnquotedString(v->mText))
|
||||
// l += 2;
|
||||
|
||||
// if (l > 132)
|
||||
// continue;
|
||||
|
||||
// if (columnWidths[v->mColumnIndex] < l + 1)
|
||||
// columnWidths[v->mColumnIndex] = l + 1;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// for (auto Row = mHead; Row != nullptr; Row = Row->mNext) // loop over rows
|
||||
// {
|
||||
// size_t offset = 0;
|
||||
|
||||
// for (size_t cix : order)
|
||||
// {
|
||||
// size_t w = columnWidths[cix];
|
||||
|
||||
// std::string s;
|
||||
// for (auto iv = Row->mValues; iv != nullptr; iv = iv->mNext)
|
||||
// {
|
||||
// if (iv->mColumnIndex == cix)
|
||||
// {
|
||||
// s = iv->mText;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
// if (s.empty())
|
||||
// s = "?";
|
||||
|
||||
// size_t l = s.length();
|
||||
// if (not isUnquotedString(s.c_str()))
|
||||
// l += 2;
|
||||
// if (l < w)
|
||||
// l = w;
|
||||
|
||||
// if (offset + l > 132 and offset > 0)
|
||||
// {
|
||||
// os << std::endl;
|
||||
// offset = 0;
|
||||
// }
|
||||
|
||||
// offset = detail::writeValue(os, s, offset, w);
|
||||
|
||||
// if (offset > 132)
|
||||
// {
|
||||
// os << std::endl;
|
||||
// offset = 0;
|
||||
// }
|
||||
// }
|
||||
|
||||
// if (offset > 0)
|
||||
// os << std::endl;
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// // first find the indent level
|
||||
// size_t l = 0;
|
||||
|
||||
// for (auto &col : mColumns)
|
||||
// {
|
||||
// std::string tag = '_' + mName + '.' + col.mName;
|
||||
|
||||
// if (l < tag.length())
|
||||
// l = tag.length();
|
||||
// }
|
||||
|
||||
// l += 3;
|
||||
|
||||
// for (size_t cix : order)
|
||||
// {
|
||||
// auto &col = mColumns[cix];
|
||||
|
||||
// os << '_' << mName << '.' << col.mName << std::string(l - col.mName.length() - mName.length() - 2, ' ');
|
||||
|
||||
// std::string s;
|
||||
// for (auto iv = mHead->mValues; iv != nullptr; iv = iv->mNext)
|
||||
// {
|
||||
// if (iv->mColumnIndex == cix)
|
||||
// {
|
||||
// s = iv->mText;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
// if (s.empty())
|
||||
// s = "?";
|
||||
|
||||
// size_t offset = l;
|
||||
// if (s.length() + l >= kMaxLineLength)
|
||||
// {
|
||||
// os << std::endl;
|
||||
// offset = 0;
|
||||
// }
|
||||
|
||||
// if (detail::writeValue(os, s, offset, 1) != 0)
|
||||
// os << std::endl;
|
||||
// }
|
||||
// }
|
||||
|
||||
// os << "# " << std::endl;
|
||||
// }
|
||||
|
||||
void write(std::ostream &os) const
|
||||
{
|
||||
// std::vector<size_t> order(mColumns.size());
|
||||
// iota(order.begin(), order.end(), 0);
|
||||
// write(os, order, false);
|
||||
|
||||
os << '#' << m_name << std::endl;
|
||||
for (auto &r : *this)
|
||||
{
|
||||
for (auto &f : r)
|
||||
os << '_' << m_name << '.' << f.name() << ' ' << f.value() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// void Category::write(std::ostream &os, const std::vector<std::string> &columns)
|
||||
// {
|
||||
// // make sure all columns are present
|
||||
// for (auto &c : columns)
|
||||
// addColumn(c);
|
||||
|
||||
// std::vector<size_t> order;
|
||||
// order.reserve(mColumns.size());
|
||||
|
||||
// for (auto &c : columns)
|
||||
// order.push_back(getColumnIndex(c));
|
||||
|
||||
// for (size_t i = 0; i < mColumns.size(); ++i)
|
||||
// {
|
||||
// if (std::find(order.begin(), order.end(), i) == order.end())
|
||||
// order.push_back(i);
|
||||
// }
|
||||
|
||||
// write(os, order, true);
|
||||
// }
|
||||
@@ -31,7 +31,7 @@
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
|
||||
#include <cif++/cif/row.hpp>
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -26,9 +26,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/cif/forward_decl.hpp>
|
||||
#include <cif++/forward_decl.hpp>
|
||||
|
||||
#include <cif++/cif/category.hpp>
|
||||
#include <cif++/category.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/cif/validate.hpp>
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -28,8 +28,8 @@
|
||||
|
||||
#include <list>
|
||||
|
||||
#include <cif++/cif/datablock.hpp>
|
||||
#include <cif++/cif/parser.hpp>
|
||||
#include <cif++/datablock.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -36,7 +36,7 @@
|
||||
|
||||
#include <cif++/text.hpp>
|
||||
|
||||
#include <cif++/cif/forward_decl.hpp>
|
||||
#include <cif++/forward_decl.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/cif/row.hpp>
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <cif++/cif/row.hpp>
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -1,39 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/Cif++.hpp>
|
||||
|
||||
void WritePDBFile(std::ostream &os, const cif::Datablock &data);
|
||||
|
||||
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
|
||||
void WritePDBHeaderLines(std::ostream &os, const cif::Datablock &data);
|
||||
|
||||
std::string GetPDBHEADERLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBCOMPNDLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBSOURCELine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBAUTHORLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
|
||||
@@ -1,60 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/Cif++.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct PDBRecord
|
||||
{
|
||||
PDBRecord *mNext;
|
||||
uint32_t mLineNr;
|
||||
char mName[11];
|
||||
size_t mVlen;
|
||||
char mValue[1];
|
||||
|
||||
PDBRecord(uint32_t lineNr, const std::string &name, const std::string &value);
|
||||
~PDBRecord();
|
||||
|
||||
void *operator new(size_t);
|
||||
void *operator new(size_t size, size_t vLen);
|
||||
|
||||
void operator delete(void *p);
|
||||
void operator delete(void *p, size_t vLen);
|
||||
|
||||
bool is(const char *name) const;
|
||||
|
||||
char vC(size_t column);
|
||||
std::string vS(size_t columnFirst, size_t columnLast = std::numeric_limits<size_t>::max());
|
||||
int vI(int columnFirst, int columnLast);
|
||||
std::string vF(size_t columnFirst, size_t columnLast);
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void ReadPDBFile(std::istream &pdbFile, cif::File &cifFile);
|
||||
@@ -1,73 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/PDB2Cif.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct TemplateLine;
|
||||
|
||||
class Remark3Parser
|
||||
{
|
||||
public:
|
||||
virtual ~Remark3Parser() {}
|
||||
|
||||
static bool parse(const std::string& expMethod, PDBRecord* r, cif::Datablock& db);
|
||||
|
||||
virtual std::string program();
|
||||
virtual std::string version();
|
||||
|
||||
protected:
|
||||
|
||||
Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db,
|
||||
const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programVersion);
|
||||
|
||||
virtual float parse();
|
||||
std::string nextLine();
|
||||
|
||||
bool match(const char* expr, int nextState);
|
||||
void storeCapture(const char* category, std::initializer_list<const char*> items, bool createNew = false);
|
||||
void storeRefineLsRestr(const char* type, std::initializer_list<const char*> values);
|
||||
void updateRefineLsRestr(const char* type, std::initializer_list<const char*> values);
|
||||
|
||||
virtual void fixup() {}
|
||||
|
||||
std::string mName;
|
||||
std::string mExpMethod;
|
||||
PDBRecord* mRec;
|
||||
cif::Datablock mDb;
|
||||
std::string mLine;
|
||||
std::smatch mM;
|
||||
uint32_t mState;
|
||||
|
||||
const TemplateLine* mTemplate;
|
||||
uint32_t mTemplateCount;
|
||||
std::regex mProgramVersion;
|
||||
};
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/cif/item.hpp>
|
||||
#include <cif++/item.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -1,277 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// Lib for working with structures as contained in mmCIF and PDB files
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
enum AtomType : uint8_t
|
||||
{
|
||||
Nn = 0, // Unknown
|
||||
|
||||
H = 1, // Hydrogen
|
||||
He = 2, // Helium
|
||||
|
||||
Li = 3, // Lithium
|
||||
Be = 4, // Beryllium
|
||||
B = 5, // Boron
|
||||
C = 6, // Carbon
|
||||
N = 7, // Nitrogen
|
||||
O = 8, // Oxygen
|
||||
F = 9, // Fluorine
|
||||
Ne = 10, // Neon
|
||||
|
||||
Na = 11, // Sodium
|
||||
Mg = 12, // Magnesium
|
||||
Al = 13, // Aluminium
|
||||
Si = 14, // Silicon
|
||||
P = 15, // Phosphorus
|
||||
S = 16, // Sulfur
|
||||
Cl = 17, // Chlorine
|
||||
Ar = 18, // Argon
|
||||
|
||||
K = 19, // Potassium
|
||||
Ca = 20, // Calcium
|
||||
Sc = 21, // Scandium
|
||||
Ti = 22, // Titanium
|
||||
V = 23, // Vanadium
|
||||
Cr = 24, // Chromium
|
||||
Mn = 25, // Manganese
|
||||
Fe = 26, // Iron
|
||||
Co = 27, // Cobalt
|
||||
Ni = 28, // Nickel
|
||||
Cu = 29, // Copper
|
||||
Zn = 30, // Zinc
|
||||
Ga = 31, // Gallium
|
||||
Ge = 32, // Germanium
|
||||
As = 33, // Arsenic
|
||||
Se = 34, // Selenium
|
||||
Br = 35, // Bromine
|
||||
Kr = 36, // Krypton
|
||||
|
||||
Rb = 37, // Rubidium
|
||||
Sr = 38, // Strontium
|
||||
Y = 39, // Yttrium
|
||||
Zr = 40, // Zirconium
|
||||
Nb = 41, // Niobium
|
||||
Mo = 42, // Molybdenum
|
||||
Tc = 43, // Technetium
|
||||
Ru = 44, // Ruthenium
|
||||
Rh = 45, // Rhodium
|
||||
Pd = 46, // Palladium
|
||||
Ag = 47, // Silver
|
||||
Cd = 48, // Cadmium
|
||||
In = 49, // Indium
|
||||
Sn = 50, // Tin
|
||||
Sb = 51, // Antimony
|
||||
Te = 52, // Tellurium
|
||||
I = 53, // Iodine
|
||||
Xe = 54, // Xenon
|
||||
Cs = 55, // Caesium
|
||||
Ba = 56, // Barium
|
||||
La = 57, // Lanthanum
|
||||
|
||||
Hf = 72, // Hafnium
|
||||
Ta = 73, // Tantalum
|
||||
W = 74, // Tungsten
|
||||
Re = 75, // Rhenium
|
||||
Os = 76, // Osmium
|
||||
Ir = 77, // Iridium
|
||||
Pt = 78, // Platinum
|
||||
Au = 79, // Gold
|
||||
Hg = 80, // Mercury
|
||||
Tl = 81, // Thallium
|
||||
Pb = 82, // Lead
|
||||
Bi = 83, // Bismuth
|
||||
Po = 84, // Polonium
|
||||
At = 85, // Astatine
|
||||
Rn = 86, // Radon
|
||||
Fr = 87, // Francium
|
||||
Ra = 88, // Radium
|
||||
Ac = 89, // Actinium
|
||||
|
||||
Rf = 104, // Rutherfordium
|
||||
Db = 105, // Dubnium
|
||||
Sg = 106, // Seaborgium
|
||||
Bh = 107, // Bohrium
|
||||
Hs = 108, // Hassium
|
||||
Mt = 109, // Meitnerium
|
||||
Ds = 110, // Darmstadtium
|
||||
Rg = 111, // Roentgenium
|
||||
Cn = 112, // Copernicium
|
||||
Nh = 113, // Nihonium
|
||||
Fl = 114, // Flerovium
|
||||
Mc = 115, // Moscovium
|
||||
Lv = 116, // Livermorium
|
||||
Ts = 117, // Tennessine
|
||||
Og = 118, // Oganesson
|
||||
|
||||
Ce = 58, // Cerium
|
||||
Pr = 59, // Praseodymium
|
||||
Nd = 60, // Neodymium
|
||||
Pm = 61, // Promethium
|
||||
Sm = 62, // Samarium
|
||||
Eu = 63, // Europium
|
||||
Gd = 64, // Gadolinium
|
||||
Tb = 65, // Terbium
|
||||
Dy = 66, // Dysprosium
|
||||
Ho = 67, // Holmium
|
||||
Er = 68, // Erbium
|
||||
Tm = 69, // Thulium
|
||||
Yb = 70, // Ytterbium
|
||||
Lu = 71, // Lutetium
|
||||
|
||||
Th = 90, // Thorium
|
||||
Pa = 91, // Protactinium
|
||||
U = 92, // Uranium
|
||||
Np = 93, // Neptunium
|
||||
Pu = 94, // Plutonium
|
||||
Am = 95, // Americium
|
||||
Cm = 96, // Curium
|
||||
Bk = 97, // Berkelium
|
||||
Cf = 98, // Californium
|
||||
Es = 99, // Einsteinium
|
||||
Fm = 100, // Fermium
|
||||
Md = 101, // Mendelevium
|
||||
No = 102, // Nobelium
|
||||
Lr = 103, // Lawrencium
|
||||
|
||||
D = 129, // Deuterium
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// AtomTypeInfo
|
||||
|
||||
enum class RadiusType
|
||||
{
|
||||
Calculated,
|
||||
Empirical,
|
||||
CovalentEmpirical,
|
||||
|
||||
SingleBond,
|
||||
DoubleBond,
|
||||
TripleBond,
|
||||
|
||||
VanderWaals,
|
||||
|
||||
TypeCount
|
||||
};
|
||||
|
||||
constexpr size_t RadiusTypeCount = static_cast<size_t>(RadiusType::TypeCount);
|
||||
|
||||
enum class IonicRadiusType
|
||||
{
|
||||
Effective, Crystal
|
||||
};
|
||||
|
||||
struct AtomTypeInfo
|
||||
{
|
||||
AtomType type;
|
||||
std::string name;
|
||||
std::string symbol;
|
||||
float weight;
|
||||
bool metal;
|
||||
float radii[RadiusTypeCount];
|
||||
};
|
||||
|
||||
extern const AtomTypeInfo kKnownAtoms[];
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// AtomTypeTraits
|
||||
|
||||
class AtomTypeTraits
|
||||
{
|
||||
public:
|
||||
AtomTypeTraits(AtomType a);
|
||||
AtomTypeTraits(const std::string &symbol);
|
||||
|
||||
AtomType type() const { return mInfo->type; }
|
||||
std::string name() const { return mInfo->name; }
|
||||
std::string symbol() const { return mInfo->symbol; }
|
||||
float weight() const { return mInfo->weight; }
|
||||
|
||||
bool isMetal() const { return mInfo->metal; }
|
||||
|
||||
static bool isElement(const std::string &symbol);
|
||||
static bool isMetal(const std::string &symbol);
|
||||
|
||||
float radius(RadiusType type = RadiusType::SingleBond) const
|
||||
{
|
||||
if (type >= RadiusType::TypeCount)
|
||||
throw std::invalid_argument("invalid radius requested");
|
||||
return mInfo->radii[static_cast<size_t>(type)] / 100.f;
|
||||
}
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom in a solid crystal
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float crystal_ionic_radius(int charge) const;
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom in a non-solid environment
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float effective_ionic_radius(int charge) const;
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float ionic_radius(int charge, IonicRadiusType type = IonicRadiusType::Effective) const
|
||||
{
|
||||
return type == IonicRadiusType::Effective ? effective_ionic_radius(charge) : crystal_ionic_radius(charge);
|
||||
}
|
||||
|
||||
// data type encapsulating the Waasmaier & Kirfel scattering factors
|
||||
// in a simplified form (only a and b).
|
||||
// Added the electrion scattering factors as well
|
||||
struct SFData
|
||||
{
|
||||
double a[6], b[6];
|
||||
};
|
||||
|
||||
// to get the Cval and Siva values, use this constant as charge:
|
||||
enum
|
||||
{
|
||||
kWKSFVal = -99
|
||||
};
|
||||
|
||||
const SFData &wksf(int charge = 0) const;
|
||||
const SFData &elsf() const;
|
||||
|
||||
private:
|
||||
const struct AtomTypeInfo *mInfo;
|
||||
};
|
||||
|
||||
} // namespace mmcif
|
||||
@@ -1,101 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <cif++/structure/Structure.hpp>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
class BondMapException : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
BondMapException(const std::string &msg)
|
||||
: runtime_error(msg)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
class BondMap
|
||||
{
|
||||
public:
|
||||
BondMap(const Structure &p);
|
||||
|
||||
BondMap(const BondMap &) = delete;
|
||||
BondMap &operator=(const BondMap &) = delete;
|
||||
|
||||
bool operator()(const Atom &a, const Atom &b) const
|
||||
{
|
||||
return isBonded(index.at(a.id()), index.at(b.id()));
|
||||
}
|
||||
|
||||
bool is1_4(const Atom &a, const Atom &b) const
|
||||
{
|
||||
uint32_t ixa = index.at(a.id());
|
||||
uint32_t ixb = index.at(b.id());
|
||||
|
||||
return bond_1_4.count(key(ixa, ixb));
|
||||
}
|
||||
|
||||
// links coming from the struct_conn records:
|
||||
std::vector<std::string> linked(const Atom &a) const;
|
||||
|
||||
// This list of atomID's is comming from either CCD or the CCP4 dictionaries loaded
|
||||
static std::vector<std::string> atomIDsForCompound(const std::string &compoundID);
|
||||
|
||||
private:
|
||||
bool isBonded(uint32_t ai, uint32_t bi) const
|
||||
{
|
||||
return bond.count(key(ai, bi)) != 0;
|
||||
}
|
||||
|
||||
uint64_t key(uint32_t a, uint32_t b) const
|
||||
{
|
||||
if (a > b)
|
||||
std::swap(a, b);
|
||||
return static_cast<uint64_t>(a) | (static_cast<uint64_t>(b) << 32);
|
||||
}
|
||||
|
||||
std::tuple<uint32_t, uint32_t> dekey(uint64_t k) const
|
||||
{
|
||||
return std::make_tuple(
|
||||
static_cast<uint32_t>(k >> 32),
|
||||
static_cast<uint32_t>(k));
|
||||
}
|
||||
|
||||
uint32_t dim;
|
||||
std::unordered_map<std::string, uint32_t> index;
|
||||
std::set<uint64_t> bond, bond_1_4;
|
||||
|
||||
std::map<std::string, std::set<std::string>> link;
|
||||
};
|
||||
|
||||
} // namespace mmcif
|
||||
@@ -1,197 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/// \file This file contains the definition for the class Compound, encapsulating
|
||||
/// the information found for compounds in the CCD.
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include <cif++/cif.hpp>
|
||||
#include <cif++/structure/AtomType.hpp>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Compound;
|
||||
struct CompoundAtom;
|
||||
class CompoundFactoryImpl;
|
||||
|
||||
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx file
|
||||
enum class BondType
|
||||
{
|
||||
sing, // 'single bond'
|
||||
doub, // 'double bond'
|
||||
trip, // 'triple bond'
|
||||
quad, // 'quadruple bond'
|
||||
arom, // 'aromatic bond'
|
||||
poly, // 'polymeric bond'
|
||||
delo, // 'delocalized double bond'
|
||||
pi, // 'pi bond'
|
||||
};
|
||||
|
||||
std::string to_string(BondType bondType);
|
||||
BondType from_string(const std::string& bondType);
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
/// \brief struct containing information about an atom in a chemical compound.
|
||||
/// This is a subset of the available information. Contact the author if you need more fields.
|
||||
|
||||
struct CompoundAtom
|
||||
{
|
||||
std::string id;
|
||||
AtomType typeSymbol;
|
||||
int charge = 0;
|
||||
bool aromatic = false;
|
||||
bool leavingAtom = false;
|
||||
bool stereoConfig = false;
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
/// \brief struct containing information about the bonds
|
||||
|
||||
struct CompoundBond
|
||||
{
|
||||
std::string atomID[2];
|
||||
BondType type;
|
||||
bool aromatic = false, stereoConfig = false;
|
||||
};
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
/// \brief a class that contains information about a chemical compound.
|
||||
/// This information is derived from the CDD by default.
|
||||
///
|
||||
/// To create compounds, you use the factory method. You can add your own
|
||||
/// compound definitions by calling the addExtraComponents function and
|
||||
/// pass it a valid CCD formatted file.
|
||||
|
||||
class Compound
|
||||
{
|
||||
public:
|
||||
|
||||
// accessors
|
||||
|
||||
std::string id() const { return mID; }
|
||||
std::string name() const { return mName; }
|
||||
std::string type() const { return mType; }
|
||||
std::string group() const { return mGroup; }
|
||||
std::string formula() const { return mFormula; }
|
||||
float formulaWeight() const { return mFormulaWeight; }
|
||||
int formalCharge() const { return mFormalCharge; }
|
||||
|
||||
const std::vector<CompoundAtom> &atoms() const { return mAtoms; }
|
||||
const std::vector<CompoundBond> &bonds() const { return mBonds; }
|
||||
|
||||
CompoundAtom getAtomByID(const std::string &atomID) const;
|
||||
|
||||
bool atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const;
|
||||
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
|
||||
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
|
||||
// float chiralVolume(const std::string ¢reID) const;
|
||||
|
||||
bool isWater() const
|
||||
{
|
||||
return mID == "HOH" or mID == "H2O" or mID == "WAT";
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
friend class CompoundFactoryImpl;
|
||||
friend class CCDCompoundFactoryImpl;
|
||||
friend class CCP4CompoundFactoryImpl;
|
||||
|
||||
Compound(cif::datablock &db);
|
||||
Compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
|
||||
|
||||
std::string mID;
|
||||
std::string mName;
|
||||
std::string mType;
|
||||
std::string mGroup;
|
||||
std::string mFormula;
|
||||
float mFormulaWeight = 0;
|
||||
int mFormalCharge = 0;
|
||||
std::vector<CompoundAtom> mAtoms;
|
||||
std::vector<CompoundBond> mBonds;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Factory class for Compound and Link objects
|
||||
|
||||
CIFPP_EXPORT extern const std::map<std::string, char> kAAMap, kBaseMap;
|
||||
|
||||
class CompoundFactory
|
||||
{
|
||||
public:
|
||||
|
||||
/// \brief Initialise a singleton instance.
|
||||
///
|
||||
/// If you have a multithreaded application and want to have different
|
||||
/// compounds in each thread (e.g. a web service processing user requests
|
||||
/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
|
||||
/// flag to true.
|
||||
|
||||
static void init(bool useThreadLocalInstanceOnly);
|
||||
static CompoundFactory &instance();
|
||||
static void clear();
|
||||
|
||||
void setDefaultDictionary(const std::filesystem::path &inDictFile);
|
||||
void pushDictionary(const std::filesystem::path &inDictFile);
|
||||
void popDictionary();
|
||||
|
||||
bool isKnownPeptide(const std::string &res_name) const;
|
||||
bool isKnownBase(const std::string &res_name) const;
|
||||
|
||||
/// \brief Create the Compound object for \a id
|
||||
///
|
||||
/// This will create the Compound instance for \a id if it doesn't exist already.
|
||||
/// The result is owned by this factory and should not be deleted by the user.
|
||||
/// \param id The Compound ID, a three letter code usually
|
||||
/// \result The compound, or nullptr if it could not be created (missing info)
|
||||
const Compound *create(std::string id);
|
||||
|
||||
~CompoundFactory();
|
||||
|
||||
private:
|
||||
CompoundFactory();
|
||||
|
||||
CompoundFactory(const CompoundFactory &) = delete;
|
||||
CompoundFactory &operator=(const CompoundFactory &) = delete;
|
||||
|
||||
static std::unique_ptr<CompoundFactory> sInstance;
|
||||
static thread_local std::unique_ptr<CompoundFactory> tlInstance;
|
||||
static bool sUseThreadLocalInstance;
|
||||
|
||||
std::shared_ptr<CompoundFactoryImpl> mImpl;
|
||||
};
|
||||
|
||||
} // namespace mmcif
|
||||
@@ -1,844 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include <cif++/cif.hpp>
|
||||
#include <cif++/structure/AtomType.hpp>
|
||||
#include <cif++/structure/Compound.hpp>
|
||||
#include <cif++/point.hpp>
|
||||
|
||||
/*
|
||||
To modify a structure, you will have to use actions.
|
||||
|
||||
The currently supported actions are:
|
||||
|
||||
// - Move atom to new location
|
||||
- Remove atom
|
||||
// - Add new atom that was formerly missing
|
||||
// - Add alternate Residue
|
||||
-
|
||||
|
||||
*/
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
class Atom;
|
||||
class Residue;
|
||||
class Monomer;
|
||||
class Polymer;
|
||||
class Structure;
|
||||
class File;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Atom
|
||||
{
|
||||
private:
|
||||
struct AtomImpl : public std::enable_shared_from_this<AtomImpl>
|
||||
{
|
||||
AtomImpl(cif::datablock &db, const std::string &id, cif::row_handle row);
|
||||
|
||||
// constructor for a symmetry copy of an atom
|
||||
AtomImpl(const AtomImpl &impl, const Point &loc, const std::string &sym_op);
|
||||
|
||||
AtomImpl(const AtomImpl &i) = default;
|
||||
|
||||
void prefetch();
|
||||
|
||||
int compare(const AtomImpl &b) const;
|
||||
|
||||
bool getAnisoU(float anisou[6]) const;
|
||||
|
||||
int charge() const;
|
||||
|
||||
void moveTo(const Point &p);
|
||||
|
||||
const Compound *compound() const;
|
||||
|
||||
const std::string get_property(const std::string_view name) const;
|
||||
void set_property(const std::string_view name, const std::string &value);
|
||||
|
||||
const cif::datablock &mDb;
|
||||
std::string mID;
|
||||
AtomType mType;
|
||||
|
||||
std::string mAtomID;
|
||||
std::string mCompID;
|
||||
std::string mAsymID;
|
||||
int mSeqID;
|
||||
std::string mAltID;
|
||||
std::string mAuthSeqID;
|
||||
|
||||
Point mLocation;
|
||||
int mRefcount;
|
||||
cif::row_handle mRow;
|
||||
|
||||
// mutable std::vector<std::tuple<std::string, cif::detail::ItemReference>> mCachedRefs;
|
||||
|
||||
mutable const Compound *mCompound = nullptr;
|
||||
|
||||
bool mSymmetryCopy = false;
|
||||
bool mClone = false;
|
||||
|
||||
std::string mSymmetryOperator = "1_555";
|
||||
};
|
||||
|
||||
public:
|
||||
Atom() {}
|
||||
|
||||
Atom(std::shared_ptr<AtomImpl> impl)
|
||||
: mImpl(impl)
|
||||
{
|
||||
}
|
||||
|
||||
Atom(const Atom &rhs)
|
||||
: mImpl(rhs.mImpl)
|
||||
{
|
||||
}
|
||||
|
||||
Atom(cif::datablock &db, cif::row_handle &row);
|
||||
|
||||
// a special constructor to create symmetry copies
|
||||
Atom(const Atom &rhs, const Point &symmmetry_location, const std::string &symmetry_operation);
|
||||
|
||||
explicit operator bool() const { return (bool)mImpl; }
|
||||
|
||||
// return a copy of this atom, with data copied instead of referenced
|
||||
Atom clone() const
|
||||
{
|
||||
auto copy = std::make_shared<AtomImpl>(*mImpl);
|
||||
copy->mClone = true;
|
||||
return Atom(copy);
|
||||
}
|
||||
|
||||
Atom &operator=(const Atom &rhs) = default;
|
||||
|
||||
template <typename T>
|
||||
T get_property(const std::string_view name) const;
|
||||
|
||||
void set_property(const std::string_view name, const std::string &value)
|
||||
{
|
||||
if (not mImpl)
|
||||
throw std::logic_error("Error trying to modify an uninitialized atom");
|
||||
mImpl->set_property(name, value);
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
|
||||
void property(const std::string_view name, const T &value)
|
||||
{
|
||||
set_property(name, std::to_string(value));
|
||||
}
|
||||
|
||||
const std::string &id() const { return impl().mID; }
|
||||
AtomType type() const { return impl().mType; }
|
||||
|
||||
Point location() const { return impl().mLocation; }
|
||||
void location(Point p)
|
||||
{
|
||||
if (not mImpl)
|
||||
throw std::logic_error("Error trying to modify an uninitialized atom");
|
||||
mImpl->moveTo(p);
|
||||
}
|
||||
|
||||
/// \brief Translate the position of this atom by \a t
|
||||
void translate(Point t);
|
||||
|
||||
/// \brief Rotate the position of this atom by \a q
|
||||
void rotate(Quaternion q);
|
||||
|
||||
/// \brief Translate and rotate the position of this atom by \a t and \a q
|
||||
void translateAndRotate(Point t, Quaternion q);
|
||||
|
||||
/// \brief Translate, rotate and translate again the coordinates this atom by \a t1 , \a q and \a t2
|
||||
void translateRotateAndTranslate(Point t1, Quaternion q, Point t2);
|
||||
|
||||
// for direct access to underlying data, be careful!
|
||||
const cif::row_handle getRow() const { return impl().mRow; }
|
||||
const cif::row_handle getRowAniso() const;
|
||||
|
||||
bool isSymmetryCopy() const { return impl().mSymmetryCopy; }
|
||||
std::string symmetry() const { return impl().mSymmetryOperator; }
|
||||
|
||||
const Compound &compound() const;
|
||||
bool isWater() const { return impl().mCompID == "HOH" or impl().mCompID == "H2O" or impl().mCompID == "WAT"; }
|
||||
int charge() const;
|
||||
|
||||
float uIso() const;
|
||||
bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); }
|
||||
float occupancy() const;
|
||||
|
||||
// specifications
|
||||
const std::string &labelAtomID() const { return impl().mAtomID; }
|
||||
const std::string &labelCompID() const { return impl().mCompID; }
|
||||
const std::string &labelAsymID() const { return impl().mAsymID; }
|
||||
std::string labelEntityID() const;
|
||||
int labelSeqID() const { return impl().mSeqID; }
|
||||
const std::string &labelAltID() const { return impl().mAltID; }
|
||||
bool isAlternate() const { return not impl().mAltID.empty(); }
|
||||
|
||||
std::string authAtomID() const;
|
||||
std::string authCompID() const;
|
||||
std::string authAsymID() const;
|
||||
const std::string &authSeqID() const { return impl().mAuthSeqID; }
|
||||
std::string pdbxAuthInsCode() const;
|
||||
std::string pdbxAuthAltID() const;
|
||||
|
||||
std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id
|
||||
std::string pdbID() const; // auth_comp_id + '_' + auth_asym_id + '_' + auth_seq_id + pdbx_PDB_ins_code
|
||||
|
||||
bool operator==(const Atom &rhs) const;
|
||||
bool operator!=(const Atom &rhs) const
|
||||
{
|
||||
return not operator==(rhs);
|
||||
}
|
||||
|
||||
// access data in compound for this atom
|
||||
|
||||
// convenience routine
|
||||
bool isBackBone() const
|
||||
{
|
||||
auto atomID = labelAtomID();
|
||||
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
|
||||
}
|
||||
|
||||
void swap(Atom &b)
|
||||
{
|
||||
std::swap(mImpl, b.mImpl);
|
||||
}
|
||||
|
||||
int compare(const Atom &b) const { return impl().compare(*b.mImpl); }
|
||||
|
||||
bool operator<(const Atom &rhs) const
|
||||
{
|
||||
return compare(rhs) < 0;
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const Atom &atom);
|
||||
|
||||
/// \brief Synchronize data with underlying cif data
|
||||
void sync()
|
||||
{
|
||||
if (mImpl)
|
||||
mImpl->prefetch();
|
||||
}
|
||||
|
||||
private:
|
||||
friend class Structure;
|
||||
|
||||
const AtomImpl &impl() const
|
||||
{
|
||||
if (not mImpl)
|
||||
throw std::runtime_error("Uninitialized atom, not found?");
|
||||
return *mImpl;
|
||||
}
|
||||
|
||||
std::shared_ptr<AtomImpl> mImpl;
|
||||
};
|
||||
|
||||
template <>
|
||||
inline std::string Atom::get_property<std::string>(const std::string_view name) const
|
||||
{
|
||||
return impl().get_property(name);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline int Atom::get_property<int>(const std::string_view name) const
|
||||
{
|
||||
auto v = impl().get_property(name);
|
||||
return v.empty() ? 0 : stoi(v);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline float Atom::get_property<float>(const std::string_view name) const
|
||||
{
|
||||
return stof(impl().get_property(name));
|
||||
}
|
||||
|
||||
inline void swap(mmcif::Atom &a, mmcif::Atom &b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
inline double Distance(const Atom &a, const Atom &b)
|
||||
{
|
||||
return Distance(a.location(), b.location());
|
||||
}
|
||||
|
||||
inline double DistanceSquared(const Atom &a, const Atom &b)
|
||||
{
|
||||
return DistanceSquared(a.location(), b.location());
|
||||
}
|
||||
|
||||
typedef std::vector<Atom> AtomView;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class EntityType
|
||||
{
|
||||
Polymer, NonPolymer, Macrolide, Water, Branched
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Residue
|
||||
{
|
||||
public:
|
||||
// constructor
|
||||
Residue(const Structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID, int seqID, const std::string &authSeqID)
|
||||
: mStructure(&structure)
|
||||
, mCompoundID(compoundID)
|
||||
, mAsymID(asymID)
|
||||
, mSeqID(seqID)
|
||||
, mAuthSeqID(authSeqID)
|
||||
{
|
||||
}
|
||||
|
||||
Residue(const Residue &rhs) = delete;
|
||||
Residue &operator=(const Residue &rhs) = delete;
|
||||
|
||||
Residue(Residue &&rhs);
|
||||
Residue &operator=(Residue &&rhs);
|
||||
|
||||
virtual ~Residue();
|
||||
|
||||
const Compound &compound() const;
|
||||
|
||||
AtomView &atoms();
|
||||
const AtomView &atoms() const;
|
||||
|
||||
void addAtom(Atom &atom);
|
||||
|
||||
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
|
||||
AtomView unique_atoms() const;
|
||||
|
||||
/// \brief The alt ID used for the unique atoms
|
||||
std::string unique_alt_id() const;
|
||||
|
||||
Atom atomByID(const std::string &atomID) const;
|
||||
|
||||
const std::string &compoundID() const { return mCompoundID; }
|
||||
void setCompoundID(const std::string &id) { mCompoundID = id; }
|
||||
|
||||
const std::string &asymID() const { return mAsymID; }
|
||||
int seqID() const { return mSeqID; }
|
||||
std::string entityID() const;
|
||||
|
||||
EntityType entityType() const;
|
||||
|
||||
std::string authAsymID() const;
|
||||
std::string authSeqID() const;
|
||||
std::string authInsCode() const;
|
||||
|
||||
// return a human readable PDB-like auth id (chain+seqnr+iCode)
|
||||
std::string authID() const;
|
||||
|
||||
// similar for mmCIF space
|
||||
std::string labelID() const;
|
||||
|
||||
// Is this residue a single entity?
|
||||
bool isEntity() const;
|
||||
|
||||
bool isWater() const { return mCompoundID == "HOH"; }
|
||||
|
||||
const Structure &structure() const { return *mStructure; }
|
||||
|
||||
bool empty() const { return mStructure == nullptr; }
|
||||
|
||||
bool hasAlternateAtoms() const;
|
||||
|
||||
/// \brief Return the list of unique alt ID's present in this residue
|
||||
std::set<std::string> getAlternateIDs() const;
|
||||
|
||||
/// \brief Return the list of unique atom ID's
|
||||
std::set<std::string> getAtomIDs() const;
|
||||
|
||||
/// \brief Return the list of atoms having ID \a atomID
|
||||
AtomView getAtomsByID(const std::string &atomID) const;
|
||||
|
||||
// some routines for 3d work
|
||||
std::tuple<Point, float> centerAndRadius() const;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const Residue &res);
|
||||
|
||||
friend Structure;
|
||||
|
||||
bool operator==(const mmcif::Residue &rhs) const
|
||||
{
|
||||
return this == &rhs or (
|
||||
mStructure == rhs.mStructure and
|
||||
mSeqID == rhs.mSeqID and
|
||||
mAsymID == rhs.mAsymID and
|
||||
mCompoundID == rhs.mCompoundID and
|
||||
mAuthSeqID == rhs.mAuthSeqID);
|
||||
}
|
||||
|
||||
protected:
|
||||
Residue() {}
|
||||
|
||||
friend class Polymer;
|
||||
|
||||
const Structure *mStructure = nullptr;
|
||||
std::string mCompoundID, mAsymID;
|
||||
int mSeqID = 0;
|
||||
std::string mAuthSeqID;
|
||||
AtomView mAtoms;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a monomer models a single Residue in a protein chain
|
||||
|
||||
class Monomer : public Residue
|
||||
{
|
||||
public:
|
||||
// Monomer();
|
||||
Monomer(const Monomer &rhs) = delete;
|
||||
Monomer &operator=(const Monomer &rhs) = delete;
|
||||
|
||||
Monomer(Monomer &&rhs);
|
||||
Monomer &operator=(Monomer &&rhs);
|
||||
|
||||
Monomer(const Polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
|
||||
const std::string &compoundID);
|
||||
|
||||
bool is_first_in_chain() const;
|
||||
bool is_last_in_chain() const;
|
||||
|
||||
// convenience
|
||||
bool has_alpha() const;
|
||||
bool has_kappa() const;
|
||||
|
||||
// Assuming this is really an amino acid...
|
||||
|
||||
float phi() const;
|
||||
float psi() const;
|
||||
float alpha() const;
|
||||
float kappa() const;
|
||||
float tco() const;
|
||||
float omega() const;
|
||||
|
||||
// torsion angles
|
||||
size_t nrOfChis() const;
|
||||
float chi(size_t i) const;
|
||||
|
||||
bool isCis() const;
|
||||
|
||||
/// \brief Returns true if the four atoms C, CA, N and O are present
|
||||
bool isComplete() const;
|
||||
|
||||
/// \brief Returns true if any of the backbone atoms has an alternate
|
||||
bool hasAlternateBackboneAtoms() const;
|
||||
|
||||
Atom CAlpha() const { return atomByID("CA"); }
|
||||
Atom C() const { return atomByID("C"); }
|
||||
Atom N() const { return atomByID("N"); }
|
||||
Atom O() const { return atomByID("O"); }
|
||||
Atom H() const { return atomByID("H"); }
|
||||
|
||||
bool isBondedTo(const Monomer &rhs) const
|
||||
{
|
||||
return this != &rhs and areBonded(*this, rhs);
|
||||
}
|
||||
|
||||
static bool areBonded(const Monomer &a, const Monomer &b, float errorMargin = 0.5f);
|
||||
static bool isCis(const Monomer &a, const Monomer &b);
|
||||
static float omega(const Monomer &a, const Monomer &b);
|
||||
|
||||
// for LEU and VAL
|
||||
float chiralVolume() const;
|
||||
|
||||
bool operator==(const Monomer &rhs) const
|
||||
{
|
||||
return mPolymer == rhs.mPolymer and mIndex == rhs.mIndex;
|
||||
}
|
||||
|
||||
private:
|
||||
const Polymer *mPolymer;
|
||||
size_t mIndex;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Polymer : public std::vector<Monomer>
|
||||
{
|
||||
public:
|
||||
Polymer(const Structure &s, const std::string &entityID, const std::string &asymID);
|
||||
|
||||
Polymer(const Polymer &) = delete;
|
||||
Polymer &operator=(const Polymer &) = delete;
|
||||
|
||||
// Polymer(Polymer&& rhs) = delete;
|
||||
// Polymer& operator=(Polymer&& rhs) = de;
|
||||
|
||||
Monomer &getBySeqID(int seqID);
|
||||
const Monomer &getBySeqID(int seqID) const;
|
||||
|
||||
Structure *structure() const { return mStructure; }
|
||||
|
||||
std::string asymID() const { return mAsymID; }
|
||||
std::string entityID() const { return mEntityID; }
|
||||
|
||||
std::string chainID() const;
|
||||
|
||||
int Distance(const Monomer &a, const Monomer &b) const;
|
||||
|
||||
private:
|
||||
Structure *mStructure;
|
||||
std::string mEntityID;
|
||||
std::string mAsymID;
|
||||
// cif::row_handleSet mPolySeq;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Sugar and Branch, to describe glycosylation sites
|
||||
|
||||
class Branch;
|
||||
|
||||
class Sugar : public Residue
|
||||
{
|
||||
public:
|
||||
Sugar(const Branch &branch, const std::string &compoundID,
|
||||
const std::string &asymID, int authSeqID);
|
||||
|
||||
Sugar(Sugar &&rhs);
|
||||
Sugar &operator=(Sugar &&rhs);
|
||||
|
||||
int num() const { return std::stoi(mAuthSeqID); }
|
||||
std::string name() const;
|
||||
|
||||
/// \brief Return the atom the C1 is linked to
|
||||
Atom getLink() const { return mLink; }
|
||||
void setLink(Atom link) { mLink = link; }
|
||||
|
||||
size_t getLinkNr() const
|
||||
{
|
||||
return mLink ? std::stoi(mLink.authSeqID()) : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
const Branch *mBranch;
|
||||
Atom mLink;
|
||||
};
|
||||
|
||||
class Branch : public std::vector<Sugar>
|
||||
{
|
||||
public:
|
||||
Branch(Structure &structure, const std::string &asymID);
|
||||
|
||||
void linkAtoms();
|
||||
|
||||
std::string name() const;
|
||||
float weight() const;
|
||||
std::string asymID() const { return mAsymID; }
|
||||
|
||||
Structure &structure() { return *mStructure; }
|
||||
const Structure &structure() const { return *mStructure; }
|
||||
|
||||
Sugar &getSugarByNum(int nr);
|
||||
const Sugar &getSugarByNum(int nr) const;
|
||||
|
||||
private:
|
||||
friend Sugar;
|
||||
|
||||
std::string name(const Sugar &s) const;
|
||||
|
||||
Structure *mStructure;
|
||||
std::string mAsymID;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// file is a reference to the data stored in e.g. the cif file.
|
||||
// This object is not copyable.
|
||||
|
||||
class File : public cif::file
|
||||
{
|
||||
public:
|
||||
File() {}
|
||||
|
||||
// File(const std::filesystem::path &path)
|
||||
// {
|
||||
// load(path);
|
||||
// }
|
||||
|
||||
// File(const char *data, size_t length)
|
||||
// {
|
||||
// load(data, length);
|
||||
// }
|
||||
|
||||
File(const File &) = delete;
|
||||
File &operator=(const File &) = delete;
|
||||
|
||||
// void load(const std::filesystem::path &p) override;
|
||||
// void save(const std::filesystem::path &p) override;
|
||||
|
||||
// using cif::file::load;
|
||||
// using cif::file::save;
|
||||
|
||||
cif::datablock &data() { return front(); }
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class StructureOpenOptions
|
||||
{
|
||||
SkipHydrogen = 1 << 0
|
||||
};
|
||||
|
||||
inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
|
||||
{
|
||||
return static_cast<int>(a) bitand static_cast<int>(b);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Structure
|
||||
{
|
||||
public:
|
||||
Structure(cif::file &p, size_t modelNr = 1, StructureOpenOptions options = {})
|
||||
: Structure(p.front(), modelNr, options)
|
||||
{
|
||||
}
|
||||
|
||||
Structure(cif::datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
|
||||
Structure(Structure &&s) = default;
|
||||
|
||||
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
|
||||
Structure(const Structure &);
|
||||
|
||||
Structure &operator=(const Structure &) = delete;
|
||||
// Structure &operator=(Structure &&s) = default;
|
||||
|
||||
~Structure();
|
||||
|
||||
const AtomView &atoms() const { return mAtoms; }
|
||||
// AtomView &atoms() { return mAtoms; }
|
||||
|
||||
EntityType getEntityTypeForEntityID(const std::string entityID) const;
|
||||
EntityType getEntityTypeForAsymID(const std::string asymID) const;
|
||||
|
||||
AtomView waters() const;
|
||||
|
||||
const std::list<Polymer> &polymers() const { return mPolymers; }
|
||||
std::list<Polymer> &polymers() { return mPolymers; }
|
||||
|
||||
Polymer &getPolymerByAsymID(const std::string &asymID);
|
||||
|
||||
const Polymer &getPolymerByAsymID(const std::string &asymID) const
|
||||
{
|
||||
return const_cast<Structure *>(this)->getPolymerByAsymID(asymID);
|
||||
}
|
||||
|
||||
const std::list<Branch> &branches() const { return mBranches; }
|
||||
std::list<Branch> &branches() { return mBranches; }
|
||||
|
||||
Branch &getBranchByAsymID(const std::string &asymID);
|
||||
const Branch &getBranchByAsymID(const std::string &asymID) const;
|
||||
|
||||
const std::vector<Residue> &nonPolymers() const { return mNonPolymers; }
|
||||
|
||||
Atom getAtomByID(const std::string &id) const;
|
||||
// Atom getAtomByLocation(Point pt, float maxDistance) const;
|
||||
|
||||
Atom getAtomByLabel(const std::string &atomID, const std::string &asymID,
|
||||
const std::string &compID, int seqID, const std::string &altID = "");
|
||||
|
||||
/// \brief Return the atom closest to point \a p
|
||||
Atom getAtomByPosition(Point p) const;
|
||||
|
||||
/// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type
|
||||
Atom getAtomByPositionAndType(Point p, std::string_view type, std::string_view res_type) const;
|
||||
|
||||
/// \brief Get a non-poly residue for an asym with id \a asymID
|
||||
Residue &getResidue(const std::string &asymID)
|
||||
{
|
||||
return getResidue(asymID, 0, "");
|
||||
}
|
||||
|
||||
/// \brief Get a non-poly residue for an asym with id \a asymID
|
||||
const Residue &getResidue(const std::string &asymID) const
|
||||
{
|
||||
return getResidue(asymID, 0, "");
|
||||
}
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
|
||||
Residue &getResidue(const std::string &asymID, int seqID, const std::string &authSeqID);
|
||||
|
||||
/// \brief Get a the single residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
|
||||
const Residue &getResidue(const std::string &asymID, int seqID, const std::string &authSeqID) const
|
||||
{
|
||||
return const_cast<Structure *>(this)->getResidue(asymID, seqID, authSeqID);
|
||||
}
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
|
||||
Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID);
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
|
||||
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID) const
|
||||
{
|
||||
return const_cast<Structure *>(this)->getResidue(asymID, compID, seqID, authSeqID);
|
||||
}
|
||||
|
||||
/// \brief Get a the residue for atom \a atom
|
||||
Residue &getResidue(const mmcif::Atom &atom)
|
||||
{
|
||||
return getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID(), atom.authSeqID());
|
||||
}
|
||||
|
||||
/// \brief Get a the residue for atom \a atom
|
||||
const Residue &getResidue(const mmcif::Atom &atom) const
|
||||
{
|
||||
return getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID(), atom.authSeqID());
|
||||
}
|
||||
|
||||
// Actions
|
||||
void removeAtom(Atom &a)
|
||||
{
|
||||
removeAtom(a, true);
|
||||
}
|
||||
|
||||
void swapAtoms(Atom a1, Atom a2); // swap the labels for these atoms
|
||||
void moveAtom(Atom a, Point p); // move atom to a new location
|
||||
void changeResidue(Residue &res, const std::string &newCompound,
|
||||
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
|
||||
|
||||
/// \brief Remove a residue, can be monomer or nonpoly
|
||||
///
|
||||
/// \param asym_id The asym ID
|
||||
/// \param seq_id The sequence ID
|
||||
void removeResidue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id)
|
||||
{
|
||||
removeResidue(getResidue(asym_id, seq_id, auth_seq_id));
|
||||
}
|
||||
|
||||
/// \brief Create a new non-polymer entity, returns new ID
|
||||
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
|
||||
/// \return The ID of the created entity
|
||||
std::string createNonPolyEntity(const std::string &mon_id);
|
||||
|
||||
/// \brief Create a new NonPolymer struct_asym with atoms constructed from \a atoms, returns asym_id.
|
||||
/// This method assumes you are copying data from one cif file to another.
|
||||
///
|
||||
/// \param entity_id The entity ID of the new nonpoly
|
||||
/// \param atoms The array of atom_site rows containing the data.
|
||||
/// \return The newly create asym ID
|
||||
std::string createNonpoly(const std::string &entity_id, const std::vector<mmcif::Atom> &atoms);
|
||||
|
||||
/// \brief Create a new NonPolymer struct_asym with atoms constructed from info in \a atom_info, returns asym_id.
|
||||
/// This method creates new atom records filled with info from the info.
|
||||
///
|
||||
/// \param entity_id The entity ID of the new nonpoly
|
||||
/// \param atoms The array of sets of cif::item data containing the data for the atoms.
|
||||
/// \return The newly create asym ID
|
||||
std::string createNonpoly(const std::string &entity_id, std::vector<std::vector<cif::item>> &atom_info);
|
||||
|
||||
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a nag_atom_info
|
||||
Branch &createBranch(std::vector<std::vector<cif::item>> &nag_atom_info);
|
||||
|
||||
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info
|
||||
///
|
||||
/// \param asym_id The asym id of the branch to extend
|
||||
/// \param atom_info Array containing the info for the atoms to construct for the new sugar
|
||||
/// \param link_sugar The sugar to link to, note: this is the sugar number (1 based)
|
||||
/// \param link_atom The atom id of the atom linked in the sugar
|
||||
Branch &extendBranch(const std::string &asym_id, std::vector<std::vector<cif::item>> &atom_info,
|
||||
int link_sugar, const std::string &link_atom);
|
||||
|
||||
/// \brief Remove \a branch
|
||||
void removeBranch(Branch &branch);
|
||||
|
||||
/// \brief Remove residue \a res
|
||||
///
|
||||
/// \param res The residue to remove
|
||||
void removeResidue(mmcif::Residue &res);
|
||||
|
||||
/// \brief Translate the coordinates of all atoms in the structure by \a t
|
||||
void translate(Point t);
|
||||
|
||||
/// \brief Rotate the coordinates of all atoms in the structure by \a q
|
||||
void rotate(Quaternion t);
|
||||
|
||||
/// \brief Translate and rotate the coordinates of all atoms in the structure by \a t and \a q
|
||||
void translateAndRotate(Point t, Quaternion q);
|
||||
|
||||
/// \brief Translate, rotate and translate again the coordinates of all atoms in the structure by \a t1 , \a q and \a t2
|
||||
void translateRotateAndTranslate(Point t1, Quaternion q, Point t2);
|
||||
|
||||
const std::vector<Residue> &getNonPolymers() const { return mNonPolymers; }
|
||||
|
||||
void cleanupEmptyCategories();
|
||||
|
||||
/// \brief Direct access to underlying data
|
||||
cif::category &category(std::string_view name) const
|
||||
{
|
||||
return mDb[name];
|
||||
}
|
||||
|
||||
cif::datablock &datablock() const
|
||||
{
|
||||
return mDb;
|
||||
}
|
||||
|
||||
void validateAtoms() const;
|
||||
|
||||
private:
|
||||
friend Polymer;
|
||||
friend Residue;
|
||||
|
||||
std::string insertCompound(const std::string &compoundID, bool isEntity);
|
||||
|
||||
std::string createEntityForBranch(Branch &branch);
|
||||
|
||||
void loadData();
|
||||
|
||||
void loadAtomsForModel(StructureOpenOptions options);
|
||||
|
||||
template<typename... Args>
|
||||
Atom& emplace_atom(Args ...args)
|
||||
{
|
||||
return emplace_atom(Atom{std::forward<Args>(args)...});
|
||||
}
|
||||
|
||||
Atom &emplace_atom(Atom &&atom);
|
||||
|
||||
void removeAtom(Atom &a, bool removeFromResidue);
|
||||
void removeSugar(Sugar &sugar);
|
||||
|
||||
cif::datablock &mDb;
|
||||
size_t mModelNr;
|
||||
AtomView mAtoms;
|
||||
std::vector<size_t> mAtomIndex;
|
||||
std::list<Polymer> mPolymers;
|
||||
std::list<Branch> mBranches;
|
||||
std::vector<Residue> mNonPolymers;
|
||||
};
|
||||
|
||||
} // namespace mmcif
|
||||
@@ -1,144 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <array>
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class SpacegroupName
|
||||
{
|
||||
full, xHM, Hall
|
||||
};
|
||||
|
||||
struct Spacegroup
|
||||
{
|
||||
const char* name;
|
||||
const char* xHM;
|
||||
const char* Hall;
|
||||
int nr;
|
||||
};
|
||||
|
||||
CIFPP_EXPORT extern const Spacegroup kSpaceGroups[];
|
||||
CIFPP_EXPORT extern const std::size_t kNrOfSpaceGroups;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct SymopData
|
||||
{
|
||||
constexpr SymopData(const std::array<int,15>& data)
|
||||
: m_packed((data[ 0] & 0x03ULL) << 34 bitor
|
||||
(data[ 1] & 0x03ULL) << 32 bitor
|
||||
(data[ 2] & 0x03ULL) << 30 bitor
|
||||
(data[ 3] & 0x03ULL) << 28 bitor
|
||||
(data[ 4] & 0x03ULL) << 26 bitor
|
||||
(data[ 5] & 0x03ULL) << 24 bitor
|
||||
(data[ 6] & 0x03ULL) << 22 bitor
|
||||
(data[ 7] & 0x03ULL) << 20 bitor
|
||||
(data[ 8] & 0x03ULL) << 18 bitor
|
||||
(data[ 9] & 0x07ULL) << 15 bitor
|
||||
(data[10] & 0x07ULL) << 12 bitor
|
||||
(data[11] & 0x07ULL) << 9 bitor
|
||||
(data[12] & 0x07ULL) << 6 bitor
|
||||
(data[13] & 0x07ULL) << 3 bitor
|
||||
(data[14] & 0x07ULL) << 0)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator==(const SymopData& rhs) const
|
||||
{
|
||||
return m_packed == rhs.m_packed;
|
||||
}
|
||||
|
||||
std::array<int,15> data() const
|
||||
{
|
||||
return {
|
||||
static_cast<int>(m_packed >> 34) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 32) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 30) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 28) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 26) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 24) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 22) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 20) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 18) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 15) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 12) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 9) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 6) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 3) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 0) bitand 0x07,
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
friend struct SymopDataBlock;
|
||||
|
||||
const uint64_t kPackMask = (~0ULL >> (64-36));
|
||||
|
||||
SymopData(uint64_t v)
|
||||
: m_packed(v bitand kPackMask) {}
|
||||
|
||||
uint64_t m_packed;
|
||||
};
|
||||
|
||||
struct SymopDataBlock
|
||||
{
|
||||
constexpr SymopDataBlock(int spacegroup, int rotational_number, const std::array<int,15>& rt_data)
|
||||
: m_v((spacegroup & 0xffffULL) << 48 bitor
|
||||
(rotational_number & 0xffULL) << 40 bitor
|
||||
SymopData(rt_data).m_packed)
|
||||
{
|
||||
}
|
||||
|
||||
uint16_t spacegroup() const { return m_v >> 48; }
|
||||
SymopData symop() const { return SymopData(m_v); }
|
||||
uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
|
||||
|
||||
private:
|
||||
uint64_t m_v;
|
||||
};
|
||||
|
||||
static_assert(sizeof(SymopDataBlock) == sizeof(uint64_t), "Size of SymopData is wrong");
|
||||
|
||||
CIFPP_EXPORT extern const SymopDataBlock kSymopNrTable[];
|
||||
CIFPP_EXPORT extern const std::size_t kSymopNrTableSize;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int GetSpacegroupNumber(std::string spacegroup); // alternative for clipper's parsing code, using SpacegroupName::full
|
||||
int GetSpacegroupNumber(std::string spacegroup, SpacegroupName type); // alternative for clipper's parsing code
|
||||
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
|
||||
#include <cif++/cif.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
extern const int
|
||||
kResidueNrWildcard,
|
||||
kNoSeqNum;
|
||||
|
||||
struct TLSSelection;
|
||||
typedef std::unique_ptr<TLSSelection> TLSSelectionPtr;
|
||||
|
||||
struct TLSResidue;
|
||||
|
||||
struct TLSSelection
|
||||
{
|
||||
virtual ~TLSSelection() {}
|
||||
virtual void CollectResidues(cif::datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel = 0) const = 0;
|
||||
std::vector<std::tuple<std::string,int,int>> GetRanges(cif::datablock& db, bool pdbNamespace) const;
|
||||
};
|
||||
|
||||
// Low level: get the selections
|
||||
TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection);
|
||||
|
||||
}
|
||||
@@ -43,8 +43,6 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <cif++/Cif++Export.hpp>
|
||||
|
||||
#if _MSC_VER
|
||||
#pragma warning(disable : 4996) // unsafe function or variable (strcpy e.g.)
|
||||
#pragma warning(disable : 4068) // unknown pragma
|
||||
|
||||
3636
src/Cif++.cpp
3636
src/Cif++.cpp
File diff suppressed because it is too large
Load Diff
@@ -26,9 +26,9 @@
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include <cif++/cif/category.hpp>
|
||||
#include <cif++/cif/datablock.hpp>
|
||||
#include <cif++/cif/parser.hpp>
|
||||
#include <cif++/category.hpp>
|
||||
#include <cif++/datablock.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
|
||||
// TODO: Find out what the rules are exactly for linked items, the current implementation
|
||||
// is inconsistent. It all depends whether a link is satified if a field taking part in the
|
||||
@@ -539,7 +539,7 @@ category::category(const category &rhs)
|
||||
for (auto r = rhs.m_head; r != nullptr; r = r->m_next)
|
||||
insert_impl(end(), clone_row(*r));
|
||||
|
||||
if (m_validator != nullptr)
|
||||
if (m_cat_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
}
|
||||
|
||||
@@ -585,7 +585,7 @@ category &category::operator=(const category &rhs)
|
||||
m_parent_links = rhs.m_parent_links;
|
||||
m_child_links = rhs.m_child_links;
|
||||
|
||||
if (m_validator != nullptr)
|
||||
if (m_cat_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
}
|
||||
|
||||
@@ -620,7 +620,6 @@ category &category::operator=(category &&rhs)
|
||||
category::~category()
|
||||
{
|
||||
clear();
|
||||
delete m_index;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
@@ -1076,9 +1075,7 @@ void category::clear()
|
||||
m_head = m_tail = nullptr;
|
||||
|
||||
delete m_index;
|
||||
|
||||
if (m_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
m_index = nullptr;
|
||||
}
|
||||
|
||||
void category::erase_orphans(condition &&cond)
|
||||
@@ -1145,7 +1142,7 @@ void category::update_value(const std::vector<row_handle> &rows, std::string_vie
|
||||
(*col.m_validator)(value);
|
||||
|
||||
// first some sanity checks, what was the old value and is it the same for all rows?
|
||||
std::string_view oldValue = rows.front()[tag].text();
|
||||
std::string oldValue{ rows.front()[tag].text() };
|
||||
for (auto row : rows)
|
||||
{
|
||||
if (oldValue != row[tag].text())
|
||||
@@ -1258,6 +1255,10 @@ void category::update_value(const std::vector<row_handle> &rows, std::string_vie
|
||||
|
||||
void category::update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate)
|
||||
{
|
||||
// make sure we have an index, if possible
|
||||
if (m_index == nullptr and m_cat_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
|
||||
auto &col = m_columns[column];
|
||||
|
||||
const char *oldValue = nullptr;
|
||||
@@ -1285,7 +1286,6 @@ void category::update_value(row *row, size_t column, std::string_view value, boo
|
||||
// before updating
|
||||
|
||||
bool reinsert = false;
|
||||
|
||||
if (updateLinked and // an update of an Item's value
|
||||
m_index != nullptr and key_field_indices().count(column))
|
||||
{
|
||||
@@ -1498,6 +1498,9 @@ row_handle category::create_copy(row_handle r)
|
||||
// proxy methods for every insertion
|
||||
category::iterator category::insert_impl(const_iterator pos, row *n)
|
||||
{
|
||||
if (m_index == nullptr and m_cat_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
|
||||
assert(n != nullptr);
|
||||
assert(n->m_next == nullptr);
|
||||
|
||||
@@ -24,8 +24,8 @@
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/cif/category.hpp>
|
||||
#include <cif++/cif/condition.hpp>
|
||||
#include <cif++/category.hpp>
|
||||
#include <cif++/condition.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -24,7 +24,7 @@
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/cif/datablock.hpp>
|
||||
#include <cif++/datablock.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -24,10 +24,10 @@
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/cif/condition.hpp>
|
||||
#include <cif++/cif/dictionary_parser.hpp>
|
||||
#include <cif++/cif/file.hpp>
|
||||
#include <cif++/cif/parser.hpp>
|
||||
#include <cif++/condition.hpp>
|
||||
#include <cif++/dictionary_parser.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#include <gxrio.hpp>
|
||||
|
||||
#include <cif++/cif/file.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -24,7 +24,7 @@
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/cif/row.hpp>
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -32,9 +32,9 @@
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
#include <cif++/cif/forward_decl.hpp>
|
||||
#include <cif++/cif/parser.hpp>
|
||||
#include <cif++/cif/file.hpp>
|
||||
#include <cif++/forward_decl.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
3959
src/pdb/Cif2PDB.cpp
3959
src/pdb/Cif2PDB.cpp
File diff suppressed because it is too large
Load Diff
6065
src/pdb/PDB2Cif.cpp
6065
src/pdb/PDB2Cif.cpp
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -24,7 +24,7 @@
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/cif/category.hpp>
|
||||
#include <cif++/category.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,502 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
|
||||
#include <cif++/cif.hpp>
|
||||
#include <cif++/structure/BondMap.hpp>
|
||||
#include <cif++/structure/Compound.hpp>
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
union IDType
|
||||
{
|
||||
IDType()
|
||||
: id_n(0)
|
||||
{
|
||||
}
|
||||
IDType(const IDType &rhs)
|
||||
: id_n(rhs.id_n)
|
||||
{
|
||||
}
|
||||
IDType(const std::string &s)
|
||||
: IDType()
|
||||
{
|
||||
assert(s.length() <= 4);
|
||||
if (s.length() > 4)
|
||||
throw BondMapException("Atom ID '" + s + "' is too long");
|
||||
std::copy(s.begin(), s.end(), id_s);
|
||||
}
|
||||
|
||||
IDType &operator=(const IDType &rhs)
|
||||
{
|
||||
id_n = rhs.id_n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
IDType &operator=(const std::string &s)
|
||||
{
|
||||
id_n = 0;
|
||||
assert(s.length() <= 4);
|
||||
if (s.length() > 4)
|
||||
throw BondMapException("Atom ID '" + s + "' is too long");
|
||||
std::copy(s.begin(), s.end(), id_s);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator<(const IDType &rhs) const
|
||||
{
|
||||
return id_n < rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator<=(const IDType &rhs) const
|
||||
{
|
||||
return id_n <= rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator==(const IDType &rhs) const
|
||||
{
|
||||
return id_n == rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator!=(const IDType &rhs) const
|
||||
{
|
||||
return id_n != rhs.id_n;
|
||||
}
|
||||
|
||||
char id_s[4];
|
||||
uint32_t id_n;
|
||||
};
|
||||
|
||||
static_assert(sizeof(IDType) == 4, "atom_id_type should be 4 bytes");
|
||||
} // namespace
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct CompoundBondInfo
|
||||
{
|
||||
IDType mID;
|
||||
std::set<std::tuple<uint32_t, uint32_t>> mBonded;
|
||||
|
||||
bool bonded(uint32_t a1, uint32_t a2) const
|
||||
{
|
||||
return mBonded.count({a1, a2}) > 0;
|
||||
}
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class CompoundBondMap
|
||||
{
|
||||
public:
|
||||
static CompoundBondMap &instance()
|
||||
{
|
||||
static std::unique_ptr<CompoundBondMap> s_instance(new CompoundBondMap);
|
||||
return *s_instance;
|
||||
}
|
||||
|
||||
bool bonded(const std::string &compoundID, const std::string &atomID1, const std::string &atomID2);
|
||||
|
||||
private:
|
||||
CompoundBondMap() {}
|
||||
|
||||
uint32_t getAtomID(const std::string &atomID)
|
||||
{
|
||||
IDType id(atomID);
|
||||
|
||||
uint32_t result;
|
||||
|
||||
auto i = mAtomIDIndex.find(id);
|
||||
if (i == mAtomIDIndex.end())
|
||||
{
|
||||
result = uint32_t(mAtomIDIndex.size());
|
||||
mAtomIDIndex[id] = result;
|
||||
}
|
||||
else
|
||||
result = i->second;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::map<IDType, uint32_t> mAtomIDIndex;
|
||||
std::vector<CompoundBondInfo> mCompounds;
|
||||
std::mutex mMutex;
|
||||
};
|
||||
|
||||
bool CompoundBondMap::bonded(const std::string &compoundID, const std::string &atomID1, const std::string &atomID2)
|
||||
{
|
||||
std::lock_guard lock(mMutex);
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
IDType id(compoundID);
|
||||
uint32_t a1 = getAtomID(atomID1);
|
||||
uint32_t a2 = getAtomID(atomID2);
|
||||
if (a1 > a2)
|
||||
std::swap(a1, a2);
|
||||
|
||||
for (auto &bi : mCompounds)
|
||||
{
|
||||
if (bi.mID != id)
|
||||
continue;
|
||||
|
||||
return bi.bonded(a1, a2);
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
|
||||
// not found in our cache, calculate
|
||||
CompoundBondInfo bondInfo{id};
|
||||
|
||||
auto compound = mmcif::CompoundFactory::instance().create(compoundID);
|
||||
if (not compound)
|
||||
{
|
||||
if (cif::VERBOSE >= 0)
|
||||
std::cerr << "Missing compound bond info for " << compoundID << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto &atom : compound->bonds())
|
||||
{
|
||||
uint32_t ca1 = getAtomID(atom.atomID[0]);
|
||||
uint32_t ca2 = getAtomID(atom.atomID[1]);
|
||||
if (ca1 > ca2)
|
||||
std::swap(ca1, ca2);
|
||||
|
||||
bondInfo.mBonded.insert({ca1, ca2});
|
||||
result = result or (a1 == ca1 and a2 == ca2);
|
||||
}
|
||||
}
|
||||
|
||||
mCompounds.push_back(bondInfo);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BondMap::BondMap(const Structure &p)
|
||||
{
|
||||
auto &compoundBondInfo = CompoundBondMap::instance();
|
||||
|
||||
auto atoms = p.atoms();
|
||||
dim = uint32_t(atoms.size());
|
||||
|
||||
// bond = std::vector<bool>(dim * (dim - 1), false);
|
||||
|
||||
for (auto &atom : atoms)
|
||||
index[atom.id()] = uint32_t(index.size());
|
||||
|
||||
auto bindAtoms = [this](const std::string &a, const std::string &b)
|
||||
{
|
||||
uint32_t ixa = index[a];
|
||||
uint32_t ixb = index[b];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
};
|
||||
|
||||
auto linkAtoms = [this, &bindAtoms](const std::string &a, const std::string &b)
|
||||
{
|
||||
bindAtoms(a, b);
|
||||
|
||||
link[a].insert(b);
|
||||
link[b].insert(a);
|
||||
};
|
||||
|
||||
cif::v2::datablock &db = p.datablock();
|
||||
|
||||
// collect all compounds first
|
||||
std::set<std::string> compounds;
|
||||
for (auto c : db["chem_comp"])
|
||||
compounds.insert(c["id"].as<std::string>());
|
||||
|
||||
// make sure we also have all residues in the polyseq
|
||||
for (auto m : db["entity_poly_seq"])
|
||||
{
|
||||
std::string c = m["mon_id"].as<std::string>();
|
||||
if (compounds.count(c))
|
||||
continue;
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cerr << "Warning: mon_id " << c << " is missing in the chem_comp category" << std::endl;
|
||||
compounds.insert(c);
|
||||
}
|
||||
|
||||
cif::Progress progress(compounds.size(), "Creating bond map");
|
||||
|
||||
// some helper indices to speed things up a bit
|
||||
std::map<std::tuple<std::string, int, std::string, std::string>, std::string> atomMapByAsymSeqAndAtom;
|
||||
for (auto &a : p.atoms())
|
||||
{
|
||||
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID(), a.authSeqID());
|
||||
atomMapByAsymSeqAndAtom[key] = a.id();
|
||||
}
|
||||
|
||||
// first link all residues in a polyseq
|
||||
|
||||
std::string lastAsymID, lastAuthSeqID;
|
||||
int lastSeqID = 0;
|
||||
for (const auto &[asymID, seqID, authSeqID] : db["pdbx_poly_seq_scheme"].rows<std::string, int, std::string>("asym_id", "seq_id", "pdb_seq_num"))
|
||||
{
|
||||
if (asymID != lastAsymID) // first in a new sequece
|
||||
{
|
||||
lastAsymID = asymID;
|
||||
lastSeqID = seqID;
|
||||
lastAuthSeqID = authSeqID;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto kc = make_tuple(asymID, lastSeqID, "C", lastAuthSeqID);
|
||||
auto kn = make_tuple(asymID, seqID, "N", authSeqID);
|
||||
|
||||
if (atomMapByAsymSeqAndAtom.count(kc) and atomMapByAsymSeqAndAtom.count(kn))
|
||||
{
|
||||
auto c = atomMapByAsymSeqAndAtom.at(kc);
|
||||
auto n = atomMapByAsymSeqAndAtom.at(kn);
|
||||
|
||||
bindAtoms(c, n);
|
||||
}
|
||||
// if (not(c.empty() or n.empty()))
|
||||
|
||||
lastSeqID = seqID;
|
||||
lastAuthSeqID = authSeqID;
|
||||
}
|
||||
|
||||
for (auto l : db["struct_conn"])
|
||||
{
|
||||
std::string asym1, asym2, atomId1, atomId2;
|
||||
int seqId1 = 0, seqId2 = 0;
|
||||
std::string authSeqId1, authSeqId2;
|
||||
|
||||
cif::v2::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2, authSeqId1, authSeqId2) =
|
||||
l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
|
||||
"ptnr1_label_atom_id", "ptnr2_label_atom_id",
|
||||
"ptnr1_label_seq_id", "ptnr2_label_seq_id",
|
||||
"ptnr1_auth_seq_id", "ptnr2_auth_seq_id");
|
||||
|
||||
auto ka = make_tuple(asym1, seqId1, atomId1, authSeqId1);
|
||||
auto kb = make_tuple(asym2, seqId2, atomId2, authSeqId2);
|
||||
|
||||
if (atomMapByAsymSeqAndAtom.count(ka) and atomMapByAsymSeqAndAtom.count(kb))
|
||||
{
|
||||
auto a = atomMapByAsymSeqAndAtom.at(ka);
|
||||
auto b = atomMapByAsymSeqAndAtom.at(kb);
|
||||
|
||||
linkAtoms(a, b);
|
||||
}
|
||||
|
||||
// std::string a = atomMapByAsymSeqAndAtom.at(make_tuple(asym1, seqId1, atomId1, authSeqId1));
|
||||
// std::string b = atomMapByAsymSeqAndAtom.at(make_tuple(asym2, seqId2, atomId2, authSeqId2));
|
||||
// if (not(a.empty() or b.empty()))
|
||||
// linkAtoms(a, b);
|
||||
}
|
||||
|
||||
// then link all atoms in the compounds
|
||||
|
||||
for (auto c : compounds)
|
||||
{
|
||||
if (c == "HOH" or c == "H2O" or c == "WAT")
|
||||
{
|
||||
if (cif::VERBOSE > 0)
|
||||
std::cerr << "skipping water in bond map calculation" << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bonded = [c, &compoundBondInfo](const Atom &a, const Atom &b)
|
||||
{
|
||||
auto label_a = a.labelAtomID();
|
||||
auto label_b = b.labelAtomID();
|
||||
|
||||
return compoundBondInfo.bonded(c, label_a, label_b);
|
||||
};
|
||||
|
||||
// loop over poly_seq_scheme
|
||||
for (auto r : db["pdbx_poly_seq_scheme"].find(cif::v2::key("mon_id") == c))
|
||||
{
|
||||
std::string asymID;
|
||||
int seqID;
|
||||
cif::v2::tie(asymID, seqID) = r.get("asym_id", "seq_id");
|
||||
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[&](auto &a)
|
||||
{ return a.labelAsymID() == asymID and a.labelSeqID() == seqID; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
bindAtoms(rAtoms[i].id(), rAtoms[j].id());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loop over pdbx_nonpoly_scheme
|
||||
for (auto r : db["pdbx_nonpoly_scheme"].find(cif::v2::key("mon_id") == c))
|
||||
{
|
||||
std::string asymID;
|
||||
cif::v2::tie(asymID) = r.get("asym_id");
|
||||
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[&](auto &a)
|
||||
{ return a.labelAsymID() == asymID; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
{
|
||||
uint32_t ixa = index[rAtoms[i].id()];
|
||||
uint32_t ixb = index[rAtoms[j].id()];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loop over pdbx_branch_scheme
|
||||
for (const auto &[asym_id, pdb_seq_num] : db["pdbx_branch_scheme"].find<std::string, std::string>(cif::v2::key("mon_id") == c, "asym_id", "pdb_seq_num"))
|
||||
{
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[id = asym_id, nr = pdb_seq_num](const Atom &a)
|
||||
{ return a.labelAsymID() == id and a.authSeqID() == nr; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
{
|
||||
uint32_t ixa = index[rAtoms[i].id()];
|
||||
uint32_t ixb = index[rAtoms[j].id()];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// start by creating an index for single bonds
|
||||
|
||||
std::multimap<uint32_t, uint32_t> b1_2;
|
||||
for (auto &bk : bond)
|
||||
{
|
||||
uint32_t a, b;
|
||||
std::tie(a, b) = dekey(bk);
|
||||
|
||||
b1_2.insert({a, b});
|
||||
b1_2.insert({b, a});
|
||||
}
|
||||
|
||||
std::multimap<uint32_t, uint32_t> b1_3;
|
||||
for (uint32_t i = 0; i < dim; ++i)
|
||||
{
|
||||
auto a = b1_2.equal_range(i);
|
||||
|
||||
std::vector<uint32_t> s;
|
||||
for (auto j = a.first; j != a.second; ++j)
|
||||
s.push_back(j->second);
|
||||
|
||||
for (size_t si1 = 0; si1 + 1 < s.size(); ++si1)
|
||||
{
|
||||
for (size_t si2 = si1 + 1; si2 < s.size(); ++si2)
|
||||
{
|
||||
uint32_t x = s[si1];
|
||||
uint32_t y = s[si2];
|
||||
|
||||
if (isBonded(x, y))
|
||||
continue;
|
||||
|
||||
b1_3.insert({x, y});
|
||||
b1_3.insert({y, x});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < dim; ++i)
|
||||
{
|
||||
auto a1 = b1_2.equal_range(i);
|
||||
auto a2 = b1_3.equal_range(i);
|
||||
|
||||
for (auto ai1 = a1.first; ai1 != a1.second; ++ai1)
|
||||
{
|
||||
for (auto ai2 = a2.first; ai2 != a2.second; ++ai2)
|
||||
{
|
||||
uint32_t b1 = ai1->second;
|
||||
uint32_t b2 = ai2->second;
|
||||
|
||||
if (isBonded(b1, b2))
|
||||
continue;
|
||||
|
||||
bond_1_4.insert(key(b1, b2));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> BondMap::linked(const Atom &a) const
|
||||
{
|
||||
auto i = link.find(a.id());
|
||||
|
||||
std::vector<std::string> result;
|
||||
|
||||
if (i != link.end())
|
||||
result = std::vector<std::string>(i->second.begin(), i->second.end());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> BondMap::atomIDsForCompound(const std::string &compoundID)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
|
||||
auto *compound = mmcif::CompoundFactory::instance().create(compoundID);
|
||||
|
||||
if (compound == nullptr)
|
||||
throw BondMapException("Missing bond information for compound " + compoundID);
|
||||
|
||||
for (auto &compAtom : compound->atoms())
|
||||
result.push_back(compAtom.id);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace mmcif
|
||||
@@ -1,750 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include <cif++/cif.hpp>
|
||||
#include <cif++/utilities.hpp>
|
||||
#include <cif++/structure/Compound.hpp>
|
||||
// #include <cif++/point.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string to_string(BondType bondType)
|
||||
{
|
||||
switch (bondType)
|
||||
{
|
||||
case BondType::sing: return "sing";
|
||||
case BondType::doub: return "doub";
|
||||
case BondType::trip: return "trip";
|
||||
case BondType::quad: return "quad";
|
||||
case BondType::arom: return "arom";
|
||||
case BondType::poly: return "poly";
|
||||
case BondType::delo: return "delo";
|
||||
case BondType::pi: return "pi";
|
||||
}
|
||||
throw std::invalid_argument("Invalid bondType");
|
||||
}
|
||||
|
||||
BondType from_string(const std::string &bondType)
|
||||
{
|
||||
if (cif::iequals(bondType, "sing"))
|
||||
return BondType::sing;
|
||||
if (cif::iequals(bondType, "doub"))
|
||||
return BondType::doub;
|
||||
if (cif::iequals(bondType, "trip"))
|
||||
return BondType::trip;
|
||||
if (cif::iequals(bondType, "quad"))
|
||||
return BondType::quad;
|
||||
if (cif::iequals(bondType, "arom"))
|
||||
return BondType::arom;
|
||||
if (cif::iequals(bondType, "poly"))
|
||||
return BondType::poly;
|
||||
if (cif::iequals(bondType, "delo"))
|
||||
return BondType::delo;
|
||||
if (cif::iequals(bondType, "pi"))
|
||||
return BondType::pi;
|
||||
throw std::invalid_argument("Invalid bondType: " + bondType);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Compound helper classes
|
||||
|
||||
struct CompoundAtomLess
|
||||
{
|
||||
bool operator()(const CompoundAtom &a, const CompoundAtom &b) const
|
||||
{
|
||||
int d = a.id.compare(b.id);
|
||||
if (d == 0)
|
||||
d = a.typeSymbol - b.typeSymbol;
|
||||
return d < 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct CompoundBondLess
|
||||
{
|
||||
bool operator()(const CompoundBond &a, const CompoundBond &b) const
|
||||
{
|
||||
int d = a.atomID[0].compare(b.atomID[0]);
|
||||
if (d == 0)
|
||||
d = a.atomID[1].compare(b.atomID[1]);
|
||||
if (d == 0)
|
||||
d = static_cast<int>(a.type) - static_cast<int>(b.type);
|
||||
return d < 0;
|
||||
}
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Compound
|
||||
|
||||
Compound::Compound(cif::datablock &db)
|
||||
{
|
||||
auto &chemComp = db["chem_comp"];
|
||||
|
||||
if (chemComp.size() != 1)
|
||||
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
|
||||
|
||||
cif::tie(mID, mName, mType, mFormula, mFormulaWeight, mFormalCharge) =
|
||||
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
|
||||
|
||||
// The name should not contain newline characters since that triggers validation errors later on
|
||||
cif::replace_all(mName, "\n", "");
|
||||
|
||||
mGroup = "non-polymer";
|
||||
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
CompoundAtom atom;
|
||||
std::string typeSymbol;
|
||||
cif::tie(atom.id, typeSymbol, atom.charge, atom.aromatic, atom.leavingAtom, atom.stereoConfig, atom.x, atom.y, atom.z) =
|
||||
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
|
||||
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
|
||||
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
|
||||
mAtoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
auto &chemCompBond = db["chem_comp_bond"];
|
||||
for (auto row : chemCompBond)
|
||||
{
|
||||
CompoundBond bond;
|
||||
std::string valueOrder;
|
||||
cif::tie(bond.atomID[0], bond.atomID[1], valueOrder, bond.aromatic, bond.stereoConfig) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
|
||||
bond.type = from_string(valueOrder);
|
||||
mBonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
Compound::Compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
|
||||
: mID(id)
|
||||
, mName(name)
|
||||
, mType(type)
|
||||
, mGroup(group)
|
||||
{
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
CompoundAtom atom;
|
||||
std::string typeSymbol;
|
||||
cif::tie(atom.id, typeSymbol, atom.charge, atom.x, atom.y, atom.z) =
|
||||
row.get("atom_id", "type_symbol", "charge", "x", "y", "z");
|
||||
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
|
||||
|
||||
mFormalCharge += atom.charge;
|
||||
mFormulaWeight += AtomTypeTraits(atom.typeSymbol).weight();
|
||||
|
||||
mAtoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
auto &chemCompBond = db["chem_comp_bond"];
|
||||
for (auto row : chemCompBond)
|
||||
{
|
||||
CompoundBond bond;
|
||||
std::string btype;
|
||||
cif::tie(bond.atomID[0], bond.atomID[1], btype, bond.aromatic) = row.get("atom_id_1", "atom_id_2", "type", "aromatic");
|
||||
|
||||
using cif::iequals;
|
||||
|
||||
if (iequals(btype, "single"))
|
||||
bond.type = BondType::sing;
|
||||
else if (iequals(btype, "double"))
|
||||
bond.type = BondType::doub;
|
||||
else if (iequals(btype, "triple"))
|
||||
bond.type = BondType::trip;
|
||||
else if (iequals(btype, "deloc") or iequals(btype, "aromat") or iequals(btype, "aromatic"))
|
||||
bond.type = BondType::delo;
|
||||
else
|
||||
{
|
||||
if (cif::VERBOSE > 0)
|
||||
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << std::endl;
|
||||
bond.type = BondType::sing;
|
||||
}
|
||||
mBonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
CompoundAtom Compound::getAtomByID(const std::string &atomID) const
|
||||
{
|
||||
CompoundAtom result = {};
|
||||
for (auto &a : mAtoms)
|
||||
{
|
||||
if (a.id == atomID)
|
||||
{
|
||||
result = a;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.id != atomID)
|
||||
throw std::out_of_range("No atom " + atomID + " in Compound " + mID);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Compound::atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const
|
||||
{
|
||||
auto i = find_if(mBonds.begin(), mBonds.end(),
|
||||
[&](const CompoundBond &b) {
|
||||
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
|
||||
});
|
||||
|
||||
return i != mBonds.end();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a factory class to generate compounds
|
||||
|
||||
CIFPP_EXPORT const std::map<std::string, char> kAAMap{
|
||||
{"ALA", 'A'},
|
||||
{"ARG", 'R'},
|
||||
{"ASN", 'N'},
|
||||
{"ASP", 'D'},
|
||||
{"CYS", 'C'},
|
||||
{"GLN", 'Q'},
|
||||
{"GLU", 'E'},
|
||||
{"GLY", 'G'},
|
||||
{"HIS", 'H'},
|
||||
{"ILE", 'I'},
|
||||
{"LEU", 'L'},
|
||||
{"LYS", 'K'},
|
||||
{"MET", 'M'},
|
||||
{"PHE", 'F'},
|
||||
{"PRO", 'P'},
|
||||
{"SER", 'S'},
|
||||
{"THR", 'T'},
|
||||
{"TRP", 'W'},
|
||||
{"TYR", 'Y'},
|
||||
{"VAL", 'V'},
|
||||
{"GLX", 'Z'},
|
||||
{"ASX", 'B'}};
|
||||
|
||||
CIFPP_EXPORT const std::map<std::string, char> kBaseMap{
|
||||
{"A", 'A'},
|
||||
{"C", 'C'},
|
||||
{"G", 'G'},
|
||||
{"T", 'T'},
|
||||
{"U", 'U'},
|
||||
{"DA", 'A'},
|
||||
{"DC", 'C'},
|
||||
{"DG", 'G'},
|
||||
{"DT", 'T'}};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryImpl>
|
||||
{
|
||||
public:
|
||||
CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next);
|
||||
|
||||
CompoundFactoryImpl(const fs::path &file, std::shared_ptr<CompoundFactoryImpl> next);
|
||||
|
||||
virtual ~CompoundFactoryImpl()
|
||||
{
|
||||
for (auto c: mCompounds)
|
||||
delete c;
|
||||
}
|
||||
|
||||
Compound *get(std::string id)
|
||||
{
|
||||
std::shared_lock lock(mMutex);
|
||||
|
||||
cif::to_upper(id);
|
||||
|
||||
Compound *result = nullptr;
|
||||
|
||||
// walk the list, see if any of us has the compound already
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
|
||||
{
|
||||
for (auto cmp : impl->mCompounds)
|
||||
{
|
||||
if (cmp->id() == id)
|
||||
{
|
||||
result = cmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
|
||||
if (result == nullptr and mMissing.count(id) == 0)
|
||||
{
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
|
||||
{
|
||||
result = impl->create(id);
|
||||
if (result != nullptr)
|
||||
break;
|
||||
}
|
||||
|
||||
if (result == nullptr)
|
||||
mMissing.insert(id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<CompoundFactoryImpl> next() const
|
||||
{
|
||||
return mNext;
|
||||
}
|
||||
|
||||
bool isKnownPeptide(const std::string &resName)
|
||||
{
|
||||
return mKnownPeptides.count(resName) or
|
||||
(mNext and mNext->isKnownPeptide(resName));
|
||||
}
|
||||
|
||||
bool isKnownBase(const std::string &resName)
|
||||
{
|
||||
return mKnownBases.count(resName) or
|
||||
(mNext and mNext->isKnownBase(resName));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
virtual Compound *create(const std::string &id)
|
||||
{
|
||||
// For the base class we assume every compound is preloaded
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_timed_mutex mMutex;
|
||||
|
||||
std::vector<Compound *> mCompounds;
|
||||
std::set<std::string> mKnownPeptides;
|
||||
std::set<std::string> mKnownBases;
|
||||
std::set<std::string> mMissing;
|
||||
std::shared_ptr<CompoundFactoryImpl> mNext;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
CompoundFactoryImpl::CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: mNext(next)
|
||||
{
|
||||
for (const auto &[key, value] : kAAMap)
|
||||
mKnownPeptides.insert(key);
|
||||
|
||||
for (const auto &[key, value] : kBaseMap)
|
||||
mKnownBases.insert(key);
|
||||
}
|
||||
|
||||
CompoundFactoryImpl::CompoundFactoryImpl(const fs::path &file, std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: mNext(next)
|
||||
{
|
||||
cif::file cifFile(file);
|
||||
|
||||
auto &compList = cifFile["comp_list"];
|
||||
if (not compList.empty()) // So this is a CCP4 restraints file, special handling
|
||||
{
|
||||
auto &chemComp = compList["chem_comp"];
|
||||
|
||||
for (const auto &[id, name, group] : chemComp.rows<std::string, std::string, std::string>("id", "name", "group"))
|
||||
{
|
||||
std::string type;
|
||||
|
||||
// known groups are (counted from ccp4 monomer dictionary)
|
||||
|
||||
// D-pyranose
|
||||
// DNA
|
||||
// L-PEPTIDE LINKING
|
||||
// L-SACCHARIDE
|
||||
// L-peptide
|
||||
// L-pyranose
|
||||
// M-peptide
|
||||
// NON-POLYMER
|
||||
// P-peptide
|
||||
// RNA
|
||||
// furanose
|
||||
// non-polymer
|
||||
// non_polymer
|
||||
// peptide
|
||||
// pyranose
|
||||
// saccharide
|
||||
|
||||
if (cif::iequals(id, "gly"))
|
||||
type = "peptide linking";
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
|
||||
type = "L-peptide linking";
|
||||
else if (cif::iequals(group, "DNA"))
|
||||
type = "DNA linking";
|
||||
else if (cif::iequals(group, "RNA"))
|
||||
type = "RNA linking";
|
||||
else
|
||||
type = "non-polymer";
|
||||
|
||||
auto &db = cifFile["comp_" + id];
|
||||
|
||||
mCompounds.push_back(new Compound(db, id, name, type, group));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// A CCD components file, validate it first
|
||||
cifFile.load_dictionary("mmcif_pdbx");
|
||||
|
||||
if (not cifFile.is_valid())
|
||||
throw std::runtime_error("Invalid compound file");
|
||||
|
||||
for (auto &db : cifFile)
|
||||
mCompounds.push_back(new Compound(db));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the cached components.cif file from CCD
|
||||
|
||||
class CCDCompoundFactoryImpl : public CompoundFactoryImpl
|
||||
{
|
||||
public:
|
||||
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next, const fs::path& file)
|
||||
: CompoundFactoryImpl(next)
|
||||
, mCompoundsFile(file)
|
||||
{
|
||||
}
|
||||
|
||||
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: CompoundFactoryImpl(next)
|
||||
{
|
||||
}
|
||||
|
||||
Compound *create(const std::string &id) override;
|
||||
|
||||
cif::parser::datablock_index mIndex;
|
||||
fs::path mCompoundsFile;
|
||||
};
|
||||
|
||||
Compound *CCDCompoundFactoryImpl::create(const std::string &id)
|
||||
{
|
||||
Compound *result = nullptr;
|
||||
|
||||
std::unique_ptr<std::istream> ccd;
|
||||
|
||||
if (mCompoundsFile.empty())
|
||||
{
|
||||
ccd = cif::load_resource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
|
||||
}
|
||||
else
|
||||
ccd.reset(new std::ifstream(mCompoundsFile));
|
||||
|
||||
cif::file file;
|
||||
|
||||
if (mIndex.empty())
|
||||
{
|
||||
if (cif::VERBOSE > 1)
|
||||
{
|
||||
std::cout << "Creating component index "
|
||||
<< "...";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::parser parser(*ccd, file);
|
||||
mIndex = parser.index_datablocks();
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cout << " done" << std::endl;
|
||||
|
||||
// reload the resource, perhaps this should be improved...
|
||||
if (mCompoundsFile.empty())
|
||||
{
|
||||
ccd = cif::load_resource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
|
||||
}
|
||||
else
|
||||
ccd.reset(new std::ifstream(mCompoundsFile));
|
||||
}
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
{
|
||||
std::cout << "Loading component " << id << "...";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::parser parser(*ccd, file);
|
||||
parser.parse_single_datablock(id, mIndex);
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cout << " done" << std::endl;
|
||||
|
||||
if (not file.empty())
|
||||
{
|
||||
auto &db = file.front();
|
||||
if (db.name() == id)
|
||||
{
|
||||
result = new Compound(db);
|
||||
|
||||
std::shared_lock lock(mMutex);
|
||||
mCompounds.push_back(result);
|
||||
}
|
||||
}
|
||||
|
||||
if (result == nullptr and cif::VERBOSE > 0)
|
||||
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the data found in CCP4's monomers lib
|
||||
|
||||
class CCP4CompoundFactoryImpl : public CompoundFactoryImpl
|
||||
{
|
||||
public:
|
||||
CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next = nullptr);
|
||||
|
||||
Compound *create(const std::string &id) override;
|
||||
|
||||
private:
|
||||
cif::file mFile;
|
||||
fs::path mCLIBD_MON;
|
||||
};
|
||||
|
||||
CCP4CompoundFactoryImpl::CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: CompoundFactoryImpl(next)
|
||||
, mFile((clibd_mon / "list" / "mon_lib_list.cif").string())
|
||||
, mCLIBD_MON(clibd_mon)
|
||||
{
|
||||
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
|
||||
|
||||
auto &chemComps = mFile["comp_list"]["chem_comp"];
|
||||
|
||||
for (const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>("group", "three_letter_code"))
|
||||
{
|
||||
if (std::regex_match(group, peptideRx))
|
||||
mKnownPeptides.insert(threeLetterCode);
|
||||
else if (cif::iequals(group, "DNA") or cif::iequals(group, "RNA"))
|
||||
mKnownBases.insert(threeLetterCode);
|
||||
}
|
||||
}
|
||||
|
||||
Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
|
||||
{
|
||||
Compound *result = nullptr;
|
||||
|
||||
auto &cat = mFile["comp_list"]["chem_comp"];
|
||||
|
||||
auto rs = cat.find(cif::key("three_letter_code") == id);
|
||||
|
||||
if (rs.size() == 1)
|
||||
{
|
||||
auto row = rs.front();
|
||||
|
||||
std::string name, group;
|
||||
uint32_t numberAtomsAll, numberAtomsNh;
|
||||
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
|
||||
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
|
||||
|
||||
fs::path resFile = mCLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
|
||||
|
||||
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
|
||||
resFile = mCLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
|
||||
|
||||
if (fs::exists(resFile))
|
||||
{
|
||||
cif::file cf(resFile.string());
|
||||
|
||||
// locate the datablock
|
||||
auto &db = cf["comp_" + id];
|
||||
|
||||
std::string type;
|
||||
|
||||
// known groups are (counted from ccp4 monomer dictionary)
|
||||
|
||||
// D-pyranose
|
||||
// DNA
|
||||
// L-PEPTIDE LINKING
|
||||
// L-SACCHARIDE
|
||||
// L-peptide
|
||||
// L-pyranose
|
||||
// M-peptide
|
||||
// NON-POLYMER
|
||||
// P-peptide
|
||||
// RNA
|
||||
// furanose
|
||||
// non-polymer
|
||||
// non_polymer
|
||||
// peptide
|
||||
// pyranose
|
||||
// saccharide
|
||||
|
||||
if (cif::iequals(id, "gly"))
|
||||
type = "peptide linking";
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
|
||||
type = "L-peptide linking";
|
||||
else if (cif::iequals(group, "DNA"))
|
||||
type = "DNA linking";
|
||||
else if (cif::iequals(group, "RNA"))
|
||||
type = "RNA linking";
|
||||
else
|
||||
type = "non-polymer";
|
||||
|
||||
mCompounds.push_back(new Compound(db, id, name, type, group));
|
||||
result = mCompounds.back();
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::unique_ptr<CompoundFactory> CompoundFactory::sInstance;
|
||||
thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
|
||||
bool CompoundFactory::sUseThreadLocalInstance;
|
||||
|
||||
void CompoundFactory::init(bool useThreadLocalInstanceOnly)
|
||||
{
|
||||
sUseThreadLocalInstance = useThreadLocalInstanceOnly;
|
||||
}
|
||||
|
||||
CompoundFactory::CompoundFactory()
|
||||
: mImpl(nullptr)
|
||||
{
|
||||
auto ccd = cif::load_resource("components.cif");
|
||||
if (ccd)
|
||||
mImpl.reset(new CCDCompoundFactoryImpl(mImpl));
|
||||
else if (cif::VERBOSE > 0)
|
||||
std::cerr << "CCD components.cif file was not found" << std::endl;
|
||||
|
||||
const char *clibd_mon = getenv("CLIBD_MON");
|
||||
if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
|
||||
mImpl.reset(new CCP4CompoundFactoryImpl(clibd_mon));
|
||||
else if (cif::VERBOSE > 0)
|
||||
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
|
||||
|
||||
}
|
||||
|
||||
CompoundFactory::~CompoundFactory()
|
||||
{
|
||||
}
|
||||
|
||||
CompoundFactory &CompoundFactory::instance()
|
||||
{
|
||||
if (sUseThreadLocalInstance)
|
||||
{
|
||||
if (not tlInstance)
|
||||
tlInstance.reset(new CompoundFactory());
|
||||
return *tlInstance;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (not sInstance)
|
||||
sInstance.reset(new CompoundFactory());
|
||||
return *sInstance;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::clear()
|
||||
{
|
||||
if (sUseThreadLocalInstance)
|
||||
tlInstance.reset(nullptr);
|
||||
else
|
||||
sInstance.reset();
|
||||
}
|
||||
|
||||
void CompoundFactory::setDefaultDictionary(const fs::path &inDictFile)
|
||||
{
|
||||
if (not fs::exists(inDictFile))
|
||||
throw std::runtime_error("file not found: " + inDictFile.string());
|
||||
|
||||
try
|
||||
{
|
||||
mImpl.reset(new CCDCompoundFactoryImpl(mImpl, inDictFile));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
if (cif::VERBOSE >= 0)
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::pushDictionary(const fs::path &inDictFile)
|
||||
{
|
||||
if (not fs::exists(inDictFile))
|
||||
throw std::runtime_error("file not found: " + inDictFile.string());
|
||||
|
||||
// ifstream file(inDictFile);
|
||||
// if (not file.is_open())
|
||||
// throw std::runtime_error("Could not open peptide list " + inDictFile);
|
||||
|
||||
try
|
||||
{
|
||||
mImpl.reset(new CompoundFactoryImpl(inDictFile, mImpl));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
if (cif::VERBOSE >= 0)
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::popDictionary()
|
||||
{
|
||||
if (mImpl)
|
||||
mImpl = mImpl->next();
|
||||
}
|
||||
|
||||
const Compound *CompoundFactory::create(std::string id)
|
||||
{
|
||||
// static bool warned = false;
|
||||
|
||||
// if (mImpl and warned == false)
|
||||
// {
|
||||
// std::cerr << "Warning: no compound information library was found, resulting data may be incorrect or incomplete" << std::endl;
|
||||
// warned = true;
|
||||
// }
|
||||
|
||||
return mImpl ? mImpl->get(id) : nullptr;
|
||||
}
|
||||
|
||||
bool CompoundFactory::isKnownPeptide(const std::string &resName) const
|
||||
{
|
||||
return mImpl ? mImpl->isKnownPeptide(resName) : kAAMap.count(resName) > 0;
|
||||
}
|
||||
|
||||
bool CompoundFactory::isKnownBase(const std::string &resName) const
|
||||
{
|
||||
return mImpl ? mImpl->isKnownBase(resName) : kBaseMap.count(resName) > 0;
|
||||
}
|
||||
|
||||
} // namespace mmcif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,155 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
||||
#include <cif++/structure/Symmetry.hpp>
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
#include "./SymOpTable_data.hpp"
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Unfortunately, clipper has a different numbering scheme than PDB
|
||||
// for rotation numbers. So we created a table to map those.
|
||||
// Perhaps a bit over the top, but hey....
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int GetSpacegroupNumber(std::string spacegroup)
|
||||
{
|
||||
if (spacegroup == "P 21 21 2 A")
|
||||
spacegroup = "P 21 21 2 (a)";
|
||||
else if (spacegroup.empty())
|
||||
throw std::runtime_error("No spacegroup, cannot continue");
|
||||
|
||||
int result = 0;
|
||||
|
||||
const size_t N = kNrOfSpaceGroups;
|
||||
int32_t L = 0, R = static_cast<int32_t>(N - 1);
|
||||
while (L <= R)
|
||||
{
|
||||
int32_t i = (L + R) / 2;
|
||||
|
||||
int d = spacegroup.compare(kSpaceGroups[i].name);
|
||||
|
||||
if (d > 0)
|
||||
L = i + 1;
|
||||
else if (d < 0)
|
||||
R = i - 1;
|
||||
else
|
||||
{
|
||||
result = kSpaceGroups[i].nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// not found, see if we can find a match based on xHM name
|
||||
if (result == 0)
|
||||
{
|
||||
for (size_t i = 0; i < kNrOfSpaceGroups; ++i)
|
||||
{
|
||||
auto& sp = kSpaceGroups[i];
|
||||
if (sp.xHM == spacegroup)
|
||||
{
|
||||
result = sp.nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (result == 0)
|
||||
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int GetSpacegroupNumber(std::string spacegroup, SpacegroupName type)
|
||||
{
|
||||
if (spacegroup == "P 21 21 2 A")
|
||||
spacegroup = "P 21 21 2 (a)";
|
||||
else if (spacegroup.empty())
|
||||
throw std::runtime_error("No spacegroup, cannot continue");
|
||||
|
||||
int result = 0;
|
||||
|
||||
if (type == SpacegroupName::full)
|
||||
{
|
||||
const size_t N = kNrOfSpaceGroups;
|
||||
int32_t L = 0, R = static_cast<int32_t>(N - 1);
|
||||
while (L <= R)
|
||||
{
|
||||
int32_t i = (L + R) / 2;
|
||||
|
||||
int d = spacegroup.compare(kSpaceGroups[i].name);
|
||||
|
||||
if (d > 0)
|
||||
L = i + 1;
|
||||
else if (d < 0)
|
||||
R = i - 1;
|
||||
else
|
||||
{
|
||||
result = kSpaceGroups[i].nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (type == SpacegroupName::xHM)
|
||||
{
|
||||
for (auto &sg : kSpaceGroups)
|
||||
{
|
||||
if (sg.xHM == spacegroup)
|
||||
{
|
||||
result = sg.nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto &sg : kSpaceGroups)
|
||||
{
|
||||
if (sg.Hall == spacegroup)
|
||||
{
|
||||
result = sg.nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// not found, see if we can find a match based on xHM name
|
||||
if (result == 0)
|
||||
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -43,8 +43,8 @@ using std::regex;
|
||||
|
||||
#include <gxrio.hpp>
|
||||
|
||||
#include <cif++/cif/dictionary_parser.hpp>
|
||||
#include <cif++/cif/validate.hpp>
|
||||
#include <cif++/dictionary_parser.hpp>
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cif++/cif.hpp>
|
||||
#include <cif++.hpp>
|
||||
#include <cif++/structure/Structure.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cif++/cif.hpp>
|
||||
#include <cif++.hpp>
|
||||
#include <cif++/structure/Structure.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
@@ -32,9 +32,9 @@
|
||||
|
||||
// #include <cif++/DistanceMap.hpp>
|
||||
#include <cif++/BondMap.hpp>
|
||||
#include <cif++/Cif++.hpp>
|
||||
#include <cif++/CifValidator.hpp>
|
||||
#include <cif++/CifParser.hpp>
|
||||
#include <cif++++.hpp>
|
||||
#include <cif++Validator.hpp>
|
||||
#include <cif++Parser.hpp>
|
||||
|
||||
namespace tt = boost::test_tools;
|
||||
|
||||
|
||||
@@ -31,12 +31,12 @@
|
||||
|
||||
// #include <cif++/DistanceMap.hpp>
|
||||
// #include <cif++/BondMap.hpp>
|
||||
#include <cif++/cif.hpp>
|
||||
// #include <cif++/CifValidator.hpp>
|
||||
// #include <cif++/CifParser.hpp>
|
||||
#include <cif++.hpp>
|
||||
// #include <cif++Validator.hpp>
|
||||
// #include <cif++Parser.hpp>
|
||||
|
||||
#include <cif++/cif/parser.hpp>
|
||||
#include <cif++/cif/dictionary_parser.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
#include <cif++/dictionary_parser.hpp>
|
||||
|
||||
namespace tt = boost::test_tools;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user