mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-04 22:14:24 +08:00
Compare commits
29 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1ed704f172 | ||
|
|
398c16eac2 | ||
|
|
fa869bdc7d | ||
|
|
c20d0d2a30 | ||
|
|
000f2736c2 | ||
|
|
cfcc81bb62 | ||
|
|
82eae05868 | ||
|
|
e8fb53c49b | ||
|
|
604c97afe1 | ||
|
|
7e60cdf272 | ||
|
|
9ea7cfcc80 | ||
|
|
a7a4a16f79 | ||
|
|
6717059934 | ||
|
|
714747c280 | ||
|
|
81cd305c80 | ||
|
|
5de872bbb3 | ||
|
|
ce6a75a920 | ||
|
|
874a5cb2f2 | ||
|
|
6e2202d4f1 | ||
|
|
bcf33df701 | ||
|
|
3bdcf21c69 | ||
|
|
4b36bdc58c | ||
|
|
6d9008ee8c | ||
|
|
ee93692707 | ||
|
|
2bcc368bce | ||
|
|
6cc4467d53 | ||
|
|
41c0521480 | ||
|
|
7d33d56c0e | ||
|
|
f86f34e5e1 |
@@ -24,10 +24,15 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.23)
|
||||
|
||||
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
|
||||
# set the project name
|
||||
project(
|
||||
libcifpp
|
||||
VERSION 8.0.1
|
||||
VERSION 9.0.0
|
||||
LANGUAGES CXX)
|
||||
|
||||
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
@@ -63,25 +68,26 @@ elseif(MSVC)
|
||||
endif()
|
||||
|
||||
# Build documentation?
|
||||
option(BUILD_DOCUMENTATION "Build the documentation" OFF)
|
||||
set(BUILD_DOCUMENTATION OFF CACHE BOOL "Build the documentation")
|
||||
|
||||
# Optionally build a version to be installed inside CCP4
|
||||
option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4")
|
||||
set(BUILD_FOR_CCP4 OFF CACHE BOOL "Build a version to be installed in CCP4")
|
||||
|
||||
# Optionally use libcurl to fetch compound files
|
||||
set(USE_CURL_FOR_CCD ON CACHE BOOL "Use curl to fetch missing CCD files")
|
||||
|
||||
# Building shared libraries?
|
||||
if(NOT(BUILD_FOR_CCP4 AND WIN32))
|
||||
option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build a shared library instead of a static one")
|
||||
endif()
|
||||
|
||||
if(PROJECT_IS_TOP_LEVEL AND NOT BUILD_FOR_CCP4)
|
||||
# Lots of code depend on the availability of the components.cif file
|
||||
option(CIFPP_DOWNLOAD_CCD
|
||||
"Download the CCD file components.cif during installation" ON)
|
||||
set(CIFPP_DOWNLOAD_CCD ON CACHE BOOL "Download the CCD file components.cif during installation")
|
||||
|
||||
# An optional cron script can be installed to keep the data files up-to-date
|
||||
if(UNIX AND NOT APPLE)
|
||||
option(CIFPP_INSTALL_UPDATE_SCRIPT
|
||||
"Install the script to update CCD and dictionary files" ON)
|
||||
set(CIFPP_INSTALL_UPDATE_SCRIPT ON CACHE BOOL "Install the script to update CCD and dictionary files")
|
||||
endif()
|
||||
else()
|
||||
unset(CIFPP_DOWNLOAD_CCD)
|
||||
@@ -91,14 +97,13 @@ endif()
|
||||
# When CCP4 is sourced in the environment, we can recreate the symmetry
|
||||
# operations table
|
||||
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
|
||||
option(CIFPP_RECREATE_SYMOP_DATA
|
||||
"Recreate SymOp data table in case it is out of date" ON)
|
||||
set(CIFPP_RECREATE_SYMOP_DATA ON CACHE BOOL "Recreate SymOp data table in case it is out of date")
|
||||
endif()
|
||||
|
||||
# CCP4 build
|
||||
if(BUILD_FOR_CCP4)
|
||||
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
|
||||
message(FATAL_ERROR "A CCP4 built was requested but CCP4 was not sourced")
|
||||
message(FATAL_ERROR "cifpp: A CCP4 built was requested but CCP4 was not sourced")
|
||||
else()
|
||||
list(PREPEND CMAKE_MODULE_PATH "$ENV{CCP4}")
|
||||
list(PREPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
|
||||
@@ -128,9 +133,6 @@ if(WIN32)
|
||||
add_definitions(-D _WIN32_WINNT=0x0501)
|
||||
endif()
|
||||
|
||||
# Man, this is 2024 we're living in...
|
||||
add_definitions(-DNOMINMAX)
|
||||
|
||||
# We do not want to write an export file for all our symbols...
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
endif()
|
||||
@@ -164,7 +166,7 @@ int main(int argc, char *argv[]) { return 0; }"
|
||||
|
||||
if(GXX_LIBSTDCPP)
|
||||
message(
|
||||
STATUS "Testing for known regex bug, since you're using GNU libstdc++")
|
||||
STATUS "cifpp: Testing for known regex bug, since you're using GNU libstdc++")
|
||||
|
||||
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test
|
||||
@@ -173,7 +175,7 @@ if(GXX_LIBSTDCPP)
|
||||
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
|
||||
message(
|
||||
STATUS
|
||||
"You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
|
||||
"cifpp: You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
|
||||
)
|
||||
|
||||
find_package(Boost 1.80 QUIET COMPONENTS regex)
|
||||
@@ -221,11 +223,15 @@ if(MSVC)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(USE_CURL_FOR_CCD)
|
||||
find_package(CURL REQUIRED)
|
||||
endif()
|
||||
|
||||
find_package(ZLIB QUIET)
|
||||
find_package(Threads)
|
||||
|
||||
if(NOT ZLIB_FOUND)
|
||||
message(FATAL_ERROR "The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
|
||||
message(FATAL_ERROR "cifpp: The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
|
||||
endif()
|
||||
|
||||
# Using Eigen3 is a bit of a thing. We don't want to build it completely since
|
||||
@@ -242,12 +248,12 @@ else()
|
||||
GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
|
||||
GIT_TAG 3.4.0
|
||||
INSTALL_COMMAND "")
|
||||
|
||||
|
||||
ExternalProject_Get_Property(my-eigen3 SOURCE_DIR)
|
||||
set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})
|
||||
endif()
|
||||
|
||||
message(STATUS "Eigen include dir is ${EIGEN_INCLUDE_DIR}")
|
||||
message(STATUS "cifpp: Eigen include dir is ${EIGEN_INCLUDE_DIR}")
|
||||
|
||||
# Create a revision file, containing the current git version info
|
||||
include(VersionString)
|
||||
@@ -376,7 +382,13 @@ target_include_directories(
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")
|
||||
|
||||
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>)
|
||||
target_link_libraries(cifpp
|
||||
PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>
|
||||
$<IF:$<BOOL:${USE_CURL_FOR_CCD}>,CURL::libcurl,>)
|
||||
|
||||
if (USE_CURL_FOR_CCD)
|
||||
target_compile_definitions(cifpp PUBLIC HAVE_CURL)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
|
||||
@@ -390,7 +402,7 @@ if(CIFPP_DOWNLOAD_CCD)
|
||||
file(SIZE ${COMPONENTS_CIF} CCD_FILE_SIZE)
|
||||
|
||||
if(CCD_FILE_SIZE EQUAL 0)
|
||||
message(STATUS "Removing empty ${COMPONENTS_CIF} file")
|
||||
message(STATUS "cifpp: Removing empty ${COMPONENTS_CIF} file")
|
||||
file(REMOVE "${COMPONENTS_CIF}")
|
||||
endif()
|
||||
endif()
|
||||
@@ -429,7 +441,7 @@ if(CIFPP_DOWNLOAD_CCD)
|
||||
|
||||
if(CCD_FETCH_STATUS_CODE)
|
||||
message(
|
||||
FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
|
||||
FATAL_ERROR "cifpp: Error trying to download CCD file: ${CCD_FETCH_STATUS}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
@@ -493,7 +505,7 @@ file(GLOB OLD_CONFIG_FILES
|
||||
|
||||
if(OLD_CONFIG_FILES)
|
||||
message(
|
||||
STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
|
||||
STATUS "cifpp: Installation will remove old config files: ${OLD_CONFIG_FILES}")
|
||||
install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
|
||||
endif()
|
||||
|
||||
@@ -559,7 +571,7 @@ if(CIFPP_INSTALL_UPDATE_SCRIPT)
|
||||
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
|
||||
WORLD_READ)
|
||||
else()
|
||||
message(FATAL_ERROR "Don't know where to install the update script")
|
||||
message(FATAL_ERROR "cifpp: Don't know where to install the update script")
|
||||
endif()
|
||||
|
||||
# a config file, to make it complete
|
||||
@@ -573,7 +585,7 @@ if(CIFPP_INSTALL_UPDATE_SCRIPT)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
|
||||
DESTINATION ${CMAKE_INSTALL_SYSCONFDIR})
|
||||
install(
|
||||
CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
|
||||
CODE "message(\"cifpp: A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
|
||||
)
|
||||
|
||||
install(DIRECTORY DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/libcifpp/cache-update.d)
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
Version 9.0.0
|
||||
- Rename fields of cif::mm::polymer to match the naming
|
||||
in mmcif_pdbx.dic. Also, related, fix building mm::structure
|
||||
using the correct mapping between atom_site and residues.
|
||||
- _atom_site.auth_alt_id does not exist, it should be
|
||||
_atom_site.pdbx_auth_alt_id of course.
|
||||
- Added a more lightweight fixup for mmcif_pdbx files
|
||||
that lack certain categories.
|
||||
|
||||
Version 8.0.1
|
||||
- Fix cif::mm::structure::cleanup_empty_categories, removed too much
|
||||
- Add default value for B_iso_or_equiv in residue::create_new_atom
|
||||
|
||||
@@ -8,5 +8,6 @@ include(CMakeFindDependencyMacro)
|
||||
|
||||
find_dependency(Threads)
|
||||
find_dependency(ZLIB REQUIRED)
|
||||
find_dependency(CURL REQUIRED)
|
||||
|
||||
check_required_components(cifpp)
|
||||
|
||||
@@ -223,6 +223,11 @@ class category
|
||||
/// @return Returns true is all validations pass
|
||||
bool validate_links() const;
|
||||
|
||||
/**
|
||||
* @brief Strip removes items from this category that are invalid according to the assigned validator
|
||||
*/
|
||||
void strip();
|
||||
|
||||
/// @brief Equality operator, returns true if @a rhs is equal to this
|
||||
/// @param rhs The object to compare with
|
||||
/// @return True if the data contained is equal
|
||||
|
||||
@@ -196,6 +196,23 @@ class compound
|
||||
// --------------------------------------------------------------------
|
||||
// Factory class for compound and Link objects
|
||||
|
||||
/// @brief Options available to configure a compound factory
|
||||
struct compound_factory_options
|
||||
{
|
||||
/// If you have a multithreaded application and want to have different
|
||||
/// compounds in each thread (e.g. a web service processing user requests
|
||||
/// with different sets of compounds) you can set this flag to true.
|
||||
bool use_thread_local_instance_only = false;
|
||||
|
||||
#if HAVE_CURL
|
||||
// Various locations for chem_comp data files:
|
||||
// - ftp://files.ebi.ac.uk/pub/databases/pdb/refdata/chem_comp
|
||||
// - https://files.rcsb.org/pub/pdb/refdata/chem_comp/
|
||||
|
||||
std::string remote_chem_comp_url = "ftp://files.ebi.ac.uk/pub/databases/pdb/refdata/chem_comp";
|
||||
#endif
|
||||
};
|
||||
|
||||
/// Use the compound_factory singleton instance to create compound objects
|
||||
|
||||
class compound_factory
|
||||
@@ -208,8 +225,12 @@ class compound_factory
|
||||
/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
|
||||
/// flag to true.
|
||||
|
||||
[[deprecated("Use version with compound_factory_options instead")]]
|
||||
static void init(bool useThreadLocalInstanceOnly);
|
||||
|
||||
/// \brief Initialise a singleton instance.
|
||||
static void init(compound_factory_options options = {});
|
||||
|
||||
/// Return the singleton instance. If initialized with local threads, this is the
|
||||
/// instance for the current thread.
|
||||
static compound_factory &instance();
|
||||
@@ -239,6 +260,8 @@ class compound_factory
|
||||
void push_dictionary(const file &file);
|
||||
|
||||
/// Remove the last pushed dictionary
|
||||
|
||||
// TODO: check if the popped dict is the correct one
|
||||
void pop_dictionary();
|
||||
|
||||
/// Return whether @a res_name is a valid and known peptide
|
||||
@@ -298,7 +321,7 @@ class compound_factory
|
||||
|
||||
static std::unique_ptr<compound_factory> s_instance;
|
||||
static thread_local std::unique_ptr<compound_factory> tl_instance;
|
||||
static bool s_use_thread_local_instance;
|
||||
static compound_factory_options s_options;
|
||||
|
||||
std::shared_ptr<compound_factory_impl> m_impl;
|
||||
};
|
||||
@@ -320,6 +343,9 @@ class compound_factory
|
||||
* @endcode
|
||||
*/
|
||||
|
||||
|
||||
// TODO: check if pushed and popped dicts are the same!
|
||||
|
||||
class compound_source
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -128,15 +128,6 @@ class datablock : public std::list<category>
|
||||
*/
|
||||
bool is_valid() const;
|
||||
|
||||
/**
|
||||
* @brief Validates the content of this datablock and all its content
|
||||
* and updates or removes the audit_conform category to match the result.
|
||||
*
|
||||
* @return true If the content is valid
|
||||
* @return false If the content is not valid
|
||||
*/
|
||||
bool is_valid();
|
||||
|
||||
/**
|
||||
* @brief Validates all contained data for valid links between parents and children
|
||||
* as defined in the validator
|
||||
@@ -146,6 +137,14 @@ class datablock : public std::list<category>
|
||||
*/
|
||||
bool validate_links() const;
|
||||
|
||||
/**
|
||||
* @brief Strip removes all categories and items that are invalid according
|
||||
* to the assigned validator. Will also add a valid audit_conform block.
|
||||
*
|
||||
* @return true if the remaining datablock is valid
|
||||
*/
|
||||
bool strip();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
@@ -184,6 +183,15 @@ class datablock : public std::list<category>
|
||||
*/
|
||||
const category *get(std::string_view name) const;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Return true if this datablock contains a non-empty category
|
||||
*/
|
||||
bool contains(std::string_view name) const
|
||||
{
|
||||
return get(name) != nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Tries to find a category with name @a name and will create a
|
||||
* new one if it is not found. The result is a tuple of an iterator
|
||||
|
||||
@@ -106,8 +106,6 @@ class atom
|
||||
|
||||
atom_impl(const atom_impl &i) = default;
|
||||
|
||||
void prefetch();
|
||||
|
||||
int compare(const atom_impl &b) const;
|
||||
|
||||
// bool getAnisoU(float anisou[6]) const;
|
||||
@@ -345,7 +343,7 @@ class atom
|
||||
std::string get_auth_asym_id() const { return get_property("auth_asym_id"); } ///< Return the auth_asym_id property
|
||||
std::string get_auth_seq_id() const { return get_property("auth_seq_id"); } ///< Return the auth_seq_id property
|
||||
std::string get_auth_atom_id() const { return get_property("auth_atom_id"); } ///< Return the auth_atom_id property
|
||||
std::string get_auth_alt_id() const { return get_property("auth_alt_id"); } ///< Return the auth_alt_id property
|
||||
std::string get_auth_alt_id() const { return get_property("pdbx_auth_alt_id"); } ///< Return the auth_alt_id property
|
||||
std::string get_auth_comp_id() const { return get_property("auth_comp_id"); } ///< Return the auth_comp_id property
|
||||
std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); } ///< Return the pdb_ins_code property
|
||||
|
||||
@@ -481,8 +479,8 @@ class residue
|
||||
, m_compound_id(compoundID)
|
||||
, m_asym_id(asymID)
|
||||
, m_seq_id(seqID)
|
||||
, m_auth_asym_id(authAsymID)
|
||||
, m_auth_seq_id(authSeqID)
|
||||
, m_pdb_strand_id(authAsymID)
|
||||
, m_pdb_seq_num(authSeqID)
|
||||
, m_pdb_ins_code(pdbInsCode)
|
||||
{
|
||||
}
|
||||
@@ -509,9 +507,9 @@ class residue
|
||||
const std::string &get_asym_id() const { return m_asym_id; } ///< Return the asym_id
|
||||
int get_seq_id() const { return m_seq_id; } ///< Return the seq_id
|
||||
|
||||
const std::string get_auth_asym_id() const { return m_auth_asym_id; } ///< Return the auth_asym_id
|
||||
const std::string get_auth_seq_id() const { return m_auth_seq_id; } ///< Return the auth_seq_id
|
||||
std::string get_pdb_ins_code() const { return m_pdb_ins_code; } ///< Return the pdb_ins_code
|
||||
const std::string get_pdb_strand_id() const { return m_pdb_strand_id; } ///< Return the pdb_strand_id
|
||||
const std::string get_pdb_seq_num() const { return m_pdb_seq_num; } ///< Return the pdb_seq_num
|
||||
std::string get_pdb_ins_code() const { return m_pdb_ins_code; } ///< Return the pdb_ins_code
|
||||
|
||||
const std::string &get_compound_id() const { return m_compound_id; } ///< Return the compound_id
|
||||
void set_compound_id(const std::string &id) { m_compound_id = id; } ///< Set the compound_id to @a id
|
||||
@@ -580,7 +578,7 @@ class residue
|
||||
m_seq_id == rhs.m_seq_id and
|
||||
m_asym_id == rhs.m_asym_id and
|
||||
m_compound_id == rhs.m_compound_id and
|
||||
m_auth_seq_id == rhs.m_auth_seq_id);
|
||||
m_pdb_seq_num == rhs.m_pdb_seq_num);
|
||||
}
|
||||
|
||||
/// @brief Create a new atom and add it to the list
|
||||
@@ -594,7 +592,7 @@ class residue
|
||||
structure *m_structure = nullptr;
|
||||
std::string m_compound_id, m_asym_id;
|
||||
int m_seq_id = 0;
|
||||
std::string m_auth_asym_id, m_auth_seq_id, m_pdb_ins_code;
|
||||
std::string m_pdb_strand_id, m_pdb_seq_num, m_pdb_ins_code;
|
||||
std::vector<atom> m_atoms;
|
||||
/** @endcond */
|
||||
};
|
||||
@@ -714,15 +712,15 @@ class polymer : public std::vector<monomer>
|
||||
|
||||
structure *get_structure() const { return m_structure; } ///< Return the structure
|
||||
|
||||
std::string get_asym_id() const { return m_asym_id; } ///< Return the asym_id
|
||||
std::string get_auth_asym_id() const { return m_auth_asym_id; } ///< Return the PDB chain ID, actually
|
||||
std::string get_entity_id() const { return m_entity_id; } ///< Return the entity_id
|
||||
std::string get_asym_id() const { return m_asym_id; } ///< Return the asym_id
|
||||
std::string get_pdb_strand_id() const { return m_pdb_strand_id; } ///< Return the PDB chain ID, actually
|
||||
std::string get_entity_id() const { return m_entity_id; } ///< Return the entity_id
|
||||
|
||||
private:
|
||||
structure *m_structure;
|
||||
std::string m_entity_id;
|
||||
std::string m_asym_id;
|
||||
std::string m_auth_asym_id;
|
||||
std::string m_pdb_strand_id;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
@@ -760,7 +758,7 @@ class sugar : public residue
|
||||
int num() const
|
||||
{
|
||||
int result;
|
||||
auto r = std::from_chars(m_auth_seq_id.data(), m_auth_seq_id.data() + m_auth_seq_id.length(), result);
|
||||
auto r = std::from_chars(m_pdb_seq_num.data(), m_pdb_seq_num.data() + m_pdb_seq_num.length(), result);
|
||||
if ((bool)r.ec)
|
||||
throw std::runtime_error("The auth_seq_id should be a number for a sugar");
|
||||
return result;
|
||||
@@ -859,19 +857,38 @@ class branch : public std::vector<sugar>
|
||||
std::string m_asym_id, m_entity_id;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
/// \brief A still very limited set of options for reading structures
|
||||
enum class StructureOpenOptions
|
||||
/** @brief Enumeration for controlling atom selection based on occupancy. */
|
||||
enum class occupancy_policy
|
||||
{
|
||||
SkipHydrogen = 1 << 0 ///< Do not include hydrogen atoms in the structure object
|
||||
/** @brief Include all atoms regardless of their occupancy factor. */
|
||||
ALL = 0,
|
||||
|
||||
/** @brief Select only alternate atoms with the maximum occupancy factor.
|
||||
* If multiple atoms have the same maximum occupancy, choose the one with the minimum B-factor.
|
||||
* If multiple atoms share both the maximum occupancy and the minimum B-factor, select the first encountered atom.
|
||||
*/
|
||||
MAX = 1,
|
||||
|
||||
/** @brief Select only alternate atoms with the minimum occupancy factor.
|
||||
* Similar to MAX, if multiple atoms have the same minimum occupancy, choose the one with the minimum B-factor.
|
||||
* If multiple atoms share both the minimum occupancy and the minimum B-factor, select the first encountered atom.
|
||||
*/
|
||||
MIN = 2,
|
||||
|
||||
/** @brief Exclude all atoms with an occupancy factor greater than zero. */
|
||||
UNOCCUPIED = 3
|
||||
};
|
||||
|
||||
/// \brief A way to combine two options. Not very useful as there is only one...
|
||||
constexpr inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
|
||||
struct structure_open_options
|
||||
{
|
||||
return static_cast<int>(a) bitand static_cast<int>(b);
|
||||
}
|
||||
bool skip_hydrogen = false; ///< Do not include hydrogen atoms in the structure object
|
||||
bool skip_hetatom = false; ///< Do not include HET atoms in the structure object
|
||||
bool skip_water = false; ///< Do not include water atoms in the structure object
|
||||
occupancy_policy occupancy_mode = occupancy_policy::ALL; ///< By default, the occupancy policy is set to occupancy_policy::ALL
|
||||
std::vector<std::string> asyms; ///< The asyms to load, if empty load all
|
||||
std::optional<float> min_b_factor; ///< Only load atoms with at least this b_factor
|
||||
std::optional<float> max_b_factor; ///< Only load atoms with at most this b_factor
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
@@ -885,10 +902,10 @@ class structure
|
||||
{
|
||||
public:
|
||||
/// \brief Read the structure from cif::file @a p
|
||||
structure(file &p, std::size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
structure(file &p, std::size_t modelNr = 1, structure_open_options options = {});
|
||||
|
||||
/// \brief Load the structure from already parsed mmCIF data in @a db
|
||||
structure(datablock &db, std::size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
structure(datablock &db, std::size_t modelNr = 1, structure_open_options options = {});
|
||||
|
||||
/** @cond */
|
||||
structure(structure &&s) = default;
|
||||
@@ -1118,7 +1135,7 @@ class structure
|
||||
friend polymer;
|
||||
friend residue;
|
||||
|
||||
void load_atoms_for_model(StructureOpenOptions options);
|
||||
void load_atoms_for_model(structure_open_options options);
|
||||
|
||||
std::string insert_compound(const std::string &compoundID, bool is_entity);
|
||||
|
||||
|
||||
@@ -104,6 +104,27 @@ inline void write(const std::filesystem::path &p, const file &f)
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief Quickly fix a PDB file that lacks some often needed categories
|
||||
*
|
||||
* This differs from reconstruct_pdbx which does a much more thorough job
|
||||
*
|
||||
* \param pdbx_file The cif::file that hopefully contains some valid data
|
||||
*/
|
||||
|
||||
void fixup_pdbx(file &pdbx_file);
|
||||
|
||||
/**
|
||||
* @brief Quickly fix a PDB file that lacks some often needed categories
|
||||
*
|
||||
* This differs from reconstruct_pdbx which does a much more thorough job
|
||||
*
|
||||
* \param pdbx_file The cif::file that hopefully contains some valid data
|
||||
* \param v The validator to use
|
||||
*/
|
||||
|
||||
void fixup_pdbx(file &pdbx_file, const validator &v);
|
||||
|
||||
/** \brief Reconstruct all missing categories for an assumed PDBx file.
|
||||
*
|
||||
* Some people believe that simply dumping some atom records is enough.
|
||||
|
||||
@@ -9,7 +9,7 @@ _datablock.description
|
||||
#
|
||||
_dictionary.title mmcif_pdbx.dic
|
||||
_dictionary.datablock_id mmcif_pdbx.dic
|
||||
_dictionary.version 5.403
|
||||
_dictionary.version 5.404
|
||||
#
|
||||
loop_
|
||||
_dictionary_history.version
|
||||
@@ -3292,6 +3292,18 @@ Changes (ep):
|
||||
+ Add 'M' to _em_software.name enumeration.
|
||||
;
|
||||
|
||||
5.404 2025-06-01
|
||||
;
|
||||
Changes (ep):
|
||||
+ Add DeepEMhancer to _em_software.name enumeration
|
||||
+ Add HexAuFoil to _em_sample_support.grid_type enumeration
|
||||
+ Add "PSI JUNGFRAU 9M" and "PSI JUNGFRAU 10M" detectors to
|
||||
_diffrn_detector.type
|
||||
+ Add "N6-benzoyllysine", "N6-isonicotinyllysine",
|
||||
and "N6-methacryllysine" to enumeration list for
|
||||
_pdbx_chem_comp_pcm.type and _pdbx_modification_feature.type
|
||||
;
|
||||
|
||||
#
|
||||
loop_
|
||||
_sub_category.id
|
||||
@@ -3838,13 +3850,13 @@ _pdbx_dictionary_component.datablock_id
|
||||
_pdbx_dictionary_component.dictionary_component_id
|
||||
_pdbx_dictionary_component.title
|
||||
_pdbx_dictionary_component.version
|
||||
mmcif_pdbx-base.dic mmcif_pdbx-base.dic "mmCIF/PDBx base dictionary" 0.39
|
||||
mmcif_pdbx-base.dic mmcif_pdbx-base.dic "mmCIF/PDBx base dictionary" 0.40
|
||||
mmcif_xfel_extensions-v3.dic mmcif_xfel_extensions-v3.dic "PDBx/mmCIF XFELDictionary License Extension" 0.0.2
|
||||
mmcif_pdbx_audit_support-extension.dic mmcif_pdbx_audit_support-extension.dic "mmCIF/PDBx Audit support extension" 0.24
|
||||
mmcif_pdbx_sifts.dic mmcif_pdbx_sifts.dic "PDBx/mmCIF Dictionary Sifts Extension" 0.0.2
|
||||
mmcif_pdbx_license.dic mmcif_pdbx_license.dic "PDBx/mmCIF Dictionary License Extension" 0.0.1
|
||||
initial-model-extension.dic initial-model-extension.dic "PDBx/mmCIF Initial model extension" 0.10
|
||||
ptm-extension.dic ptm-extension.dic "PDBx/mmCIF PTM extension" 0.10
|
||||
ptm-extension.dic ptm-extension.dic "PDBx/mmCIF PTM extension" 0.11
|
||||
#
|
||||
loop_
|
||||
_pdbx_dictionary_component_history.dictionary_component_id
|
||||
@@ -4181,6 +4193,15 @@ Changes (ep):
|
||||
+ Add 'M' to _em_software.name enumeration.
|
||||
;
|
||||
|
||||
mmcif_pdbx-base.dic 0.40 2025-06-01
|
||||
;
|
||||
Changes (ep):
|
||||
+ Add DeepEMhancer to _em_software.name enumeration
|
||||
+ Add HexAuFoil to _em_sample_support.grid_type enumeration
|
||||
+ Add "PSI JUNGFRAU 9M" and "PSI JUNGFRAU 10M" detectors to
|
||||
_diffrn_detector.type
|
||||
;
|
||||
|
||||
mmcif_xfel_extensions-v3.dic 0.0.1 2023-05-31
|
||||
;
|
||||
Changes (ep):
|
||||
@@ -4606,6 +4627,16 @@ ptm-extension.dic 0.10 2024-11-26
|
||||
_pdbx_modification_feature.type
|
||||
;
|
||||
|
||||
ptm-extension.dic 0.11 2024-06-01
|
||||
;
|
||||
Changes (dh/ep)
|
||||
+ added "N6-benzoyllysine", "N6-isonicotinyllysine",
|
||||
and "N6-methacryllysine"
|
||||
type to enumeration list for
|
||||
_pdbx_chem_comp_pcm.type
|
||||
_pdbx_modification_feature.type
|
||||
;
|
||||
|
||||
#
|
||||
loop_
|
||||
_pdbx_item_linked_group.category_id
|
||||
@@ -21911,6 +21942,8 @@ save__diffrn_detector.type
|
||||
"_diffrn_detector.type" "PSI JUNGFRAU 1M" PIXEL
|
||||
"_diffrn_detector.type" "PSI JUNGFRAU 4M" PIXEL
|
||||
"_diffrn_detector.type" "PSI JUNGFRAU 8M" PIXEL
|
||||
"_diffrn_detector.type" "PSI JUNGFRAU 9M" PIXEL
|
||||
"_diffrn_detector.type" "PSI JUNGFRAU 10M" PIXEL
|
||||
"_diffrn_detector.type" "PSI JUNGFRAU 16M" PIXEL
|
||||
"_diffrn_detector.type" "PSI PILATUS 6M" PIXEL
|
||||
"_diffrn_detector.type" "RAYONIX MX-225" CCD
|
||||
@@ -117761,6 +117794,7 @@ save__em_sample_support.grid_type
|
||||
"_em_sample_support.grid_type" "Quantifoil Active R2/1" .
|
||||
"_em_sample_support.grid_type" "Quantifoil Active R1.6/0.9" .
|
||||
"_em_sample_support.grid_type" "Quantifoil Active R1.2/0.8" .
|
||||
"_em_sample_support.grid_type" HexAuFoil .
|
||||
"_em_sample_support.grid_type" UltrAuFoil .
|
||||
"_em_sample_support.grid_type" "UltrAuFoil R0./1" .
|
||||
"_em_sample_support.grid_type" "UltrAuFoil R1.2/1.3" .
|
||||
@@ -126147,6 +126181,7 @@ save__em_software.name
|
||||
"_em_software.name" CTFPHASEFLIP .
|
||||
"_em_software.name" CTFTILT .
|
||||
"_em_software.name" DE-IM .
|
||||
"_em_software.name" DeepEMhancer .
|
||||
"_em_software.name" DIALS .
|
||||
"_em_software.name" DigitalMicrograph .
|
||||
"_em_software.name" DireX .
|
||||
@@ -168351,6 +168386,9 @@ save__pdbx_chem_comp_pcm.type
|
||||
Methylsulfanylation .
|
||||
Methylsulfation .
|
||||
Myristoylation .
|
||||
N6-benzoyllysine .
|
||||
N6-isonicotinyllysine .
|
||||
N6-methacryllysine .
|
||||
"N-pyruvic acid 2-iminylation" .
|
||||
N-methylcarbamoylation .
|
||||
Nitration .
|
||||
@@ -169337,6 +169375,9 @@ save__pdbx_modification_feature.type
|
||||
Methylsulfanylation .
|
||||
Methylsulfation .
|
||||
Myristoylation .
|
||||
N6-benzoyllysine .
|
||||
N6-isonicotinyllysine .
|
||||
N6-methacryllysine .
|
||||
"N-pyruvic acid 2-iminylation" .
|
||||
N-methylcarbamoylation .
|
||||
Nitration .
|
||||
|
||||
@@ -914,6 +914,24 @@ bool category::validate_links() const
|
||||
return result;
|
||||
}
|
||||
|
||||
void category::strip()
|
||||
{
|
||||
std::vector<std::string> to_be_removed;
|
||||
|
||||
for (auto &item : m_items)
|
||||
{
|
||||
if (item.m_validator == nullptr)
|
||||
to_be_removed.push_back(item.m_name);
|
||||
}
|
||||
|
||||
for (auto item : to_be_removed)
|
||||
{
|
||||
if (cif::VERBOSE > 0)
|
||||
std::clog << "Dropping item " << m_name << '.' << item << '\n';
|
||||
remove_item(item);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
row_handle category::operator[](const key_type &key)
|
||||
|
||||
@@ -26,6 +26,10 @@
|
||||
|
||||
#include "cif++.hpp"
|
||||
|
||||
#if HAVE_CURL
|
||||
# include <curl/curl.h>
|
||||
#endif
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
@@ -140,7 +144,7 @@ compound::compound(cif::datablock &db)
|
||||
|
||||
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge, one_letter_code, m_parent_id) =
|
||||
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge", "one_letter_code", "mon_nstd_parent_comp_id");
|
||||
|
||||
|
||||
if (one_letter_code.length() == 1)
|
||||
m_one_letter_code = one_letter_code.front();
|
||||
|
||||
@@ -159,7 +163,7 @@ compound::compound(cif::datablock &db)
|
||||
if (stereo_config.empty())
|
||||
atom.stereo_config = stereo_config_type::N;
|
||||
else
|
||||
atom.stereo_config = parse_stereo_config_from_string(stereo_config);
|
||||
atom.stereo_config = parse_stereo_config_from_string(stereo_config);
|
||||
m_atoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
@@ -172,7 +176,7 @@ compound::compound(cif::datablock &db)
|
||||
if (valueOrder.empty())
|
||||
bond.type = bond_type::sing;
|
||||
else
|
||||
bond.type = parse_bond_type_from_string(valueOrder);
|
||||
bond.type = parse_bond_type_from_string(valueOrder);
|
||||
m_bonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
@@ -231,12 +235,12 @@ float compound::bond_length(const std::string &atomId_1, const std::string &atom
|
||||
|
||||
bool compound::is_peptide() const
|
||||
{
|
||||
return iequals(m_type, "l-peptide linking") or iequals(m_type, "peptide linking");
|
||||
return iequals(m_type, "l-peptide linking") or iequals(m_type, "peptide linking");
|
||||
}
|
||||
|
||||
bool compound::is_base() const
|
||||
{
|
||||
return iequals(m_type, "dna linking") or iequals(m_type, "rna linking");
|
||||
return iequals(m_type, "dna linking") or iequals(m_type, "rna linking");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
@@ -299,7 +303,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
|
||||
std::shared_lock lock(mMutex);
|
||||
|
||||
compound *result = nullptr;
|
||||
|
||||
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->m_next)
|
||||
{
|
||||
result = impl->create(id);
|
||||
@@ -363,7 +367,9 @@ compound *compound_factory_impl::create(const std::string &id)
|
||||
if (m_missing.contains(id))
|
||||
return nullptr;
|
||||
|
||||
if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c) { return c->id() == id; }); i != m_compounds.end())
|
||||
if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c)
|
||||
{ return c->id() == id; });
|
||||
i != m_compounds.end())
|
||||
return *i;
|
||||
|
||||
compound *result = nullptr;
|
||||
@@ -454,7 +460,6 @@ class local_compound_factory_impl : public compound_factory_impl
|
||||
compound *create(const std::string &id) override;
|
||||
|
||||
private:
|
||||
|
||||
compound *construct_compound(const datablock &db, const std::string &id, const std::string &name, const std::string &three_letter_code, const std::string &group);
|
||||
|
||||
cif::file m_local_file;
|
||||
@@ -465,7 +470,9 @@ compound *local_compound_factory_impl::create(const std::string &id)
|
||||
if (m_missing.contains(id))
|
||||
return nullptr;
|
||||
|
||||
if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c) { return c->id() == id; }); i != m_compounds.end())
|
||||
if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c)
|
||||
{ return c->id() == id; });
|
||||
i != m_compounds.end())
|
||||
return *i;
|
||||
|
||||
compound *result = nullptr;
|
||||
@@ -507,12 +514,10 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
|
||||
|
||||
float formula_weight = 0;
|
||||
int formal_charge = 0;
|
||||
std::map<std::string,std::size_t> formula_data;
|
||||
std::map<std::string, std::size_t> formula_data;
|
||||
|
||||
for (std::size_t ord = 1; const auto &[atom_id, type_symbol, type, charge, x, y, z, xi, yi, zi] :
|
||||
rdb["chem_comp_atom"].rows<std::string, std::string, std::string, int,
|
||||
std::optional<float>, std::optional<float>, std::optional<float>,
|
||||
std::optional<float>, std::optional<float>, std::optional<float>>(
|
||||
rdb["chem_comp_atom"].rows<std::string, std::string, std::string, int, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>>(
|
||||
"atom_id", "type_symbol", "type", "charge",
|
||||
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z",
|
||||
"pdbx_model_Cartn_x_ideal", "pdbx_model_Cartn_y_ideal", "pdbx_model_Cartn_z_ideal"))
|
||||
@@ -522,16 +527,14 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
|
||||
|
||||
formula_data[type_symbol] += 1;
|
||||
|
||||
db["chem_comp_atom"].emplace({
|
||||
{ "comp_id", id },
|
||||
db["chem_comp_atom"].emplace({ { "comp_id", id },
|
||||
{ "atom_id", atom_id },
|
||||
{ "type_symbol", type_symbol },
|
||||
{ "charge", charge },
|
||||
{ "model_Cartn_x", x.has_value() ? x : xi, 3 },
|
||||
{ "model_Cartn_y", y.has_value() ? y : yi, 3 },
|
||||
{ "model_Cartn_z", z.has_value() ? z : zi, 3 },
|
||||
{ "pdbx_ordinal", ord++ }
|
||||
});
|
||||
{ "model_Cartn_x", x.has_value() ? x : xi, 3 },
|
||||
{ "model_Cartn_y", y.has_value() ? y : yi, 3 },
|
||||
{ "model_Cartn_z", z.has_value() ? z : zi, 3 },
|
||||
{ "pdbx_ordinal", ord++ } });
|
||||
|
||||
formal_charge += charge;
|
||||
}
|
||||
@@ -548,21 +551,19 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
|
||||
else if (cif::iequals(type, "triple") or cif::iequals(type, "trip"))
|
||||
value_order = "TRIP";
|
||||
|
||||
db["chem_comp_bond"].emplace({
|
||||
{ "comp_id", id },
|
||||
db["chem_comp_bond"].emplace({ { "comp_id", id },
|
||||
{ "atom_id_1", atom_id_1 },
|
||||
{ "atom_id_2", atom_id_2 },
|
||||
{ "value_order", value_order },
|
||||
{ "pdbx_aromatic_flag", aromatic },
|
||||
// TODO: fetch stereo_config info from chem_comp_chir
|
||||
{ "pdbx_ordinal", ord++ }
|
||||
});
|
||||
{ "pdbx_ordinal", ord++ } });
|
||||
}
|
||||
|
||||
db.emplace_back(rdb["pdbx_chem_comp_descriptor"]);
|
||||
|
||||
std::string formula;
|
||||
for (bool first = true; const auto &[symbol, count]: formula_data)
|
||||
for (bool first = true; const auto &[symbol, count] : formula_data)
|
||||
{
|
||||
if (std::exchange(first, false))
|
||||
formula += ' ';
|
||||
@@ -581,15 +582,13 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
|
||||
else
|
||||
type = "NON-POLYMER";
|
||||
|
||||
db["chem_comp"].emplace({
|
||||
{ "id", id },
|
||||
db["chem_comp"].emplace({ { "id", id },
|
||||
{ "name", name },
|
||||
{ "type", type },
|
||||
{ "formula", formula },
|
||||
{ "pdbx_formal_charge", formal_charge },
|
||||
{ "formula_weight", formula_weight },
|
||||
{ "three_letter_code", three_letter_code }
|
||||
});
|
||||
{ "three_letter_code", three_letter_code } });
|
||||
|
||||
std::shared_lock lock(mMutex);
|
||||
|
||||
@@ -602,11 +601,16 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
|
||||
|
||||
std::unique_ptr<compound_factory> compound_factory::s_instance;
|
||||
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
|
||||
bool compound_factory::s_use_thread_local_instance;
|
||||
compound_factory_options compound_factory::s_options;
|
||||
|
||||
void compound_factory::init(bool useThreadLocalInstanceOnly)
|
||||
{
|
||||
s_use_thread_local_instance = useThreadLocalInstanceOnly;
|
||||
init({ .use_thread_local_instance_only = useThreadLocalInstanceOnly });
|
||||
}
|
||||
|
||||
void compound_factory::init(compound_factory_options options)
|
||||
{
|
||||
s_options = options;
|
||||
}
|
||||
|
||||
compound_factory::compound_factory()
|
||||
@@ -625,7 +629,7 @@ compound_factory::~compound_factory()
|
||||
|
||||
compound_factory &compound_factory::instance()
|
||||
{
|
||||
if (s_use_thread_local_instance)
|
||||
if (s_options.use_thread_local_instance_only)
|
||||
{
|
||||
if (not tl_instance)
|
||||
tl_instance.reset(new compound_factory());
|
||||
@@ -641,7 +645,7 @@ compound_factory &compound_factory::instance()
|
||||
|
||||
void compound_factory::clear()
|
||||
{
|
||||
if (s_use_thread_local_instance)
|
||||
if (s_options.use_thread_local_instance_only)
|
||||
tl_instance.reset(nullptr);
|
||||
else
|
||||
s_instance.reset();
|
||||
@@ -719,7 +723,7 @@ bool compound_factory::is_peptide(std::string_view res_name) const
|
||||
bool result = is_std_peptide(res_name);
|
||||
if (not result and m_impl)
|
||||
{
|
||||
auto compound = const_cast<compound_factory&>(*this).create(res_name);
|
||||
auto compound = const_cast<compound_factory &>(*this).create(res_name);
|
||||
result = compound != nullptr and compound->is_peptide();
|
||||
}
|
||||
return result;
|
||||
@@ -731,7 +735,7 @@ bool compound_factory::is_base(std::string_view res_name) const
|
||||
bool result = is_std_base(res_name);
|
||||
if (not result and m_impl)
|
||||
{
|
||||
auto compound = const_cast<compound_factory&>(*this).create(res_name);
|
||||
auto compound = const_cast<compound_factory &>(*this).create(res_name);
|
||||
result = compound != nullptr and compound->is_base();
|
||||
}
|
||||
return result;
|
||||
|
||||
@@ -78,17 +78,41 @@ bool datablock::is_valid() const
|
||||
return result;
|
||||
}
|
||||
|
||||
bool datablock::is_valid()
|
||||
bool datablock::validate_links() const
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
throw std::runtime_error("Validator not specified for datablock data_" + name());
|
||||
|
||||
bool result = true;
|
||||
|
||||
for (auto &cat : *this)
|
||||
result = cat.is_valid() and result;
|
||||
const_cast<category &>(cat).update_links(*this);
|
||||
|
||||
for (auto &cat : *this)
|
||||
result = cat.validate_links() and result;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool datablock::strip()
|
||||
{
|
||||
bool result = true;
|
||||
|
||||
// remove all categories that have no validator
|
||||
erase(std::remove_if(begin(), end(), [](category &c) {
|
||||
bool result = false;
|
||||
if (c.get_cat_validator() == nullptr)
|
||||
{
|
||||
if (cif::VERBOSE > 0)
|
||||
std::clog << "Dropping category " << c.name() << '\n';
|
||||
result = true;
|
||||
}
|
||||
return result;
|
||||
}), end());
|
||||
|
||||
// then strip the remaining categories
|
||||
for (auto &cat : *this)
|
||||
cat.strip();
|
||||
|
||||
// Add or remove the audit_conform block here.
|
||||
if (result)
|
||||
if (is_valid())
|
||||
{
|
||||
// If the dictionary declares an audit_conform category, put it in,
|
||||
// but only if it does not exist already!
|
||||
@@ -101,22 +125,7 @@ bool datablock::is_valid()
|
||||
}
|
||||
}
|
||||
else
|
||||
erase(std::find_if(begin(), end(), [](category &cat)
|
||||
{ return cat.name() == "audit_conform"; }),
|
||||
end());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool datablock::validate_links() const
|
||||
{
|
||||
bool result = true;
|
||||
|
||||
for (auto &cat : *this)
|
||||
const_cast<category &>(cat).update_links(*this);
|
||||
|
||||
for (auto &cat : *this)
|
||||
result = cat.validate_links() and result;
|
||||
result = false;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
398
src/model.cpp
398
src/model.cpp
@@ -102,18 +102,18 @@ void atom::atom_impl::set_property(const std::string_view name, const std::strin
|
||||
r.assign(name, value, true, true);
|
||||
}
|
||||
|
||||
// int atom::atom_impl::compare(const atom_impl &b) const
|
||||
// {
|
||||
// int d = m_asym_id.compare(b.m_asym_id);
|
||||
// if (d == 0)
|
||||
// d = m_seq_id - b.m_seq_id;
|
||||
// if (d == 0)
|
||||
// d = m_auth_seq_id.compare(b.m_auth_seq_id);
|
||||
// if (d == 0)
|
||||
// d = mAtom_id.compare(b.mAtom_id);
|
||||
int atom::atom_impl::compare(const atom_impl &b) const
|
||||
{
|
||||
int d = get_property("label_asym_id").compare(b.get_property("label_asym_id"));
|
||||
if (d == 0)
|
||||
d = get_property_int("label_seq_id") - b.get_property_int("label_seq_id");
|
||||
if (d == 0)
|
||||
d = get_property_int("auth_seq_id") - b.get_property_int("auth_seq_id");
|
||||
if (d == 0)
|
||||
d = get_property("label_atom_id").compare(b.get_property("label_atom_id"));
|
||||
|
||||
// return d;
|
||||
// }
|
||||
return d;
|
||||
}
|
||||
|
||||
// bool atom::atom_impl::getAnisoU(float anisou[6]) const
|
||||
// {
|
||||
@@ -149,145 +149,6 @@ int atom::atom_impl::get_charge() const
|
||||
return formalCharge.value_or(0);
|
||||
}
|
||||
|
||||
// const Compound *atom::atom_impl::compound() const
|
||||
// {
|
||||
// if (mCompound == nullptr)
|
||||
// {
|
||||
// std::string compID = get_property("label_comp_id");
|
||||
|
||||
// mCompound = compound_factory::instance().create(compID);
|
||||
// }
|
||||
|
||||
// return mCompound;
|
||||
// }
|
||||
|
||||
// const std::string atom::atom_impl::get_property(const std::string_view name) const
|
||||
// {
|
||||
// for (auto &&[item_name, ref] : mCachedRefs)
|
||||
// {
|
||||
// if (item_name == name)
|
||||
// return ref.as<std::string>();
|
||||
// }
|
||||
|
||||
// mCachedRefs.emplace_back(name, const_cast<Row &>(mRow)[name]);
|
||||
// return std::get<1>(mCachedRefs.back()).as<std::string>();
|
||||
// }
|
||||
|
||||
// void atom::atom_impl::set_property(const std::string_view name, const std::string &value)
|
||||
// {
|
||||
// for (auto &&[item_name, ref] : mCachedRefs)
|
||||
// {
|
||||
// if (item_name != name)
|
||||
// continue;
|
||||
|
||||
// ref = value;
|
||||
// return;
|
||||
// }
|
||||
|
||||
// mCachedRefs.emplace_back(name, mRow[name]);
|
||||
// std::get<1>(mCachedRefs.back()) = value;
|
||||
// }
|
||||
|
||||
// const Row atom::getRowAniso() const
|
||||
// {
|
||||
// auto &db = m_impl->m_db;
|
||||
// auto cat = db.get("atom_site_anisotrop");
|
||||
// if (not cat)
|
||||
// return {};
|
||||
// else
|
||||
// return cat->find1(key("id") == m_impl->m_id);
|
||||
// }
|
||||
|
||||
// float atom::uIso() const
|
||||
// {
|
||||
// float result;
|
||||
|
||||
// if (not get_property<std::string>("U_iso_or_equiv").empty())
|
||||
// result = get_property<float>("U_iso_or_equiv");
|
||||
// else if (not get_property<std::string>("B_iso_or_equiv").empty())
|
||||
// result = get_property<float>("B_iso_or_equiv") / static_cast<float>(8 * kPI * kPI);
|
||||
// else
|
||||
// throw std::runtime_error("Missing B_iso or U_iso");
|
||||
|
||||
// return result;
|
||||
// }
|
||||
|
||||
// const Compound &atom::compound() const
|
||||
// {
|
||||
// auto result = impl().compound();
|
||||
|
||||
// if (result == nullptr)
|
||||
// {
|
||||
// if (VERBOSE > 0)
|
||||
// std::cerr << "Compound not found: '" << get_property<std::string>("label_comp_id") << '\'' << '\n';
|
||||
|
||||
// throw std::runtime_error("no compound");
|
||||
// }
|
||||
|
||||
// return *result;
|
||||
// }
|
||||
|
||||
// std::string atom::labelEntityID() const
|
||||
// {
|
||||
// return get_property<std::string>("label_entity_id");
|
||||
// }
|
||||
|
||||
// std::string atom::authAtom_id() const
|
||||
// {
|
||||
// return get_property<std::string>("auth_atom_id");
|
||||
// }
|
||||
|
||||
// std::string atom::authCompID() const
|
||||
// {
|
||||
// return get_property<std::string>("auth_comp_id");
|
||||
// }
|
||||
|
||||
// std::string atom::get_auth_asym_id() const
|
||||
// {
|
||||
// return get_property<std::string>("auth_asym_id");
|
||||
// }
|
||||
|
||||
// std::string atom::get_pdb_ins_code() const
|
||||
// {
|
||||
// return get_property<std::string>("pdbx_PDB_ins_code");
|
||||
// }
|
||||
|
||||
// std::string atom::pdbxAuthAltID() const
|
||||
// {
|
||||
// return get_property<std::string>("pdbx_auth_alt_id");
|
||||
// }
|
||||
|
||||
// void atom::translate(point t)
|
||||
// {
|
||||
// auto loc = location();
|
||||
// loc += t;
|
||||
// location(loc);
|
||||
// }
|
||||
|
||||
// void atom::rotate(quaternion q)
|
||||
// {
|
||||
// auto loc = location();
|
||||
// loc.rotate(q);
|
||||
// location(loc);
|
||||
// }
|
||||
|
||||
// void atom::translate_and_rotate(point t, quaternion q)
|
||||
// {
|
||||
// auto loc = location();
|
||||
// loc += t;
|
||||
// loc.rotate(q);
|
||||
// location(loc);
|
||||
// }
|
||||
|
||||
// void atom::translate_rotate_and_translate(point t1, quaternion q, point t2)
|
||||
// {
|
||||
// auto loc = location();
|
||||
// loc += t1;
|
||||
// loc.rotate(q);
|
||||
// loc += t2;
|
||||
// location(loc);
|
||||
// }
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const atom &atom)
|
||||
{
|
||||
if (atom.is_water())
|
||||
@@ -319,8 +180,8 @@ residue::residue(structure &structure, const std::vector<atom> &atoms)
|
||||
m_compound_id = a.get_label_comp_id();
|
||||
m_asym_id = a.get_label_asym_id();
|
||||
m_seq_id = a.get_label_seq_id();
|
||||
m_auth_asym_id = a.get_auth_asym_id();
|
||||
m_auth_seq_id = a.get_auth_seq_id();
|
||||
m_pdb_strand_id = a.get_auth_asym_id();
|
||||
m_pdb_seq_num = a.get_auth_seq_id();
|
||||
m_pdb_ins_code = a.get_pdb_ins_code();
|
||||
|
||||
for (auto atom : atoms)
|
||||
@@ -371,10 +232,10 @@ atom residue::create_new_atom(atom_type inType, const std::string &inAtomID, poi
|
||||
{ "label_alt_id", "." },
|
||||
{ "label_comp_id", m_compound_id },
|
||||
{ "label_seq_id", m_seq_id },
|
||||
{ "auth_asym_id", m_auth_asym_id },
|
||||
{ "auth_asym_id", m_pdb_strand_id },
|
||||
{ "auth_atom_id", inAtomID },
|
||||
{ "auth_comp_id", m_compound_id },
|
||||
{ "auth_seq_id", m_auth_seq_id },
|
||||
{ "auth_seq_id", m_pdb_seq_num },
|
||||
{ "occupancy", 1.0f, 2 },
|
||||
{ "B_iso_or_equiv", 20.0f },
|
||||
{ "pdbx_PDB_model_num", m_structure->get_model_nr() },
|
||||
@@ -541,8 +402,8 @@ std::ostream &operator<<(std::ostream &os, const residue &res)
|
||||
{
|
||||
os << res.get_compound_id() << ' ' << res.get_asym_id() << ':' << res.get_seq_id();
|
||||
|
||||
if (res.get_auth_asym_id() != res.get_asym_id() or res.get_auth_seq_id() != std::to_string(res.get_seq_id()))
|
||||
os << " [" << res.get_auth_asym_id() << ':' << res.get_auth_seq_id() << ']';
|
||||
if (res.get_pdb_strand_id() != res.get_asym_id() or res.get_pdb_seq_num() != std::to_string(res.get_seq_id()))
|
||||
os << " [" << res.get_pdb_strand_id() << ':' << res.get_pdb_seq_num() << ']';
|
||||
|
||||
return os;
|
||||
}
|
||||
@@ -551,7 +412,7 @@ std::ostream &operator<<(std::ostream &os, const residue &res)
|
||||
// monomer
|
||||
|
||||
monomer::monomer(const polymer &polymer, std::size_t index, int seqID, const std::string &authSeqID, const std::string &pdbInsCode, const std::string &compoundID)
|
||||
: residue(*polymer.get_structure(), compoundID, polymer.get_asym_id(), seqID, polymer.get_auth_asym_id(), authSeqID, pdbInsCode)
|
||||
: residue(*polymer.get_structure(), compoundID, polymer.get_asym_id(), seqID, polymer.get_pdb_strand_id(), authSeqID, pdbInsCode)
|
||||
, m_polymer(&polymer)
|
||||
, m_index(index)
|
||||
{
|
||||
@@ -970,7 +831,7 @@ polymer::polymer(structure &s, const std::string &entityID, const std::string &a
|
||||
: m_structure(const_cast<structure *>(&s))
|
||||
, m_entity_id(entityID)
|
||||
, m_asym_id(asym_id)
|
||||
, m_auth_asym_id(auth_asym_id)
|
||||
, m_pdb_strand_id(auth_asym_id)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
@@ -982,12 +843,8 @@ polymer::polymer(structure &s, const std::string &entityID, const std::string &a
|
||||
for (auto r : poly_seq_scheme.find("asym_id"_key == asym_id))
|
||||
{
|
||||
int seqID;
|
||||
std::optional<int> pdbSeqNum;
|
||||
std::string compoundID, authSeqID, pdbInsCode;
|
||||
cif::tie(seqID, authSeqID, compoundID, pdbInsCode, pdbSeqNum) = r.get("seq_id", "auth_seq_num", "mon_id", "pdb_ins_code", "pdb_seq_num");
|
||||
|
||||
if (authSeqID.empty() and pdbSeqNum.has_value())
|
||||
authSeqID = std::to_string(*pdbSeqNum);
|
||||
std::string compoundID, pdbSeqNum, pdbInsCode;
|
||||
cif::tie(seqID, pdbSeqNum, compoundID, pdbInsCode) = r.get("seq_id", "pdb_seq_num", "mon_id", "pdb_ins_code");
|
||||
|
||||
std::size_t index = size();
|
||||
|
||||
@@ -995,11 +852,11 @@ polymer::polymer(structure &s, const std::string &entityID, const std::string &a
|
||||
if (not ix.count(seqID))
|
||||
{
|
||||
ix[seqID] = index;
|
||||
emplace_back(*this, index, seqID, authSeqID, pdbInsCode, compoundID);
|
||||
emplace_back(*this, index, seqID, pdbSeqNum, pdbInsCode, compoundID);
|
||||
}
|
||||
else if (VERBOSE > 0)
|
||||
{
|
||||
monomer m{ *this, index, seqID, authSeqID, pdbInsCode, compoundID };
|
||||
monomer m{ *this, index, seqID, pdbSeqNum, pdbInsCode, compoundID };
|
||||
std::cerr << "Dropping alternate residue " << m << '\n';
|
||||
}
|
||||
}
|
||||
@@ -1139,7 +996,7 @@ cif::mm::atom sugar::add_atom(row_initializer atom_info)
|
||||
atom_info.set_value({ "label_alt_id", "." });
|
||||
atom_info.set_value({ "auth_asym_id", m_branch->get_asym_id() });
|
||||
atom_info.set_value({ "auth_comp_id", m_compound_id });
|
||||
atom_info.set_value({ "auth_seq_id", m_auth_seq_id });
|
||||
atom_info.set_value({ "auth_seq_id", m_pdb_seq_num });
|
||||
atom_info.set_value({ "occupancy", 1.0, 2 });
|
||||
atom_info.set_value({ "B_iso_or_equiv", 30.0, 2 });
|
||||
atom_info.set_value({ "pdbx_PDB_model_num", 1 });
|
||||
@@ -1255,12 +1112,12 @@ sugar &branch::construct_sugar(const std::string &compound_id)
|
||||
{ "mon_id", result.get_compound_id() },
|
||||
|
||||
{ "pdb_asym_id", result.get_asym_id() },
|
||||
{ "pdb_seq_num", result.num() },
|
||||
{ "pdb_seq_num", result.get_pdb_seq_num() },
|
||||
{ "pdb_mon_id", result.get_compound_id() },
|
||||
|
||||
{ "auth_asym_id", result.get_auth_asym_id() },
|
||||
{ "auth_asym_id", result.get_pdb_strand_id() },
|
||||
{ "auth_mon_id", result.get_compound_id() },
|
||||
{ "auth_seq_num", result.get_auth_seq_id() },
|
||||
{ "auth_seq_num", result.get_pdb_seq_num() },
|
||||
|
||||
{ "hetero", "n" } });
|
||||
|
||||
@@ -1303,7 +1160,7 @@ std::string branch::name(const sugar &s) const
|
||||
|
||||
for (auto &sn : *this)
|
||||
{
|
||||
if (not sn.get_link() or sn.get_link().get_auth_seq_id() != s.get_auth_seq_id())
|
||||
if (not sn.get_link() or sn.get_link().get_auth_seq_id() != s.get_pdb_seq_num())
|
||||
continue;
|
||||
|
||||
auto n = name(sn) + "-(1-" + sn.get_link().get_label_atom_id().substr(1) + ')';
|
||||
@@ -1330,19 +1187,19 @@ float branch::weight() const
|
||||
// --------------------------------------------------------------------
|
||||
// structure
|
||||
|
||||
structure::structure(file &p, std::size_t modelNr, StructureOpenOptions options)
|
||||
structure::structure(file &p, std::size_t modelNr, structure_open_options options)
|
||||
: structure(p.front(), modelNr, options)
|
||||
{
|
||||
}
|
||||
|
||||
structure::structure(datablock &db, std::size_t modelNr, StructureOpenOptions options)
|
||||
structure::structure(datablock &db, std::size_t modelNr, structure_open_options options)
|
||||
: m_db(db)
|
||||
, m_model_nr(modelNr)
|
||||
{
|
||||
if (db.get_validator() == nullptr)
|
||||
db.load_dictionary();
|
||||
|
||||
auto &atomCat = db["atom_site"];
|
||||
auto &atom_site = db["atom_site"];
|
||||
|
||||
load_atoms_for_model(options);
|
||||
|
||||
@@ -1350,7 +1207,7 @@ structure::structure(datablock &db, std::size_t modelNr, StructureOpenOptions op
|
||||
if (m_atoms.empty() and m_model_nr == 1)
|
||||
{
|
||||
std::optional<std::size_t> model_nr;
|
||||
cif::tie(model_nr) = atomCat.front().get("pdbx_PDB_model_num");
|
||||
cif::tie(model_nr) = atom_site.front().get("pdbx_PDB_model_num");
|
||||
if (model_nr and *model_nr != m_model_nr)
|
||||
{
|
||||
if (VERBOSE > 0)
|
||||
@@ -1369,42 +1226,133 @@ structure::structure(datablock &db, std::size_t modelNr, StructureOpenOptions op
|
||||
load_data();
|
||||
}
|
||||
|
||||
void structure::load_atoms_for_model(StructureOpenOptions options)
|
||||
void structure::load_atoms_for_model(structure_open_options options)
|
||||
{
|
||||
using namespace literals;
|
||||
|
||||
auto &atomCat = m_db["atom_site"];
|
||||
auto &atom_site = m_db["atom_site"];
|
||||
|
||||
condition c = "pdbx_PDB_model_num"_key == null or "pdbx_PDB_model_num"_key == m_model_nr;
|
||||
if (options bitand StructureOpenOptions::SkipHydrogen)
|
||||
c = std::move(c) and ("type_symbol"_key != "H" and "type_symbol"_key != "D");
|
||||
|
||||
for (auto id : atomCat.find<std::string>(std::move(c), "id"))
|
||||
emplace_atom(std::make_shared<atom::atom_impl>(m_db, id));
|
||||
if (options.skip_hydrogen)
|
||||
c = std::move(c) and (cif::key("type_symbol") != "H" and cif::key("type_symbol") != "D");
|
||||
|
||||
if (options.skip_water)
|
||||
c = std::move(c) and (cif::key("auth_comp_id") != "HOH" and cif::key("auth_comp_id") != "H20" and cif::key("auth_comp_id") != "WAT");
|
||||
|
||||
if (options.skip_hetatom)
|
||||
{
|
||||
if (options.skip_water)
|
||||
c = std::move(c) and cif::key("group_PDB") != "HETATM";
|
||||
else
|
||||
c = std::move(c) and (cif::key("group_PDB") != "HETATM" or (cif::key("auth_comp_id") == "HOH" or cif::key("auth_comp_id") == "H20" or cif::key("auth_comp_id") == "WAT"));
|
||||
}
|
||||
|
||||
if (options.min_b_factor.has_value())
|
||||
c = std::move(c) and cif::key("B_iso_or_equiv") >= *options.min_b_factor;
|
||||
|
||||
if (options.max_b_factor.has_value())
|
||||
c = std::move(c) and cif::key("B_iso_or_equiv") <= *options.max_b_factor;
|
||||
|
||||
if (not options.asyms.empty())
|
||||
{
|
||||
condition tmp_c;
|
||||
for (auto asym_id : options.asyms)
|
||||
tmp_c = std::move(tmp_c) or cif::key("label_asym_id") == asym_id;
|
||||
c = std::move(c) and std::move(tmp_c);
|
||||
}
|
||||
|
||||
if (options.occupancy_mode == occupancy_policy::ALL)
|
||||
{
|
||||
for (auto id : atom_site.find<std::string>(std::move(c), "id"))
|
||||
emplace_atom(std::make_shared<atom::atom_impl>(m_db, id));
|
||||
}
|
||||
else if (options.occupancy_mode == occupancy_policy::UNOCCUPIED)
|
||||
{
|
||||
for (auto id : atom_site.find<std::string>(std::move(c), "id"))
|
||||
{
|
||||
auto a = std::make_shared<atom::atom_impl>(m_db, id);
|
||||
if (a->get_property_float("occupancy") > 0)
|
||||
continue;
|
||||
emplace_atom(a);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cif::mm::atom> atoms;
|
||||
std::map<std::tuple<std::string,int>, std::map<std::string, float>> alts;
|
||||
|
||||
for (auto id : atom_site.find<std::string>(std::move(c), "id"))
|
||||
{
|
||||
auto a = atoms.emplace_back(std::make_shared<atom::atom_impl>(m_db, id));
|
||||
|
||||
if (a.is_alternate())
|
||||
{
|
||||
auto key = std::make_tuple(a.get_label_asym_id(), a.get_label_seq_id());
|
||||
auto alt_id = a.get_label_alt_id();
|
||||
|
||||
if (auto i = alts.find(key); i != alts.end())
|
||||
i->second[alt_id] += a.get_occupancy();
|
||||
else
|
||||
alts[key][alt_id] = a.get_occupancy();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &&[key, value] : alts)
|
||||
{
|
||||
const auto &[asym_id, seq_id] = key;
|
||||
|
||||
// select highest occupancy for this residue's alternates
|
||||
std::string alt_id;
|
||||
float occupancy = options.occupancy_mode == occupancy_policy::MAX ? 0.f : std::numeric_limits<float>::max();
|
||||
for (const auto &[alt_key, alt_value] : value)
|
||||
{
|
||||
if (options.occupancy_mode == occupancy_policy::MAX)
|
||||
{
|
||||
if (occupancy < alt_value)
|
||||
{
|
||||
alt_id = alt_key;
|
||||
occupancy = alt_value;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (occupancy > alt_value)
|
||||
{
|
||||
alt_id = alt_key;
|
||||
occupancy = alt_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
value.clear();
|
||||
value.emplace(alt_id, occupancy);
|
||||
}
|
||||
|
||||
for (auto a : atoms)
|
||||
{
|
||||
if (a.is_alternate())
|
||||
{
|
||||
auto key = std::make_tuple(a.get_label_asym_id(), a.get_label_seq_id());
|
||||
|
||||
if (alts[key].contains(a.get_label_alt_id()))
|
||||
emplace_atom(a);
|
||||
}
|
||||
else
|
||||
emplace_atom(a);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// structure::structure(const structure &s)
|
||||
// : m_db(s.m_db)
|
||||
// , m_model_nr(s.m_model_nr)
|
||||
// {
|
||||
// m_atoms.reserve(s.m_atoms.size());
|
||||
// for (auto &atom : s.m_atoms)
|
||||
// emplace_atom(atom.clone());
|
||||
|
||||
// load_data();
|
||||
// }
|
||||
|
||||
// structure::~structure()
|
||||
// {
|
||||
// }
|
||||
|
||||
void structure::load_data()
|
||||
{
|
||||
auto &polySeqScheme = m_db["pdbx_poly_seq_scheme"];
|
||||
|
||||
for (const auto &[asym_id, auth_asym_id, entityID] : polySeqScheme.rows<std::string, std::string, std::string>("asym_id", "pdb_strand_id", "entity_id"))
|
||||
{
|
||||
if (m_polymers.empty() or m_polymers.back().get_asym_id() != asym_id or m_polymers.back().get_entity_id() != entityID)
|
||||
if (m_polymers.empty() or m_polymers.back().get_asym_id() != asym_id)
|
||||
m_polymers.emplace_back(*this, entityID, asym_id, auth_asym_id);
|
||||
}
|
||||
|
||||
@@ -1430,18 +1378,18 @@ void structure::load_data()
|
||||
for (auto &poly : m_polymers)
|
||||
{
|
||||
for (auto &res : poly)
|
||||
resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id() }] = &res;
|
||||
resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_pdb_seq_num() }] = &res;
|
||||
}
|
||||
|
||||
for (auto &res : m_non_polymers)
|
||||
resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id() }] = &res;
|
||||
resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_pdb_seq_num() }] = &res;
|
||||
|
||||
std::set<std::string> sugars;
|
||||
for (auto &branch : m_branches)
|
||||
{
|
||||
for (auto &sugar : branch)
|
||||
{
|
||||
resMap[{ sugar.get_asym_id(), sugar.get_seq_id(), sugar.get_auth_seq_id() }] = &sugar;
|
||||
resMap[{ sugar.get_asym_id(), sugar.get_seq_id(), sugar.get_pdb_seq_num() }] = &sugar;
|
||||
sugars.insert(sugar.get_compound_id());
|
||||
}
|
||||
}
|
||||
@@ -1516,30 +1464,6 @@ EntityType structure::get_entity_type_for_asym_id(const std::string asym_id) con
|
||||
return get_entity_type_for_entity_id(entityID);
|
||||
}
|
||||
|
||||
// std::vector<atom> structure::waters() const
|
||||
// {
|
||||
// using namespace literals;
|
||||
|
||||
// std::vector<atom> result;
|
||||
|
||||
// auto &db = datablock();
|
||||
|
||||
// // Get the entity id for water. Watch out, structure may not have water at all
|
||||
// auto &entityCat = db["entity"];
|
||||
// for (const auto &[waterEntityID] : entityCat.find<std::string>("type"_key == "water", "id"))
|
||||
// {
|
||||
// for (auto &a : m_atoms)
|
||||
// {
|
||||
// if (a.get_property("label_entity_id") == waterEntityID)
|
||||
// result.push_back(a);
|
||||
// }
|
||||
|
||||
// break;
|
||||
// }
|
||||
|
||||
// return result;
|
||||
// }
|
||||
|
||||
bool structure::has_atom_id(const std::string &id) const
|
||||
{
|
||||
assert(m_atoms.size() == m_atom_index.size());
|
||||
@@ -1688,7 +1612,7 @@ residue &structure::get_residue(const std::string &asym_id, int seqID, const std
|
||||
{
|
||||
for (auto &res : m_non_polymers)
|
||||
{
|
||||
if (res.get_asym_id() == asym_id and (authSeqID.empty() or res.get_auth_seq_id() == authSeqID))
|
||||
if (res.get_asym_id() == asym_id and (authSeqID.empty() or res.get_pdb_seq_num() == authSeqID))
|
||||
return res;
|
||||
}
|
||||
}
|
||||
@@ -1712,7 +1636,7 @@ residue &structure::get_residue(const std::string &asym_id, int seqID, const std
|
||||
|
||||
for (auto &sugar : branch)
|
||||
{
|
||||
if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == authSeqID)
|
||||
if (sugar.get_asym_id() == asym_id and sugar.get_pdb_seq_num() == authSeqID)
|
||||
return sugar;
|
||||
}
|
||||
}
|
||||
@@ -1734,7 +1658,7 @@ residue &structure::get_residue(const std::string &asym_id, const std::string &c
|
||||
{
|
||||
for (auto &res : m_non_polymers)
|
||||
{
|
||||
if (res.get_asym_id() == asym_id and res.get_auth_seq_id() == authSeqID and res.get_compound_id() == compID)
|
||||
if (res.get_asym_id() == asym_id and res.get_pdb_seq_num() == authSeqID and res.get_compound_id() == compID)
|
||||
return res;
|
||||
}
|
||||
}
|
||||
@@ -1758,7 +1682,7 @@ residue &structure::get_residue(const std::string &asym_id, const std::string &c
|
||||
|
||||
for (auto &sugar : branch)
|
||||
{
|
||||
if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == authSeqID and sugar.get_compound_id() == compID)
|
||||
if (sugar.get_asym_id() == asym_id and sugar.get_pdb_seq_num() == authSeqID and sugar.get_compound_id() == compID)
|
||||
return sugar;
|
||||
}
|
||||
}
|
||||
@@ -2108,7 +2032,7 @@ void structure::remove_residue(const std::string &asym_id, int seq_id, const std
|
||||
{
|
||||
for (auto &res : m_non_polymers)
|
||||
{
|
||||
if (res.get_asym_id() == asym_id and (auth_seq_id.empty() or res.get_auth_seq_id() == auth_seq_id))
|
||||
if (res.get_asym_id() == asym_id and (auth_seq_id.empty() or res.get_pdb_seq_num() == auth_seq_id))
|
||||
{
|
||||
remove_residue(res);
|
||||
return;
|
||||
@@ -2138,7 +2062,7 @@ void structure::remove_residue(const std::string &asym_id, int seq_id, const std
|
||||
|
||||
for (auto &sugar : branch)
|
||||
{
|
||||
if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == auth_seq_id)
|
||||
if (sugar.get_asym_id() == asym_id and sugar.get_pdb_seq_num() == auth_seq_id)
|
||||
{
|
||||
remove_residue(sugar);
|
||||
return;
|
||||
@@ -2271,7 +2195,7 @@ void structure::remove_sugar(sugar &s)
|
||||
// TODO: need fix, collect from nag_atoms?
|
||||
{ "auth_asym_id", asym_id },
|
||||
{ "auth_mon_id", sugar.get_compound_id() },
|
||||
{ "auth_seq_num", sugar.get_auth_seq_id() },
|
||||
{ "auth_seq_num", sugar.get_pdb_seq_num() },
|
||||
|
||||
{ "hetero", "n" } });
|
||||
}
|
||||
@@ -2357,8 +2281,8 @@ std::string structure::create_non_poly(const std::string &entity_id, const std::
|
||||
{ "entity_id", entity_id },
|
||||
{ "mon_id", comp_id },
|
||||
{ "ndb_seq_num", ndb_nr },
|
||||
{ "pdb_seq_num", res.get_auth_seq_id() },
|
||||
{ "auth_seq_num", res.get_auth_seq_id() },
|
||||
{ "pdb_seq_num", res.get_pdb_seq_num() },
|
||||
{ "auth_seq_num", res.get_pdb_seq_num() },
|
||||
{ "pdb_mon_id", comp_id },
|
||||
{ "auth_mon_id", comp_id },
|
||||
{ "pdb_strand_id", asym_id },
|
||||
@@ -2418,8 +2342,8 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
|
||||
{ "entity_id", entity_id },
|
||||
{ "mon_id", comp_id },
|
||||
{ "ndb_seq_num", ndb_nr },
|
||||
{ "pdb_seq_num", res.get_auth_seq_id() },
|
||||
{ "auth_seq_num", res.get_auth_seq_id() },
|
||||
{ "pdb_seq_num", res.get_pdb_seq_num() },
|
||||
{ "auth_seq_num", res.get_pdb_seq_num() },
|
||||
{ "pdb_mon_id", comp_id },
|
||||
{ "auth_mon_id", comp_id },
|
||||
{ "pdb_strand_id", asym_id },
|
||||
@@ -2733,11 +2657,11 @@ std::string structure::create_entity_for_branch(branch &branch)
|
||||
|
||||
pdbx_entity_branch_link.emplace({ { "link_id", pdbx_entity_branch_link.get_unique_id("") },
|
||||
{ "entity_id", entityID },
|
||||
{ "entity_branch_list_num_1", s1.get_auth_seq_id() },
|
||||
{ "entity_branch_list_num_1", s1.get_pdb_seq_num() },
|
||||
{ "comp_id_1", s1.get_compound_id() },
|
||||
{ "atom_id_1", l1.get_label_atom_id() },
|
||||
{ "leaving_atom_id_1", "O1" },
|
||||
{ "entity_branch_list_num_2", s2.get_auth_seq_id() },
|
||||
{ "entity_branch_list_num_2", s2.get_pdb_seq_num() },
|
||||
{ "comp_id_2", s2.get_compound_id() },
|
||||
{ "atom_id_2", l2.get_label_atom_id() },
|
||||
{ "leaving_atom_id_2", "H" + l2.get_label_atom_id() },
|
||||
|
||||
@@ -6419,15 +6419,24 @@ file read(std::istream &is)
|
||||
std::throw_with_nested(std::runtime_error("Since the file did not start with a valid PDB HEADER line mmCIF was assumed, but that failed."));
|
||||
}
|
||||
|
||||
// Try to see if we can create an mm::structure out of this data.
|
||||
// If that fails, we need to reconstruct a PDBx file out of it.
|
||||
try
|
||||
if (not(result.empty() or result.front().empty()))
|
||||
{
|
||||
cif::mm::structure s(result);
|
||||
}
|
||||
catch (const std::exception &e)
|
||||
{
|
||||
reconstruct_pdbx(result);
|
||||
if (auto &db = result.front(); db.get("audit_conform") == nullptr)
|
||||
reconstruct_pdbx(result);
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
// Try to see if we can create an mm::structure out of this data.
|
||||
// If that fails, we need to reconstruct a PDBx file out of it.
|
||||
|
||||
cif::mm::structure s(result);
|
||||
}
|
||||
catch (const std::exception &e)
|
||||
{
|
||||
reconstruct_pdbx(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1554,4 +1554,92 @@ bool reconstruct_pdbx(file &file, const validator &validator)
|
||||
return valid and is_valid_pdbx_file(file, validator);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void fixup_pdbx(file &file)
|
||||
{
|
||||
if (file.empty())
|
||||
throw std::runtime_error("Cannot reconstruct PDBx, file seems to be empty");
|
||||
|
||||
auto &db = file.front();
|
||||
|
||||
if (auto ac = db.get("audit_conform"); ac != nullptr)
|
||||
fixup_pdbx(file, validator_factory::instance().get(*ac));
|
||||
else
|
||||
fixup_pdbx(file, validator_factory::instance().get("mmcif_pdbx.dic"));
|
||||
}
|
||||
|
||||
void fixup_pdbx(file &file, const validator &validator)
|
||||
{
|
||||
if (file.empty())
|
||||
throw std::runtime_error("Cannot reconstruct PDBx, file seems to be empty");
|
||||
|
||||
// assuming the first datablock contains the entry ...
|
||||
auto &db = file.front();
|
||||
|
||||
// ... and any additional datablock will contain compound information
|
||||
cif::compound_source cs(file);
|
||||
|
||||
if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
|
||||
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
|
||||
|
||||
std::string entry_id;
|
||||
|
||||
// Phenix files do not have an entry record
|
||||
if (auto cat = db.get("entry"); cat == nullptr or cat->empty())
|
||||
{
|
||||
entry_id = db.name();
|
||||
category entry("entry");
|
||||
entry.emplace({ { "id", entry_id } });
|
||||
db.emplace_back(std::move(entry));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto &entry = db["entry"];
|
||||
if (entry.size() != 1)
|
||||
throw std::runtime_error("Unexpected size of entry category");
|
||||
|
||||
entry_id = entry.front().get<std::string>("id");
|
||||
}
|
||||
|
||||
// Start with chem_comp, it is often missing many fields
|
||||
// that can easily be filled in.
|
||||
checkChemCompRecords(db);
|
||||
|
||||
// If the data is really horrible, it might not contain entities
|
||||
if (not db["atom_site"].find_first(key("label_entity_id") != null))
|
||||
createEntityIDs(db);
|
||||
|
||||
// Now see if atom records make sense at all
|
||||
checkAtomRecords(db);
|
||||
|
||||
db["chem_comp"].reorder_by_index();
|
||||
|
||||
db.set_validator(&validator);
|
||||
|
||||
// Now create any missing categories
|
||||
// Next make sure we have struct_asym records
|
||||
if (auto cat = db.get("struct_asym"); cat == nullptr or cat->empty())
|
||||
createStructAsym(db);
|
||||
|
||||
if (auto cat = db.get("entity"); cat == nullptr or cat->empty())
|
||||
createEntity(db);
|
||||
|
||||
if (auto cat = db.get("pdbx_poly_seq_scheme"); cat == nullptr or cat->empty())
|
||||
createPdbxPolySeqScheme(db);
|
||||
|
||||
if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
|
||||
comparePolySeqSchemes(db);
|
||||
|
||||
createPdbxNonpolyScheme(db);
|
||||
|
||||
// Create a minimal set of branch records
|
||||
createPdbxBranchScheme(db);
|
||||
|
||||
// fill in missing formula_weight, e.g.
|
||||
checkEntities(db);
|
||||
|
||||
// That's it
|
||||
}
|
||||
|
||||
} // namespace cif::pdb
|
||||
|
||||
@@ -552,6 +552,9 @@ const validator &validator_factory::get(const category &audit_conform)
|
||||
validator validator_factory::construct_validator(std::string_view name, std::optional<std::string> version)
|
||||
{
|
||||
auto data = load_resource(name);
|
||||
if (not data and name == "mmcif_pdbx_v50")
|
||||
data = load_resource("mmcif_pdbx.dic");
|
||||
|
||||
if (not data)
|
||||
throw std::runtime_error("Could not load dictionary " + std::string{ name });
|
||||
|
||||
|
||||
@@ -28,7 +28,8 @@ list(
|
||||
sugar
|
||||
spinner
|
||||
# reconstruction
|
||||
validate-pdbx)
|
||||
validate-pdbx
|
||||
)
|
||||
|
||||
add_library(test-main OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/test-main.cpp")
|
||||
|
||||
|
||||
@@ -431,3 +431,169 @@ TEST_CASE("remove_residue_1")
|
||||
|
||||
REQUIRE_NOTHROW(s.validate_atoms());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Tests for structure_open_options
|
||||
|
||||
TEST_CASE("options_1")
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
|
||||
cif::file file(example.string());
|
||||
|
||||
auto &cf = cif::compound_factory::instance();
|
||||
|
||||
SECTION("skip_water")
|
||||
{
|
||||
cif::mm::structure s(file, 1, { .skip_water = true });
|
||||
|
||||
REQUIRE_NOTHROW(s.validate_atoms());
|
||||
|
||||
for (auto a : s.atoms())
|
||||
CHECK_FALSE(a.is_water());
|
||||
}
|
||||
|
||||
SECTION("skip_hetatom")
|
||||
{
|
||||
cif::mm::structure s(file, 1, { .skip_hetatom = true });
|
||||
|
||||
REQUIRE_NOTHROW(s.validate_atoms());
|
||||
|
||||
for (auto a : s.atoms())
|
||||
CHECK((a.is_water() or cf.is_peptide(a.get_label_comp_id()) or cf.is_base(a.get_label_comp_id())));
|
||||
}
|
||||
|
||||
SECTION("selected_asyms")
|
||||
{
|
||||
cif::mm::structure s(file, 1, { .asyms = { "A" } });
|
||||
|
||||
REQUIRE_NOTHROW(s.validate_atoms());
|
||||
|
||||
for (auto a : s.atoms())
|
||||
CHECK(a.get_label_asym_id() == "A");
|
||||
}
|
||||
|
||||
SECTION("min-b-factor")
|
||||
{
|
||||
cif::mm::structure s(file, 1, { .min_b_factor = 20.f });
|
||||
|
||||
REQUIRE_NOTHROW(s.validate_atoms());
|
||||
|
||||
for (auto a : s.atoms())
|
||||
CHECK(a.get_property_float("B_iso_or_equiv") >= 20.f);
|
||||
}
|
||||
|
||||
SECTION("max-b-factor")
|
||||
{
|
||||
cif::mm::structure s(file, 1, { .max_b_factor = 20.f });
|
||||
|
||||
REQUIRE_NOTHROW(s.validate_atoms());
|
||||
|
||||
for (auto a : s.atoms())
|
||||
CHECK(a.get_property_float("B_iso_or_equiv") <= 20.f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("options_2")
|
||||
{
|
||||
|
||||
auto data = R"(
|
||||
data_TEST
|
||||
#
|
||||
_pdbx_nonpoly_scheme.asym_id A
|
||||
_pdbx_nonpoly_scheme.ndb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.entity_id 1
|
||||
_pdbx_nonpoly_scheme.mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.auth_seq_num 1
|
||||
_pdbx_nonpoly_scheme.pdb_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.auth_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_strand_id A
|
||||
_pdbx_nonpoly_scheme.pdb_ins_code .
|
||||
#
|
||||
loop_
|
||||
_atom_site.id
|
||||
_atom_site.auth_asym_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.label_asym_id
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_comp_id
|
||||
_atom_site.label_entity_id
|
||||
_atom_site.label_seq_id
|
||||
_atom_site.type_symbol
|
||||
_atom_site.group_PDB
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
_atom_site.auth_seq_id
|
||||
_atom_site.auth_comp_id
|
||||
_atom_site.auth_atom_id
|
||||
_atom_site.pdbx_PDB_model_num
|
||||
1 A A A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 0.75 7.67 ? 1 HEM CHA 1
|
||||
3 A A A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 0.75 7.05 ? 1 HEM CHB 1
|
||||
2 A A A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 0.75 7.69 ? 1 HEM CHC 1
|
||||
4 A A A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 0.75 8.00 ? 1 HEM CHD 1
|
||||
5 A B A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 0.25 7.67 ? 1 HEM CHA 1
|
||||
6 A B A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 0.25 7.05 ? 1 HEM CHB 1
|
||||
7 A B A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 0.25 7.69 ? 1 HEM CHC 1
|
||||
8 A B A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 0.25 8.00 ? 1 HEM CHD 1
|
||||
#
|
||||
_chem_comp.id HEM
|
||||
_chem_comp.type NON-POLYMER
|
||||
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_chem_comp.formula 'C34 H32 Fe N4 O4'
|
||||
_chem_comp.formula_weight 616.487000
|
||||
#
|
||||
_pdbx_entity_nonpoly.entity_id 1
|
||||
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_pdbx_entity_nonpoly.comp_id HEM
|
||||
#
|
||||
_entity.id 1
|
||||
_entity.type non-polymer
|
||||
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_entity.formula_weight 616.487000
|
||||
#
|
||||
_struct_asym.id A
|
||||
_struct_asym.entity_id 1
|
||||
_struct_asym.pdbx_blank_PDB_chainid_flag N
|
||||
_struct_asym.pdbx_modified N
|
||||
_struct_asym.details ?
|
||||
#
|
||||
)"_cf;
|
||||
|
||||
data.front().set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
|
||||
|
||||
SECTION("max")
|
||||
{
|
||||
cif::mm::structure s(data, 1, {
|
||||
.occupancy_mode = cif::mm::occupancy_policy::MAX
|
||||
});
|
||||
|
||||
REQUIRE(s.atoms().size() == 4);
|
||||
CHECK(s.atoms().front().get_label_alt_id() == "A");
|
||||
}
|
||||
|
||||
SECTION("min")
|
||||
{
|
||||
cif::mm::structure s(data, 1, {
|
||||
.occupancy_mode = cif::mm::occupancy_policy::MIN
|
||||
});
|
||||
|
||||
REQUIRE(s.atoms().size() == 4);
|
||||
CHECK(s.atoms().front().get_label_alt_id() == "B");
|
||||
}
|
||||
|
||||
SECTION("unoccupied")
|
||||
{
|
||||
cif::mm::structure s(data, 1, {
|
||||
.occupancy_mode = cif::mm::occupancy_policy::UNOCCUPIED
|
||||
});
|
||||
|
||||
CHECK(s.atoms().empty());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user