Compare commits

..

29 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
1ed704f172 begin with curl 2025-08-13 10:10:45 +02:00
Maarten L. Hekkelman
398c16eac2 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2025-08-13 09:02:10 +02:00
Maarten L. Hekkelman
fa869bdc7d lightweight fixup 2025-08-13 09:01:50 +02:00
Maarten L. Hekkelman
c20d0d2a30 lightweight fixup 2025-08-12 16:45:19 +02:00
Maarten L. Hekkelman
000f2736c2 Merge branch 'develop' into clebreto-feature/enrich_structure_options 2025-08-11 10:13:20 +02:00
Maarten L. Hekkelman
cfcc81bb62 verbose messages 2025-08-11 10:13:07 +02:00
Maarten L. Hekkelman
82eae05868 changed b-factor options for structure loading 2025-06-11 14:17:50 +02:00
Maarten L. Hekkelman
e8fb53c49b Alternate implementation of structure_open_options 2025-06-11 13:35:58 +02:00
Maarten L. Hekkelman
604c97afe1 Merge branch 'develop' into clebreto-feature/enrich_structure_options 2025-06-11 11:43:01 +02:00
Maarten L. Hekkelman
7e60cdf272 remove redundant statement 2025-06-11 11:42:26 +02:00
Maarten L. Hekkelman
9ea7cfcc80 Remove test 2025-06-11 09:56:26 +02:00
Maarten L. Hekkelman
a7a4a16f79 remove debug code 2025-06-11 09:45:45 +02:00
Maarten L. Hekkelman
6717059934 Revert renaming compound_id to mon_id in residue 2025-06-11 09:41:49 +02:00
Maarten L. Hekkelman
714747c280 version bump 2025-06-11 09:32:36 +02:00
Maarten L. Hekkelman
81cd305c80 rename mm::polymer fields and methods to better match mmcif_pdbx naming.
fix building mm::structure using pdb_seq_num instead of auth_seq_num
2025-06-11 09:30:54 +02:00
Maarten L. Hekkelman
5de872bbb3 Version bump, update mmcif_pdbx.dic 2025-06-10 09:17:30 +02:00
Maarten L. Hekkelman
ce6a75a920 right... 2025-06-10 09:11:26 +02:00
Maarten L. Hekkelman
874a5cb2f2 missing code added 2025-06-02 15:09:49 +02:00
Maarten L. Hekkelman
6e2202d4f1 More verbose strip 2025-06-02 09:10:58 +02:00
Maarten L. Hekkelman
bcf33df701 Added strip, removed dangerous datablock::is_valid (non-const version) 2025-06-02 08:52:58 +02:00
Maarten L. Hekkelman
3bdcf21c69 Merge commit '4b36bdc' into develop 2025-05-29 16:14:15 +02:00
Maarten L. Hekkelman
4b36bdc58c work around incorrect mmcif_pdbx name 2025-05-29 16:13:28 +02:00
Maarten L. Hekkelman
6d9008ee8c Merge branch 'trunk' into develop 2025-05-29 15:15:41 +02:00
Maarten L. Hekkelman
ee93692707 comment formatting 2025-05-29 14:15:13 +02:00
Maarten L. Hekkelman
2bcc368bce reconstruct when audit_conform is missing 2025-05-29 14:07:45 +02:00
Maarten L. Hekkelman
6cc4467d53 options 2025-05-27 13:55:13 +02:00
Maarten L. Hekkelman
41c0521480 Merge branch 'feature/enrich_structure_options' of github.com:clebreto/libcifpp into clebreto-feature/enrich_structure_options 2025-04-02 13:58:01 +02:00
LE BRETON Come
7d33d56c0e Update docs 2025-03-07 15:59:19 +01:00
LE BRETON Come
f86f34e5e1 WIP Enrich StructureOpenOptions 2025-03-07 15:54:30 +01:00
18 changed files with 730 additions and 368 deletions

View File

@@ -24,10 +24,15 @@
cmake_minimum_required(VERSION 3.23)
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()
# set the project name
project(
libcifpp
VERSION 8.0.1
VERSION 9.0.0
LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
@@ -63,25 +68,26 @@ elseif(MSVC)
endif()
# Build documentation?
option(BUILD_DOCUMENTATION "Build the documentation" OFF)
set(BUILD_DOCUMENTATION OFF CACHE BOOL "Build the documentation")
# Optionally build a version to be installed inside CCP4
option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4")
set(BUILD_FOR_CCP4 OFF CACHE BOOL "Build a version to be installed in CCP4")
# Optionally use libcurl to fetch compound files
set(USE_CURL_FOR_CCD ON CACHE BOOL "Use curl to fetch missing CCD files")
# Building shared libraries?
if(NOT(BUILD_FOR_CCP4 AND WIN32))
option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build a shared library instead of a static one")
endif()
if(PROJECT_IS_TOP_LEVEL AND NOT BUILD_FOR_CCP4)
# Lots of code depend on the availability of the components.cif file
option(CIFPP_DOWNLOAD_CCD
"Download the CCD file components.cif during installation" ON)
set(CIFPP_DOWNLOAD_CCD ON CACHE BOOL "Download the CCD file components.cif during installation")
# An optional cron script can be installed to keep the data files up-to-date
if(UNIX AND NOT APPLE)
option(CIFPP_INSTALL_UPDATE_SCRIPT
"Install the script to update CCD and dictionary files" ON)
set(CIFPP_INSTALL_UPDATE_SCRIPT ON CACHE BOOL "Install the script to update CCD and dictionary files")
endif()
else()
unset(CIFPP_DOWNLOAD_CCD)
@@ -91,14 +97,13 @@ endif()
# When CCP4 is sourced in the environment, we can recreate the symmetry
# operations table
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
option(CIFPP_RECREATE_SYMOP_DATA
"Recreate SymOp data table in case it is out of date" ON)
set(CIFPP_RECREATE_SYMOP_DATA ON CACHE BOOL "Recreate SymOp data table in case it is out of date")
endif()
# CCP4 build
if(BUILD_FOR_CCP4)
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
message(FATAL_ERROR "A CCP4 built was requested but CCP4 was not sourced")
message(FATAL_ERROR "cifpp: A CCP4 built was requested but CCP4 was not sourced")
else()
list(PREPEND CMAKE_MODULE_PATH "$ENV{CCP4}")
list(PREPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
@@ -128,9 +133,6 @@ if(WIN32)
add_definitions(-D _WIN32_WINNT=0x0501)
endif()
# Man, this is 2024 we're living in...
add_definitions(-DNOMINMAX)
# We do not want to write an export file for all our symbols...
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
@@ -164,7 +166,7 @@ int main(int argc, char *argv[]) { return 0; }"
if(GXX_LIBSTDCPP)
message(
STATUS "Testing for known regex bug, since you're using GNU libstdc++")
STATUS "cifpp: Testing for known regex bug, since you're using GNU libstdc++")
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
${CMAKE_CURRENT_BINARY_DIR}/test
@@ -173,7 +175,7 @@ if(GXX_LIBSTDCPP)
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
message(
STATUS
"You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
"cifpp: You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
)
find_package(Boost 1.80 QUIET COMPONENTS regex)
@@ -221,11 +223,15 @@ if(MSVC)
endforeach()
endif()
if(USE_CURL_FOR_CCD)
find_package(CURL REQUIRED)
endif()
find_package(ZLIB QUIET)
find_package(Threads)
if(NOT ZLIB_FOUND)
message(FATAL_ERROR "The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
message(FATAL_ERROR "cifpp: The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
endif()
# Using Eigen3 is a bit of a thing. We don't want to build it completely since
@@ -242,12 +248,12 @@ else()
GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
GIT_TAG 3.4.0
INSTALL_COMMAND "")
ExternalProject_Get_Property(my-eigen3 SOURCE_DIR)
set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})
endif()
message(STATUS "Eigen include dir is ${EIGEN_INCLUDE_DIR}")
message(STATUS "cifpp: Eigen include dir is ${EIGEN_INCLUDE_DIR}")
# Create a revision file, containing the current git version info
include(VersionString)
@@ -376,7 +382,13 @@ target_include_directories(
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>)
target_link_libraries(cifpp
PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>
$<IF:$<BOOL:${USE_CURL_FOR_CCD}>,CURL::libcurl,>)
if (USE_CURL_FOR_CCD)
target_compile_definitions(cifpp PUBLIC HAVE_CURL)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
@@ -390,7 +402,7 @@ if(CIFPP_DOWNLOAD_CCD)
file(SIZE ${COMPONENTS_CIF} CCD_FILE_SIZE)
if(CCD_FILE_SIZE EQUAL 0)
message(STATUS "Removing empty ${COMPONENTS_CIF} file")
message(STATUS "cifpp: Removing empty ${COMPONENTS_CIF} file")
file(REMOVE "${COMPONENTS_CIF}")
endif()
endif()
@@ -429,7 +441,7 @@ if(CIFPP_DOWNLOAD_CCD)
if(CCD_FETCH_STATUS_CODE)
message(
FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
FATAL_ERROR "cifpp: Error trying to download CCD file: ${CCD_FETCH_STATUS}")
endif()
endif()
endif()
@@ -493,7 +505,7 @@ file(GLOB OLD_CONFIG_FILES
if(OLD_CONFIG_FILES)
message(
STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
STATUS "cifpp: Installation will remove old config files: ${OLD_CONFIG_FILES}")
install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
endif()
@@ -559,7 +571,7 @@ if(CIFPP_INSTALL_UPDATE_SCRIPT)
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
WORLD_READ)
else()
message(FATAL_ERROR "Don't know where to install the update script")
message(FATAL_ERROR "cifpp: Don't know where to install the update script")
endif()
# a config file, to make it complete
@@ -573,7 +585,7 @@ if(CIFPP_INSTALL_UPDATE_SCRIPT)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
DESTINATION ${CMAKE_INSTALL_SYSCONFDIR})
install(
CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
CODE "message(\"cifpp: A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
)
install(DIRECTORY DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/libcifpp/cache-update.d)

View File

@@ -1,3 +1,12 @@
Version 9.0.0
- Rename fields of cif::mm::polymer to match the naming
in mmcif_pdbx.dic. Also, related, fix building mm::structure
using the correct mapping between atom_site and residues.
- _atom_site.auth_alt_id does not exist, it should be
_atom_site.pdbx_auth_alt_id of course.
- Added a more lightweight fixup for mmcif_pdbx files
that lack certain categories.
Version 8.0.1
- Fix cif::mm::structure::cleanup_empty_categories, removed too much
- Add default value for B_iso_or_equiv in residue::create_new_atom

View File

@@ -8,5 +8,6 @@ include(CMakeFindDependencyMacro)
find_dependency(Threads)
find_dependency(ZLIB REQUIRED)
find_dependency(CURL REQUIRED)
check_required_components(cifpp)

View File

@@ -223,6 +223,11 @@ class category
/// @return Returns true is all validations pass
bool validate_links() const;
/**
* @brief Strip removes items from this category that are invalid according to the assigned validator
*/
void strip();
/// @brief Equality operator, returns true if @a rhs is equal to this
/// @param rhs The object to compare with
/// @return True if the data contained is equal

View File

@@ -196,6 +196,23 @@ class compound
// --------------------------------------------------------------------
// Factory class for compound and Link objects
/// @brief Options available to configure a compound factory
struct compound_factory_options
{
/// If you have a multithreaded application and want to have different
/// compounds in each thread (e.g. a web service processing user requests
/// with different sets of compounds) you can set this flag to true.
bool use_thread_local_instance_only = false;
#if HAVE_CURL
// Various locations for chem_comp data files:
// - ftp://files.ebi.ac.uk/pub/databases/pdb/refdata/chem_comp
// - https://files.rcsb.org/pub/pdb/refdata/chem_comp/
std::string remote_chem_comp_url = "ftp://files.ebi.ac.uk/pub/databases/pdb/refdata/chem_comp";
#endif
};
/// Use the compound_factory singleton instance to create compound objects
class compound_factory
@@ -208,8 +225,12 @@ class compound_factory
/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
/// flag to true.
[[deprecated("Use version with compound_factory_options instead")]]
static void init(bool useThreadLocalInstanceOnly);
/// \brief Initialise a singleton instance.
static void init(compound_factory_options options = {});
/// Return the singleton instance. If initialized with local threads, this is the
/// instance for the current thread.
static compound_factory &instance();
@@ -239,6 +260,8 @@ class compound_factory
void push_dictionary(const file &file);
/// Remove the last pushed dictionary
// TODO: check if the popped dict is the correct one
void pop_dictionary();
/// Return whether @a res_name is a valid and known peptide
@@ -298,7 +321,7 @@ class compound_factory
static std::unique_ptr<compound_factory> s_instance;
static thread_local std::unique_ptr<compound_factory> tl_instance;
static bool s_use_thread_local_instance;
static compound_factory_options s_options;
std::shared_ptr<compound_factory_impl> m_impl;
};
@@ -320,6 +343,9 @@ class compound_factory
* @endcode
*/
// TODO: check if pushed and popped dicts are the same!
class compound_source
{
public:

View File

@@ -128,15 +128,6 @@ class datablock : public std::list<category>
*/
bool is_valid() const;
/**
* @brief Validates the content of this datablock and all its content
* and updates or removes the audit_conform category to match the result.
*
* @return true If the content is valid
* @return false If the content is not valid
*/
bool is_valid();
/**
* @brief Validates all contained data for valid links between parents and children
* as defined in the validator
@@ -146,6 +137,14 @@ class datablock : public std::list<category>
*/
bool validate_links() const;
/**
* @brief Strip removes all categories and items that are invalid according
* to the assigned validator. Will also add a valid audit_conform block.
*
* @return true if the remaining datablock is valid
*/
bool strip();
// --------------------------------------------------------------------
/**
@@ -184,6 +183,15 @@ class datablock : public std::list<category>
*/
const category *get(std::string_view name) const;
/**
* @brief Return true if this datablock contains a non-empty category
*/
bool contains(std::string_view name) const
{
return get(name) != nullptr;
}
/**
* @brief Tries to find a category with name @a name and will create a
* new one if it is not found. The result is a tuple of an iterator

View File

@@ -106,8 +106,6 @@ class atom
atom_impl(const atom_impl &i) = default;
void prefetch();
int compare(const atom_impl &b) const;
// bool getAnisoU(float anisou[6]) const;
@@ -345,7 +343,7 @@ class atom
std::string get_auth_asym_id() const { return get_property("auth_asym_id"); } ///< Return the auth_asym_id property
std::string get_auth_seq_id() const { return get_property("auth_seq_id"); } ///< Return the auth_seq_id property
std::string get_auth_atom_id() const { return get_property("auth_atom_id"); } ///< Return the auth_atom_id property
std::string get_auth_alt_id() const { return get_property("auth_alt_id"); } ///< Return the auth_alt_id property
std::string get_auth_alt_id() const { return get_property("pdbx_auth_alt_id"); } ///< Return the auth_alt_id property
std::string get_auth_comp_id() const { return get_property("auth_comp_id"); } ///< Return the auth_comp_id property
std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); } ///< Return the pdb_ins_code property
@@ -481,8 +479,8 @@ class residue
, m_compound_id(compoundID)
, m_asym_id(asymID)
, m_seq_id(seqID)
, m_auth_asym_id(authAsymID)
, m_auth_seq_id(authSeqID)
, m_pdb_strand_id(authAsymID)
, m_pdb_seq_num(authSeqID)
, m_pdb_ins_code(pdbInsCode)
{
}
@@ -509,9 +507,9 @@ class residue
const std::string &get_asym_id() const { return m_asym_id; } ///< Return the asym_id
int get_seq_id() const { return m_seq_id; } ///< Return the seq_id
const std::string get_auth_asym_id() const { return m_auth_asym_id; } ///< Return the auth_asym_id
const std::string get_auth_seq_id() const { return m_auth_seq_id; } ///< Return the auth_seq_id
std::string get_pdb_ins_code() const { return m_pdb_ins_code; } ///< Return the pdb_ins_code
const std::string get_pdb_strand_id() const { return m_pdb_strand_id; } ///< Return the pdb_strand_id
const std::string get_pdb_seq_num() const { return m_pdb_seq_num; } ///< Return the pdb_seq_num
std::string get_pdb_ins_code() const { return m_pdb_ins_code; } ///< Return the pdb_ins_code
const std::string &get_compound_id() const { return m_compound_id; } ///< Return the compound_id
void set_compound_id(const std::string &id) { m_compound_id = id; } ///< Set the compound_id to @a id
@@ -580,7 +578,7 @@ class residue
m_seq_id == rhs.m_seq_id and
m_asym_id == rhs.m_asym_id and
m_compound_id == rhs.m_compound_id and
m_auth_seq_id == rhs.m_auth_seq_id);
m_pdb_seq_num == rhs.m_pdb_seq_num);
}
/// @brief Create a new atom and add it to the list
@@ -594,7 +592,7 @@ class residue
structure *m_structure = nullptr;
std::string m_compound_id, m_asym_id;
int m_seq_id = 0;
std::string m_auth_asym_id, m_auth_seq_id, m_pdb_ins_code;
std::string m_pdb_strand_id, m_pdb_seq_num, m_pdb_ins_code;
std::vector<atom> m_atoms;
/** @endcond */
};
@@ -714,15 +712,15 @@ class polymer : public std::vector<monomer>
structure *get_structure() const { return m_structure; } ///< Return the structure
std::string get_asym_id() const { return m_asym_id; } ///< Return the asym_id
std::string get_auth_asym_id() const { return m_auth_asym_id; } ///< Return the PDB chain ID, actually
std::string get_entity_id() const { return m_entity_id; } ///< Return the entity_id
std::string get_asym_id() const { return m_asym_id; } ///< Return the asym_id
std::string get_pdb_strand_id() const { return m_pdb_strand_id; } ///< Return the PDB chain ID, actually
std::string get_entity_id() const { return m_entity_id; } ///< Return the entity_id
private:
structure *m_structure;
std::string m_entity_id;
std::string m_asym_id;
std::string m_auth_asym_id;
std::string m_pdb_strand_id;
};
// --------------------------------------------------------------------
@@ -760,7 +758,7 @@ class sugar : public residue
int num() const
{
int result;
auto r = std::from_chars(m_auth_seq_id.data(), m_auth_seq_id.data() + m_auth_seq_id.length(), result);
auto r = std::from_chars(m_pdb_seq_num.data(), m_pdb_seq_num.data() + m_pdb_seq_num.length(), result);
if ((bool)r.ec)
throw std::runtime_error("The auth_seq_id should be a number for a sugar");
return result;
@@ -859,19 +857,38 @@ class branch : public std::vector<sugar>
std::string m_asym_id, m_entity_id;
};
// --------------------------------------------------------------------
/// \brief A still very limited set of options for reading structures
enum class StructureOpenOptions
/** @brief Enumeration for controlling atom selection based on occupancy. */
enum class occupancy_policy
{
SkipHydrogen = 1 << 0 ///< Do not include hydrogen atoms in the structure object
/** @brief Include all atoms regardless of their occupancy factor. */
ALL = 0,
/** @brief Select only alternate atoms with the maximum occupancy factor.
* If multiple atoms have the same maximum occupancy, choose the one with the minimum B-factor.
* If multiple atoms share both the maximum occupancy and the minimum B-factor, select the first encountered atom.
*/
MAX = 1,
/** @brief Select only alternate atoms with the minimum occupancy factor.
* Similar to MAX, if multiple atoms have the same minimum occupancy, choose the one with the minimum B-factor.
* If multiple atoms share both the minimum occupancy and the minimum B-factor, select the first encountered atom.
*/
MIN = 2,
/** @brief Exclude all atoms with an occupancy factor greater than zero. */
UNOCCUPIED = 3
};
/// \brief A way to combine two options. Not very useful as there is only one...
constexpr inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
struct structure_open_options
{
return static_cast<int>(a) bitand static_cast<int>(b);
}
bool skip_hydrogen = false; ///< Do not include hydrogen atoms in the structure object
bool skip_hetatom = false; ///< Do not include HET atoms in the structure object
bool skip_water = false; ///< Do not include water atoms in the structure object
occupancy_policy occupancy_mode = occupancy_policy::ALL; ///< By default, the occupancy policy is set to occupancy_policy::ALL
std::vector<std::string> asyms; ///< The asyms to load, if empty load all
std::optional<float> min_b_factor; ///< Only load atoms with at least this b_factor
std::optional<float> max_b_factor; ///< Only load atoms with at most this b_factor
};
// --------------------------------------------------------------------
@@ -885,10 +902,10 @@ class structure
{
public:
/// \brief Read the structure from cif::file @a p
structure(file &p, std::size_t modelNr = 1, StructureOpenOptions options = {});
structure(file &p, std::size_t modelNr = 1, structure_open_options options = {});
/// \brief Load the structure from already parsed mmCIF data in @a db
structure(datablock &db, std::size_t modelNr = 1, StructureOpenOptions options = {});
structure(datablock &db, std::size_t modelNr = 1, structure_open_options options = {});
/** @cond */
structure(structure &&s) = default;
@@ -1118,7 +1135,7 @@ class structure
friend polymer;
friend residue;
void load_atoms_for_model(StructureOpenOptions options);
void load_atoms_for_model(structure_open_options options);
std::string insert_compound(const std::string &compoundID, bool is_entity);

View File

@@ -104,6 +104,27 @@ inline void write(const std::filesystem::path &p, const file &f)
// --------------------------------------------------------------------
/**
* @brief Quickly fix a PDB file that lacks some often needed categories
*
* This differs from reconstruct_pdbx which does a much more thorough job
*
* \param pdbx_file The cif::file that hopefully contains some valid data
*/
void fixup_pdbx(file &pdbx_file);
/**
* @brief Quickly fix a PDB file that lacks some often needed categories
*
* This differs from reconstruct_pdbx which does a much more thorough job
*
* \param pdbx_file The cif::file that hopefully contains some valid data
* \param v The validator to use
*/
void fixup_pdbx(file &pdbx_file, const validator &v);
/** \brief Reconstruct all missing categories for an assumed PDBx file.
*
* Some people believe that simply dumping some atom records is enough.

View File

@@ -9,7 +9,7 @@ _datablock.description
#
_dictionary.title mmcif_pdbx.dic
_dictionary.datablock_id mmcif_pdbx.dic
_dictionary.version 5.403
_dictionary.version 5.404
#
loop_
_dictionary_history.version
@@ -3292,6 +3292,18 @@ Changes (ep):
+ Add 'M' to _em_software.name enumeration.
;
5.404 2025-06-01
;
Changes (ep):
+ Add DeepEMhancer to _em_software.name enumeration
+ Add HexAuFoil to _em_sample_support.grid_type enumeration
+ Add "PSI JUNGFRAU 9M" and "PSI JUNGFRAU 10M" detectors to
_diffrn_detector.type
+ Add "N6-benzoyllysine", "N6-isonicotinyllysine",
and "N6-methacryllysine" to enumeration list for
_pdbx_chem_comp_pcm.type and _pdbx_modification_feature.type
;
#
loop_
_sub_category.id
@@ -3838,13 +3850,13 @@ _pdbx_dictionary_component.datablock_id
_pdbx_dictionary_component.dictionary_component_id
_pdbx_dictionary_component.title
_pdbx_dictionary_component.version
mmcif_pdbx-base.dic mmcif_pdbx-base.dic "mmCIF/PDBx base dictionary" 0.39
mmcif_pdbx-base.dic mmcif_pdbx-base.dic "mmCIF/PDBx base dictionary" 0.40
mmcif_xfel_extensions-v3.dic mmcif_xfel_extensions-v3.dic "PDBx/mmCIF XFELDictionary License Extension" 0.0.2
mmcif_pdbx_audit_support-extension.dic mmcif_pdbx_audit_support-extension.dic "mmCIF/PDBx Audit support extension" 0.24
mmcif_pdbx_sifts.dic mmcif_pdbx_sifts.dic "PDBx/mmCIF Dictionary Sifts Extension" 0.0.2
mmcif_pdbx_license.dic mmcif_pdbx_license.dic "PDBx/mmCIF Dictionary License Extension" 0.0.1
initial-model-extension.dic initial-model-extension.dic "PDBx/mmCIF Initial model extension" 0.10
ptm-extension.dic ptm-extension.dic "PDBx/mmCIF PTM extension" 0.10
ptm-extension.dic ptm-extension.dic "PDBx/mmCIF PTM extension" 0.11
#
loop_
_pdbx_dictionary_component_history.dictionary_component_id
@@ -4181,6 +4193,15 @@ Changes (ep):
+ Add 'M' to _em_software.name enumeration.
;
mmcif_pdbx-base.dic 0.40 2025-06-01
;
Changes (ep):
+ Add DeepEMhancer to _em_software.name enumeration
+ Add HexAuFoil to _em_sample_support.grid_type enumeration
+ Add "PSI JUNGFRAU 9M" and "PSI JUNGFRAU 10M" detectors to
_diffrn_detector.type
;
mmcif_xfel_extensions-v3.dic 0.0.1 2023-05-31
;
Changes (ep):
@@ -4606,6 +4627,16 @@ ptm-extension.dic 0.10 2024-11-26
_pdbx_modification_feature.type
;
ptm-extension.dic 0.11 2024-06-01
;
Changes (dh/ep)
+ added "N6-benzoyllysine", "N6-isonicotinyllysine",
and "N6-methacryllysine"
type to enumeration list for
_pdbx_chem_comp_pcm.type
_pdbx_modification_feature.type
;
#
loop_
_pdbx_item_linked_group.category_id
@@ -21911,6 +21942,8 @@ save__diffrn_detector.type
"_diffrn_detector.type" "PSI JUNGFRAU 1M" PIXEL
"_diffrn_detector.type" "PSI JUNGFRAU 4M" PIXEL
"_diffrn_detector.type" "PSI JUNGFRAU 8M" PIXEL
"_diffrn_detector.type" "PSI JUNGFRAU 9M" PIXEL
"_diffrn_detector.type" "PSI JUNGFRAU 10M" PIXEL
"_diffrn_detector.type" "PSI JUNGFRAU 16M" PIXEL
"_diffrn_detector.type" "PSI PILATUS 6M" PIXEL
"_diffrn_detector.type" "RAYONIX MX-225" CCD
@@ -117761,6 +117794,7 @@ save__em_sample_support.grid_type
"_em_sample_support.grid_type" "Quantifoil Active R2/1" .
"_em_sample_support.grid_type" "Quantifoil Active R1.6/0.9" .
"_em_sample_support.grid_type" "Quantifoil Active R1.2/0.8" .
"_em_sample_support.grid_type" HexAuFoil .
"_em_sample_support.grid_type" UltrAuFoil .
"_em_sample_support.grid_type" "UltrAuFoil R0./1" .
"_em_sample_support.grid_type" "UltrAuFoil R1.2/1.3" .
@@ -126147,6 +126181,7 @@ save__em_software.name
"_em_software.name" CTFPHASEFLIP .
"_em_software.name" CTFTILT .
"_em_software.name" DE-IM .
"_em_software.name" DeepEMhancer .
"_em_software.name" DIALS .
"_em_software.name" DigitalMicrograph .
"_em_software.name" DireX .
@@ -168351,6 +168386,9 @@ save__pdbx_chem_comp_pcm.type
Methylsulfanylation .
Methylsulfation .
Myristoylation .
N6-benzoyllysine .
N6-isonicotinyllysine .
N6-methacryllysine .
"N-pyruvic acid 2-iminylation" .
N-methylcarbamoylation .
Nitration .
@@ -169337,6 +169375,9 @@ save__pdbx_modification_feature.type
Methylsulfanylation .
Methylsulfation .
Myristoylation .
N6-benzoyllysine .
N6-isonicotinyllysine .
N6-methacryllysine .
"N-pyruvic acid 2-iminylation" .
N-methylcarbamoylation .
Nitration .

View File

@@ -914,6 +914,24 @@ bool category::validate_links() const
return result;
}
void category::strip()
{
std::vector<std::string> to_be_removed;
for (auto &item : m_items)
{
if (item.m_validator == nullptr)
to_be_removed.push_back(item.m_name);
}
for (auto item : to_be_removed)
{
if (cif::VERBOSE > 0)
std::clog << "Dropping item " << m_name << '.' << item << '\n';
remove_item(item);
}
}
// --------------------------------------------------------------------
row_handle category::operator[](const key_type &key)

View File

@@ -26,6 +26,10 @@
#include "cif++.hpp"
#if HAVE_CURL
# include <curl/curl.h>
#endif
#include <filesystem>
#include <fstream>
#include <map>
@@ -140,7 +144,7 @@ compound::compound(cif::datablock &db)
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge, one_letter_code, m_parent_id) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge", "one_letter_code", "mon_nstd_parent_comp_id");
if (one_letter_code.length() == 1)
m_one_letter_code = one_letter_code.front();
@@ -159,7 +163,7 @@ compound::compound(cif::datablock &db)
if (stereo_config.empty())
atom.stereo_config = stereo_config_type::N;
else
atom.stereo_config = parse_stereo_config_from_string(stereo_config);
atom.stereo_config = parse_stereo_config_from_string(stereo_config);
m_atoms.push_back(std::move(atom));
}
@@ -172,7 +176,7 @@ compound::compound(cif::datablock &db)
if (valueOrder.empty())
bond.type = bond_type::sing;
else
bond.type = parse_bond_type_from_string(valueOrder);
bond.type = parse_bond_type_from_string(valueOrder);
m_bonds.push_back(std::move(bond));
}
}
@@ -231,12 +235,12 @@ float compound::bond_length(const std::string &atomId_1, const std::string &atom
bool compound::is_peptide() const
{
return iequals(m_type, "l-peptide linking") or iequals(m_type, "peptide linking");
return iequals(m_type, "l-peptide linking") or iequals(m_type, "peptide linking");
}
bool compound::is_base() const
{
return iequals(m_type, "dna linking") or iequals(m_type, "rna linking");
return iequals(m_type, "dna linking") or iequals(m_type, "rna linking");
}
// --------------------------------------------------------------------
@@ -299,7 +303,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
std::shared_lock lock(mMutex);
compound *result = nullptr;
for (auto impl = shared_from_this(); impl; impl = impl->m_next)
{
result = impl->create(id);
@@ -363,7 +367,9 @@ compound *compound_factory_impl::create(const std::string &id)
if (m_missing.contains(id))
return nullptr;
if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c) { return c->id() == id; }); i != m_compounds.end())
if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c)
{ return c->id() == id; });
i != m_compounds.end())
return *i;
compound *result = nullptr;
@@ -454,7 +460,6 @@ class local_compound_factory_impl : public compound_factory_impl
compound *create(const std::string &id) override;
private:
compound *construct_compound(const datablock &db, const std::string &id, const std::string &name, const std::string &three_letter_code, const std::string &group);
cif::file m_local_file;
@@ -465,7 +470,9 @@ compound *local_compound_factory_impl::create(const std::string &id)
if (m_missing.contains(id))
return nullptr;
if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c) { return c->id() == id; }); i != m_compounds.end())
if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c)
{ return c->id() == id; });
i != m_compounds.end())
return *i;
compound *result = nullptr;
@@ -507,12 +514,10 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
float formula_weight = 0;
int formal_charge = 0;
std::map<std::string,std::size_t> formula_data;
std::map<std::string, std::size_t> formula_data;
for (std::size_t ord = 1; const auto &[atom_id, type_symbol, type, charge, x, y, z, xi, yi, zi] :
rdb["chem_comp_atom"].rows<std::string, std::string, std::string, int,
std::optional<float>, std::optional<float>, std::optional<float>,
std::optional<float>, std::optional<float>, std::optional<float>>(
rdb["chem_comp_atom"].rows<std::string, std::string, std::string, int, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>>(
"atom_id", "type_symbol", "type", "charge",
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z",
"pdbx_model_Cartn_x_ideal", "pdbx_model_Cartn_y_ideal", "pdbx_model_Cartn_z_ideal"))
@@ -522,16 +527,14 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
formula_data[type_symbol] += 1;
db["chem_comp_atom"].emplace({
{ "comp_id", id },
db["chem_comp_atom"].emplace({ { "comp_id", id },
{ "atom_id", atom_id },
{ "type_symbol", type_symbol },
{ "charge", charge },
{ "model_Cartn_x", x.has_value() ? x : xi, 3 },
{ "model_Cartn_y", y.has_value() ? y : yi, 3 },
{ "model_Cartn_z", z.has_value() ? z : zi, 3 },
{ "pdbx_ordinal", ord++ }
});
{ "model_Cartn_x", x.has_value() ? x : xi, 3 },
{ "model_Cartn_y", y.has_value() ? y : yi, 3 },
{ "model_Cartn_z", z.has_value() ? z : zi, 3 },
{ "pdbx_ordinal", ord++ } });
formal_charge += charge;
}
@@ -548,21 +551,19 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
else if (cif::iequals(type, "triple") or cif::iequals(type, "trip"))
value_order = "TRIP";
db["chem_comp_bond"].emplace({
{ "comp_id", id },
db["chem_comp_bond"].emplace({ { "comp_id", id },
{ "atom_id_1", atom_id_1 },
{ "atom_id_2", atom_id_2 },
{ "value_order", value_order },
{ "pdbx_aromatic_flag", aromatic },
// TODO: fetch stereo_config info from chem_comp_chir
{ "pdbx_ordinal", ord++ }
});
{ "pdbx_ordinal", ord++ } });
}
db.emplace_back(rdb["pdbx_chem_comp_descriptor"]);
std::string formula;
for (bool first = true; const auto &[symbol, count]: formula_data)
for (bool first = true; const auto &[symbol, count] : formula_data)
{
if (std::exchange(first, false))
formula += ' ';
@@ -581,15 +582,13 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
else
type = "NON-POLYMER";
db["chem_comp"].emplace({
{ "id", id },
db["chem_comp"].emplace({ { "id", id },
{ "name", name },
{ "type", type },
{ "formula", formula },
{ "pdbx_formal_charge", formal_charge },
{ "formula_weight", formula_weight },
{ "three_letter_code", three_letter_code }
});
{ "three_letter_code", three_letter_code } });
std::shared_lock lock(mMutex);
@@ -602,11 +601,16 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
std::unique_ptr<compound_factory> compound_factory::s_instance;
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
bool compound_factory::s_use_thread_local_instance;
compound_factory_options compound_factory::s_options;
void compound_factory::init(bool useThreadLocalInstanceOnly)
{
s_use_thread_local_instance = useThreadLocalInstanceOnly;
init({ .use_thread_local_instance_only = useThreadLocalInstanceOnly });
}
void compound_factory::init(compound_factory_options options)
{
s_options = options;
}
compound_factory::compound_factory()
@@ -625,7 +629,7 @@ compound_factory::~compound_factory()
compound_factory &compound_factory::instance()
{
if (s_use_thread_local_instance)
if (s_options.use_thread_local_instance_only)
{
if (not tl_instance)
tl_instance.reset(new compound_factory());
@@ -641,7 +645,7 @@ compound_factory &compound_factory::instance()
void compound_factory::clear()
{
if (s_use_thread_local_instance)
if (s_options.use_thread_local_instance_only)
tl_instance.reset(nullptr);
else
s_instance.reset();
@@ -719,7 +723,7 @@ bool compound_factory::is_peptide(std::string_view res_name) const
bool result = is_std_peptide(res_name);
if (not result and m_impl)
{
auto compound = const_cast<compound_factory&>(*this).create(res_name);
auto compound = const_cast<compound_factory &>(*this).create(res_name);
result = compound != nullptr and compound->is_peptide();
}
return result;
@@ -731,7 +735,7 @@ bool compound_factory::is_base(std::string_view res_name) const
bool result = is_std_base(res_name);
if (not result and m_impl)
{
auto compound = const_cast<compound_factory&>(*this).create(res_name);
auto compound = const_cast<compound_factory &>(*this).create(res_name);
result = compound != nullptr and compound->is_base();
}
return result;

View File

@@ -78,17 +78,41 @@ bool datablock::is_valid() const
return result;
}
bool datablock::is_valid()
bool datablock::validate_links() const
{
if (m_validator == nullptr)
throw std::runtime_error("Validator not specified for datablock data_" + name());
bool result = true;
for (auto &cat : *this)
result = cat.is_valid() and result;
const_cast<category &>(cat).update_links(*this);
for (auto &cat : *this)
result = cat.validate_links() and result;
return result;
}
bool datablock::strip()
{
bool result = true;
// remove all categories that have no validator
erase(std::remove_if(begin(), end(), [](category &c) {
bool result = false;
if (c.get_cat_validator() == nullptr)
{
if (cif::VERBOSE > 0)
std::clog << "Dropping category " << c.name() << '\n';
result = true;
}
return result;
}), end());
// then strip the remaining categories
for (auto &cat : *this)
cat.strip();
// Add or remove the audit_conform block here.
if (result)
if (is_valid())
{
// If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already!
@@ -101,22 +125,7 @@ bool datablock::is_valid()
}
}
else
erase(std::find_if(begin(), end(), [](category &cat)
{ return cat.name() == "audit_conform"; }),
end());
return result;
}
bool datablock::validate_links() const
{
bool result = true;
for (auto &cat : *this)
const_cast<category &>(cat).update_links(*this);
for (auto &cat : *this)
result = cat.validate_links() and result;
result = false;
return result;
}

View File

@@ -102,18 +102,18 @@ void atom::atom_impl::set_property(const std::string_view name, const std::strin
r.assign(name, value, true, true);
}
// int atom::atom_impl::compare(const atom_impl &b) const
// {
// int d = m_asym_id.compare(b.m_asym_id);
// if (d == 0)
// d = m_seq_id - b.m_seq_id;
// if (d == 0)
// d = m_auth_seq_id.compare(b.m_auth_seq_id);
// if (d == 0)
// d = mAtom_id.compare(b.mAtom_id);
int atom::atom_impl::compare(const atom_impl &b) const
{
int d = get_property("label_asym_id").compare(b.get_property("label_asym_id"));
if (d == 0)
d = get_property_int("label_seq_id") - b.get_property_int("label_seq_id");
if (d == 0)
d = get_property_int("auth_seq_id") - b.get_property_int("auth_seq_id");
if (d == 0)
d = get_property("label_atom_id").compare(b.get_property("label_atom_id"));
// return d;
// }
return d;
}
// bool atom::atom_impl::getAnisoU(float anisou[6]) const
// {
@@ -149,145 +149,6 @@ int atom::atom_impl::get_charge() const
return formalCharge.value_or(0);
}
// const Compound *atom::atom_impl::compound() const
// {
// if (mCompound == nullptr)
// {
// std::string compID = get_property("label_comp_id");
// mCompound = compound_factory::instance().create(compID);
// }
// return mCompound;
// }
// const std::string atom::atom_impl::get_property(const std::string_view name) const
// {
// for (auto &&[item_name, ref] : mCachedRefs)
// {
// if (item_name == name)
// return ref.as<std::string>();
// }
// mCachedRefs.emplace_back(name, const_cast<Row &>(mRow)[name]);
// return std::get<1>(mCachedRefs.back()).as<std::string>();
// }
// void atom::atom_impl::set_property(const std::string_view name, const std::string &value)
// {
// for (auto &&[item_name, ref] : mCachedRefs)
// {
// if (item_name != name)
// continue;
// ref = value;
// return;
// }
// mCachedRefs.emplace_back(name, mRow[name]);
// std::get<1>(mCachedRefs.back()) = value;
// }
// const Row atom::getRowAniso() const
// {
// auto &db = m_impl->m_db;
// auto cat = db.get("atom_site_anisotrop");
// if (not cat)
// return {};
// else
// return cat->find1(key("id") == m_impl->m_id);
// }
// float atom::uIso() const
// {
// float result;
// if (not get_property<std::string>("U_iso_or_equiv").empty())
// result = get_property<float>("U_iso_or_equiv");
// else if (not get_property<std::string>("B_iso_or_equiv").empty())
// result = get_property<float>("B_iso_or_equiv") / static_cast<float>(8 * kPI * kPI);
// else
// throw std::runtime_error("Missing B_iso or U_iso");
// return result;
// }
// const Compound &atom::compound() const
// {
// auto result = impl().compound();
// if (result == nullptr)
// {
// if (VERBOSE > 0)
// std::cerr << "Compound not found: '" << get_property<std::string>("label_comp_id") << '\'' << '\n';
// throw std::runtime_error("no compound");
// }
// return *result;
// }
// std::string atom::labelEntityID() const
// {
// return get_property<std::string>("label_entity_id");
// }
// std::string atom::authAtom_id() const
// {
// return get_property<std::string>("auth_atom_id");
// }
// std::string atom::authCompID() const
// {
// return get_property<std::string>("auth_comp_id");
// }
// std::string atom::get_auth_asym_id() const
// {
// return get_property<std::string>("auth_asym_id");
// }
// std::string atom::get_pdb_ins_code() const
// {
// return get_property<std::string>("pdbx_PDB_ins_code");
// }
// std::string atom::pdbxAuthAltID() const
// {
// return get_property<std::string>("pdbx_auth_alt_id");
// }
// void atom::translate(point t)
// {
// auto loc = location();
// loc += t;
// location(loc);
// }
// void atom::rotate(quaternion q)
// {
// auto loc = location();
// loc.rotate(q);
// location(loc);
// }
// void atom::translate_and_rotate(point t, quaternion q)
// {
// auto loc = location();
// loc += t;
// loc.rotate(q);
// location(loc);
// }
// void atom::translate_rotate_and_translate(point t1, quaternion q, point t2)
// {
// auto loc = location();
// loc += t1;
// loc.rotate(q);
// loc += t2;
// location(loc);
// }
std::ostream &operator<<(std::ostream &os, const atom &atom)
{
if (atom.is_water())
@@ -319,8 +180,8 @@ residue::residue(structure &structure, const std::vector<atom> &atoms)
m_compound_id = a.get_label_comp_id();
m_asym_id = a.get_label_asym_id();
m_seq_id = a.get_label_seq_id();
m_auth_asym_id = a.get_auth_asym_id();
m_auth_seq_id = a.get_auth_seq_id();
m_pdb_strand_id = a.get_auth_asym_id();
m_pdb_seq_num = a.get_auth_seq_id();
m_pdb_ins_code = a.get_pdb_ins_code();
for (auto atom : atoms)
@@ -371,10 +232,10 @@ atom residue::create_new_atom(atom_type inType, const std::string &inAtomID, poi
{ "label_alt_id", "." },
{ "label_comp_id", m_compound_id },
{ "label_seq_id", m_seq_id },
{ "auth_asym_id", m_auth_asym_id },
{ "auth_asym_id", m_pdb_strand_id },
{ "auth_atom_id", inAtomID },
{ "auth_comp_id", m_compound_id },
{ "auth_seq_id", m_auth_seq_id },
{ "auth_seq_id", m_pdb_seq_num },
{ "occupancy", 1.0f, 2 },
{ "B_iso_or_equiv", 20.0f },
{ "pdbx_PDB_model_num", m_structure->get_model_nr() },
@@ -541,8 +402,8 @@ std::ostream &operator<<(std::ostream &os, const residue &res)
{
os << res.get_compound_id() << ' ' << res.get_asym_id() << ':' << res.get_seq_id();
if (res.get_auth_asym_id() != res.get_asym_id() or res.get_auth_seq_id() != std::to_string(res.get_seq_id()))
os << " [" << res.get_auth_asym_id() << ':' << res.get_auth_seq_id() << ']';
if (res.get_pdb_strand_id() != res.get_asym_id() or res.get_pdb_seq_num() != std::to_string(res.get_seq_id()))
os << " [" << res.get_pdb_strand_id() << ':' << res.get_pdb_seq_num() << ']';
return os;
}
@@ -551,7 +412,7 @@ std::ostream &operator<<(std::ostream &os, const residue &res)
// monomer
monomer::monomer(const polymer &polymer, std::size_t index, int seqID, const std::string &authSeqID, const std::string &pdbInsCode, const std::string &compoundID)
: residue(*polymer.get_structure(), compoundID, polymer.get_asym_id(), seqID, polymer.get_auth_asym_id(), authSeqID, pdbInsCode)
: residue(*polymer.get_structure(), compoundID, polymer.get_asym_id(), seqID, polymer.get_pdb_strand_id(), authSeqID, pdbInsCode)
, m_polymer(&polymer)
, m_index(index)
{
@@ -970,7 +831,7 @@ polymer::polymer(structure &s, const std::string &entityID, const std::string &a
: m_structure(const_cast<structure *>(&s))
, m_entity_id(entityID)
, m_asym_id(asym_id)
, m_auth_asym_id(auth_asym_id)
, m_pdb_strand_id(auth_asym_id)
{
using namespace cif::literals;
@@ -982,12 +843,8 @@ polymer::polymer(structure &s, const std::string &entityID, const std::string &a
for (auto r : poly_seq_scheme.find("asym_id"_key == asym_id))
{
int seqID;
std::optional<int> pdbSeqNum;
std::string compoundID, authSeqID, pdbInsCode;
cif::tie(seqID, authSeqID, compoundID, pdbInsCode, pdbSeqNum) = r.get("seq_id", "auth_seq_num", "mon_id", "pdb_ins_code", "pdb_seq_num");
if (authSeqID.empty() and pdbSeqNum.has_value())
authSeqID = std::to_string(*pdbSeqNum);
std::string compoundID, pdbSeqNum, pdbInsCode;
cif::tie(seqID, pdbSeqNum, compoundID, pdbInsCode) = r.get("seq_id", "pdb_seq_num", "mon_id", "pdb_ins_code");
std::size_t index = size();
@@ -995,11 +852,11 @@ polymer::polymer(structure &s, const std::string &entityID, const std::string &a
if (not ix.count(seqID))
{
ix[seqID] = index;
emplace_back(*this, index, seqID, authSeqID, pdbInsCode, compoundID);
emplace_back(*this, index, seqID, pdbSeqNum, pdbInsCode, compoundID);
}
else if (VERBOSE > 0)
{
monomer m{ *this, index, seqID, authSeqID, pdbInsCode, compoundID };
monomer m{ *this, index, seqID, pdbSeqNum, pdbInsCode, compoundID };
std::cerr << "Dropping alternate residue " << m << '\n';
}
}
@@ -1139,7 +996,7 @@ cif::mm::atom sugar::add_atom(row_initializer atom_info)
atom_info.set_value({ "label_alt_id", "." });
atom_info.set_value({ "auth_asym_id", m_branch->get_asym_id() });
atom_info.set_value({ "auth_comp_id", m_compound_id });
atom_info.set_value({ "auth_seq_id", m_auth_seq_id });
atom_info.set_value({ "auth_seq_id", m_pdb_seq_num });
atom_info.set_value({ "occupancy", 1.0, 2 });
atom_info.set_value({ "B_iso_or_equiv", 30.0, 2 });
atom_info.set_value({ "pdbx_PDB_model_num", 1 });
@@ -1255,12 +1112,12 @@ sugar &branch::construct_sugar(const std::string &compound_id)
{ "mon_id", result.get_compound_id() },
{ "pdb_asym_id", result.get_asym_id() },
{ "pdb_seq_num", result.num() },
{ "pdb_seq_num", result.get_pdb_seq_num() },
{ "pdb_mon_id", result.get_compound_id() },
{ "auth_asym_id", result.get_auth_asym_id() },
{ "auth_asym_id", result.get_pdb_strand_id() },
{ "auth_mon_id", result.get_compound_id() },
{ "auth_seq_num", result.get_auth_seq_id() },
{ "auth_seq_num", result.get_pdb_seq_num() },
{ "hetero", "n" } });
@@ -1303,7 +1160,7 @@ std::string branch::name(const sugar &s) const
for (auto &sn : *this)
{
if (not sn.get_link() or sn.get_link().get_auth_seq_id() != s.get_auth_seq_id())
if (not sn.get_link() or sn.get_link().get_auth_seq_id() != s.get_pdb_seq_num())
continue;
auto n = name(sn) + "-(1-" + sn.get_link().get_label_atom_id().substr(1) + ')';
@@ -1330,19 +1187,19 @@ float branch::weight() const
// --------------------------------------------------------------------
// structure
structure::structure(file &p, std::size_t modelNr, StructureOpenOptions options)
structure::structure(file &p, std::size_t modelNr, structure_open_options options)
: structure(p.front(), modelNr, options)
{
}
structure::structure(datablock &db, std::size_t modelNr, StructureOpenOptions options)
structure::structure(datablock &db, std::size_t modelNr, structure_open_options options)
: m_db(db)
, m_model_nr(modelNr)
{
if (db.get_validator() == nullptr)
db.load_dictionary();
auto &atomCat = db["atom_site"];
auto &atom_site = db["atom_site"];
load_atoms_for_model(options);
@@ -1350,7 +1207,7 @@ structure::structure(datablock &db, std::size_t modelNr, StructureOpenOptions op
if (m_atoms.empty() and m_model_nr == 1)
{
std::optional<std::size_t> model_nr;
cif::tie(model_nr) = atomCat.front().get("pdbx_PDB_model_num");
cif::tie(model_nr) = atom_site.front().get("pdbx_PDB_model_num");
if (model_nr and *model_nr != m_model_nr)
{
if (VERBOSE > 0)
@@ -1369,42 +1226,133 @@ structure::structure(datablock &db, std::size_t modelNr, StructureOpenOptions op
load_data();
}
void structure::load_atoms_for_model(StructureOpenOptions options)
void structure::load_atoms_for_model(structure_open_options options)
{
using namespace literals;
auto &atomCat = m_db["atom_site"];
auto &atom_site = m_db["atom_site"];
condition c = "pdbx_PDB_model_num"_key == null or "pdbx_PDB_model_num"_key == m_model_nr;
if (options bitand StructureOpenOptions::SkipHydrogen)
c = std::move(c) and ("type_symbol"_key != "H" and "type_symbol"_key != "D");
for (auto id : atomCat.find<std::string>(std::move(c), "id"))
emplace_atom(std::make_shared<atom::atom_impl>(m_db, id));
if (options.skip_hydrogen)
c = std::move(c) and (cif::key("type_symbol") != "H" and cif::key("type_symbol") != "D");
if (options.skip_water)
c = std::move(c) and (cif::key("auth_comp_id") != "HOH" and cif::key("auth_comp_id") != "H20" and cif::key("auth_comp_id") != "WAT");
if (options.skip_hetatom)
{
if (options.skip_water)
c = std::move(c) and cif::key("group_PDB") != "HETATM";
else
c = std::move(c) and (cif::key("group_PDB") != "HETATM" or (cif::key("auth_comp_id") == "HOH" or cif::key("auth_comp_id") == "H20" or cif::key("auth_comp_id") == "WAT"));
}
if (options.min_b_factor.has_value())
c = std::move(c) and cif::key("B_iso_or_equiv") >= *options.min_b_factor;
if (options.max_b_factor.has_value())
c = std::move(c) and cif::key("B_iso_or_equiv") <= *options.max_b_factor;
if (not options.asyms.empty())
{
condition tmp_c;
for (auto asym_id : options.asyms)
tmp_c = std::move(tmp_c) or cif::key("label_asym_id") == asym_id;
c = std::move(c) and std::move(tmp_c);
}
if (options.occupancy_mode == occupancy_policy::ALL)
{
for (auto id : atom_site.find<std::string>(std::move(c), "id"))
emplace_atom(std::make_shared<atom::atom_impl>(m_db, id));
}
else if (options.occupancy_mode == occupancy_policy::UNOCCUPIED)
{
for (auto id : atom_site.find<std::string>(std::move(c), "id"))
{
auto a = std::make_shared<atom::atom_impl>(m_db, id);
if (a->get_property_float("occupancy") > 0)
continue;
emplace_atom(a);
}
}
else
{
std::vector<cif::mm::atom> atoms;
std::map<std::tuple<std::string,int>, std::map<std::string, float>> alts;
for (auto id : atom_site.find<std::string>(std::move(c), "id"))
{
auto a = atoms.emplace_back(std::make_shared<atom::atom_impl>(m_db, id));
if (a.is_alternate())
{
auto key = std::make_tuple(a.get_label_asym_id(), a.get_label_seq_id());
auto alt_id = a.get_label_alt_id();
if (auto i = alts.find(key); i != alts.end())
i->second[alt_id] += a.get_occupancy();
else
alts[key][alt_id] = a.get_occupancy();
}
}
for (auto &&[key, value] : alts)
{
const auto &[asym_id, seq_id] = key;
// select highest occupancy for this residue's alternates
std::string alt_id;
float occupancy = options.occupancy_mode == occupancy_policy::MAX ? 0.f : std::numeric_limits<float>::max();
for (const auto &[alt_key, alt_value] : value)
{
if (options.occupancy_mode == occupancy_policy::MAX)
{
if (occupancy < alt_value)
{
alt_id = alt_key;
occupancy = alt_value;
}
}
else
{
if (occupancy > alt_value)
{
alt_id = alt_key;
occupancy = alt_value;
}
}
}
value.clear();
value.emplace(alt_id, occupancy);
}
for (auto a : atoms)
{
if (a.is_alternate())
{
auto key = std::make_tuple(a.get_label_asym_id(), a.get_label_seq_id());
if (alts[key].contains(a.get_label_alt_id()))
emplace_atom(a);
}
else
emplace_atom(a);
}
}
}
// structure::structure(const structure &s)
// : m_db(s.m_db)
// , m_model_nr(s.m_model_nr)
// {
// m_atoms.reserve(s.m_atoms.size());
// for (auto &atom : s.m_atoms)
// emplace_atom(atom.clone());
// load_data();
// }
// structure::~structure()
// {
// }
void structure::load_data()
{
auto &polySeqScheme = m_db["pdbx_poly_seq_scheme"];
for (const auto &[asym_id, auth_asym_id, entityID] : polySeqScheme.rows<std::string, std::string, std::string>("asym_id", "pdb_strand_id", "entity_id"))
{
if (m_polymers.empty() or m_polymers.back().get_asym_id() != asym_id or m_polymers.back().get_entity_id() != entityID)
if (m_polymers.empty() or m_polymers.back().get_asym_id() != asym_id)
m_polymers.emplace_back(*this, entityID, asym_id, auth_asym_id);
}
@@ -1430,18 +1378,18 @@ void structure::load_data()
for (auto &poly : m_polymers)
{
for (auto &res : poly)
resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id() }] = &res;
resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_pdb_seq_num() }] = &res;
}
for (auto &res : m_non_polymers)
resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id() }] = &res;
resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_pdb_seq_num() }] = &res;
std::set<std::string> sugars;
for (auto &branch : m_branches)
{
for (auto &sugar : branch)
{
resMap[{ sugar.get_asym_id(), sugar.get_seq_id(), sugar.get_auth_seq_id() }] = &sugar;
resMap[{ sugar.get_asym_id(), sugar.get_seq_id(), sugar.get_pdb_seq_num() }] = &sugar;
sugars.insert(sugar.get_compound_id());
}
}
@@ -1516,30 +1464,6 @@ EntityType structure::get_entity_type_for_asym_id(const std::string asym_id) con
return get_entity_type_for_entity_id(entityID);
}
// std::vector<atom> structure::waters() const
// {
// using namespace literals;
// std::vector<atom> result;
// auto &db = datablock();
// // Get the entity id for water. Watch out, structure may not have water at all
// auto &entityCat = db["entity"];
// for (const auto &[waterEntityID] : entityCat.find<std::string>("type"_key == "water", "id"))
// {
// for (auto &a : m_atoms)
// {
// if (a.get_property("label_entity_id") == waterEntityID)
// result.push_back(a);
// }
// break;
// }
// return result;
// }
bool structure::has_atom_id(const std::string &id) const
{
assert(m_atoms.size() == m_atom_index.size());
@@ -1688,7 +1612,7 @@ residue &structure::get_residue(const std::string &asym_id, int seqID, const std
{
for (auto &res : m_non_polymers)
{
if (res.get_asym_id() == asym_id and (authSeqID.empty() or res.get_auth_seq_id() == authSeqID))
if (res.get_asym_id() == asym_id and (authSeqID.empty() or res.get_pdb_seq_num() == authSeqID))
return res;
}
}
@@ -1712,7 +1636,7 @@ residue &structure::get_residue(const std::string &asym_id, int seqID, const std
for (auto &sugar : branch)
{
if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == authSeqID)
if (sugar.get_asym_id() == asym_id and sugar.get_pdb_seq_num() == authSeqID)
return sugar;
}
}
@@ -1734,7 +1658,7 @@ residue &structure::get_residue(const std::string &asym_id, const std::string &c
{
for (auto &res : m_non_polymers)
{
if (res.get_asym_id() == asym_id and res.get_auth_seq_id() == authSeqID and res.get_compound_id() == compID)
if (res.get_asym_id() == asym_id and res.get_pdb_seq_num() == authSeqID and res.get_compound_id() == compID)
return res;
}
}
@@ -1758,7 +1682,7 @@ residue &structure::get_residue(const std::string &asym_id, const std::string &c
for (auto &sugar : branch)
{
if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == authSeqID and sugar.get_compound_id() == compID)
if (sugar.get_asym_id() == asym_id and sugar.get_pdb_seq_num() == authSeqID and sugar.get_compound_id() == compID)
return sugar;
}
}
@@ -2108,7 +2032,7 @@ void structure::remove_residue(const std::string &asym_id, int seq_id, const std
{
for (auto &res : m_non_polymers)
{
if (res.get_asym_id() == asym_id and (auth_seq_id.empty() or res.get_auth_seq_id() == auth_seq_id))
if (res.get_asym_id() == asym_id and (auth_seq_id.empty() or res.get_pdb_seq_num() == auth_seq_id))
{
remove_residue(res);
return;
@@ -2138,7 +2062,7 @@ void structure::remove_residue(const std::string &asym_id, int seq_id, const std
for (auto &sugar : branch)
{
if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == auth_seq_id)
if (sugar.get_asym_id() == asym_id and sugar.get_pdb_seq_num() == auth_seq_id)
{
remove_residue(sugar);
return;
@@ -2271,7 +2195,7 @@ void structure::remove_sugar(sugar &s)
// TODO: need fix, collect from nag_atoms?
{ "auth_asym_id", asym_id },
{ "auth_mon_id", sugar.get_compound_id() },
{ "auth_seq_num", sugar.get_auth_seq_id() },
{ "auth_seq_num", sugar.get_pdb_seq_num() },
{ "hetero", "n" } });
}
@@ -2357,8 +2281,8 @@ std::string structure::create_non_poly(const std::string &entity_id, const std::
{ "entity_id", entity_id },
{ "mon_id", comp_id },
{ "ndb_seq_num", ndb_nr },
{ "pdb_seq_num", res.get_auth_seq_id() },
{ "auth_seq_num", res.get_auth_seq_id() },
{ "pdb_seq_num", res.get_pdb_seq_num() },
{ "auth_seq_num", res.get_pdb_seq_num() },
{ "pdb_mon_id", comp_id },
{ "auth_mon_id", comp_id },
{ "pdb_strand_id", asym_id },
@@ -2418,8 +2342,8 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
{ "entity_id", entity_id },
{ "mon_id", comp_id },
{ "ndb_seq_num", ndb_nr },
{ "pdb_seq_num", res.get_auth_seq_id() },
{ "auth_seq_num", res.get_auth_seq_id() },
{ "pdb_seq_num", res.get_pdb_seq_num() },
{ "auth_seq_num", res.get_pdb_seq_num() },
{ "pdb_mon_id", comp_id },
{ "auth_mon_id", comp_id },
{ "pdb_strand_id", asym_id },
@@ -2733,11 +2657,11 @@ std::string structure::create_entity_for_branch(branch &branch)
pdbx_entity_branch_link.emplace({ { "link_id", pdbx_entity_branch_link.get_unique_id("") },
{ "entity_id", entityID },
{ "entity_branch_list_num_1", s1.get_auth_seq_id() },
{ "entity_branch_list_num_1", s1.get_pdb_seq_num() },
{ "comp_id_1", s1.get_compound_id() },
{ "atom_id_1", l1.get_label_atom_id() },
{ "leaving_atom_id_1", "O1" },
{ "entity_branch_list_num_2", s2.get_auth_seq_id() },
{ "entity_branch_list_num_2", s2.get_pdb_seq_num() },
{ "comp_id_2", s2.get_compound_id() },
{ "atom_id_2", l2.get_label_atom_id() },
{ "leaving_atom_id_2", "H" + l2.get_label_atom_id() },

View File

@@ -6419,15 +6419,24 @@ file read(std::istream &is)
std::throw_with_nested(std::runtime_error("Since the file did not start with a valid PDB HEADER line mmCIF was assumed, but that failed."));
}
// Try to see if we can create an mm::structure out of this data.
// If that fails, we need to reconstruct a PDBx file out of it.
try
if (not(result.empty() or result.front().empty()))
{
cif::mm::structure s(result);
}
catch (const std::exception &e)
{
reconstruct_pdbx(result);
if (auto &db = result.front(); db.get("audit_conform") == nullptr)
reconstruct_pdbx(result);
else
{
try
{
// Try to see if we can create an mm::structure out of this data.
// If that fails, we need to reconstruct a PDBx file out of it.
cif::mm::structure s(result);
}
catch (const std::exception &e)
{
reconstruct_pdbx(result);
}
}
}
}
}

View File

@@ -1554,4 +1554,92 @@ bool reconstruct_pdbx(file &file, const validator &validator)
return valid and is_valid_pdbx_file(file, validator);
}
// --------------------------------------------------------------------
void fixup_pdbx(file &file)
{
if (file.empty())
throw std::runtime_error("Cannot reconstruct PDBx, file seems to be empty");
auto &db = file.front();
if (auto ac = db.get("audit_conform"); ac != nullptr)
fixup_pdbx(file, validator_factory::instance().get(*ac));
else
fixup_pdbx(file, validator_factory::instance().get("mmcif_pdbx.dic"));
}
void fixup_pdbx(file &file, const validator &validator)
{
if (file.empty())
throw std::runtime_error("Cannot reconstruct PDBx, file seems to be empty");
// assuming the first datablock contains the entry ...
auto &db = file.front();
// ... and any additional datablock will contain compound information
cif::compound_source cs(file);
if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
std::string entry_id;
// Phenix files do not have an entry record
if (auto cat = db.get("entry"); cat == nullptr or cat->empty())
{
entry_id = db.name();
category entry("entry");
entry.emplace({ { "id", entry_id } });
db.emplace_back(std::move(entry));
}
else
{
auto &entry = db["entry"];
if (entry.size() != 1)
throw std::runtime_error("Unexpected size of entry category");
entry_id = entry.front().get<std::string>("id");
}
// Start with chem_comp, it is often missing many fields
// that can easily be filled in.
checkChemCompRecords(db);
// If the data is really horrible, it might not contain entities
if (not db["atom_site"].find_first(key("label_entity_id") != null))
createEntityIDs(db);
// Now see if atom records make sense at all
checkAtomRecords(db);
db["chem_comp"].reorder_by_index();
db.set_validator(&validator);
// Now create any missing categories
// Next make sure we have struct_asym records
if (auto cat = db.get("struct_asym"); cat == nullptr or cat->empty())
createStructAsym(db);
if (auto cat = db.get("entity"); cat == nullptr or cat->empty())
createEntity(db);
if (auto cat = db.get("pdbx_poly_seq_scheme"); cat == nullptr or cat->empty())
createPdbxPolySeqScheme(db);
if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
comparePolySeqSchemes(db);
createPdbxNonpolyScheme(db);
// Create a minimal set of branch records
createPdbxBranchScheme(db);
// fill in missing formula_weight, e.g.
checkEntities(db);
// That's it
}
} // namespace cif::pdb

View File

@@ -552,6 +552,9 @@ const validator &validator_factory::get(const category &audit_conform)
validator validator_factory::construct_validator(std::string_view name, std::optional<std::string> version)
{
auto data = load_resource(name);
if (not data and name == "mmcif_pdbx_v50")
data = load_resource("mmcif_pdbx.dic");
if (not data)
throw std::runtime_error("Could not load dictionary " + std::string{ name });

View File

@@ -28,7 +28,8 @@ list(
sugar
spinner
# reconstruction
validate-pdbx)
validate-pdbx
)
add_library(test-main OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/test-main.cpp")

View File

@@ -431,3 +431,169 @@ TEST_CASE("remove_residue_1")
REQUIRE_NOTHROW(s.validate_atoms());
}
// --------------------------------------------------------------------
// Tests for structure_open_options
TEST_CASE("options_1")
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
cif::file file(example.string());
auto &cf = cif::compound_factory::instance();
SECTION("skip_water")
{
cif::mm::structure s(file, 1, { .skip_water = true });
REQUIRE_NOTHROW(s.validate_atoms());
for (auto a : s.atoms())
CHECK_FALSE(a.is_water());
}
SECTION("skip_hetatom")
{
cif::mm::structure s(file, 1, { .skip_hetatom = true });
REQUIRE_NOTHROW(s.validate_atoms());
for (auto a : s.atoms())
CHECK((a.is_water() or cf.is_peptide(a.get_label_comp_id()) or cf.is_base(a.get_label_comp_id())));
}
SECTION("selected_asyms")
{
cif::mm::structure s(file, 1, { .asyms = { "A" } });
REQUIRE_NOTHROW(s.validate_atoms());
for (auto a : s.atoms())
CHECK(a.get_label_asym_id() == "A");
}
SECTION("min-b-factor")
{
cif::mm::structure s(file, 1, { .min_b_factor = 20.f });
REQUIRE_NOTHROW(s.validate_atoms());
for (auto a : s.atoms())
CHECK(a.get_property_float("B_iso_or_equiv") >= 20.f);
}
SECTION("max-b-factor")
{
cif::mm::structure s(file, 1, { .max_b_factor = 20.f });
REQUIRE_NOTHROW(s.validate_atoms());
for (auto a : s.atoms())
CHECK(a.get_property_float("B_iso_or_equiv") <= 20.f);
}
}
TEST_CASE("options_2")
{
auto data = R"(
data_TEST
#
_pdbx_nonpoly_scheme.asym_id A
_pdbx_nonpoly_scheme.ndb_seq_num 1
_pdbx_nonpoly_scheme.entity_id 1
_pdbx_nonpoly_scheme.mon_id HEM
_pdbx_nonpoly_scheme.pdb_seq_num 1
_pdbx_nonpoly_scheme.auth_seq_num 1
_pdbx_nonpoly_scheme.pdb_mon_id HEM
_pdbx_nonpoly_scheme.auth_mon_id HEM
_pdbx_nonpoly_scheme.pdb_strand_id A
_pdbx_nonpoly_scheme.pdb_ins_code .
#
loop_
_atom_site.id
_atom_site.auth_asym_id
_atom_site.label_alt_id
_atom_site.label_asym_id
_atom_site.label_atom_id
_atom_site.label_comp_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.type_symbol
_atom_site.group_PDB
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
1 A A A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 0.75 7.67 ? 1 HEM CHA 1
3 A A A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 0.75 7.05 ? 1 HEM CHB 1
2 A A A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 0.75 7.69 ? 1 HEM CHC 1
4 A A A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 0.75 8.00 ? 1 HEM CHD 1
5 A B A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 0.25 7.67 ? 1 HEM CHA 1
6 A B A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 0.25 7.05 ? 1 HEM CHB 1
7 A B A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 0.25 7.69 ? 1 HEM CHC 1
8 A B A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 0.25 8.00 ? 1 HEM CHD 1
#
_chem_comp.id HEM
_chem_comp.type NON-POLYMER
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
_chem_comp.formula 'C34 H32 Fe N4 O4'
_chem_comp.formula_weight 616.487000
#
_pdbx_entity_nonpoly.entity_id 1
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
_pdbx_entity_nonpoly.comp_id HEM
#
_entity.id 1
_entity.type non-polymer
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
_entity.formula_weight 616.487000
#
_struct_asym.id A
_struct_asym.entity_id 1
_struct_asym.pdbx_blank_PDB_chainid_flag N
_struct_asym.pdbx_modified N
_struct_asym.details ?
#
)"_cf;
data.front().set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
SECTION("max")
{
cif::mm::structure s(data, 1, {
.occupancy_mode = cif::mm::occupancy_policy::MAX
});
REQUIRE(s.atoms().size() == 4);
CHECK(s.atoms().front().get_label_alt_id() == "A");
}
SECTION("min")
{
cif::mm::structure s(data, 1, {
.occupancy_mode = cif::mm::occupancy_policy::MIN
});
REQUIRE(s.atoms().size() == 4);
CHECK(s.atoms().front().get_label_alt_id() == "B");
}
SECTION("unoccupied")
{
cif::mm::structure s(data, 1, {
.occupancy_mode = cif::mm::occupancy_policy::UNOCCUPIED
});
CHECK(s.atoms().empty());
}
}