Compare commits

...

48 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
d638d634ba Remove gxrio and replace it with stripped down version 2022-11-09 14:23:51 +01:00
Maarten L. Hekkelman
35196789e0 updated makefile, use system regex if good enough 2022-11-09 10:33:44 +01:00
Maarten L. Hekkelman
e907ce6c29 exclude regex from tar 2022-11-09 09:19:39 +01:00
Maarten L. Hekkelman
b80bc20d17 Use system installed boost headers for regex, when available 2022-11-09 08:54:56 +01:00
Maarten L. Hekkelman
3a87eaa435 include utility
exclude from all for gxrio
2022-11-08 15:46:03 +01:00
Maarten L. Hekkelman
143eb57f04 Removed remaining mrc_add_resources call 2022-11-08 15:38:06 +01:00
Maarten L. Hekkelman
6cc550bf18 oops 2022-11-08 15:20:29 +01:00
Maarten L. Hekkelman
7f5336661b Error reporting 2022-11-08 14:53:15 +01:00
Maarten L. Hekkelman
e44539ef2c Checks before building indices. Better error reporting 2022-11-08 13:45:06 +01:00
Maarten L. Hekkelman
a2f5850173 avoid crash on empty branches 2022-11-08 09:29:15 +01:00
Maarten L. Hekkelman
283f4883f7 Fix makefile 2022-11-08 08:43:49 +01:00
Maarten L. Hekkelman
ce9842f671 Fix in PDB export
Better add_git_submodule
2022-11-08 08:28:02 +01:00
Maarten L. Hekkelman
b784433fd7 less verbose parser 2022-11-07 17:00:49 +01:00
Maarten L. Hekkelman
8c064e7c0a version bump 2022-11-07 12:37:01 +01:00
Maarten L. Hekkelman
c15a8bd127 export source tarballs 2022-11-07 12:32:14 +01:00
Maarten L. Hekkelman
64e40e7b31 Fix writing PDB CISPEP records
Better checking for open files
More verbose parser
2022-11-07 11:06:06 +01:00
Maarten L. Hekkelman
06d254e0de Revert "Use system version of boost regex, when available"
This reverts commit eaa342ca32.
2022-11-03 16:52:09 +01:00
Maarten L. Hekkelman
eaa342ca32 Use system version of boost regex, when available 2022-11-03 16:36:14 +01:00
Maarten L. Hekkelman
782f7c467b Support for cifv1.0 (empty category names) 2022-11-03 15:48:58 +01:00
Maarten L. Hekkelman
c45d02cb70 sigh 2022-11-03 13:09:04 +01:00
Maarten L. Hekkelman
5b4c131eea More verbose 2022-11-03 13:00:11 +01:00
Maarten L. Hekkelman
bbe71af99e Accept invalid CCD component files, for now 2022-11-03 12:18:26 +01:00
Maarten L. Hekkelman
49912d019f Better error reporting 2022-11-03 11:43:57 +01:00
Maarten L. Hekkelman
d4758e09d7 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-03 09:37:38 +01:00
Maarten L. Hekkelman
f0a913cc07 Fixes for deuterium...
Fixes for sugar branches
2022-11-03 09:37:31 +01:00
Maarten L. Hekkelman
8b0b8e3688 removed submodule 2022-11-01 16:52:42 +01:00
Maarten L. Hekkelman
de4fc8a015 Added necessary include 2022-11-01 16:50:54 +01:00
Maarten L. Hekkelman
bf1e56ec53 Another attempt 2022-11-01 16:29:59 +01:00
Maarten L. Hekkelman
040b4e4ff9 clean up 2022-11-01 14:54:26 +01:00
Maarten L. Hekkelman
4666ee3145 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-01 14:54:10 +01:00
Maarten L. Hekkelman
2958c56a92 change parser to use streambuf directly 2022-11-01 14:53:17 +01:00
Maarten L. Hekkelman
9cff8768ab Merge branch 'potential-performance-gain' into trunk 2022-11-01 13:41:06 +01:00
Maarten L. Hekkelman
cc671b8006 fixes in numeric conversions 2022-11-01 13:41:01 +01:00
Maarten L. Hekkelman
728abe6d0e less verbose pdb2cif 2022-11-01 12:11:04 +01:00
Maarten L. Hekkelman
7b8f3f2538 optimise retract buffer 2022-11-01 11:56:18 +01:00
Maarten L. Hekkelman
98db98f916 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-01 09:54:20 +01:00
Maarten L. Hekkelman
96a67b23ca Fix loading dictionaries 2022-11-01 09:53:36 +01:00
Maarten L. Hekkelman
2c3d7542e5 no submodule for gxrio 2022-11-01 08:45:52 +01:00
Maarten L. Hekkelman
f84d83b723 Add gxrio dependency again 2022-10-31 10:50:53 +01:00
Maarten L. Hekkelman
b1837ba029 for freebsd 2022-10-31 10:35:28 +01:00
Maarten L. Hekkelman
260438fa44 fix for meta project 2022-10-30 19:51:29 +01:00
Maarten L. Hekkelman
23d82beb04 Fix version string 2022-10-30 13:02:53 +01:00
Maarten L. Hekkelman
19db5d736b Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-10-30 11:14:16 +01:00
Maarten L. Hekkelman
6946c40657 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-10-30 11:12:30 +01:00
Maarten L. Hekkelman
bd3723ee20 Do not crash on empty rows (find result) 2022-10-30 11:12:19 +01:00
Maarten L. Hekkelman
1f078d4827 update for meta project 2022-10-30 10:28:01 +01:00
Maarten L. Hekkelman
3c62a38667 write out PDB files 2022-10-28 16:13:33 +02:00
Maarten L. Hekkelman
7ffda74e3d pdb2cif, avoid duplicate key violation on REMARK 350 2022-10-26 16:43:21 +02:00
35 changed files with 109097 additions and 89443 deletions

11
.gitignore vendored
View File

@@ -1,17 +1,8 @@
build/
.vscode/
.vs/
.pc/
tools/symop-map-generator
test/unit-test
test/pdb2cif-test
test/rename-compound-test
tools/update-libcifpp-data
data/components.cif*
CMakeSettings.json
msvc/
Testing/
rsrc/feature-request.txt
test/test-create_sugar_?.cif
test/oprofile_data/
test/perf.data*
src/revision.hpp

View File

@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16)
# set the project name
project(cifpp VERSION 5.0.0 LANGUAGES CXX)
project(cifpp VERSION 5.0.2 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
@@ -35,8 +35,6 @@ include(CheckIncludeFiles)
include(CheckLibraryExists)
include(CMakePackageConfigHelpers)
include(CheckCXXSourceCompiles)
# include(Dart)
include(AddGitSubmodule)
set(CXX_EXTENSIONS OFF)
@@ -63,7 +61,7 @@ option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installat
# An optional cron script can be installed to keep the data files up-to-date
if(UNIX)
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" OFF)
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" ON)
endif()
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
@@ -135,8 +133,21 @@ if(GXX_LIBSTDCPP)
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
message(STATUS "You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will try to use boost::regex instead")
set(BOOST_REGEX_STANDALONE ON)
add_git_submodule(regex EXCLUDE_FROM_ALL)
find_package(Boost COMPONENTS regex QUIET)
if(Boost_FOUND)
if(Boost_VERSION VERSION_GREATER_EQUAL 1.77)
set(BOOST_REGEX_STANDALONE ON)
set(BOOST_REGEX_INCLUDE ${Boost_INCLUDE_DIRS})
else()
list(APPEND CIFPP_REQUIRED_LIBRARIES Boost::regex)
set(BOOST_REGEX_SYSTEM ON)
endif()
else()
add_git_submodule("https://github.com/boostorg/regex" regex EXCLUDE_FROM_ALL)
set(BOOST_REGEX_STANDALONE ON)
set(BOOST_REGEX_INCLUDE regex/include)
endif()
endif()
endif()
@@ -144,9 +155,7 @@ set(CMAKE_THREAD_PREFER_PTHREAD)
set(THREADS_PREFER_PTHREAD_FLAG)
find_package(Threads)
add_git_submodule(gxrio EXCLUDE_FROM_ALL)
find_package(ZLIB REQUIRED)
find_package(LibLZMA REQUIRED)
include(FindFilesystem)
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPFS_LIBRARY})
@@ -156,12 +165,12 @@ list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPATOMIC_LIBRARY})
# Create a revision file, containing the current git version info
include(VersionString)
write_version_header("LibCIFPP")
write_version_header(${PROJECT_SOURCE_DIR}/src/ "LibCIFPP")
# SymOp data table
if(CIFPP_RECREATE_SYMOP_DATA)
# The tool to create the table
add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/tools/symop-map-generator.cpp")
add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
@@ -229,7 +238,7 @@ set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif_remark_3.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
)
)
add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
add_library(cifpp::cifpp ALIAS cifpp)
@@ -237,7 +246,9 @@ add_library(cifpp::cifpp ALIAS cifpp)
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
if(BOOST_REGEX_STANDALONE)
target_compile_definitions(cifpp PUBLIC USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
elseif(BOOST_REGEX_SYSTEM)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1)
endif()
target_include_directories(cifpp
@@ -246,17 +257,10 @@ target_include_directories(cifpp
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)
target_link_libraries(cifpp PUBLIC
Threads::Threads
ZLIB::ZLIB
LibLZMA::LibLZMA
${CIFPP_REQUIRED_LIBRARIES})
get_target_property(GXRIO_INCLUDE_DIR gxrio::gxrio INTERFACE_INCLUDE_DIRECTORIES)
target_include_directories(cifpp PRIVATE ${GXRIO_INCLUDE_DIR})
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB ${CIFPP_REQUIRED_LIBRARIES})
if(BOOST_REGEX_STANDALONE)
target_include_directories(cifpp PRIVATE regex/include)
target_include_directories(cifpp PRIVATE ${BOOST_REGEX_INCLUDE})
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
@@ -343,8 +347,14 @@ install(FILES
DESTINATION ${CIFPP_DATA_DIR}
)
if(BOOST_REGEX_STANDALONE)
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in)
else()
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig-boost-regex.cmake.in)
endif()
configure_package_config_file(
${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in
${CONFIG_TEMPLATE_FILE}
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR SHARE_INSTALL_DIR
@@ -388,7 +398,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc DESTINATION ${CMAKE_INSTAL
if(ENABLE_TESTING)
enable_testing()
find_package(Boost REQUIRED headers)
find_package(Boost REQUIRED)
list(APPEND CIFPP_tests unit-v2 unit-3d format model rename-compound sugar
)
@@ -399,11 +409,7 @@ if(ENABLE_TESTING)
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE})
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Boost::headers)
if(CIFPP_USE_RSRC)
mrc_target_resources(${CIFPP_TEST} ${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic)
endif()
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Boost::boost)
if(MSVC)
# Specify unwind semantics so that MSVC knowns how to handle exceptions
@@ -448,3 +454,13 @@ if(CIFPP_INSTALL_UPDATE_SCRIPT)
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
endif()
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_SOURCE_TGZ ON)
set(CPACK_SOURCE_TBZ2 OFF)
set(CPACK_SOURCE_TXZ OFF)
set(CPACK_SOURCE_TZ OFF)
set(CPACK_SOURCE_IGNORE_FILES "/data/components.cif;/build;/.vscode;/.git;/regex")
set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}")
set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CPACK_PACKAGE_FILE_NAME})
include(CPack)

View File

@@ -1,3 +1,11 @@
Version 5.0.2
- Fix export of CISPEP records in PDB format
- Better support for exporting package_source
Version 5.0.1
- Fix loading dictionaries
- Support for cifv1.0 files
Version 5.0.0
- Total rewrite of cif part
- Removed DSSP code, moved into dssp project itself

View File

@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.16..3.19)
function(add_git_submodule dir)
function(add_git_submodule repo dir)
# add a Git submodule directory to CMake, assuming the
# Git submodule directory is a CMake project.
#
@@ -8,20 +8,33 @@ function(add_git_submodule dir)
#
# include(AddGitSubmodule.cmake)
# add_git_submodule(mysubmod_dir)
find_package(Git REQUIRED)
find_package(Git QUIET)
if(NOT EXISTS ${dir}/CMakeLists.txt)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.19)
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND_ERROR_IS_FATAL ANY)
if(NOT EXISTS "${PROJECT_SOURCE_DIR}/${dir}/CMakeLists.txt")
if(NOT(GIT_FOUND))
message(FATAL_ERROR "${CMAKE_CURRENT_SOURCE_DIR} is not a git repository and the submodule ${dir} is not complete. Cannot continue.")
elseif(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.git") # We're in a git repo, we can use submodules
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.19)
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND_ERROR_IS_FATAL ANY)
else()
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
endif()
else()
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.19)
execute_process(COMMAND ${GIT_EXECUTABLE} clone "${repo}" --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND_ERROR_IS_FATAL ANY)
else()
execute_process(COMMAND ${GIT_EXECUTABLE} clone "${repo}" --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
endif()
endif()
endif()
set(ENABLE_TESTING OFF)
add_subdirectory(${dir} ${ARGV})
add_subdirectory(${dir} ${ARGV2})
endfunction(add_git_submodule)

View File

@@ -25,7 +25,12 @@
cmake_minimum_required(VERSION 3.15)
# Create a revision file, containing the current git version info, if any
function(write_version_header)
function(write_version_header dir)
# parameter check
if(NOT IS_DIRECTORY ${dir})
message(FATAL_ERROR "First parameter to write_version_header should be a directory where the final revision.hpp file will be placed")
endif()
include(GetGitRevisionDescription)
if(NOT(GIT-NOTFOUND OR HEAD-HASH-NOTFOUND))
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
@@ -39,17 +44,16 @@ function(write_version_header)
endif()
endif()
else()
set(BUILD_VERSION_STRING "no git info available")
message(WARNING "no git info available, cannot update version string")
endif()
include_directories(${CMAKE_BINARY_DIR} PRIVATE)
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
if(ARGC GREATER 0)
set(VAR_PREFIX "${ARGV0}")
if(ARGC GREATER 1)
set(VAR_PREFIX "${ARGV1}")
endif()
file(WRITE "${CMAKE_BINARY_DIR}/revision.hpp.in" [[// Generated revision file
file(WRITE "${PROJECT_BINARY_DIR}/revision.hpp.in" [[// Generated revision file
#pragma once
@@ -72,6 +76,6 @@ inline void write_version_string(std::ostream &os, bool verbose)
}
}
]])
configure_file("${CMAKE_BINARY_DIR}/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY)
configure_file("${PROJECT_BINARY_DIR}/revision.hpp.in" "${dir}/revision.hpp" @ONLY)
endfunction()

View File

@@ -0,0 +1,16 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(Threads)
find_dependency(ZLIB REQUIRED)
find_dependency(LibLZMA REQUIRED)
find_dependency(Boost COMPONENTS regex)
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")
set_and_check(CIFPP_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@")
set_and_check(CIFPP_LIBRARY_DIR "@PACKAGE_LIBRARY_INSTALL_DIR@")
set_and_check(CIFPP_SHARE_DIR "@PACKAGE_SHARE_INSTALL_DIR@")
check_required_components(cifpp)

View File

@@ -2579,4 +2579,137 @@ NAG "Other modification" 2019-12-19 RCSB
NAG "Other modification" 2020-07-03 RCSB
NAG "Modify name" 2020-07-17 RCSB
NAG "Modify synonyms" 2020-07-17 RCSB
##
##
data_HIS
#
_chem_comp.id HIS
_chem_comp.name HISTIDINE
_chem_comp.type "L-PEPTIDE LINKING"
_chem_comp.pdbx_type ATOMP
_chem_comp.formula "C6 H10 N3 O2"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms ?
_chem_comp.pdbx_formal_charge 1
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2011-06-04
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces ?
_chem_comp.formula_weight 156.162
_chem_comp.one_letter_code H
_chem_comp.three_letter_code HIS
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details "OpenEye/OEToolkits V1.4.2"
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code ?
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site EBI
#
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
HIS N N N 0 1 N N N 33.472 42.685 -4.610 -0.040 -1.210 0.053 N HIS 1
HIS CA CA C 0 1 N N S 33.414 41.686 -5.673 1.172 -1.709 0.652 CA HIS 2
HIS C C C 0 1 N N N 33.773 42.279 -7.040 1.083 -3.207 0.905 C HIS 3
HIS O O O 0 1 N N N 33.497 43.444 -7.337 0.040 -3.770 1.222 O HIS 4
HIS CB CB C 0 1 N N N 32.005 41.080 -5.734 1.484 -0.975 1.962 CB HIS 5
HIS CG CG C 0 1 Y N N 31.888 39.902 -6.651 2.940 -1.060 2.353 CG HIS 6
HIS ND1 ND1 N 1 1 Y N N 32.539 38.710 -6.414 3.380 -2.075 3.129 ND1 HIS 7
HIS CD2 CD2 C 0 1 Y N N 31.199 39.734 -7.804 3.960 -0.251 2.046 CD2 HIS 8
HIS CE1 CE1 C 0 1 Y N N 32.251 37.857 -7.382 4.693 -1.908 3.317 CE1 HIS 9
HIS NE2 NE2 N 0 1 Y N N 31.439 38.453 -8.237 5.058 -0.801 2.662 NE2 HIS 10
HIS OXT OXT O 0 1 N Y N 34.382 41.455 -7.879 2.247 -3.882 0.744 OXT HIS 11
HIS H H H 0 1 N N N 33.485 42.227 -3.721 -0.102 -1.155 -0.950 H HIS 12
HIS H2 HN2 H 0 1 N Y N 34.301 43.234 -4.714 -0.715 -0.741 0.634 H2 HIS 13
HIS HA HA H 0 1 N N N 34.155 40.908 -5.439 1.965 -1.558 -0.089 HA HIS 14
HIS HB2 1HB H 0 1 N N N 31.733 40.750 -4.721 1.215 0.087 1.879 HB2 HIS 15
HIS HB3 2HB H 0 1 N N N 31.337 41.860 -6.127 0.859 -1.368 2.775 HB3 HIS 16
HIS HD1 HD1 H 0 1 N N N 33.135 38.521 -5.633 2.828 -2.838 3.511 HD1 HIS 17
HIS HD2 HD2 H 0 1 N N N 30.577 40.470 -8.292 4.108 0.647 1.479 HD2 HIS 18
HIS HE1 HE1 H 0 1 N N N 32.618 36.844 -7.461 5.340 -2.550 3.892 HE1 HIS 19
HIS HE2 HE2 H 0 1 N N N 31.061 38.039 -9.065 6.002 -0.428 2.627 HE2 HIS 20
HIS HXT HXT H 0 1 N Y N 34.553 41.905 -8.698 2.188 -4.848 0.901 HXT HIS 21
#
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
HIS N CA SING N N 1
HIS N H SING N N 2
HIS N H2 SING N N 3
HIS CA C SING N N 4
HIS CA CB SING N N 5
HIS CA HA SING N N 6
HIS C O DOUB N N 7
HIS C OXT SING N N 8
HIS CB CG SING N N 9
HIS CB HB2 SING N N 10
HIS CB HB3 SING N N 11
HIS CG ND1 SING Y N 12
HIS CG CD2 DOUB Y N 13
HIS ND1 CE1 DOUB Y N 14
HIS ND1 HD1 SING N N 15
HIS CD2 NE2 SING Y N 16
HIS CD2 HD2 SING N N 17
HIS CE1 NE2 SING Y N 18
HIS CE1 HE1 SING N N 19
HIS NE2 HE2 SING N N 20
HIS OXT HXT SING N N 21
#
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
HIS SMILES ACDLabs 10.04 "O=C(O)C(N)Cc1cnc[nH+]1"
HIS SMILES_CANONICAL CACTVS 3.341 "N[C@@H](Cc1c[nH]c[nH+]1)C(O)=O"
HIS SMILES CACTVS 3.341 "N[CH](Cc1c[nH]c[nH+]1)C(O)=O"
HIS SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 "c1c([nH+]c[nH]1)C[C@@H](C(=O)O)N"
HIS SMILES "OpenEye OEToolkits" 1.5.0 "c1c([nH+]c[nH]1)CC(C(=O)O)N"
HIS InChI InChI 1.03 "InChI=1S/C6H9N3O2/c7-5(6(10)11)1-4-2-8-3-9-4/h2-3,5H,1,7H2,(H,8,9)(H,10,11)/p+1/t5-/m0/s1"
HIS InChIKey InChI 1.03 HNDVDQJCIGZPNO-YFKPBYRVSA-O
#
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
HIS "SYSTEMATIC NAME" ACDLabs 10.04 "3-(1H-imidazol-3-ium-4-yl)-L-alanine"
HIS "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.5.0 "(2S)-2-amino-3-(1H-imidazol-3-ium-4-yl)propanoic acid"
#
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
HIS "Create component" 1999-07-08 EBI
HIS "Modify descriptor" 2011-06-04 RCSB
#

1
gxrio

Submodule gxrio deleted from a7bb5b5c4b

View File

@@ -166,7 +166,7 @@ enum atom_type : uint8_t
No = 102, // Nobel­ium
Lr = 103, // Lawren­cium
D = 129, // Deuterium
D = 119, // Deuterium
};
// --------------------------------------------------------------------

View File

@@ -43,6 +43,15 @@
namespace cif
{
// --------------------------------------------------------------------
// special exception
class duplicate_key_error : public std::runtime_error
{
public:
duplicate_key_error(const std::string &msg)
: std::runtime_error(msg) {}
};
// --------------------------------------------------------------------
class category
@@ -77,7 +86,7 @@ class category
const std::string &name() const { return m_name; }
iset fields() const;
iset key_fields() const;
std::set<uint16_t> key_field_indices() const;
@@ -473,6 +482,8 @@ class category
return get_column_ix(name) < m_columns.size();
}
iset get_columns() const;
// --------------------------------------------------------------------
void sort(std::function<int(row_handle,row_handle)> f);

1036
include/cif++/gzio.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -33,6 +33,7 @@
#include <limits>
#include <memory>
#include <optional>
#include <utility>
#include <cif++/forward_decl.hpp>
#include <cif++/text.hpp>

View File

@@ -608,7 +608,13 @@ class sugar : public residue
sugar(sugar &&rhs);
sugar &operator=(sugar &&rhs);
int num() const { return std::stoi(m_auth_seq_id); }
int num() const {
int result;
auto r = std::from_chars(m_auth_seq_id.data(), m_auth_seq_id.data() + m_auth_seq_id.length(), result);
if (r.ec != std::errc())
throw std::runtime_error("The auth_seq_id should be a number for a sugar");
return result;
}
std::string name() const;
/// \brief Return the atom the C1 is linked to

View File

@@ -212,12 +212,16 @@ class sac_parser
void error(const std::string &msg)
{
if (cif::VERBOSE > 0)
std::cerr << "Error parsing mmCIF: " << msg << std::endl;
throw parse_error(m_line_nr, msg);
}
void warning(const std::string &msg)
{
std::cerr << "parser warning at line" << m_line_nr << ": " << msg << std::endl;
if (cif::VERBOSE > 0)
std::cerr << "parser warning at line " << m_line_nr << ": " << msg << std::endl;
}
// production methods, these are pure virtual here
@@ -248,7 +252,7 @@ class sac_parser
SAVE
};
std::istream &m_source;
std::streambuf &m_source;
// Parser state
bool m_validate;
@@ -257,7 +261,7 @@ class sac_parser
CIFToken m_lookahead;
std::string m_token_value;
CIFValue mTokenType;
std::stack<int> m_buffer;
std::vector<int> m_buffer; // retract buffer, used to be a stack<char>
};
// --------------------------------------------------------------------

View File

@@ -31,14 +31,30 @@
namespace cif::pdb
{
file read(std::istream &is);
/// \brief Read a file in either mmCIF or PDB format, compressed or not,
/// depending on the content.
file read(const std::filesystem::path &file);
/// \brief Read a file in either mmCIF or PDB format, compressed or not,
/// depending on the content.
file read(std::istream &is);
/// \brief Write out a file in PDB format
void write(std::ostream &os, const datablock &db);
/// \brief Write out a file in PDB format
inline void write(std::ostream &os, const file &f)
{
write(os, f.front());
}
/// \brief Write out a file in PDB format or mmCIF format, depending on the filename extension
void write(const std::filesystem::path &file, const datablock &db);
/// \brief Write out a file in PDB format or mmCIF format, depending on the filename extension
inline void write(const std::filesystem::path &p, const file &f)
{
write(p, f.front());
}
}

View File

@@ -81,6 +81,11 @@ struct symop_data
return m_packed == rhs.m_packed;
}
bool operator<(const symop_data &rhs) const
{
return m_packed < rhs.m_packed;
}
std::array<int, 15> data() const
{
return {

File diff suppressed because it is too large Load Diff

View File

@@ -1077,9 +1077,13 @@ bool atom_type_traits::is_metal(const std::string& symbol)
auto atom_type_traits::wksf(int charge) const -> const SFData&
{
auto type = m_info->type;
if (type == D)
type = H;
for (auto& sf: data::kWKSFData)
{
if (sf.symbol == m_info->type and sf.charge == charge)
if (sf.symbol == type and sf.charge == charge)
return sf.sf;
}
@@ -1092,7 +1096,7 @@ auto atom_type_traits::wksf(int charge) const -> const SFData&
for (auto& sf: data::kWKSFData)
{
if (sf.symbol == m_info->type and sf.charge == 0)
if (sf.symbol == type and sf.charge == 0)
return sf.sf;
}
}
@@ -1102,9 +1106,13 @@ auto atom_type_traits::wksf(int charge) const -> const SFData&
auto atom_type_traits::elsf() const -> const SFData&
{
auto type = m_info->type;
if (type == D)
type = H;
for (auto& sf: data::kELSFData)
{
if (sf.symbol == m_info->type)
if (sf.symbol == type)
return sf.sf;
}

View File

@@ -410,13 +410,13 @@ category_index::entry *category_index::insert(entry *h, row *v)
row_handle rh(m_category, *v);
std::ostringstream os;
for (auto col : m_category.fields())
for (auto col : m_category.key_fields())
{
if (rh[col])
os << col << ": " << std::quoted(rh[col].text()) << "; ";
}
throw std::runtime_error("Duplicate Key violation, cat: " + m_category.name() + " values: " + os.str());
throw duplicate_key_error("Duplicate Key violation, cat: " + m_category.name() + " values: " + os.str());
}
if (is_red(h->m_right) and not is_red(h->m_left))
@@ -686,7 +686,17 @@ category::~category()
// --------------------------------------------------------------------
iset category::fields() const
iset category::get_columns() const
{
iset result;
for (auto &col : m_columns)
result.insert(col.m_name);
return result;
}
iset category::key_fields() const
{
if (m_validator == nullptr)
throw std::runtime_error("No Validator specified");
@@ -733,7 +743,26 @@ void category::set_validator(const validator *v, datablock &db)
m_cat_validator = m_validator->get_validator_for_category(m_name);
if (m_cat_validator != nullptr)
m_index = new category_index(this);
{
std::set<std::string> missing;
if (not empty())
{
std::vector<uint16_t> kix;
for (auto k : m_cat_validator->m_keys)
{
kix.push_back(get_column_ix(k));
if (kix.back() >= m_columns.size())
missing.insert(k);
}
}
if (missing.empty())
m_index = new category_index(this);
else if (VERBOSE > 0)
std::cerr << "Cannot construct index since the key field" << (missing.size() > 1 ? "s" : "") << " "
<< cif::join(missing, ", ") + " in " + m_name + " " + (missing.size() == 1 ? "is" : "are") << " missing" << std::endl;
}
}
else
m_cat_validator = nullptr;
@@ -813,6 +842,20 @@ bool category::is_valid() const
result = false;
}
if (m_cat_validator->m_keys.empty() == false and m_index == nullptr)
{
std::set<std::string> missing;
for (auto k : m_cat_validator->m_keys)
{
if (get_column_ix(k) >= m_columns.size())
missing.insert(k);
}
m_validator->report_error("In category " + m_name + " the index is missing, likely due to missing key fields: " + join(missing, ", "), false);
result = false;
}
#if not defined(NDEBUG)
// check index?
if (m_index)
@@ -857,6 +900,7 @@ bool category::is_valid() const
}
catch (const std::exception &e)
{
result = false;
m_validator->report_error("Error validating " + m_columns[cix].m_name + ": " + e.what(), false);
continue;
}
@@ -1852,7 +1896,10 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool
for (auto cix : order)
{
auto &col = m_columns[cix];
os << '_' << m_name << '.' << col.m_name << ' ' << '\n';
os << '_';
if (not m_name.empty())
os << m_name << '.';
os << col.m_name << ' ' << '\n';
columnWidths[cix] = 2;
}
@@ -1940,7 +1987,10 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool
{
auto &col = m_columns[cix];
os << '_' << m_name << '.' << col.m_name << std::string(l - col.m_name.length() - m_name.length() - 2, ' ');
os << '_';
if (not m_name.empty())
os << m_name << '.';
os << col.m_name << std::string(l - col.m_name.length() - m_name.length() - 2, ' ');
std::string_view s;
auto iv = m_head->get(cix);
@@ -1977,29 +2027,44 @@ bool category::operator==(const category &rhs) const
// if (tagsA != tagsB)
// std::cout << "Unequal number of fields" << std::endl;
const category_validator *catValidator = nullptr;
auto validator = a.get_validator();
auto catValidator = validator->get_validator_for_category(a.name());
if (catValidator == nullptr)
throw std::runtime_error("missing cat validator");
if (validator != nullptr)
catValidator = validator->get_validator_for_category(a.name());
typedef std::function<int(std::string_view,std::string_view)> compType;
std::vector<std::tuple<std::string,compType>> tags;
auto keys = catValidator->m_keys;
std::vector<std::string> keys;
std::vector<size_t> keyIx;
for (auto& tag: a.fields())
if (catValidator == nullptr)
{
auto iv = catValidator->get_validator_for_item(tag);
if (iv == nullptr)
throw std::runtime_error("missing item validator");
auto tv = iv->m_type;
if (tv == nullptr)
throw std::runtime_error("missing type validator");
tags.push_back(std::make_tuple(tag, std::bind(&cif::type_validator::compare, tv, std::placeholders::_1, std::placeholders::_2)));
auto pred = [tag](const std::string& s) -> bool { return cif::iequals(tag, s) == 0; };
if (find_if(keys.begin(), keys.end(), pred) == keys.end())
keyIx.push_back(tags.size() - 1);
for (auto& tag: a.get_columns())
{
tags.push_back(std::make_tuple(tag, [](std::string_view va, std::string_view vb) { return va.compare(vb); }));
keyIx.push_back(keys.size());
keys.push_back(tag);
}
}
else
{
keys = catValidator->m_keys;
for (auto& tag: a.key_fields())
{
auto iv = catValidator->get_validator_for_item(tag);
if (iv == nullptr)
throw std::runtime_error("missing item validator");
auto tv = iv->m_type;
if (tv == nullptr)
throw std::runtime_error("missing type validator");
tags.push_back(std::make_tuple(tag, std::bind(&cif::type_validator::compare, tv, std::placeholders::_1, std::placeholders::_2)));
auto pred = [tag](const std::string& s) -> bool { return cif::iequals(tag, s) == 0; };
if (find_if(keys.begin(), keys.end(), pred) == keys.end())
keyIx.push_back(tags.size() - 1);
}
}
// a.reorderByIndex();

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020-2022 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -215,7 +215,8 @@ compound_atom compound::get_atom_by_atom_id(const std::string &atom_id) const
bool compound::atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const
{
auto i = find_if(m_bonds.begin(), m_bonds.end(),
[&](const compound_bond &b) {
[&](const compound_bond &b)
{
return (b.atom_id[0] == atomId_1 and b.atom_id[1] == atomId_2) or (b.atom_id[0] == atomId_2 and b.atom_id[1] == atomId_1);
});
@@ -226,39 +227,41 @@ bool compound::atoms_bonded(const std::string &atomId_1, const std::string &atom
// known amino acids and bases
const std::map<std::string, char> compound_factory::kAAMap{
{"ALA", 'A'},
{"ARG", 'R'},
{"ASN", 'N'},
{"ASP", 'D'},
{"CYS", 'C'},
{"GLN", 'Q'},
{"GLU", 'E'},
{"GLY", 'G'},
{"HIS", 'H'},
{"ILE", 'I'},
{"LEU", 'L'},
{"LYS", 'K'},
{"MET", 'M'},
{"PHE", 'F'},
{"PRO", 'P'},
{"SER", 'S'},
{"THR", 'T'},
{"TRP", 'W'},
{"TYR", 'Y'},
{"VAL", 'V'},
{"GLX", 'Z'},
{"ASX", 'B'}};
{ "ALA", 'A' },
{ "ARG", 'R' },
{ "ASN", 'N' },
{ "ASP", 'D' },
{ "CYS", 'C' },
{ "GLN", 'Q' },
{ "GLU", 'E' },
{ "GLY", 'G' },
{ "HIS", 'H' },
{ "ILE", 'I' },
{ "LEU", 'L' },
{ "LYS", 'K' },
{ "MET", 'M' },
{ "PHE", 'F' },
{ "PRO", 'P' },
{ "SER", 'S' },
{ "THR", 'T' },
{ "TRP", 'W' },
{ "TYR", 'Y' },
{ "VAL", 'V' },
{ "GLX", 'Z' },
{ "ASX", 'B' }
};
const std::map<std::string, char> compound_factory::kBaseMap{
{"A", 'A'},
{"C", 'C'},
{"G", 'G'},
{"T", 'T'},
{"U", 'U'},
{"DA", 'A'},
{"DC", 'C'},
{"DG", 'G'},
{"DT", 'T'}};
{ "A", 'A' },
{ "C", 'C' },
{ "G", 'G' },
{ "T", 'T' },
{ "U", 'U' },
{ "DA", 'A' },
{ "DC", 'C' },
{ "DG", 'G' },
{ "DT", 'T' }
};
// --------------------------------------------------------------------
// a factory class to generate compounds
@@ -272,16 +275,16 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
virtual ~compound_factory_impl()
{
for (auto c: m_compounds)
for (auto c : m_compounds)
delete c;
}
compound *get(std::string id)
{
std::shared_lock lock(mMutex);
cif::to_upper(id);
std::shared_lock lock(mMutex);
compound *result = nullptr;
// walk the list, see if any of us has the compound already
@@ -324,17 +327,16 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
bool is_known_peptide(const std::string &resName)
{
return m_known_peptides.count(resName) or
(m_next and m_next->is_known_peptide(resName));
(m_next and m_next->is_known_peptide(resName));
}
bool is_known_base(const std::string &resName)
{
return m_known_bases.count(resName) or
(m_next and m_next->is_known_base(resName));
(m_next and m_next->is_known_base(resName));
}
protected:
virtual compound *create(const std::string &id)
{
// For the base class we assume every compound is preloaded
@@ -414,10 +416,22 @@ compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_p
else
{
// A CCD components file, validate it first
cifFile.load_dictionary("mmcif_pdbx");
try
{
cifFile.load_dictionary("mmcif_pdbx.dic");
if (not cifFile.is_valid())
throw std::runtime_error("Invalid compound file");
if (not cifFile.is_valid())
{
std::cerr << "The components file " << file << " is not valid" << std::endl;
if (cif::VERBOSE < 1)
std::cerr << "(use --verbose to see why)" << std::endl;
}
}
catch (const std::exception &e)
{
std::cerr << "When trying to load the components file " << file << " there was an exception:" << std::endl
<< e.what() << std::endl;
}
for (auto &db : cifFile)
m_compounds.push_back(new compound(db));
@@ -430,7 +444,7 @@ compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_p
class CCD_compound_factory_impl : public compound_factory_impl
{
public:
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next, const fs::path& file)
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next, const fs::path &file)
: compound_factory_impl(next)
, mCompoundsFile(file)
{
@@ -457,7 +471,10 @@ compound *CCD_compound_factory_impl::create(const std::string &id)
{
ccd = cif::load_resource("components.cif");
if (not ccd)
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
{
std::cerr << "Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data." << std::endl;
return nullptr;
}
}
else
ccd.reset(new std::ifstream(mCompoundsFile));
@@ -647,7 +664,6 @@ compound_factory::compound_factory()
m_impl.reset(new CCP4_compound_factory_impl(clibd_mon));
else if (cif::VERBOSE > 0)
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
}
compound_factory::~compound_factory()
@@ -689,9 +705,7 @@ void compound_factory::set_default_dictionary(const fs::path &inDictFile)
}
catch (const std::exception &)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
throw;
std::throw_with_nested(std::runtime_error("Error loading dictionary " + inDictFile.string()));
}
}
@@ -700,19 +714,13 @@ void compound_factory::push_dictionary(const fs::path &inDictFile)
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
// ifstream file(inDictFile);
// if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile);
try
{
m_impl.reset(new compound_factory_impl(inDictFile, m_impl));
}
catch (const std::exception &)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
throw;
std::throw_with_nested(std::runtime_error("Error loading dictionary " + inDictFile.string()));
}
}
@@ -724,14 +732,6 @@ void compound_factory::pop_dictionary()
const compound *compound_factory::create(std::string id)
{
// static bool warned = false;
// if (m_impl and warned == false)
// {
// std::cerr << "Warning: no compound information library was found, resulting data may be incorrect or incomplete" << std::endl;
// warned = true;
// }
return m_impl ? m_impl->get(id) : nullptr;
}
@@ -745,4 +745,4 @@ bool compound_factory::is_known_base(const std::string &resName) const
return m_impl ? m_impl->is_known_base(resName) : kBaseMap.count(resName) > 0;
}
} // namespace pdbx
} // namespace cif

View File

@@ -32,7 +32,7 @@ namespace cif
iset get_category_fields(const category &cat)
{
return cat.fields();
return cat.key_fields();
}
uint16_t get_column_ix(const category &cat, std::string_view col)

View File

@@ -33,8 +33,15 @@ void datablock::set_validator(const validator *v)
{
m_validator = v;
for (auto &cat : *this)
cat.set_validator(v, *this);
try
{
for (auto &cat : *this)
cat.set_validator(v, *this);
}
catch(const std::exception& e)
{
throw_with_nested(std::runtime_error("Error while setting validator in datablock " + m_name));
}
}
const validator *datablock::get_validator() const

View File

@@ -24,9 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <gxrio.hpp>
#include <cif++/file.hpp>
#include <cif++/gzio.hpp>
namespace cif
{
@@ -97,6 +96,9 @@ void file::load_dictionary()
{
std::string name = audit_conform->front().get<std::string>("dict_name");
if (name == "mmcif_pdbx_v50")
name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp...
if (not name.empty())
{
try
@@ -112,8 +114,8 @@ void file::load_dictionary()
}
}
if (not m_validator)
load_dictionary("mmcif_ddl");
// if (not m_validator)
// load_dictionary("mmcif_pdbx.dic"); // TODO: maybe incorrect? Perhaps improve?
}
void file::load_dictionary(std::string_view name)
@@ -180,8 +182,18 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)
void file::load(const std::filesystem::path &p)
{
gxrio::ifstream in(p);
load(in);
try
{
gzio::ifstream in(p);
if (not in.is_open())
throw std::runtime_error("Could not open file " + p.string());
load(in);
}
catch (const std::exception &ex)
{
throw_with_nested(std::runtime_error("Error reading file " + p.string()));
}
}
void file::load(std::istream &is)
@@ -200,7 +212,7 @@ void file::load(std::istream &is)
void file::save(const std::filesystem::path &p) const
{
gxrio::ofstream outFile(p);
gzio::ofstream outFile(p);
save(outFile);
}

View File

@@ -31,8 +31,6 @@
#include <iomanip>
#include <numeric>
#include <gxrio.hpp>
namespace fs = std::filesystem;
extern int VERBOSE;
@@ -1118,8 +1116,8 @@ branch::branch(structure &structure, const std::string &asym_id)
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
{
if (not iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
// if (not iequals(atom1, "c1"))
// throw std::runtime_error("invalid pdbx_entity_branch_link");
auto &s1 = at(num1 - 1);
auto &s2 = at(num2 - 1);
@@ -1133,23 +1131,26 @@ branch::branch(structure &structure, const std::string &asym_id)
void branch::link_atoms()
{
using namespace literals;
auto &db = m_structure->get_datablock();
auto &branch_link = db["pdbx_entity_branch_link"];
auto entity_id = front().get_entity_id();
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
if (not empty())
{
if (not iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
using namespace literals;
auto &s1 = at(num1 - 1);
auto &s2 = at(num2 - 1);
auto &db = m_structure->get_datablock();
auto &branch_link = db["pdbx_entity_branch_link"];
s1.set_link(s2.get_atom_by_atom_id(atom2));
auto entity_id = front().get_entity_id();
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
{
// if (not iequals(atom1, "c1"))
// throw std::runtime_error("invalid pdbx_entity_branch_link");
auto &s1 = at(num1 - 1);
auto &s2 = at(num2 - 1);
s1.set_link(s2.get_atom_by_atom_id(atom2));
}
}
}
@@ -1190,42 +1191,6 @@ float branch::weight() const
return sum; });
}
// // --------------------------------------------------------------------
// // File
// void File::load(const std::filesystem::path &path)
// {
// gxrio::ifstream in(path);
// auto ext = path.extension().string();
// if (ext == ".gz" or ext = ".xz")
// ext = path.stem().extension().string();
// if (ext == ".pdb" or ext == ".ent")
// ReadPDBFile(in, *this);
// else
// file::load(in);
// // validate, otherwise lots of functionality won't work
// loadDictionary("mmcif_pdbx");
// if (not isValid() and VERBOSE >= 0)
// std::cerr << "Invalid mmCIF file" << (VERBOSE > 0 ? "." : " use --verbose option to see errors") << std::endl;
// }
// void File::save(const std::filesystem::path &path)
// {
// gxrio::ostream outFile(path);
// auto ext = path.extension().string();
// if (ext == ".gz" or ext = ".xz")
// ext = path.stem().extension().string();
// if (ext == ".pdb" or ext == ".ent")
// WritePDBFile(outFile, data());
// else
// file::save(outFile);
// }
// --------------------------------------------------------------------
// structure
@@ -1279,7 +1244,7 @@ void structure::load_atoms_for_model(StructureOpenOptions options)
if (model_nr and *model_nr != m_model_nr)
continue;
if ((options bitand StructureOpenOptions::SkipHydrogen) and type_symbol == "H")
if ((options bitand StructureOpenOptions::SkipHydrogen) and (type_symbol == "H" or type_symbol == "D"))
continue;
emplace_atom(std::make_shared<atom::atom_impl>(m_db, id));
@@ -1375,6 +1340,9 @@ void structure::load_data()
ri->second->add_atom(atom);
}
// what the ...
m_branches.erase(std::remove_if(m_branches.begin(), m_branches.end(), [](const branch &b) { return b.empty(); }), m_branches.end());
for (auto &branch : m_branches)
branch.link_atoms();
}

View File

@@ -47,8 +47,11 @@ namespace cif
// --------------------------------------------------------------------
sac_parser::sac_parser(std::istream &is, bool init)
: m_source(is)
: m_source(*is.rdbuf())
{
if (is.rdbuf() == nullptr)
throw std::runtime_error("Attempt to read from uninitialised stream");
m_validate = true;
m_line_nr = 1;
m_bol = true;
@@ -62,26 +65,29 @@ sac_parser::sac_parser(std::istream &is, bool init)
// translation.
int sac_parser::get_next_char()
{
int result;
int result = std::char_traits<char>::eof();
if (m_buffer.empty())
result = m_source.get();
result = m_source.sbumpc();
else
{
result = m_buffer.top();
m_buffer.pop();
result = m_buffer.back();
m_buffer.pop_back();
}
// very simple CR/LF translation into LF
if (result == '\r')
{
int lookahead = m_source.get();
int lookahead = m_source.sbumpc();
if (lookahead != '\n')
m_buffer.push(lookahead);
m_buffer.push_back(lookahead);
result = '\n';
}
m_token_value += static_cast<char>(result);
if (result == std::char_traits<char>::eof())
m_token_value.push_back(0);
else
m_token_value.push_back(std::char_traits<char>::to_char_type(result));
if (result == '\n')
++m_line_nr;
@@ -106,7 +112,7 @@ void sac_parser::retract()
if (ch == '\n')
--m_line_nr;
m_buffer.push(ch);
m_buffer.push_back(ch == 0 ? std::char_traits<char>::eof() : std::char_traits<char>::to_int_type(ch));
m_token_value.pop_back();
}
@@ -216,7 +222,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
error("unterminated textfield");
// else if (ch == '\\')
// state = State::Esc;
else if (not is_any_print(ch))
else if (not is_any_print(ch) and cif::VERBOSE > 2)
warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
break;
@@ -246,8 +252,8 @@ sac_parser::CIFToken sac_parser::get_next_token()
error("unterminated quoted string");
else if (ch == quoteChar)
state = State::QuotedStringQuote;
else if (not is_any_print(ch))
warning("invalid character in quoted string: '" + std::string({static_cast<char>(ch)}) + '\'');
else if (not is_any_print(ch) and cif::VERBOSE > 2)
warning("invalid character in quoted string: '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
break;
case State::QuotedStringQuote:
@@ -453,8 +459,6 @@ void sac_parser::match(CIFToken token)
bool sac_parser::parse_single_datablock(const std::string &datablock)
{
// first locate the start, as fast as we can
auto &sb = *m_source.rdbuf();
enum
{
start,
@@ -471,7 +475,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
std::string::size_type si = 0;
bool found = false;
for (auto ch = sb.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
for (auto ch = m_source.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = m_source.sbumpc())
{
switch (state)
{
@@ -544,8 +548,6 @@ sac_parser::datablock_index sac_parser::index_datablocks()
datablock_index index;
// first locate the start, as fast as we can
auto &sb = *m_source.rdbuf();
enum
{
start,
@@ -563,7 +565,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
std::string::size_type si = 0;
std::string datablock;
for (auto ch = sb.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
for (auto ch = m_source.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = m_source.sbumpc())
{
switch (state)
{
@@ -626,7 +628,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
else if (isspace(ch))
{
if (not datablock.empty())
index[datablock] = m_source.tellg();
index[datablock] = m_source.pubseekoff(0, std::ios_base::cur, std::ios_base::in);
state = start;
}
@@ -648,7 +650,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock, const data
auto i = index.find(datablock);
if (i != index.end())
{
m_source.seekg(i->second);
m_source.pubseekpos(i->second, std::ios_base::in);
produce_datablock(datablock);
m_lookahead = get_next_token();
@@ -696,7 +698,8 @@ void sac_parser::parse_global()
void sac_parser::parse_datablock()
{
std::string cat;
static const std::string kUnitializedCategory("<invalid>");
std::string cat = kUnitializedCategory; // intial value acts as a guard for empty category names
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE)
{
@@ -704,7 +707,7 @@ void sac_parser::parse_datablock()
{
case CIFToken::LOOP:
{
cat.clear(); // should start a new category
cat = kUnitializedCategory; // should start a new category
match(CIFToken::LOOP);
@@ -715,7 +718,7 @@ void sac_parser::parse_datablock()
std::string catName, itemName;
std::tie(catName, itemName) = split_tag_name(m_token_value);
if (cat.empty())
if (cat == kUnitializedCategory)
{
produce_category(catName);
cat = catName;
@@ -801,6 +804,9 @@ void parser::produce_row()
if (VERBOSE >= 4)
std::cerr << "producing row for category " << m_category->name() << std::endl;
if (m_category == nullptr)
error("inconsistent categories in loop_");
m_category->emplace({});
m_row = m_category->back();
// m_row.lineNr(m_line_nr);
@@ -811,7 +817,7 @@ void parser::produce_item(const std::string &category, const std::string &item,
if (VERBOSE >= 4)
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
if (not iequals(category, m_category->name()))
if (m_category == nullptr or not iequals(category, m_category->name()))
error("inconsistent categories in loop_");
m_row[item] = m_token_value;

View File

@@ -32,6 +32,7 @@
#include <cif++.hpp>
#include <cif++/pdb/cif2pdb.hpp>
#include <cif++/gzio.hpp>
namespace cif::pdb
{
@@ -703,16 +704,17 @@ class Ff : public FBase
else
{
std::string s{ text() };
try
double d = 0;
auto r = cif::from_chars(s.data(), s.data() + s.length(), d);
if (r.ec != std::errc())
{
os << std::stod(s);
}
catch (const std::exception &ex)
{
if (VERBOSE >= 0)
if (VERBOSE > 0)
std::cerr << "Failed to write '" << s << "' as a double, this indicates an error in the code for writing PDB files" << std::endl;
os << s;
}
else
os << d;
}
}
};
@@ -2456,36 +2458,39 @@ void WriteRemark350(std::ostream &pdbFile, const datablock &db)
auto gen = db["pdbx_struct_assembly_gen"].find1(key("assembly_id") == id);
std::string asym_id_list, oper_id_list;
cif::tie(asym_id_list, oper_id_list) = gen.get("asym_id_list", "oper_expression");
auto asyms = split<std::string>(asym_id_list, ",");
std::vector<std::string> chains = MapAsymIDs2ChainIDs(asyms, db);
pdbFile << RM("APPLY THE FOLLOWING TO CHAINS: ") << join(chains, ", ") << std::endl;
for (auto oper_id : split<std::string>(oper_id_list, ",", true))
if (gen)
{
auto r = db["pdbx_struct_oper_list"].find1(key("id") == oper_id);
std::string asym_id_list, oper_id_list;
cif::tie(asym_id_list, oper_id_list) = gen.get("asym_id_list", "oper_expression");
pdbFile << RM(" BIOMT1 ", -3) << Fs(r, "id")
<< SEP(" ", -9, 6) << Ff(r, "matrix[1][1]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[1][2]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[1][3]")
<< SEP(" ", -14, 5) << Ff(r, "vector[1]")
<< std::endl
<< RM(" BIOMT2 ", -3) << Fs(r, "id")
<< SEP(" ", -9, 6) << Ff(r, "matrix[2][1]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[2][2]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[2][3]")
<< SEP(" ", -14, 5) << Ff(r, "vector[2]")
<< std::endl
<< RM(" BIOMT3 ", -3) << Fs(r, "id")
<< SEP(" ", -9, 6) << Ff(r, "matrix[3][1]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[3][2]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[3][3]")
<< SEP(" ", -14, 5) << Ff(r, "vector[3]")
<< std::endl;
auto asyms = split<std::string>(asym_id_list, ",");
std::vector<std::string> chains = MapAsymIDs2ChainIDs(asyms, db);
pdbFile << RM("APPLY THE FOLLOWING TO CHAINS: ") << join(chains, ", ") << std::endl;
for (auto oper_id : split<std::string>(oper_id_list, ",", true))
{
auto r = db["pdbx_struct_oper_list"].find1(key("id") == oper_id);
pdbFile << RM(" BIOMT1 ", -3) << Fs(r, "id")
<< SEP(" ", -9, 6) << Ff(r, "matrix[1][1]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[1][2]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[1][3]")
<< SEP(" ", -14, 5) << Ff(r, "vector[1]")
<< std::endl
<< RM(" BIOMT2 ", -3) << Fs(r, "id")
<< SEP(" ", -9, 6) << Ff(r, "matrix[2][1]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[2][2]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[2][3]")
<< SEP(" ", -14, 5) << Ff(r, "vector[2]")
<< std::endl
<< RM(" BIOMT3 ", -3) << Fs(r, "id")
<< SEP(" ", -9, 6) << Ff(r, "matrix[3][1]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[3][2]")
<< SEP(" ", -9, 6) << Ff(r, "matrix[3][3]")
<< SEP(" ", -14, 5) << Ff(r, "vector[3]")
<< std::endl;
}
}
}
}
@@ -3224,7 +3229,8 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
"pdbx_label_comp_id_2", "pdbx_auth_asym_id_2", "pdbx_auth_seq_id_2", "pdbx_PDB_ins_code_2",
"pdbx_PDB_model_num", "pdbx_omega_angle");
pdbFile << format("CISPEP %3d %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s %3d %6.2f", serNum, pep1, chainID1, seqNum1, icode1, pep2, chainID2, seqNum2, icode2, modNum, measure) << std::endl;
pdbFile << format("CISPEP %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s %3.3s %6.2f",
serNum, pep1, chainID1, seqNum1, icode1, pep2, chainID2, seqNum2, icode2, modNum, measure) << std::endl;
}
}
@@ -3723,4 +3729,21 @@ void write(std::ostream &os, const datablock &db)
<< "END" << std::endl;
}
void write(const std::filesystem::path &p, const datablock &db)
{
gzio::ofstream out(p);
bool writePDB = false;
if (p.extension() == ".gz")
writePDB = iequals(p.stem().extension().string(), ".pdb");
else
writePDB = iequals(p.extension().string(), ".pdb");
if (writePDB)
write(out, db);
else
db.write(out);
}
} // namespace cif::pdb

View File

@@ -30,10 +30,10 @@
#include <stack>
#include <cif++.hpp>
#include <gxrio.hpp>
#include <cif++/pdb/pdb2cif.hpp>
#include <cif++/pdb/pdb2cif_remark_3.hpp>
#include <cif++/gzio.hpp>
using cif::category;
using cif::datablock;
@@ -1136,16 +1136,15 @@ void PDBFileParser::PreParseInput(std::istream &is)
{
std::string cs = lookahead.substr(offset, len);
cif::trim(cs);
int result;
int result = 0;
try
if (not cs.empty())
{
result = cs.empty() ? 0 : stoi(cs);
}
catch (...)
{
throw std::runtime_error("Continuation std::string '" + cs + "' is not valid");
auto r = std::from_chars(cs.data(), cs.data() + cs.length(), result);
if (r.ec != std::errc())
throw std::runtime_error("Continuation std::string '" + cs + "' is not valid");
}
return result;
};
@@ -1402,7 +1401,12 @@ void PDBFileParser::PreParseInput(std::istream &is)
link.symOpB = cur->vS(67, 72); // 67 - 72 SymOP sym2 Symmetry operator atom 2.
if (type == "LINK") // 1 - 6 Record name "LINK "
link.distance = std::stof(cur->vF(74, 78));
{
auto f = cur->vF(74, 78);
auto r = cif::from_chars(f.data(), f.data() + f.length(), link.distance);
if (r.ec != std::errc() and cif::VERBOSE > 0)
std::cerr << "Error parsing link distance at line " << cur->mLineNr << std::endl;
}
// 74 78 Real(5.2) Length Link distance
mLinks.push_back(link);
@@ -3248,23 +3252,37 @@ void PDBFileParser::ParseRemark350()
values.clear();
}
getCategory("pdbx_struct_oper_list")->emplace({
{ "id", operID },
{ "type", mat == std::vector<double>{ 1, 0, 0, 0, 1, 0, 0, 0, 1 } and vec == std::vector<double>{ 0, 0, 0 } ? "identity operation" : "crystal symmetry operation" },
// { "name", "" },
// { "symmetryOperation", "" },
{ "matrix[1][1]", format("%12.10f", mat[0]).str() },
{ "matrix[1][2]", format("%12.10f", mat[1]).str() },
{ "matrix[1][3]", format("%12.10f", mat[2]).str() },
{ "vector[1]", format("%12.10f", vec[0]).str() },
{ "matrix[2][1]", format("%12.10f", mat[3]).str() },
{ "matrix[2][2]", format("%12.10f", mat[4]).str() },
{ "matrix[2][3]", format("%12.10f", mat[5]).str() },
{ "vector[2]", format("%12.10f", vec[1]).str() },
{ "matrix[3][1]", format("%12.10f", mat[6]).str() },
{ "matrix[3][2]", format("%12.10f", mat[7]).str() },
{ "matrix[3][3]", format("%12.10f", mat[8]).str() },
{ "vector[3]", format("%12.10f", vec[2]).str() } });
std::string type = mat == std::vector<double>{ 1, 0, 0, 0, 1, 0, 0, 0, 1 } and vec == std::vector<double>{ 0, 0, 0 } ? "identity operation" : "crystal symmetry operation";
// if (type == "identity operation")
// {
// }
// else
try
{
getCategory("pdbx_struct_oper_list")->emplace({
{ "id", operID },
{ "type", type },
// { "name", "" },
// { "symmetryOperation", "" },
{ "matrix[1][1]", format("%12.10f", mat[0]).str() },
{ "matrix[1][2]", format("%12.10f", mat[1]).str() },
{ "matrix[1][3]", format("%12.10f", mat[2]).str() },
{ "vector[1]", format("%12.10f", vec[0]).str() },
{ "matrix[2][1]", format("%12.10f", mat[3]).str() },
{ "matrix[2][2]", format("%12.10f", mat[4]).str() },
{ "matrix[2][3]", format("%12.10f", mat[5]).str() },
{ "vector[2]", format("%12.10f", vec[1]).str() },
{ "matrix[3][1]", format("%12.10f", mat[6]).str() },
{ "matrix[3][2]", format("%12.10f", mat[7]).str() },
{ "matrix[3][3]", format("%12.10f", mat[8]).str() },
{ "vector[3]", format("%12.10f", vec[2]).str() } });
}
catch (duplicate_key_error &ex)
{
// so what?
}
mat.clear();
vec.clear();
@@ -5093,11 +5111,10 @@ void PDBFileParser::ParseConnectivtyAnnotation()
if (mRec->is("LINK "))
{
distance = vS(74, 78);
try
{
stod(distance);
}
catch (const std::invalid_argument &)
double d;
auto r = cif::from_chars(distance.data(), distance.data() + distance.length(), d);
if (r.ec != std::errc())
{
if (cif::VERBOSE > 0)
std::cerr << "Distance value '" << distance << "' is not a valid float in LINK record" << std::endl;
@@ -5825,6 +5842,9 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
auto a1 = atom_site.find1("label_asym_id"_key == asym1 and "label_seq_id"_key == seq1 and "label_atom_id"_key == atom1);
auto a2 = atom_site.find1("label_asym_id"_key == asym2 and "label_seq_id"_key == seq2 and "label_atom_id"_key == atom2);
if (not a1 or not a2)
throw std::runtime_error("cannot find atom");
const auto &[x1, y1, z1] = a1.get<float, float, float>("cartn_x", "cartn_y", "cartn_z");
const auto &[x2, y2, z2] = a2.get<float, float, float>("cartn_x", "cartn_y", "cartn_z");
@@ -6165,7 +6185,7 @@ void ReadPDBFile(std::istream &pdbFile, cif::file &cifFile)
{
PDBFileParser p;
cifFile.load_dictionary("mmcif_pdbx");
cifFile.load_dictionary("mmcif_pdbx.dic");
p.Parse(pdbFile, cifFile);
@@ -6194,16 +6214,27 @@ file read(std::istream &is)
result.load(is);
}
// Must be a PDB like file, right?
if (result.get_validator() == nullptr)
result.load_dictionary("mmcif_pdbx.dic");
return result;
}
file read(const std::filesystem::path &file)
{
gxrio::ifstream in(file);
if (not in.is_open())
throw std::runtime_error("Could not open file " + file.string() + " for input");
return read(in);
try
{
gzio::ifstream in(file);
if (not in.is_open())
throw std::runtime_error("Could not open file " + file.string() + " for input");
return read(in);
}
catch (const std::exception &ex)
{
throw_with_nested(std::runtime_error("Error reading file " + file.string()));
}
}
} // namespace pdbx

View File

@@ -1207,13 +1207,15 @@ void Remark3Parser::storeCapture(const char *category, std::initializer_list<con
}
else if (iequals(category, "pdbx_refine_tls_group"))
{
std::string tlsGroupID;
std::string tlsID;
if (not mDb["pdbx_refine_tls"].empty())
tlsGroupID = mDb["pdbx_refine_tls"].back()["id"].as<std::string>();
tlsID = mDb["pdbx_refine_tls"].back()["id"].as<std::string>();
std::string tlsGroupID = cat.get_unique_id("");
cat.emplace({ { "pdbx_refine_id", mExpMethod },
cat.emplace({
{ "pdbx_refine_id", mExpMethod },
{ "id", tlsGroupID },
{ "refine_tls_id", tlsGroupID } });
{ "refine_tls_id", tlsID } });
}
else if (iequals(category, "pdbx_refine_tls"))
{
@@ -1425,10 +1427,9 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
sort(scores.begin(), scores.end());
bool guessProgram = scores.empty() or scores.front().score < 0.9f;
;
if (guessProgram)
{
if (cif::VERBOSE >= 0)
if (cif::VERBOSE > 0)
std::cerr << "Unknown or untrusted program in REMARK 3, trying all parsers to see if there is a match" << std::endl;
tryParser(new BUSTER_TNT_Remark3Parser("BUSTER-TNT", expMethod, r, db));
@@ -1482,7 +1483,7 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
auto r1 = cat1.front();
auto r2 = cat2.front();
for (auto column : cat1.fields())
for (auto column : cat1.key_fields())
r2[column] = r1[column].text();
}
}

View File

@@ -31,30 +31,41 @@ namespace cif
void row_handle::assign(size_t column, std::string_view value, bool updateLinked, bool validate)
{
assert(m_category);
if (not m_category)
throw std::runtime_error("uninitialized row");
m_category->update_value(m_row, column, value, updateLinked, validate);
}
uint16_t row_handle::get_column_ix(std::string_view name) const
{
assert(m_category);
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->get_column_ix(name);
}
std::string_view row_handle::get_column_name(uint16_t ix) const
{
assert(m_category);
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->get_column_name(ix);
}
uint16_t row_handle::add_column(std::string_view name)
{
assert(m_category);
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->add_column(name);
}
void row_handle::swap(size_t column, row_handle &b)
{
if (not m_category)
throw std::runtime_error("uninitialized row");
m_category->swap_item(column, *this, b);
}
@@ -62,7 +73,9 @@ void row_handle::swap(size_t column, row_handle &b)
row_initializer::row_initializer(row_handle rh)
{
assert(rh.m_category);
if (not rh.m_category)
throw std::runtime_error("uninitialized row");
assert(rh.m_row);
row *r = rh.get_row();

View File

@@ -24,8 +24,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <atomic>
#include <mutex>
#include <stdexcept>
#include <cif++/symmetry.hpp>

View File

@@ -220,13 +220,14 @@ std::tuple<std::string, std::string> split_tag_name(std::string_view tag)
if (tag.empty())
throw std::runtime_error("empty tag");
if (tag[0] != '_')
throw std::runtime_error("tag does not start with underscore");
throw std::runtime_error("tag '" + std::string { tag } + "' does not start with underscore");
auto s = tag.find('.');
if (s == std::string::npos)
throw std::runtime_error("tag does not contain dot");
return std::tuple<std::string, std::string>{
tag.substr(1, s - 1), tag.substr(s + 1)};
// throw std::runtime_error("tag does not contain dot (" + std::string{ tag } + ')');
return std::tuple<std::string, std::string>{ "", tag.substr(1) };
else
return std::tuple<std::string, std::string>{tag.substr(1, s - 1), tag.substr(s + 1)};
}
// --------------------------------------------------------------------

View File

@@ -41,12 +41,11 @@ using boost::regex;
using std::regex;
#endif
#include <gxrio.hpp>
#include <cif++/dictionary_parser.hpp>
#include <cif++/validate.hpp>
#include <cif++/utilities.hpp>
#include <cif++/gzio.hpp>
namespace cif
{
@@ -411,11 +410,25 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
return validator;
}
// not found, add it
// not found, try to see if it helps if we tweak the name a little
// too bad clang version 10 did not have a constructor for std::filesystem::path that accepts a std::string_view
std::filesystem::path dictionary(dictionary_name.data(), dictionary_name.data() + dictionary_name.length());
if (dictionary.extension() != ".dic")
{
auto dict_name = dictionary.filename().string() + ".dic";
for (auto &validator : m_validators)
{
if (iequals(validator.name(), dict_name))
return validator;
}
}
// not found, add it
auto data = load_resource(dictionary_name);
if (not data and dictionary.extension().string() != ".dic")
@@ -458,7 +471,7 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
if (std::filesystem::exists(p, ec) and not ec)
{
gxrio::ifstream in(p);
gzio::ifstream in(p);
if (not in.is_open())
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");

View File

@@ -106,7 +106,7 @@ _atom_site.pdbx_formal_charge
# that's enough to test with
)"_cf;
atoms.load_dictionary("mmcif_pdbx");
atoms.load_dictionary("mmcif_pdbx.dic");
auto &hem_data = atoms["HEM"];
auto &atom_site = hem_data["atom_site"];

View File

@@ -2970,3 +2970,91 @@ _cat_1.id_2
for (const auto &[key, test] : TESTS)
BOOST_CHECK_EQUAL((bool)cat1[key], test);
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(cifv1_0_1)
{
auto f = R"(data_TEST
#
loop_
_id
_name
1 aap
2 noot
3 mies
4 ?
5 .
)"_cf;
auto &db = f.front();
auto &cat = db[""];
for (auto r : cat)
{
int id;
std::optional<std::string> name;
cif::tie(id, name) = r.get("id", "name");
switch (id)
{
case 1: BOOST_CHECK_EQUAL(*name, "aap"); break;
case 2: BOOST_CHECK_EQUAL(*name, "noot"); break;
case 3: BOOST_CHECK_EQUAL(*name, "mies"); break;
default: BOOST_CHECK(name.has_value() == false);
}
}
std::stringstream ss;
ss << db;
auto f2 = cif::file(ss);
auto &db2 = f2.front();
BOOST_TEST(db == db2);
}
// BOOST_AUTO_TEST_CASE(cifv1_0_2)
// {
// BOOST_CHECK_THROW(R"(data_TEST
// #
// _version 1.0
// loop_
// _id
// _name
// 1 aap
// 2 noot
// 3 mies
// 4 ?
// 5 .
// )"_cf, cif::parse_error);
// }
BOOST_AUTO_TEST_CASE(cifv1_0_3)
{
auto f = R"(data_TEST
#
_version 1.0
_date today
)"_cf;
auto &db = f.front();
auto &cat = db[""];
BOOST_CHECK(not cat.empty());
auto r = cat.front();
BOOST_CHECK_EQUAL(r["version"].as<std::string>(), "1.0");
BOOST_CHECK_EQUAL(r["date"].as<std::string>(), "today");
std::stringstream ss;
ss << db;
auto f2 = cif::file(ss);
auto &db2 = f2.front();
BOOST_TEST(db == db2);
}